diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2016-07-23 20:44:14 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2016-07-23 20:44:14 +0000 |
commit | 2b6b257f4e5503a7a2675bdb8735693db769f75c (patch) | |
tree | e85e046ae7003fe3bcc8b5454cd0fa3f7407b470 /lib/CodeGen | |
parent | b4348ed0b7e90c0831b925fbee00b5f179a99796 (diff) | |
download | src-2b6b257f4e5503a7a2675bdb8735693db769f75c.tar.gz src-2b6b257f4e5503a7a2675bdb8735693db769f75c.zip |
Vendor import of clang release_39 branch r276489:vendor/clang/clang-release_39-r276489
Notes
Notes:
svn path=/vendor/clang/dist/; revision=303233
svn path=/vendor/clang/clang-release_39-r276489/; revision=303234; tag=vendor/clang/clang-release_39-r276489
Diffstat (limited to 'lib/CodeGen')
68 files changed, 14120 insertions, 4952 deletions
diff --git a/lib/CodeGen/ABIInfo.h b/lib/CodeGen/ABIInfo.h index a65f27085616..530a7ef560c5 100644 --- a/lib/CodeGen/ABIInfo.h +++ b/lib/CodeGen/ABIInfo.h @@ -18,20 +18,25 @@ namespace llvm { class Value; class LLVMContext; class DataLayout; + class Type; } namespace clang { class ASTContext; class TargetInfo; - namespace CodeGen { - class ABIArgInfo; - class Address; - class CGCXXABI; - class CGFunctionInfo; - class CodeGenFunction; - class CodeGenTypes; - } +namespace CodeGen { + class ABIArgInfo; + class Address; + class CGCXXABI; + class CGFunctionInfo; + class CodeGenFunction; + class CodeGenTypes; + class SwiftABIInfo; + +namespace swiftcall { + class SwiftAggLowering; +} // FIXME: All of this stuff should be part of the target interface // somehow. It is currently here because it is not clear how to factor @@ -55,6 +60,8 @@ namespace clang { virtual ~ABIInfo(); + virtual bool supportsSwift() const { return false; } + CodeGen::CGCXXABI &getCXXABI() const; ASTContext &getContext() const; llvm::LLVMContext &getVMContext() const; @@ -85,6 +92,8 @@ namespace clang { CodeGen::Address VAListAddr, QualType Ty) const = 0; + bool isAndroid() const; + /// Emit the target dependent code to load a value of /// \arg Ty from the \c __builtin_ms_va_list pointed to by \arg VAListAddr. virtual CodeGen::Address EmitMSVAArg(CodeGen::CodeGenFunction &CGF, @@ -110,7 +119,35 @@ namespace clang { CodeGen::ABIArgInfo getNaturalAlignIndirectInReg(QualType Ty, bool Realign = false) const; + + + }; + + /// A refining implementation of ABIInfo for targets that support swiftcall. + /// + /// If we find ourselves wanting multiple such refinements, they'll probably + /// be independent refinements, and we should probably find another way + /// to do it than simple inheritance. + class SwiftABIInfo : public ABIInfo { + public: + SwiftABIInfo(CodeGen::CodeGenTypes &cgt) : ABIInfo(cgt) {} + + bool supportsSwift() const final override { return true; } + + virtual bool shouldPassIndirectlyForSwift(CharUnits totalSize, + ArrayRef<llvm::Type*> types, + bool asReturnValue) const = 0; + + virtual bool isLegalVectorTypeForSwift(CharUnits totalSize, + llvm::Type *eltTy, + unsigned elts) const; + + static bool classof(const ABIInfo *info) { + return info->supportsSwift(); + } }; + +} // end namespace CodeGen } // end namespace clang #endif diff --git a/lib/CodeGen/BackendUtil.cpp b/lib/CodeGen/BackendUtil.cpp index 6d746c25eed1..165b6dd55c9b 100644 --- a/lib/CodeGen/BackendUtil.cpp +++ b/lib/CodeGen/BackendUtil.cpp @@ -16,19 +16,21 @@ #include "clang/Frontend/Utils.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Triple.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Bitcode/BitcodeWriterPass.h" +#include "llvm/Bitcode/ReaderWriter.h" #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/FunctionInfo.h" +#include "llvm/IR/ModuleSummaryIndex.h" #include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" #include "llvm/IR/Verifier.h" #include "llvm/MC/SubtargetFeature.h" -#include "llvm/Object/FunctionIndexObjectFile.h" +#include "llvm/Object/ModuleSummaryIndexObjectFile.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/PrettyStackTrace.h" #include "llvm/Support/TargetRegistry.h" @@ -42,6 +44,7 @@ #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/ObjCARC.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/GVN.h" #include "llvm/Transforms/Utils/SymbolRewriter.h" #include <memory> using namespace clang; @@ -58,9 +61,7 @@ class EmitAssemblyHelper { Timer CodeGenerationTime; - mutable legacy::PassManager *CodeGenPasses; - mutable legacy::PassManager *PerModulePasses; - mutable legacy::FunctionPassManager *PerFunctionPasses; + std::unique_ptr<raw_pwrite_stream> OS; private: TargetIRAnalysis getTargetIRAnalysis() const { @@ -70,70 +71,44 @@ private: return TargetIRAnalysis(); } - legacy::PassManager *getCodeGenPasses() const { - if (!CodeGenPasses) { - CodeGenPasses = new legacy::PassManager(); - CodeGenPasses->add( - createTargetTransformInfoWrapperPass(getTargetIRAnalysis())); - } - return CodeGenPasses; - } - - legacy::PassManager *getPerModulePasses() const { - if (!PerModulePasses) { - PerModulePasses = new legacy::PassManager(); - PerModulePasses->add( - createTargetTransformInfoWrapperPass(getTargetIRAnalysis())); - } - return PerModulePasses; - } - - legacy::FunctionPassManager *getPerFunctionPasses() const { - if (!PerFunctionPasses) { - PerFunctionPasses = new legacy::FunctionPassManager(TheModule); - PerFunctionPasses->add( - createTargetTransformInfoWrapperPass(getTargetIRAnalysis())); - } - return PerFunctionPasses; - } + /// Set LLVM command line options passed through -backend-option. + void setCommandLineOpts(); - void CreatePasses(FunctionInfoIndex *FunctionIndex); + void CreatePasses(legacy::PassManager &MPM, legacy::FunctionPassManager &FPM, + ModuleSummaryIndex *ModuleSummary); /// Generates the TargetMachine. - /// Returns Null if it is unable to create the target machine. + /// Leaves TM unchanged if it is unable to create the target machine. /// Some of our clang tests specify triples which are not built /// into clang. This is okay because these tests check the generated /// IR, and they require DataLayout which depends on the triple. /// In this case, we allow this method to fail and not report an error. /// When MustCreateTM is used, we print an error if we are unable to load /// the requested target. - TargetMachine *CreateTargetMachine(bool MustCreateTM); + void CreateTargetMachine(bool MustCreateTM); /// Add passes necessary to emit assembly or LLVM IR. /// /// \return True on success. - bool AddEmitPasses(BackendAction Action, raw_pwrite_stream &OS); + bool AddEmitPasses(legacy::PassManager &CodeGenPasses, BackendAction Action, + raw_pwrite_stream &OS); public: EmitAssemblyHelper(DiagnosticsEngine &_Diags, const CodeGenOptions &CGOpts, const clang::TargetOptions &TOpts, const LangOptions &LOpts, Module *M) : Diags(_Diags), CodeGenOpts(CGOpts), TargetOpts(TOpts), LangOpts(LOpts), - TheModule(M), CodeGenerationTime("Code Generation Time"), - CodeGenPasses(nullptr), PerModulePasses(nullptr), - PerFunctionPasses(nullptr) {} + TheModule(M), CodeGenerationTime("Code Generation Time") {} ~EmitAssemblyHelper() { - delete CodeGenPasses; - delete PerModulePasses; - delete PerFunctionPasses; if (CodeGenOpts.DisableFree) BuryPointer(std::move(TM)); } std::unique_ptr<TargetMachine> TM; - void EmitAssembly(BackendAction Action, raw_pwrite_stream *OS); + void EmitAssembly(BackendAction Action, + std::unique_ptr<raw_pwrite_stream> OS); }; // We need this wrapper to access LangOpts and CGOpts from extension functions @@ -172,8 +147,19 @@ static void addAddDiscriminatorsPass(const PassManagerBuilder &Builder, PM.add(createAddDiscriminatorsPass()); } +static void addCleanupPassesForSampleProfiler( + const PassManagerBuilder &Builder, legacy::PassManagerBase &PM) { + // instcombine is needed before sample profile annotation because it converts + // certain function calls to be inlinable. simplifycfg and sroa are needed + // before instcombine for necessary preparation. E.g. load store is eliminated + // properly so that instcombine will not introduce unecessary liverange. + PM.add(createCFGSimplificationPass()); + PM.add(createSROAPass()); + PM.add(createInstructionCombiningPass()); +} + static void addBoundsCheckingPass(const PassManagerBuilder &Builder, - legacy::PassManagerBase &PM) { + legacy::PassManagerBase &PM) { PM.add(createBoundsCheckingPass()); } @@ -189,6 +175,7 @@ static void addSanitizerCoveragePass(const PassManagerBuilder &Builder, Opts.TraceBB = CGOpts.SanitizeCoverageTraceBB; Opts.TraceCmp = CGOpts.SanitizeCoverageTraceCmp; Opts.Use8bitCounters = CGOpts.SanitizeCoverage8bitCounters; + Opts.TracePC = CGOpts.SanitizeCoverageTracePC; PM.add(createSanitizerCoverageModulePass(Opts)); } @@ -198,14 +185,17 @@ static void addAddressSanitizerPasses(const PassManagerBuilder &Builder, static_cast<const PassManagerBuilderWrapper&>(Builder); const CodeGenOptions &CGOpts = BuilderWrapper.getCGOpts(); bool Recover = CGOpts.SanitizeRecover.has(SanitizerKind::Address); - PM.add(createAddressSanitizerFunctionPass(/*CompileKernel*/false, Recover)); + bool UseAfterScope = CGOpts.SanitizeAddressUseAfterScope; + PM.add(createAddressSanitizerFunctionPass(/*CompileKernel*/ false, Recover, + UseAfterScope)); PM.add(createAddressSanitizerModulePass(/*CompileKernel*/false, Recover)); } static void addKernelAddressSanitizerPasses(const PassManagerBuilder &Builder, legacy::PassManagerBase &PM) { - PM.add(createAddressSanitizerFunctionPass(/*CompileKernel*/true, - /*Recover*/true)); + PM.add(createAddressSanitizerFunctionPass( + /*CompileKernel*/ true, + /*Recover*/ true, /*UseAfterScope*/ false)); PM.add(createAddressSanitizerModulePass(/*CompileKernel*/true, /*Recover*/true)); } @@ -243,6 +233,19 @@ static void addDataFlowSanitizerPass(const PassManagerBuilder &Builder, PM.add(createDataFlowSanitizerPass(LangOpts.SanitizerBlacklistFiles)); } +static void addEfficiencySanitizerPass(const PassManagerBuilder &Builder, + legacy::PassManagerBase &PM) { + const PassManagerBuilderWrapper &BuilderWrapper = + static_cast<const PassManagerBuilderWrapper&>(Builder); + const LangOptions &LangOpts = BuilderWrapper.getLangOpts(); + EfficiencySanitizerOptions Opts; + if (LangOpts.Sanitize.has(SanitizerKind::EfficiencyCacheFrag)) + Opts.ToolType = EfficiencySanitizerOptions::ESAN_CacheFrag; + else if (LangOpts.Sanitize.has(SanitizerKind::EfficiencyWorkingSet)) + Opts.ToolType = EfficiencySanitizerOptions::ESAN_WorkingSet; + PM.add(createEfficiencySanitizerPass(Opts)); +} + static TargetLibraryInfoImpl *createTLII(llvm::Triple &TargetTriple, const CodeGenOptions &CodeGenOpts) { TargetLibraryInfoImpl *TLII = new TargetLibraryInfoImpl(TargetTriple); @@ -277,7 +280,9 @@ static void addSymbolRewriterPass(const CodeGenOptions &Opts, MPM->add(createRewriteSymbolsPass(DL)); } -void EmitAssemblyHelper::CreatePasses(FunctionInfoIndex *FunctionIndex) { +void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, + legacy::FunctionPassManager &FPM, + ModuleSummaryIndex *ModuleSummary) { if (CodeGenOpts.DisableLLVMPasses) return; @@ -300,7 +305,8 @@ void EmitAssemblyHelper::CreatePasses(FunctionInfoIndex *FunctionIndex) { switch (Inlining) { case CodeGenOptions::NoInlining: break; - case CodeGenOptions::NormalInlining: { + case CodeGenOptions::NormalInlining: + case CodeGenOptions::OnlyHintInlining: { PMBuilder.Inliner = createFunctionInliningPass(OptLevel, CodeGenOpts.OptimizeSize); break; @@ -321,22 +327,28 @@ void EmitAssemblyHelper::CreatePasses(FunctionInfoIndex *FunctionIndex) { PMBuilder.SLPVectorize = CodeGenOpts.VectorizeSLP; PMBuilder.LoopVectorize = CodeGenOpts.VectorizeLoop; - PMBuilder.DisableUnitAtATime = !CodeGenOpts.UnitAtATime; PMBuilder.DisableUnrollLoops = !CodeGenOpts.UnrollLoops; PMBuilder.MergeFunctions = CodeGenOpts.MergeFunctions; + PMBuilder.PrepareForThinLTO = CodeGenOpts.EmitSummaryIndex; PMBuilder.PrepareForLTO = CodeGenOpts.PrepareForLTO; PMBuilder.RerollLoops = CodeGenOpts.RerollLoops; - legacy::PassManager *MPM = getPerModulePasses(); - // If we are performing a ThinLTO importing compile, invoke the LTO - // pipeline and pass down the in-memory function index. - if (FunctionIndex) { - PMBuilder.FunctionIndex = FunctionIndex; - PMBuilder.populateLTOPassManager(*MPM); + // pipeline and pass down the in-memory module summary index. + if (ModuleSummary) { + PMBuilder.ModuleSummary = ModuleSummary; + PMBuilder.populateThinLTOPassManager(MPM); return; } + // Add target-specific passes that need to run as early as possible. + if (TM) + PMBuilder.addExtension( + PassManagerBuilder::EP_EarlyAsPossible, + [&](const PassManagerBuilder &, legacy::PassManagerBase &PM) { + TM->addEarlyAsPossiblePasses(PM); + }); + PMBuilder.addExtension(PassManagerBuilder::EP_EarlyAsPossible, addAddDiscriminatorsPass); @@ -401,15 +413,20 @@ void EmitAssemblyHelper::CreatePasses(FunctionInfoIndex *FunctionIndex) { addDataFlowSanitizerPass); } + if (LangOpts.Sanitize.hasOneOf(SanitizerKind::Efficiency)) { + PMBuilder.addExtension(PassManagerBuilder::EP_OptimizerLast, + addEfficiencySanitizerPass); + PMBuilder.addExtension(PassManagerBuilder::EP_EnabledOnOptLevel0, + addEfficiencySanitizerPass); + } + // Set up the per-function pass manager. - legacy::FunctionPassManager *FPM = getPerFunctionPasses(); if (CodeGenOpts.VerifyModule) - FPM->add(createVerifierPass()); - PMBuilder.populateFunctionPassManager(*FPM); + FPM.add(createVerifierPass()); // Set up the per-module pass manager. if (!CodeGenOpts.RewriteMapFiles.empty()) - addSymbolRewriterPass(CodeGenOpts, MPM); + addSymbolRewriterPass(CodeGenOpts, &MPM); if (!CodeGenOpts.DisableGCov && (CodeGenOpts.EmitGcovArcs || CodeGenOpts.EmitGcovNotes)) { @@ -424,25 +441,56 @@ void EmitAssemblyHelper::CreatePasses(FunctionInfoIndex *FunctionIndex) { Options.FunctionNamesInData = !CodeGenOpts.CoverageNoFunctionNamesInData; Options.ExitBlockBeforeBody = CodeGenOpts.CoverageExitBlockBeforeBody; - MPM->add(createGCOVProfilerPass(Options)); - if (CodeGenOpts.getDebugInfo() == CodeGenOptions::NoDebugInfo) - MPM->add(createStripSymbolsPass(true)); + MPM.add(createGCOVProfilerPass(Options)); + if (CodeGenOpts.getDebugInfo() == codegenoptions::NoDebugInfo) + MPM.add(createStripSymbolsPass(true)); } - if (CodeGenOpts.ProfileInstrGenerate) { + if (CodeGenOpts.hasProfileClangInstr()) { InstrProfOptions Options; Options.NoRedZone = CodeGenOpts.DisableRedZone; Options.InstrProfileOutput = CodeGenOpts.InstrProfileOutput; - MPM->add(createInstrProfilingPass(Options)); + MPM.add(createInstrProfilingLegacyPass(Options)); + } + if (CodeGenOpts.hasProfileIRInstr()) { + if (!CodeGenOpts.InstrProfileOutput.empty()) + PMBuilder.PGOInstrGen = CodeGenOpts.InstrProfileOutput; + else + PMBuilder.PGOInstrGen = "default.profraw"; + } + if (CodeGenOpts.hasProfileIRUse()) + PMBuilder.PGOInstrUse = CodeGenOpts.ProfileInstrumentUsePath; + + if (!CodeGenOpts.SampleProfileFile.empty()) { + MPM.add(createPruneEHPass()); + MPM.add(createSampleProfileLoaderPass(CodeGenOpts.SampleProfileFile)); + PMBuilder.addExtension(PassManagerBuilder::EP_EarlyAsPossible, + addCleanupPassesForSampleProfiler); } - if (!CodeGenOpts.SampleProfileFile.empty()) - MPM->add(createSampleProfileLoaderPass(CodeGenOpts.SampleProfileFile)); + PMBuilder.populateFunctionPassManager(FPM); + PMBuilder.populateModulePassManager(MPM); +} - PMBuilder.populateModulePassManager(*MPM); +void EmitAssemblyHelper::setCommandLineOpts() { + SmallVector<const char *, 16> BackendArgs; + BackendArgs.push_back("clang"); // Fake program name. + if (!CodeGenOpts.DebugPass.empty()) { + BackendArgs.push_back("-debug-pass"); + BackendArgs.push_back(CodeGenOpts.DebugPass.c_str()); + } + if (!CodeGenOpts.LimitFloatPrecision.empty()) { + BackendArgs.push_back("-limit-float-precision"); + BackendArgs.push_back(CodeGenOpts.LimitFloatPrecision.c_str()); + } + for (const std::string &BackendOption : CodeGenOpts.BackendOptions) + BackendArgs.push_back(BackendOption.c_str()); + BackendArgs.push_back(nullptr); + llvm::cl::ParseCommandLineOptions(BackendArgs.size() - 1, + BackendArgs.data()); } -TargetMachine *EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) { +void EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) { // Create the TargetMachine for generating code. std::string Error; std::string Triple = TheModule->getTargetTriple(); @@ -450,7 +498,7 @@ TargetMachine *EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) { if (!TheTarget) { if (MustCreateTM) Diags.Report(diag::err_fe_unable_to_create_target) << Error; - return nullptr; + return; } unsigned CodeModel = @@ -464,27 +512,11 @@ TargetMachine *EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) { assert(CodeModel != ~0u && "invalid code model!"); llvm::CodeModel::Model CM = static_cast<llvm::CodeModel::Model>(CodeModel); - SmallVector<const char *, 16> BackendArgs; - BackendArgs.push_back("clang"); // Fake program name. - if (!CodeGenOpts.DebugPass.empty()) { - BackendArgs.push_back("-debug-pass"); - BackendArgs.push_back(CodeGenOpts.DebugPass.c_str()); - } - if (!CodeGenOpts.LimitFloatPrecision.empty()) { - BackendArgs.push_back("-limit-float-precision"); - BackendArgs.push_back(CodeGenOpts.LimitFloatPrecision.c_str()); - } - for (const std::string &BackendOption : CodeGenOpts.BackendOptions) - BackendArgs.push_back(BackendOption.c_str()); - BackendArgs.push_back(nullptr); - llvm::cl::ParseCommandLineOptions(BackendArgs.size() - 1, - BackendArgs.data()); - std::string FeaturesStr = llvm::join(TargetOpts.Features.begin(), TargetOpts.Features.end(), ","); // Keep this synced with the equivalent code in tools/driver/cc1as_main.cpp. - llvm::Reloc::Model RM = llvm::Reloc::Default; + llvm::Optional<llvm::Reloc::Model> RM; if (CodeGenOpts.RelocationModel == "static") { RM = llvm::Reloc::Static; } else if (CodeGenOpts.RelocationModel == "pic") { @@ -539,38 +571,29 @@ TargetMachine *EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) { Options.UseInitArray = CodeGenOpts.UseInitArray; Options.DisableIntegratedAS = CodeGenOpts.DisableIntegratedAS; Options.CompressDebugSections = CodeGenOpts.CompressDebugSections; + Options.RelaxELFRelocations = CodeGenOpts.RelaxELFRelocations; // Set EABI version. - Options.EABIVersion = llvm::StringSwitch<llvm::EABI>(CodeGenOpts.EABIVersion) + Options.EABIVersion = llvm::StringSwitch<llvm::EABI>(TargetOpts.EABIVersion) .Case("4", llvm::EABI::EABI4) .Case("5", llvm::EABI::EABI5) .Case("gnu", llvm::EABI::GNU) .Default(llvm::EABI::Default); + if (LangOpts.SjLjExceptions) + Options.ExceptionModel = llvm::ExceptionHandling::SjLj; + Options.LessPreciseFPMADOption = CodeGenOpts.LessPreciseFPMAD; Options.NoInfsFPMath = CodeGenOpts.NoInfsFPMath; Options.NoNaNsFPMath = CodeGenOpts.NoNaNsFPMath; Options.NoZerosInBSS = CodeGenOpts.NoZeroInitializedInBSS; Options.UnsafeFPMath = CodeGenOpts.UnsafeFPMath; Options.StackAlignmentOverride = CodeGenOpts.StackAlignment; - Options.PositionIndependentExecutable = LangOpts.PIELevel != 0; Options.FunctionSections = CodeGenOpts.FunctionSections; Options.DataSections = CodeGenOpts.DataSections; Options.UniqueSectionNames = CodeGenOpts.UniqueSectionNames; Options.EmulatedTLS = CodeGenOpts.EmulatedTLS; - switch (CodeGenOpts.getDebuggerTuning()) { - case CodeGenOptions::DebuggerKindGDB: - Options.DebuggerTuning = llvm::DebuggerKind::GDB; - break; - case CodeGenOptions::DebuggerKindLLDB: - Options.DebuggerTuning = llvm::DebuggerKind::LLDB; - break; - case CodeGenOptions::DebuggerKindSCE: - Options.DebuggerTuning = llvm::DebuggerKind::SCE; - break; - default: - break; - } + Options.DebuggerTuning = CodeGenOpts.getDebuggerTuning(); Options.MCOptions.MCRelaxAll = CodeGenOpts.RelaxAll; Options.MCOptions.MCSaveTempLabels = CodeGenOpts.SaveTempLabels; @@ -582,24 +605,18 @@ TargetMachine *EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) { Options.MCOptions.AsmVerbose = CodeGenOpts.AsmVerbose; Options.MCOptions.ABIName = TargetOpts.ABI; - TargetMachine *TM = TheTarget->createTargetMachine(Triple, TargetOpts.CPU, - FeaturesStr, Options, - RM, CM, OptLevel); - - return TM; + TM.reset(TheTarget->createTargetMachine(Triple, TargetOpts.CPU, FeaturesStr, + Options, RM, CM, OptLevel)); } -bool EmitAssemblyHelper::AddEmitPasses(BackendAction Action, +bool EmitAssemblyHelper::AddEmitPasses(legacy::PassManager &CodeGenPasses, + BackendAction Action, raw_pwrite_stream &OS) { - - // Create the code generator passes. - legacy::PassManager *PM = getCodeGenPasses(); - // Add LibraryInfo. llvm::Triple TargetTriple(TheModule->getTargetTriple()); std::unique_ptr<TargetLibraryInfoImpl> TLII( createTLII(TargetTriple, CodeGenOpts)); - PM->add(new TargetLibraryInfoWrapperPass(*TLII)); + CodeGenPasses.add(new TargetLibraryInfoWrapperPass(*TLII)); // Normal mode, emit a .s or .o file by running the code generator. Note, // this also adds codegenerator level optimization passes. @@ -615,9 +632,9 @@ bool EmitAssemblyHelper::AddEmitPasses(BackendAction Action, // "codegen" passes so that it isn't run multiple times when there is // inlining happening. if (CodeGenOpts.OptimizationLevel > 0) - PM->add(createObjCARCContractPass()); + CodeGenPasses.add(createObjCARCContractPass()); - if (TM->addPassesToEmitFile(*PM, OS, CGFT, + if (TM->addPassesToEmitFile(CodeGenPasses, OS, CGFT, /*DisableVerify=*/!CodeGenOpts.VerifyModule)) { Diags.Report(diag::err_fe_unable_to_interface_with_target); return false; @@ -627,14 +644,15 @@ bool EmitAssemblyHelper::AddEmitPasses(BackendAction Action, } void EmitAssemblyHelper::EmitAssembly(BackendAction Action, - raw_pwrite_stream *OS) { + std::unique_ptr<raw_pwrite_stream> OS) { TimeRegion Region(llvm::TimePassesIsEnabled ? &CodeGenerationTime : nullptr); + setCommandLineOpts(); + bool UsesCodeGen = (Action != Backend_EmitNothing && Action != Backend_EmitBC && Action != Backend_EmitLL); - if (!TM) - TM.reset(CreateTargetMachine(UsesCodeGen)); + CreateTargetMachine(UsesCodeGen); if (UsesCodeGen && !TM) return; @@ -644,41 +662,54 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action, // If we are performing a ThinLTO importing compile, load the function // index into memory and pass it into CreatePasses, which will add it // to the PassManagerBuilder and invoke LTO passes. - std::unique_ptr<FunctionInfoIndex> FunctionIndex; + std::unique_ptr<ModuleSummaryIndex> ModuleSummary; if (!CodeGenOpts.ThinLTOIndexFile.empty()) { - ErrorOr<std::unique_ptr<FunctionInfoIndex>> IndexOrErr = - llvm::getFunctionIndexForFile(CodeGenOpts.ThinLTOIndexFile, - [&](const DiagnosticInfo &DI) { - TheModule->getContext().diagnose(DI); - }); + ErrorOr<std::unique_ptr<ModuleSummaryIndex>> IndexOrErr = + llvm::getModuleSummaryIndexForFile( + CodeGenOpts.ThinLTOIndexFile, [&](const DiagnosticInfo &DI) { + TheModule->getContext().diagnose(DI); + }); if (std::error_code EC = IndexOrErr.getError()) { std::string Error = EC.message(); errs() << "Error loading index file '" << CodeGenOpts.ThinLTOIndexFile << "': " << Error << "\n"; return; } - FunctionIndex = std::move(IndexOrErr.get()); - assert(FunctionIndex && "Expected non-empty function index"); + ModuleSummary = std::move(IndexOrErr.get()); + assert(ModuleSummary && "Expected non-empty module summary index"); } - CreatePasses(FunctionIndex.get()); + legacy::PassManager PerModulePasses; + PerModulePasses.add( + createTargetTransformInfoWrapperPass(getTargetIRAnalysis())); + + legacy::FunctionPassManager PerFunctionPasses(TheModule); + PerFunctionPasses.add( + createTargetTransformInfoWrapperPass(getTargetIRAnalysis())); + + CreatePasses(PerModulePasses, PerFunctionPasses, ModuleSummary.get()); + + legacy::PassManager CodeGenPasses; + CodeGenPasses.add( + createTargetTransformInfoWrapperPass(getTargetIRAnalysis())); switch (Action) { case Backend_EmitNothing: break; case Backend_EmitBC: - getPerModulePasses()->add(createBitcodeWriterPass( - *OS, CodeGenOpts.EmitLLVMUseLists, CodeGenOpts.EmitFunctionSummary)); + PerModulePasses.add(createBitcodeWriterPass( + *OS, CodeGenOpts.EmitLLVMUseLists, CodeGenOpts.EmitSummaryIndex, + CodeGenOpts.EmitSummaryIndex)); break; case Backend_EmitLL: - getPerModulePasses()->add( + PerModulePasses.add( createPrintModulePass(*OS, "", CodeGenOpts.EmitLLVMUseLists)); break; default: - if (!AddEmitPasses(Action, *OS)) + if (!AddEmitPasses(CodeGenPasses, Action, *OS)) return; } @@ -688,46 +719,165 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action, // Run passes. For now we do all passes at once, but eventually we // would like to have the option of streaming code generation. - if (PerFunctionPasses) { + { PrettyStackTraceString CrashInfo("Per-function optimization"); - PerFunctionPasses->doInitialization(); + PerFunctionPasses.doInitialization(); for (Function &F : *TheModule) if (!F.isDeclaration()) - PerFunctionPasses->run(F); - PerFunctionPasses->doFinalization(); + PerFunctionPasses.run(F); + PerFunctionPasses.doFinalization(); } - if (PerModulePasses) { + { PrettyStackTraceString CrashInfo("Per-module optimization passes"); - PerModulePasses->run(*TheModule); + PerModulePasses.run(*TheModule); } - if (CodeGenPasses) { + { PrettyStackTraceString CrashInfo("Code generation"); - CodeGenPasses->run(*TheModule); + CodeGenPasses.run(*TheModule); } } void clang::EmitBackendOutput(DiagnosticsEngine &Diags, const CodeGenOptions &CGOpts, const clang::TargetOptions &TOpts, - const LangOptions &LOpts, StringRef TDesc, + const LangOptions &LOpts, const llvm::DataLayout &TDesc, Module *M, BackendAction Action, - raw_pwrite_stream *OS) { + std::unique_ptr<raw_pwrite_stream> OS) { EmitAssemblyHelper AsmHelper(Diags, CGOpts, TOpts, LOpts, M); - AsmHelper.EmitAssembly(Action, OS); + AsmHelper.EmitAssembly(Action, std::move(OS)); - // If an optional clang TargetInfo description string was passed in, use it to - // verify the LLVM TargetMachine's DataLayout. - if (AsmHelper.TM && !TDesc.empty()) { + // Verify clang's TargetInfo DataLayout against the LLVM TargetMachine's + // DataLayout. + if (AsmHelper.TM) { std::string DLDesc = M->getDataLayout().getStringRepresentation(); - if (DLDesc != TDesc) { + if (DLDesc != TDesc.getStringRepresentation()) { unsigned DiagID = Diags.getCustomDiagID( DiagnosticsEngine::Error, "backend data layout '%0' does not match " "expected target description '%1'"); - Diags.Report(DiagID) << DLDesc << TDesc; + Diags.Report(DiagID) << DLDesc << TDesc.getStringRepresentation(); + } + } +} + +static const char* getSectionNameForBitcode(const Triple &T) { + switch (T.getObjectFormat()) { + case Triple::MachO: + return "__LLVM,__bitcode"; + case Triple::COFF: + case Triple::ELF: + case Triple::UnknownObjectFormat: + return ".llvmbc"; + } + llvm_unreachable("Unimplemented ObjectFormatType"); +} + +static const char* getSectionNameForCommandline(const Triple &T) { + switch (T.getObjectFormat()) { + case Triple::MachO: + return "__LLVM,__cmdline"; + case Triple::COFF: + case Triple::ELF: + case Triple::UnknownObjectFormat: + return ".llvmcmd"; + } + llvm_unreachable("Unimplemented ObjectFormatType"); +} + +// With -fembed-bitcode, save a copy of the llvm IR as data in the +// __LLVM,__bitcode section. +void clang::EmbedBitcode(llvm::Module *M, const CodeGenOptions &CGOpts, + llvm::MemoryBufferRef Buf) { + if (CGOpts.getEmbedBitcode() == CodeGenOptions::Embed_Off) + return; + + // Save llvm.compiler.used and remote it. + SmallVector<Constant*, 2> UsedArray; + SmallSet<GlobalValue*, 4> UsedGlobals; + Type *UsedElementType = Type::getInt8Ty(M->getContext())->getPointerTo(0); + GlobalVariable *Used = collectUsedGlobalVariables(*M, UsedGlobals, true); + for (auto *GV : UsedGlobals) { + if (GV->getName() != "llvm.embedded.module" && + GV->getName() != "llvm.cmdline") + UsedArray.push_back( + ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, UsedElementType)); + } + if (Used) + Used->eraseFromParent(); + + // Embed the bitcode for the llvm module. + std::string Data; + ArrayRef<uint8_t> ModuleData; + Triple T(M->getTargetTriple()); + // Create a constant that contains the bitcode. + // In case of embedding a marker, ignore the input Buf and use the empty + // ArrayRef. It is also legal to create a bitcode marker even Buf is empty. + if (CGOpts.getEmbedBitcode() != CodeGenOptions::Embed_Marker) { + if (!isBitcode((const unsigned char *)Buf.getBufferStart(), + (const unsigned char *)Buf.getBufferEnd())) { + // If the input is LLVM Assembly, bitcode is produced by serializing + // the module. Use-lists order need to be perserved in this case. + llvm::raw_string_ostream OS(Data); + llvm::WriteBitcodeToFile(M, OS, /* ShouldPreserveUseListOrder */ true); + ModuleData = + ArrayRef<uint8_t>((const uint8_t *)OS.str().data(), OS.str().size()); + } else + // If the input is LLVM bitcode, write the input byte stream directly. + ModuleData = ArrayRef<uint8_t>((const uint8_t *)Buf.getBufferStart(), + Buf.getBufferSize()); + } + llvm::Constant *ModuleConstant = + llvm::ConstantDataArray::get(M->getContext(), ModuleData); + llvm::GlobalVariable *GV = new llvm::GlobalVariable( + *M, ModuleConstant->getType(), true, llvm::GlobalValue::PrivateLinkage, + ModuleConstant); + GV->setSection(getSectionNameForBitcode(T)); + UsedArray.push_back( + ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, UsedElementType)); + if (llvm::GlobalVariable *Old = + M->getGlobalVariable("llvm.embedded.module", true)) { + assert(Old->hasOneUse() && + "llvm.embedded.module can only be used once in llvm.compiler.used"); + GV->takeName(Old); + Old->eraseFromParent(); + } else { + GV->setName("llvm.embedded.module"); + } + + // Skip if only bitcode needs to be embedded. + if (CGOpts.getEmbedBitcode() != CodeGenOptions::Embed_Bitcode) { + // Embed command-line options. + ArrayRef<uint8_t> CmdData(const_cast<uint8_t *>(CGOpts.CmdArgs.data()), + CGOpts.CmdArgs.size()); + llvm::Constant *CmdConstant = + llvm::ConstantDataArray::get(M->getContext(), CmdData); + GV = new llvm::GlobalVariable(*M, CmdConstant->getType(), true, + llvm::GlobalValue::PrivateLinkage, + CmdConstant); + GV->setSection(getSectionNameForCommandline(T)); + UsedArray.push_back( + ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, UsedElementType)); + if (llvm::GlobalVariable *Old = + M->getGlobalVariable("llvm.cmdline", true)) { + assert(Old->hasOneUse() && + "llvm.cmdline can only be used once in llvm.compiler.used"); + GV->takeName(Old); + Old->eraseFromParent(); + } else { + GV->setName("llvm.cmdline"); } } + + if (UsedArray.empty()) + return; + + // Recreate llvm.compiler.used. + ArrayType *ATy = ArrayType::get(UsedElementType, UsedArray.size()); + auto *NewUsed = new GlobalVariable( + *M, ATy, false, llvm::GlobalValue::AppendingLinkage, + llvm::ConstantArray::get(ATy, UsedArray), "llvm.compiler.used"); + NewUsed->setSection("llvm.metadata"); } diff --git a/lib/CodeGen/CGAtomic.cpp b/lib/CodeGen/CGAtomic.cpp index 24de30b0b862..7b747c138303 100644 --- a/lib/CodeGen/CGAtomic.cpp +++ b/lib/CodeGen/CGAtomic.cpp @@ -79,7 +79,7 @@ namespace { auto Offset = OrigBFI.Offset % C.toBits(lvalue.getAlignment()); AtomicSizeInBits = C.toBits( C.toCharUnitsFromBits(Offset + OrigBFI.Size + C.getCharWidth() - 1) - .RoundUpToAlignment(lvalue.getAlignment())); + .alignTo(lvalue.getAlignment())); auto VoidPtrAddr = CGF.EmitCastToVoidPtr(lvalue.getBitFieldPointer()); auto OffsetInChars = (C.toCharUnitsFromBits(OrigBFI.Offset) / lvalue.getAlignment()) * @@ -221,11 +221,13 @@ namespace { /// \param IsWeak true if atomic operation is weak, false otherwise. /// \returns Pair of values: previous value from storage (value type) and /// boolean flag (i1 type) with true if success and false otherwise. - std::pair<RValue, llvm::Value *> EmitAtomicCompareExchange( - RValue Expected, RValue Desired, - llvm::AtomicOrdering Success = llvm::SequentiallyConsistent, - llvm::AtomicOrdering Failure = llvm::SequentiallyConsistent, - bool IsWeak = false); + std::pair<RValue, llvm::Value *> + EmitAtomicCompareExchange(RValue Expected, RValue Desired, + llvm::AtomicOrdering Success = + llvm::AtomicOrdering::SequentiallyConsistent, + llvm::AtomicOrdering Failure = + llvm::AtomicOrdering::SequentiallyConsistent, + bool IsWeak = false); /// \brief Emits atomic update. /// \param AO Atomic ordering. @@ -241,11 +243,6 @@ namespace { /// Materialize an atomic r-value in atomic-layout memory. Address materializeRValue(RValue rvalue) const; - /// \brief Translates LLVM atomic ordering to GNU atomic ordering for - /// libcalls. - static AtomicExpr::AtomicOrderingKind - translateAtomicOrdering(const llvm::AtomicOrdering AO); - /// \brief Creates temp alloca for intermediate operations on atomic value. Address CreateTempAlloca() const; private: @@ -260,13 +257,17 @@ namespace { /// \brief Emits atomic compare-and-exchange op as a libcall. llvm::Value *EmitAtomicCompareExchangeLibcall( llvm::Value *ExpectedAddr, llvm::Value *DesiredAddr, - llvm::AtomicOrdering Success = llvm::SequentiallyConsistent, - llvm::AtomicOrdering Failure = llvm::SequentiallyConsistent); + llvm::AtomicOrdering Success = + llvm::AtomicOrdering::SequentiallyConsistent, + llvm::AtomicOrdering Failure = + llvm::AtomicOrdering::SequentiallyConsistent); /// \brief Emits atomic compare-and-exchange op as LLVM instruction. std::pair<llvm::Value *, llvm::Value *> EmitAtomicCompareExchangeOp( llvm::Value *ExpectedVal, llvm::Value *DesiredVal, - llvm::AtomicOrdering Success = llvm::SequentiallyConsistent, - llvm::AtomicOrdering Failure = llvm::SequentiallyConsistent, + llvm::AtomicOrdering Success = + llvm::AtomicOrdering::SequentiallyConsistent, + llvm::AtomicOrdering Failure = + llvm::AtomicOrdering::SequentiallyConsistent, bool IsWeak = false); /// \brief Emit atomic update as libcalls. void @@ -286,25 +287,6 @@ namespace { }; } -AtomicExpr::AtomicOrderingKind -AtomicInfo::translateAtomicOrdering(const llvm::AtomicOrdering AO) { - switch (AO) { - case llvm::Unordered: - case llvm::NotAtomic: - case llvm::Monotonic: - return AtomicExpr::AO_ABI_memory_order_relaxed; - case llvm::Acquire: - return AtomicExpr::AO_ABI_memory_order_acquire; - case llvm::Release: - return AtomicExpr::AO_ABI_memory_order_release; - case llvm::AcquireRelease: - return AtomicExpr::AO_ABI_memory_order_acq_rel; - case llvm::SequentiallyConsistent: - return AtomicExpr::AO_ABI_memory_order_seq_cst; - } - llvm_unreachable("Unhandled AtomicOrdering"); -} - Address AtomicInfo::CreateTempAlloca() const { Address TempAlloca = CGF.CreateMemTemp( (LVal.isBitField() && ValueSizeInBits > AtomicSizeInBits) ? ValueTy @@ -323,8 +305,7 @@ static RValue emitAtomicLibcall(CodeGenFunction &CGF, QualType resultType, CallArgList &args) { const CGFunctionInfo &fnInfo = - CGF.CGM.getTypes().arrangeFreeFunctionCall(resultType, args, - FunctionType::ExtInfo(), RequiredArgs::All); + CGF.CGM.getTypes().arrangeBuiltinFunctionCall(resultType, args); llvm::FunctionType *fnTy = CGF.CGM.getTypes().GetFunctionType(fnInfo); llvm::Constant *fn = CGF.CGM.CreateRuntimeFunction(fnTy, fnName); return CGF.EmitCall(fnInfo, fn, ReturnValueSlot(), args); @@ -422,33 +403,39 @@ static void emitAtomicCmpXchg(CodeGenFunction &CGF, AtomicExpr *E, bool IsWeak, /// instructions to cope with the provided (but possibly only dynamically known) /// FailureOrder. static void emitAtomicCmpXchgFailureSet(CodeGenFunction &CGF, AtomicExpr *E, - bool IsWeak, Address Dest, - Address Ptr, Address Val1, - Address Val2, + bool IsWeak, Address Dest, Address Ptr, + Address Val1, Address Val2, llvm::Value *FailureOrderVal, uint64_t Size, llvm::AtomicOrdering SuccessOrder) { llvm::AtomicOrdering FailureOrder; if (llvm::ConstantInt *FO = dyn_cast<llvm::ConstantInt>(FailureOrderVal)) { - switch (FO->getSExtValue()) { - default: - FailureOrder = llvm::Monotonic; - break; - case AtomicExpr::AO_ABI_memory_order_consume: - case AtomicExpr::AO_ABI_memory_order_acquire: - FailureOrder = llvm::Acquire; - break; - case AtomicExpr::AO_ABI_memory_order_seq_cst: - FailureOrder = llvm::SequentiallyConsistent; - break; - } - if (FailureOrder >= SuccessOrder) { - // Don't assert on undefined behaviour. + auto FOS = FO->getSExtValue(); + if (!llvm::isValidAtomicOrderingCABI(FOS)) + FailureOrder = llvm::AtomicOrdering::Monotonic; + else + switch ((llvm::AtomicOrderingCABI)FOS) { + case llvm::AtomicOrderingCABI::relaxed: + case llvm::AtomicOrderingCABI::release: + case llvm::AtomicOrderingCABI::acq_rel: + FailureOrder = llvm::AtomicOrdering::Monotonic; + break; + case llvm::AtomicOrderingCABI::consume: + case llvm::AtomicOrderingCABI::acquire: + FailureOrder = llvm::AtomicOrdering::Acquire; + break; + case llvm::AtomicOrderingCABI::seq_cst: + FailureOrder = llvm::AtomicOrdering::SequentiallyConsistent; + break; + } + if (isStrongerThan(FailureOrder, SuccessOrder)) { + // Don't assert on undefined behavior "failure argument shall be no + // stronger than the success argument". FailureOrder = - llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(SuccessOrder); + llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(SuccessOrder); } - emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size, - SuccessOrder, FailureOrder); + emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size, SuccessOrder, + FailureOrder); return; } @@ -456,9 +443,10 @@ static void emitAtomicCmpXchgFailureSet(CodeGenFunction &CGF, AtomicExpr *E, llvm::BasicBlock *MonotonicBB = nullptr, *AcquireBB = nullptr, *SeqCstBB = nullptr; MonotonicBB = CGF.createBasicBlock("monotonic_fail", CGF.CurFn); - if (SuccessOrder != llvm::Monotonic && SuccessOrder != llvm::Release) + if (SuccessOrder != llvm::AtomicOrdering::Monotonic && + SuccessOrder != llvm::AtomicOrdering::Release) AcquireBB = CGF.createBasicBlock("acquire_fail", CGF.CurFn); - if (SuccessOrder == llvm::SequentiallyConsistent) + if (SuccessOrder == llvm::AtomicOrdering::SequentiallyConsistent) SeqCstBB = CGF.createBasicBlock("seqcst_fail", CGF.CurFn); llvm::BasicBlock *ContBB = CGF.createBasicBlock("atomic.continue", CGF.CurFn); @@ -472,25 +460,25 @@ static void emitAtomicCmpXchgFailureSet(CodeGenFunction &CGF, AtomicExpr *E, // doesn't fold to a constant for the ordering. CGF.Builder.SetInsertPoint(MonotonicBB); emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, - Size, SuccessOrder, llvm::Monotonic); + Size, SuccessOrder, llvm::AtomicOrdering::Monotonic); CGF.Builder.CreateBr(ContBB); if (AcquireBB) { CGF.Builder.SetInsertPoint(AcquireBB); emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, - Size, SuccessOrder, llvm::Acquire); + Size, SuccessOrder, llvm::AtomicOrdering::Acquire); CGF.Builder.CreateBr(ContBB); - SI->addCase(CGF.Builder.getInt32(AtomicExpr::AO_ABI_memory_order_consume), + SI->addCase(CGF.Builder.getInt32((int)llvm::AtomicOrderingCABI::consume), AcquireBB); - SI->addCase(CGF.Builder.getInt32(AtomicExpr::AO_ABI_memory_order_acquire), + SI->addCase(CGF.Builder.getInt32((int)llvm::AtomicOrderingCABI::acquire), AcquireBB); } if (SeqCstBB) { CGF.Builder.SetInsertPoint(SeqCstBB); - emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, - Size, SuccessOrder, llvm::SequentiallyConsistent); + emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size, SuccessOrder, + llvm::AtomicOrdering::SequentiallyConsistent); CGF.Builder.CreateBr(ContBB); - SI->addCase(CGF.Builder.getInt32(AtomicExpr::AO_ABI_memory_order_seq_cst), + SI->addCase(CGF.Builder.getInt32((int)llvm::AtomicOrderingCABI::seq_cst), SeqCstBB); } @@ -1037,40 +1025,39 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { E->getOp() == AtomicExpr::AO__atomic_load_n; if (isa<llvm::ConstantInt>(Order)) { - int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); - switch (ord) { - case AtomicExpr::AO_ABI_memory_order_relaxed: - EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, - Size, llvm::Monotonic); - break; - case AtomicExpr::AO_ABI_memory_order_consume: - case AtomicExpr::AO_ABI_memory_order_acquire: - if (IsStore) - break; // Avoid crashing on code with undefined behavior - EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, - Size, llvm::Acquire); - break; - case AtomicExpr::AO_ABI_memory_order_release: - if (IsLoad) - break; // Avoid crashing on code with undefined behavior - EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, - Size, llvm::Release); - break; - case AtomicExpr::AO_ABI_memory_order_acq_rel: - if (IsLoad || IsStore) - break; // Avoid crashing on code with undefined behavior - EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, - Size, llvm::AcquireRelease); - break; - case AtomicExpr::AO_ABI_memory_order_seq_cst: - EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, - Size, llvm::SequentiallyConsistent); - break; - default: // invalid order - // We should not ever get here normally, but it's hard to - // enforce that in general. - break; - } + auto ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); + // We should not ever get to a case where the ordering isn't a valid C ABI + // value, but it's hard to enforce that in general. + if (llvm::isValidAtomicOrderingCABI(ord)) + switch ((llvm::AtomicOrderingCABI)ord) { + case llvm::AtomicOrderingCABI::relaxed: + EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, + llvm::AtomicOrdering::Monotonic); + break; + case llvm::AtomicOrderingCABI::consume: + case llvm::AtomicOrderingCABI::acquire: + if (IsStore) + break; // Avoid crashing on code with undefined behavior + EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, + llvm::AtomicOrdering::Acquire); + break; + case llvm::AtomicOrderingCABI::release: + if (IsLoad) + break; // Avoid crashing on code with undefined behavior + EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, + llvm::AtomicOrdering::Release); + break; + case llvm::AtomicOrderingCABI::acq_rel: + if (IsLoad || IsStore) + break; // Avoid crashing on code with undefined behavior + EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, + llvm::AtomicOrdering::AcquireRelease); + break; + case llvm::AtomicOrderingCABI::seq_cst: + EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, + llvm::AtomicOrdering::SequentiallyConsistent); + break; + } if (RValTy->isVoidType()) return RValue::get(nullptr); @@ -1105,39 +1092,39 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { // Emit all the different atomics Builder.SetInsertPoint(MonotonicBB); EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, - Size, llvm::Monotonic); + Size, llvm::AtomicOrdering::Monotonic); Builder.CreateBr(ContBB); if (!IsStore) { Builder.SetInsertPoint(AcquireBB); EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, - Size, llvm::Acquire); + Size, llvm::AtomicOrdering::Acquire); Builder.CreateBr(ContBB); - SI->addCase(Builder.getInt32(AtomicExpr::AO_ABI_memory_order_consume), + SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::consume), AcquireBB); - SI->addCase(Builder.getInt32(AtomicExpr::AO_ABI_memory_order_acquire), + SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::acquire), AcquireBB); } if (!IsLoad) { Builder.SetInsertPoint(ReleaseBB); EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, - Size, llvm::Release); + Size, llvm::AtomicOrdering::Release); Builder.CreateBr(ContBB); - SI->addCase(Builder.getInt32(AtomicExpr::AO_ABI_memory_order_release), + SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::release), ReleaseBB); } if (!IsLoad && !IsStore) { Builder.SetInsertPoint(AcqRelBB); EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, - Size, llvm::AcquireRelease); + Size, llvm::AtomicOrdering::AcquireRelease); Builder.CreateBr(ContBB); - SI->addCase(Builder.getInt32(AtomicExpr::AO_ABI_memory_order_acq_rel), + SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::acq_rel), AcqRelBB); } Builder.SetInsertPoint(SeqCstBB); EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, - Size, llvm::SequentiallyConsistent); + Size, llvm::AtomicOrdering::SequentiallyConsistent); Builder.CreateBr(ContBB); - SI->addCase(Builder.getInt32(AtomicExpr::AO_ABI_memory_order_seq_cst), + SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::seq_cst), SeqCstBB); // Cleanup and return @@ -1257,9 +1244,9 @@ void AtomicInfo::EmitAtomicLoadLibcall(llvm::Value *AddForLoaded, CGF.getContext().VoidPtrTy); Args.add(RValue::get(CGF.EmitCastToVoidPtr(AddForLoaded)), CGF.getContext().VoidPtrTy); - Args.add(RValue::get( - llvm::ConstantInt::get(CGF.IntTy, translateAtomicOrdering(AO))), - CGF.getContext().IntTy); + Args.add( + RValue::get(llvm::ConstantInt::get(CGF.IntTy, (int)llvm::toCABI(AO))), + CGF.getContext().IntTy); emitAtomicLibcall(CGF, "__atomic_load", CGF.getContext().VoidTy, Args); } @@ -1287,28 +1274,21 @@ bool CodeGenFunction::LValueIsSuitableForInlineAtomic(LValue LV) { bool IsVolatile = LV.isVolatile() || hasVolatileMember(LV.getType()); // An atomic is inline if we don't need to use a libcall. bool AtomicIsInline = !AI.shouldUseLibcall(); + // MSVC doesn't seem to do this for types wider than a pointer. + if (getContext().getTypeSize(LV.getType()) > + getContext().getTypeSize(getContext().getIntPtrType())) + return false; return IsVolatile && AtomicIsInline; } -/// An type is a candidate for having its loads and stores be made atomic if -/// we are operating under /volatile:ms *and* we know the access is volatile and -/// performing such an operation can be performed without a libcall. -bool CodeGenFunction::typeIsSuitableForInlineAtomic(QualType Ty, - bool IsVolatile) const { - // An atomic is inline if we don't need to use a libcall (e.g. it is builtin). - bool AtomicIsInline = getContext().getTargetInfo().hasBuiltinAtomic( - getContext().getTypeSize(Ty), getContext().getTypeAlign(Ty)); - return CGM.getCodeGenOpts().MSVolatile && IsVolatile && AtomicIsInline; -} - RValue CodeGenFunction::EmitAtomicLoad(LValue LV, SourceLocation SL, AggValueSlot Slot) { llvm::AtomicOrdering AO; bool IsVolatile = LV.isVolatileQualified(); if (LV.getType()->isAtomicType()) { - AO = llvm::SequentiallyConsistent; + AO = llvm::AtomicOrdering::SequentiallyConsistent; } else { - AO = llvm::Acquire; + AO = llvm::AtomicOrdering::Acquire; IsVolatile = true; } return EmitAtomicLoad(LV, SL, AO, IsVolatile, Slot); @@ -1462,11 +1442,11 @@ AtomicInfo::EmitAtomicCompareExchangeLibcall(llvm::Value *ExpectedAddr, CGF.getContext().VoidPtrTy); Args.add(RValue::get(CGF.EmitCastToVoidPtr(DesiredAddr)), CGF.getContext().VoidPtrTy); - Args.add(RValue::get(llvm::ConstantInt::get( - CGF.IntTy, translateAtomicOrdering(Success))), + Args.add(RValue::get( + llvm::ConstantInt::get(CGF.IntTy, (int)llvm::toCABI(Success))), CGF.getContext().IntTy); - Args.add(RValue::get(llvm::ConstantInt::get( - CGF.IntTy, translateAtomicOrdering(Failure))), + Args.add(RValue::get( + llvm::ConstantInt::get(CGF.IntTy, (int)llvm::toCABI(Failure))), CGF.getContext().IntTy); auto SuccessFailureRVal = emitAtomicLibcall(CGF, "__atomic_compare_exchange", CGF.getContext().BoolTy, Args); @@ -1477,8 +1457,9 @@ AtomicInfo::EmitAtomicCompareExchangeLibcall(llvm::Value *ExpectedAddr, std::pair<RValue, llvm::Value *> AtomicInfo::EmitAtomicCompareExchange( RValue Expected, RValue Desired, llvm::AtomicOrdering Success, llvm::AtomicOrdering Failure, bool IsWeak) { - if (Failure >= Success) - // Don't assert on undefined behavior. + if (isStrongerThan(Failure, Success)) + // Don't assert on undefined behavior "failure argument shall be no stronger + // than the success argument". Failure = llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(Success); // Check whether we should use a library call. @@ -1727,9 +1708,9 @@ void CodeGenFunction::EmitAtomicStore(RValue rvalue, LValue lvalue, bool IsVolatile = lvalue.isVolatileQualified(); llvm::AtomicOrdering AO; if (lvalue.getType()->isAtomicType()) { - AO = llvm::SequentiallyConsistent; + AO = llvm::AtomicOrdering::SequentiallyConsistent; } else { - AO = llvm::Release; + AO = llvm::AtomicOrdering::Release; IsVolatile = true; } return EmitAtomicStore(rvalue, lvalue, AO, IsVolatile, isInit); @@ -1772,9 +1753,9 @@ void CodeGenFunction::EmitAtomicStore(RValue rvalue, LValue dest, getContext().VoidPtrTy); args.add(RValue::get(EmitCastToVoidPtr(srcAddr.getPointer())), getContext().VoidPtrTy); - args.add(RValue::get(llvm::ConstantInt::get( - IntTy, AtomicInfo::translateAtomicOrdering(AO))), - getContext().IntTy); + args.add( + RValue::get(llvm::ConstantInt::get(IntTy, (int)llvm::toCABI(AO))), + getContext().IntTy); emitAtomicLibcall(*this, "__atomic_store", getContext().VoidTy, args); return; } diff --git a/lib/CodeGen/CGBlocks.cpp b/lib/CodeGen/CGBlocks.cpp index ba2941e9df4a..5a41f361d9ac 100644 --- a/lib/CodeGen/CGBlocks.cpp +++ b/lib/CodeGen/CGBlocks.cpp @@ -262,6 +262,11 @@ static bool isSafeForCXXConstantCapture(QualType type) { static llvm::Constant *tryCaptureAsConstant(CodeGenModule &CGM, CodeGenFunction *CGF, const VarDecl *var) { + // Return if this is a function paramter. We shouldn't try to + // rematerialize default arguments of function parameters. + if (isa<ParmVarDecl>(var)) + return nullptr; + QualType type = var->getType(); // We can only do this if the variable is const. @@ -508,7 +513,7 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF, // At this point, we just have to add padding if the end align still // isn't aligned right. if (endAlign < maxFieldAlign) { - CharUnits newBlockSize = blockSize.RoundUpToAlignment(maxFieldAlign); + CharUnits newBlockSize = blockSize.alignTo(maxFieldAlign); CharUnits padding = newBlockSize - blockSize; // If we haven't yet added any fields, remember that there was an @@ -775,35 +780,34 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { // Compute the address of the thing we're going to move into the // block literal. Address src = Address::invalid(); - if (BlockInfo && CI.isNested()) { - // We need to use the capture from the enclosing block. - const CGBlockInfo::Capture &enclosingCapture = - BlockInfo->getCapture(variable); - - // This is a [[type]]*, except that a byref entry wil just be an i8**. - src = Builder.CreateStructGEP(LoadBlockStruct(), - enclosingCapture.getIndex(), - enclosingCapture.getOffset(), - "block.capture.addr"); - } else if (blockDecl->isConversionFromLambda()) { + + if (blockDecl->isConversionFromLambda()) { // The lambda capture in a lambda's conversion-to-block-pointer is // special; we'll simply emit it directly. src = Address::invalid(); - } else { - // Just look it up in the locals map, which will give us back a - // [[type]]*. If that doesn't work, do the more elaborate DRE - // emission. - auto it = LocalDeclMap.find(variable); - if (it != LocalDeclMap.end()) { - src = it->second; + } else if (CI.isByRef()) { + if (BlockInfo && CI.isNested()) { + // We need to use the capture from the enclosing block. + const CGBlockInfo::Capture &enclosingCapture = + BlockInfo->getCapture(variable); + + // This is a [[type]]*, except that a byref entry wil just be an i8**. + src = Builder.CreateStructGEP(LoadBlockStruct(), + enclosingCapture.getIndex(), + enclosingCapture.getOffset(), + "block.capture.addr"); } else { - DeclRefExpr declRef( - const_cast<VarDecl *>(variable), - /*RefersToEnclosingVariableOrCapture*/ CI.isNested(), type, - VK_LValue, SourceLocation()); - src = EmitDeclRefLValue(&declRef).getAddress(); + auto I = LocalDeclMap.find(variable); + assert(I != LocalDeclMap.end()); + src = I->second; } - } + } else { + DeclRefExpr declRef(const_cast<VarDecl *>(variable), + /*RefersToEnclosingVariableOrCapture*/ CI.isNested(), + type.getNonReferenceType(), VK_LValue, + SourceLocation()); + src = EmitDeclRefLValue(&declRef).getAddress(); + }; // For byrefs, we just write the pointer to the byref struct into // the block field. There's no need to chase the forwarding @@ -837,8 +841,7 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { // If it's a reference variable, copy the reference into the block field. } else if (type->isReferenceType()) { - llvm::Value *ref = Builder.CreateLoad(src, "ref.val"); - Builder.CreateStore(ref, blockField); + Builder.CreateStore(src.getPointer(), blockField); // If this is an ARC __strong block-pointer variable, don't do a // block copy. @@ -1109,8 +1112,8 @@ void CodeGenFunction::setBlockContextParameter(const ImplicitParamDecl *D, } if (CGDebugInfo *DI = getDebugInfo()) { - if (CGM.getCodeGenOpts().getDebugInfo() - >= CodeGenOptions::LimitedDebugInfo) { + if (CGM.getCodeGenOpts().getDebugInfo() >= + codegenoptions::LimitedDebugInfo) { DI->setLocation(D->getLocation()); DI->EmitDeclareOfBlockLiteralArgVariable(*BlockInfo, arg, argNum, localAddr, Builder); @@ -1174,9 +1177,8 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD, // Create the function declaration. const FunctionProtoType *fnType = blockInfo.getBlockExpr()->getFunctionType(); - const CGFunctionInfo &fnInfo = CGM.getTypes().arrangeFreeFunctionDeclaration( - fnType->getReturnType(), args, fnType->getExtInfo(), - fnType->isVariadic()); + const CGFunctionInfo &fnInfo = + CGM.getTypes().arrangeBlockFunctionDeclaration(fnType, args); if (CGM.ReturnSlotInterferesWithArgs(fnInfo)) blockInfo.UsesStret = true; @@ -1260,8 +1262,8 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD, const VarDecl *variable = CI.getVariable(); DI->EmitLocation(Builder, variable->getLocation()); - if (CGM.getCodeGenOpts().getDebugInfo() - >= CodeGenOptions::LimitedDebugInfo) { + if (CGM.getCodeGenOpts().getDebugInfo() >= + codegenoptions::LimitedDebugInfo) { const CGBlockInfo::Capture &capture = blockInfo.getCapture(variable); if (capture.isConstant()) { auto addr = LocalDeclMap.find(variable)->second; @@ -1329,8 +1331,8 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) { C.VoidPtrTy); args.push_back(&srcDecl); - const CGFunctionInfo &FI = CGM.getTypes().arrangeFreeFunctionDeclaration( - C.VoidTy, args, FunctionType::ExtInfo(), /*variadic=*/false); + const CGFunctionInfo &FI = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, args); // FIXME: it would be nice if these were mergeable with things with // identical semantics. @@ -1505,8 +1507,8 @@ CodeGenFunction::GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo) { C.VoidPtrTy); args.push_back(&srcDecl); - const CGFunctionInfo &FI = CGM.getTypes().arrangeFreeFunctionDeclaration( - C.VoidTy, args, FunctionType::ExtInfo(), /*variadic=*/false); + const CGFunctionInfo &FI = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, args); // FIXME: We'd like to put these into a mergable by content, with // internal linkage. @@ -1791,8 +1793,8 @@ generateByrefCopyHelper(CodeGenFunction &CGF, const BlockByrefInfo &byrefInfo, Context.VoidPtrTy); args.push_back(&src); - const CGFunctionInfo &FI = CGF.CGM.getTypes().arrangeFreeFunctionDeclaration( - R, args, FunctionType::ExtInfo(), /*variadic=*/false); + const CGFunctionInfo &FI = + CGF.CGM.getTypes().arrangeBuiltinFunctionDeclaration(R, args); llvm::FunctionType *LTy = CGF.CGM.getTypes().GetFunctionType(FI); @@ -1864,8 +1866,8 @@ generateByrefDisposeHelper(CodeGenFunction &CGF, Context.VoidPtrTy); args.push_back(&src); - const CGFunctionInfo &FI = CGF.CGM.getTypes().arrangeFreeFunctionDeclaration( - R, args, FunctionType::ExtInfo(), /*variadic=*/false); + const CGFunctionInfo &FI = + CGF.CGM.getTypes().arrangeBuiltinFunctionDeclaration(R, args); llvm::FunctionType *LTy = CGF.CGM.getTypes().GetFunctionType(FI); @@ -2108,7 +2110,7 @@ const BlockByrefInfo &CodeGenFunction::getBlockByrefInfo(const VarDecl *D) { bool packed = false; CharUnits varAlign = getContext().getDeclAlign(D); - CharUnits varOffset = size.RoundUpToAlignment(varAlign); + CharUnits varOffset = size.alignTo(varAlign); // We may have to insert padding. if (varOffset != size) { @@ -2285,9 +2287,36 @@ void CodeGenFunction::enterByrefCleanup(const AutoVarEmission &emission) { /// Adjust the declaration of something from the blocks API. static void configureBlocksRuntimeObject(CodeGenModule &CGM, llvm::Constant *C) { - if (!CGM.getLangOpts().BlocksRuntimeOptional) return; - auto *GV = cast<llvm::GlobalValue>(C->stripPointerCasts()); + + if (CGM.getTarget().getTriple().isOSBinFormatCOFF()) { + IdentifierInfo &II = CGM.getContext().Idents.get(C->getName()); + TranslationUnitDecl *TUDecl = CGM.getContext().getTranslationUnitDecl(); + DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl); + + assert((isa<llvm::Function>(C->stripPointerCasts()) || + isa<llvm::GlobalVariable>(C->stripPointerCasts())) && + "expected Function or GlobalVariable"); + + const NamedDecl *ND = nullptr; + for (const auto &Result : DC->lookup(&II)) + if ((ND = dyn_cast<FunctionDecl>(Result)) || + (ND = dyn_cast<VarDecl>(Result))) + break; + + // TODO: support static blocks runtime + if (GV->isDeclaration() && (!ND || !ND->hasAttr<DLLExportAttr>())) { + GV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); + GV->setLinkage(llvm::GlobalValue::ExternalLinkage); + } else { + GV->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass); + GV->setLinkage(llvm::GlobalValue::ExternalLinkage); + } + } + + if (!CGM.getLangOpts().BlocksRuntimeOptional) + return; + if (GV->isDeclaration() && GV->hasExternalLinkage()) GV->setLinkage(llvm::GlobalValue::ExternalWeakLinkage); } @@ -2335,5 +2364,5 @@ llvm::Constant *CodeGenModule::getNSConcreteStackBlock() { Int8PtrTy->getPointerTo(), nullptr); configureBlocksRuntimeObject(*this, NSConcreteStackBlock); - return NSConcreteStackBlock; + return NSConcreteStackBlock; } diff --git a/lib/CodeGen/CGBuilder.h b/lib/CodeGen/CGBuilder.h index 489f3413d4b8..027435d7c599 100644 --- a/lib/CodeGen/CGBuilder.h +++ b/lib/CodeGen/CGBuilder.h @@ -10,6 +10,7 @@ #ifndef LLVM_CLANG_LIB_CODEGEN_CGBUILDER_H #define LLVM_CLANG_LIB_CODEGEN_CGBUILDER_H +#include "llvm/IR/DataLayout.h" #include "llvm/IR/IRBuilder.h" #include "Address.h" #include "CodeGenTypeCache.h" @@ -22,9 +23,7 @@ class CodeGenFunction; /// \brief This is an IRBuilder insertion helper that forwards to /// CodeGenFunction::InsertHelper, which adds necessary metadata to /// instructions. -template <bool PreserveNames> -class CGBuilderInserter - : protected llvm::IRBuilderDefaultInserter<PreserveNames> { +class CGBuilderInserter : protected llvm::IRBuilderDefaultInserter { public: CGBuilderInserter() = default; explicit CGBuilderInserter(CodeGenFunction *CGF) : CGF(CGF) {} @@ -38,17 +37,10 @@ private: CodeGenFunction *CGF = nullptr; }; -// Don't preserve names on values in an optimized build. -#ifdef NDEBUG -#define PreserveNames false -#else -#define PreserveNames true -#endif - -typedef CGBuilderInserter<PreserveNames> CGBuilderInserterTy; +typedef CGBuilderInserter CGBuilderInserterTy; -typedef llvm::IRBuilder<PreserveNames, llvm::ConstantFolder, - CGBuilderInserterTy> CGBuilderBaseTy; +typedef llvm::IRBuilder<llvm::ConstantFolder, CGBuilderInserterTy> + CGBuilderBaseTy; class CGBuilderTy : public CGBuilderBaseTy { /// Storing a reference to the type cache here makes it a lot easier @@ -194,6 +186,12 @@ public: Addr.getPointer(), Index, Name), Addr.getAlignment().alignmentAtOffset(Offset)); } + Address CreateStructGEP(Address Addr, unsigned Index, + const llvm::StructLayout *Layout, + const llvm::Twine &Name = "") { + auto Offset = CharUnits::fromQuantity(Layout->getElementOffset(Index)); + return CreateStructGEP(Addr, Index, Offset, Name); + } /// Given /// %addr = [n x T]* ... @@ -298,8 +296,6 @@ public: } }; -#undef PreserveNames - } // end namespace CodeGen } // end namespace clang diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index 787ac5361bbb..c74e53ea84e0 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -26,6 +26,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/MDBuilder.h" #include <sstream> using namespace clang; @@ -105,9 +106,8 @@ static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF, llvm::Type *ValueType = Args[1]->getType(); Args[1] = EmitToInt(CGF, Args[1], T, IntType); - llvm::Value *Result = - CGF.Builder.CreateAtomicRMW(Kind, Args[0], Args[1], - llvm::SequentiallyConsistent); + llvm::Value *Result = CGF.Builder.CreateAtomicRMW( + Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent); return EmitFromInt(CGF, Result, T, ValueType); } @@ -167,9 +167,8 @@ static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, Args[1] = EmitToInt(CGF, Args[1], T, IntType); Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); - llvm::Value *Result = - CGF.Builder.CreateAtomicRMW(Kind, Args[0], Args[1], - llvm::SequentiallyConsistent); + llvm::Value *Result = CGF.Builder.CreateAtomicRMW( + Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent); Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]); if (Invert) Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result, @@ -206,9 +205,9 @@ static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E, Args[1] = EmitToInt(CGF, Args[1], T, IntType); Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType); - Value *Pair = CGF.Builder.CreateAtomicCmpXchg(Args[0], Args[1], Args[2], - llvm::SequentiallyConsistent, - llvm::SequentiallyConsistent); + Value *Pair = CGF.Builder.CreateAtomicCmpXchg( + Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent, + llvm::AtomicOrdering::SequentiallyConsistent); if (ReturnBool) // Extract boolean success flag and zext it to int. return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1), @@ -219,6 +218,51 @@ static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E, ValueType); } +// Emit a simple mangled intrinsic that has 1 argument and a return type +// matching the argument type. +static Value *emitUnaryBuiltin(CodeGenFunction &CGF, + const CallExpr *E, + unsigned IntrinsicID) { + llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); + + Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); + return CGF.Builder.CreateCall(F, Src0); +} + +// Emit an intrinsic that has 2 operands of the same type as its result. +static Value *emitBinaryBuiltin(CodeGenFunction &CGF, + const CallExpr *E, + unsigned IntrinsicID) { + llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); + llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); + + Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); + return CGF.Builder.CreateCall(F, { Src0, Src1 }); +} + +// Emit an intrinsic that has 3 operands of the same type as its result. +static Value *emitTernaryBuiltin(CodeGenFunction &CGF, + const CallExpr *E, + unsigned IntrinsicID) { + llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); + llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); + llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2)); + + Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); + return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 }); +} + +// Emit an intrinsic that has 1 float or double operand, and 1 integer. +static Value *emitFPIntBuiltin(CodeGenFunction &CGF, + const CallExpr *E, + unsigned IntrinsicID) { + llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); + llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); + + Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); + return CGF.Builder.CreateCall(F, {Src0, Src1}); +} + /// EmitFAbs - Emit a call to @llvm.fabs(). static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) { Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType()); @@ -248,8 +292,8 @@ static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) { if (CGF.getTarget().isBigEndian()) { Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width); V = CGF.Builder.CreateLShr(V, ShiftCst); - } - // We are truncating value in order to extract the higher-order + } + // We are truncating value in order to extract the higher-order // double, which we will be using to extract the sign from. IntTy = llvm::IntegerType::get(C, Width); V = CGF.Builder.CreateTrunc(V, IntTy); @@ -288,6 +332,17 @@ static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF, return CGF.Builder.CreateExtractValue(Tmp, 0); } +static Value *emitRangedBuiltin(CodeGenFunction &CGF, + unsigned IntrinsicID, + int low, int high) { + llvm::MDBuilder MDHelper(CGF.getLLVMContext()); + llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high)); + Value *F = CGF.CGM.getIntrinsic(IntrinsicID, {}); + llvm::Instruction *Call = CGF.Builder.CreateCall(F); + Call->setMetadata(llvm::LLVMContext::MD_range, RNode); + return Call; +} + namespace { struct WidthAndSignedness { unsigned Width; @@ -465,9 +520,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, case Builtin::BI__builtin_fabs: case Builtin::BI__builtin_fabsf: case Builtin::BI__builtin_fabsl: { - Value *Arg1 = EmitScalarExpr(E->getArg(0)); - Value *Result = EmitFAbs(*this, Arg1); - return RValue::get(Result); + return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs)); } case Builtin::BI__builtin_fmod: case Builtin::BI__builtin_fmodf: @@ -477,7 +530,51 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, Value *Result = Builder.CreateFRem(Arg1, Arg2, "fmod"); return RValue::get(Result); } - + case Builtin::BI__builtin_copysign: + case Builtin::BI__builtin_copysignf: + case Builtin::BI__builtin_copysignl: { + return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign)); + } + case Builtin::BI__builtin_ceil: + case Builtin::BI__builtin_ceilf: + case Builtin::BI__builtin_ceill: { + return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::ceil)); + } + case Builtin::BI__builtin_floor: + case Builtin::BI__builtin_floorf: + case Builtin::BI__builtin_floorl: { + return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::floor)); + } + case Builtin::BI__builtin_trunc: + case Builtin::BI__builtin_truncf: + case Builtin::BI__builtin_truncl: { + return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc)); + } + case Builtin::BI__builtin_rint: + case Builtin::BI__builtin_rintf: + case Builtin::BI__builtin_rintl: { + return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::rint)); + } + case Builtin::BI__builtin_nearbyint: + case Builtin::BI__builtin_nearbyintf: + case Builtin::BI__builtin_nearbyintl: { + return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::nearbyint)); + } + case Builtin::BI__builtin_round: + case Builtin::BI__builtin_roundf: + case Builtin::BI__builtin_roundl: { + return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::round)); + } + case Builtin::BI__builtin_fmin: + case Builtin::BI__builtin_fminf: + case Builtin::BI__builtin_fminl: { + return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::minnum)); + } + case Builtin::BI__builtin_fmax: + case Builtin::BI__builtin_fmaxf: + case Builtin::BI__builtin_fmaxl: { + return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::maxnum)); + } case Builtin::BI__builtin_conj: case Builtin::BI__builtin_conjf: case Builtin::BI__builtin_conjl: { @@ -645,10 +742,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, case Builtin::BI__builtin_bswap16: case Builtin::BI__builtin_bswap32: case Builtin::BI__builtin_bswap64: { - Value *ArgValue = EmitScalarExpr(E->getArg(0)); - llvm::Type *ArgType = ArgValue->getType(); - Value *F = CGM.getIntrinsic(Intrinsic::bswap, ArgType); - return RValue::get(Builder.CreateCall(F, ArgValue)); + return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap)); + } + case Builtin::BI__builtin_bitreverse8: + case Builtin::BI__builtin_bitreverse16: + case Builtin::BI__builtin_bitreverse32: + case Builtin::BI__builtin_bitreverse64: { + return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse)); } case Builtin::BI__builtin_object_size: { unsigned Type = @@ -751,13 +851,19 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); } - case Builtin::BI__builtin_isinf: { - // isinf(x) --> fabs(x) == infinity + case Builtin::BI__builtin_isinf: + case Builtin::BI__builtin_isfinite: { + // isinf(x) --> fabs(x) == infinity + // isfinite(x) --> fabs(x) != infinity + // x != NaN via the ordered compare in either case. Value *V = EmitScalarExpr(E->getArg(0)); - V = EmitFAbs(*this, V); - - V = Builder.CreateFCmpOEQ(V, ConstantFP::getInfinity(V->getType()),"isinf"); - return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); + Value *Fabs = EmitFAbs(*this, V); + Constant *Infinity = ConstantFP::getInfinity(V->getType()); + CmpInst::Predicate Pred = (BuiltinID == Builtin::BI__builtin_isinf) + ? CmpInst::FCMP_OEQ + : CmpInst::FCMP_ONE; + Value *FCmp = Builder.CreateFCmp(Pred, Fabs, Infinity, "cmpinf"); + return RValue::get(Builder.CreateZExt(FCmp, ConvertType(E->getType()))); } case Builtin::BI__builtin_isinf_sign: { @@ -795,19 +901,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); } - case Builtin::BI__builtin_isfinite: { - // isfinite(x) --> x == x && fabs(x) != infinity; - Value *V = EmitScalarExpr(E->getArg(0)); - Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq"); - - Value *Abs = EmitFAbs(*this, V); - Value *IsNotInf = - Builder.CreateFCmpUNE(Abs, ConstantFP::getInfinity(V->getType()),"isinf"); - - V = Builder.CreateAnd(Eq, IsNotInf, "and"); - return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); - } - case Builtin::BI__builtin_fpclassify: { Value *V = EmitScalarExpr(E->getArg(5)); llvm::Type *Ty = ConvertType(E->getArg(5)->getType()); @@ -1258,7 +1351,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, llvm::StoreInst *Store = Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr, StoreSize); - Store->setAtomic(llvm::Release); + Store->setAtomic(llvm::AtomicOrdering::Release); return RValue::get(nullptr); } @@ -1270,7 +1363,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, // any way to safely use it... but in practice, it mostly works // to use it with non-atomic loads and stores to get acquire/release // semantics. - Builder.CreateFence(llvm::SequentiallyConsistent); + Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent); return RValue::get(nullptr); } @@ -1294,9 +1387,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)), getContext().VoidPtrTy); const CGFunctionInfo &FuncInfo = - CGM.getTypes().arrangeFreeFunctionCall(E->getType(), Args, - FunctionType::ExtInfo(), - RequiredArgs::All); + CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args); llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo); llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName); return EmitCall(FuncInfo, Func, ReturnValueSlot(), Args); @@ -1320,30 +1411,27 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, switch (ord) { case 0: // memory_order_relaxed default: // invalid order - Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, - Ptr, NewVal, - llvm::Monotonic); + Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, + llvm::AtomicOrdering::Monotonic); break; - case 1: // memory_order_consume - case 2: // memory_order_acquire - Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, - Ptr, NewVal, - llvm::Acquire); + case 1: // memory_order_consume + case 2: // memory_order_acquire + Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, + llvm::AtomicOrdering::Acquire); break; - case 3: // memory_order_release - Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, - Ptr, NewVal, - llvm::Release); + case 3: // memory_order_release + Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, + llvm::AtomicOrdering::Release); break; - case 4: // memory_order_acq_rel - Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, - Ptr, NewVal, - llvm::AcquireRelease); + case 4: // memory_order_acq_rel + + Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, + llvm::AtomicOrdering::AcquireRelease); break; - case 5: // memory_order_seq_cst - Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, - Ptr, NewVal, - llvm::SequentiallyConsistent); + case 5: // memory_order_seq_cst + Result = Builder.CreateAtomicRMW( + llvm::AtomicRMWInst::Xchg, Ptr, NewVal, + llvm::AtomicOrdering::SequentiallyConsistent); break; } Result->setVolatile(Volatile); @@ -1360,9 +1448,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, createBasicBlock("seqcst", CurFn) }; llvm::AtomicOrdering Orders[5] = { - llvm::Monotonic, llvm::Acquire, llvm::Release, - llvm::AcquireRelease, llvm::SequentiallyConsistent - }; + llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire, + llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease, + llvm::AtomicOrdering::SequentiallyConsistent}; Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]); @@ -1406,13 +1494,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, switch (ord) { case 0: // memory_order_relaxed default: // invalid order - Store->setOrdering(llvm::Monotonic); + Store->setOrdering(llvm::AtomicOrdering::Monotonic); break; case 3: // memory_order_release - Store->setOrdering(llvm::Release); + Store->setOrdering(llvm::AtomicOrdering::Release); break; case 5: // memory_order_seq_cst - Store->setOrdering(llvm::SequentiallyConsistent); + Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent); break; } return RValue::get(nullptr); @@ -1426,8 +1514,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, createBasicBlock("seqcst", CurFn) }; llvm::AtomicOrdering Orders[3] = { - llvm::Monotonic, llvm::Release, llvm::SequentiallyConsistent - }; + llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release, + llvm::AtomicOrdering::SequentiallyConsistent}; Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]); @@ -1466,16 +1554,17 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, break; case 1: // memory_order_consume case 2: // memory_order_acquire - Builder.CreateFence(llvm::Acquire, Scope); + Builder.CreateFence(llvm::AtomicOrdering::Acquire, Scope); break; case 3: // memory_order_release - Builder.CreateFence(llvm::Release, Scope); + Builder.CreateFence(llvm::AtomicOrdering::Release, Scope); break; case 4: // memory_order_acq_rel - Builder.CreateFence(llvm::AcquireRelease, Scope); + Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, Scope); break; case 5: // memory_order_seq_cst - Builder.CreateFence(llvm::SequentiallyConsistent, Scope); + Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, + Scope); break; } return RValue::get(nullptr); @@ -1492,23 +1581,23 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB); Builder.SetInsertPoint(AcquireBB); - Builder.CreateFence(llvm::Acquire, Scope); + Builder.CreateFence(llvm::AtomicOrdering::Acquire, Scope); Builder.CreateBr(ContBB); SI->addCase(Builder.getInt32(1), AcquireBB); SI->addCase(Builder.getInt32(2), AcquireBB); Builder.SetInsertPoint(ReleaseBB); - Builder.CreateFence(llvm::Release, Scope); + Builder.CreateFence(llvm::AtomicOrdering::Release, Scope); Builder.CreateBr(ContBB); SI->addCase(Builder.getInt32(3), ReleaseBB); Builder.SetInsertPoint(AcqRelBB); - Builder.CreateFence(llvm::AcquireRelease, Scope); + Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, Scope); Builder.CreateBr(ContBB); SI->addCase(Builder.getInt32(4), AcqRelBB); Builder.SetInsertPoint(SeqCstBB); - Builder.CreateFence(llvm::SequentiallyConsistent, Scope); + Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, Scope); Builder.CreateBr(ContBB); SI->addCase(Builder.getInt32(5), SeqCstBB); @@ -1794,7 +1883,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, break; } - + llvm::Value *Carry; llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry); Builder.CreateStore(Sum, SumOutPtr); @@ -1839,9 +1928,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, llvm::Value *Comparand = Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType); - auto Result = Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange, - SequentiallyConsistent, - SequentiallyConsistent); + auto Result = + Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange, + AtomicOrdering::SequentiallyConsistent, + AtomicOrdering::SequentiallyConsistent); Result->setVolatile(true); return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result, @@ -1853,44 +1943,47 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(1)), - SequentiallyConsistent, - SequentiallyConsistent); + AtomicOrdering::SequentiallyConsistent, + AtomicOrdering::SequentiallyConsistent); CXI->setVolatile(true); return RValue::get(Builder.CreateExtractValue(CXI, 0)); } case Builtin::BI_InterlockedIncrement: { + llvm::Type *IntTy = ConvertType(E->getType()); AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( AtomicRMWInst::Add, EmitScalarExpr(E->getArg(0)), - ConstantInt::get(Int32Ty, 1), - llvm::SequentiallyConsistent); + ConstantInt::get(IntTy, 1), + llvm::AtomicOrdering::SequentiallyConsistent); RMWI->setVolatile(true); - return RValue::get(Builder.CreateAdd(RMWI, ConstantInt::get(Int32Ty, 1))); + return RValue::get(Builder.CreateAdd(RMWI, ConstantInt::get(IntTy, 1))); } case Builtin::BI_InterlockedDecrement: { + llvm::Type *IntTy = ConvertType(E->getType()); AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( AtomicRMWInst::Sub, EmitScalarExpr(E->getArg(0)), - ConstantInt::get(Int32Ty, 1), - llvm::SequentiallyConsistent); + ConstantInt::get(IntTy, 1), + llvm::AtomicOrdering::SequentiallyConsistent); RMWI->setVolatile(true); - return RValue::get(Builder.CreateSub(RMWI, ConstantInt::get(Int32Ty, 1))); + return RValue::get(Builder.CreateSub(RMWI, ConstantInt::get(IntTy, 1))); } case Builtin::BI_InterlockedExchangeAdd: { AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( AtomicRMWInst::Add, EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), - llvm::SequentiallyConsistent); + llvm::AtomicOrdering::SequentiallyConsistent); RMWI->setVolatile(true); return RValue::get(RMWI); } case Builtin::BI__readfsdword: { + llvm::Type *IntTy = ConvertType(E->getType()); Value *IntToPtr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)), - llvm::PointerType::get(CGM.Int32Ty, 257)); + llvm::PointerType::get(IntTy, 257)); LoadInst *Load = - Builder.CreateAlignedLoad(IntToPtr, /*Align=*/4, /*isVolatile=*/true); + Builder.CreateDefaultAlignedLoad(IntToPtr, /*isVolatile=*/true); return RValue::get(Load); } @@ -1963,6 +2056,322 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy)); break; } + + // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions + case Builtin::BIread_pipe: + case Builtin::BIwrite_pipe: { + Value *Arg0 = EmitScalarExpr(E->getArg(0)), + *Arg1 = EmitScalarExpr(E->getArg(1)); + + // Type of the generic packet parameter. + unsigned GenericAS = + getContext().getTargetAddressSpace(LangAS::opencl_generic); + llvm::Type *I8PTy = llvm::PointerType::get( + llvm::Type::getInt8Ty(getLLVMContext()), GenericAS); + + // Testing which overloaded version we should generate the call for. + if (2U == E->getNumArgs()) { + const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2" + : "__write_pipe_2"; + // Creating a generic function type to be able to call with any builtin or + // user defined type. + llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy}; + llvm::FunctionType *FTy = llvm::FunctionType::get( + Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); + Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy); + return RValue::get(Builder.CreateCall( + CGM.CreateRuntimeFunction(FTy, Name), {Arg0, BCast})); + } else { + assert(4 == E->getNumArgs() && + "Illegal number of parameters to pipe function"); + const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4" + : "__write_pipe_4"; + + llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy}; + Value *Arg2 = EmitScalarExpr(E->getArg(2)), + *Arg3 = EmitScalarExpr(E->getArg(3)); + llvm::FunctionType *FTy = llvm::FunctionType::get( + Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); + Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy); + // We know the third argument is an integer type, but we may need to cast + // it to i32. + if (Arg2->getType() != Int32Ty) + Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty); + return RValue::get(Builder.CreateCall( + CGM.CreateRuntimeFunction(FTy, Name), {Arg0, Arg1, Arg2, BCast})); + } + } + // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write + // functions + case Builtin::BIreserve_read_pipe: + case Builtin::BIreserve_write_pipe: + case Builtin::BIwork_group_reserve_read_pipe: + case Builtin::BIwork_group_reserve_write_pipe: + case Builtin::BIsub_group_reserve_read_pipe: + case Builtin::BIsub_group_reserve_write_pipe: { + // Composing the mangled name for the function. + const char *Name; + if (BuiltinID == Builtin::BIreserve_read_pipe) + Name = "__reserve_read_pipe"; + else if (BuiltinID == Builtin::BIreserve_write_pipe) + Name = "__reserve_write_pipe"; + else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe) + Name = "__work_group_reserve_read_pipe"; + else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe) + Name = "__work_group_reserve_write_pipe"; + else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe) + Name = "__sub_group_reserve_read_pipe"; + else + Name = "__sub_group_reserve_write_pipe"; + + Value *Arg0 = EmitScalarExpr(E->getArg(0)), + *Arg1 = EmitScalarExpr(E->getArg(1)); + llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy); + + // Building the generic function prototype. + llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty}; + llvm::FunctionType *FTy = llvm::FunctionType::get( + ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false); + // We know the second argument is an integer type, but we may need to cast + // it to i32. + if (Arg1->getType() != Int32Ty) + Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty); + return RValue::get( + Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), {Arg0, Arg1})); + } + // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write + // functions + case Builtin::BIcommit_read_pipe: + case Builtin::BIcommit_write_pipe: + case Builtin::BIwork_group_commit_read_pipe: + case Builtin::BIwork_group_commit_write_pipe: + case Builtin::BIsub_group_commit_read_pipe: + case Builtin::BIsub_group_commit_write_pipe: { + const char *Name; + if (BuiltinID == Builtin::BIcommit_read_pipe) + Name = "__commit_read_pipe"; + else if (BuiltinID == Builtin::BIcommit_write_pipe) + Name = "__commit_write_pipe"; + else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe) + Name = "__work_group_commit_read_pipe"; + else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe) + Name = "__work_group_commit_write_pipe"; + else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe) + Name = "__sub_group_commit_read_pipe"; + else + Name = "__sub_group_commit_write_pipe"; + + Value *Arg0 = EmitScalarExpr(E->getArg(0)), + *Arg1 = EmitScalarExpr(E->getArg(1)); + + // Building the generic function prototype. + llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType()}; + llvm::FunctionType *FTy = + llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()), + llvm::ArrayRef<llvm::Type *>(ArgTys), false); + + return RValue::get( + Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), {Arg0, Arg1})); + } + // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions + case Builtin::BIget_pipe_num_packets: + case Builtin::BIget_pipe_max_packets: { + const char *Name; + if (BuiltinID == Builtin::BIget_pipe_num_packets) + Name = "__get_pipe_num_packets"; + else + Name = "__get_pipe_max_packets"; + + // Building the generic function prototype. + Value *Arg0 = EmitScalarExpr(E->getArg(0)); + llvm::Type *ArgTys[] = {Arg0->getType()}; + llvm::FunctionType *FTy = llvm::FunctionType::get( + Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); + + return RValue::get( + Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), {Arg0})); + } + + // OpenCL v2.0 s6.13.9 - Address space qualifier functions. + case Builtin::BIto_global: + case Builtin::BIto_local: + case Builtin::BIto_private: { + auto Arg0 = EmitScalarExpr(E->getArg(0)); + auto NewArgT = llvm::PointerType::get(Int8Ty, + CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic)); + auto NewRetT = llvm::PointerType::get(Int8Ty, + CGM.getContext().getTargetAddressSpace( + E->getType()->getPointeeType().getAddressSpace())); + auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false); + llvm::Value *NewArg; + if (Arg0->getType()->getPointerAddressSpace() != + NewArgT->getPointerAddressSpace()) + NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT); + else + NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT); + auto NewCall = Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, + E->getDirectCallee()->getName()), {NewArg}); + return RValue::get(Builder.CreateBitOrPointerCast(NewCall, + ConvertType(E->getType()))); + } + + // OpenCL v2.0, s6.13.17 - Enqueue kernel function. + // It contains four different overload formats specified in Table 6.13.17.1. + case Builtin::BIenqueue_kernel: { + StringRef Name; // Generated function call name + unsigned NumArgs = E->getNumArgs(); + + llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy); + llvm::Type *RangeTy = ConvertType(getContext().OCLNDRangeTy); + + llvm::Value *Queue = EmitScalarExpr(E->getArg(0)); + llvm::Value *Flags = EmitScalarExpr(E->getArg(1)); + llvm::Value *Range = EmitScalarExpr(E->getArg(2)); + + if (NumArgs == 4) { + // The most basic form of the call with parameters: + // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void) + Name = "__enqueue_kernel_basic"; + llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, Int8PtrTy}; + llvm::FunctionType *FTy = llvm::FunctionType::get( + Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys, 4), false); + + llvm::Value *Block = + Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int8PtrTy); + + return RValue::get(Builder.CreateCall( + CGM.CreateRuntimeFunction(FTy, Name), {Queue, Flags, Range, Block})); + } + assert(NumArgs >= 5 && "Invalid enqueue_kernel signature"); + + // Could have events and/or vaargs. + if (E->getArg(3)->getType()->isBlockPointerType()) { + // No events passed, but has variadic arguments. + Name = "__enqueue_kernel_vaargs"; + llvm::Value *Block = + Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int8PtrTy); + // Create a vector of the arguments, as well as a constant value to + // express to the runtime the number of variadic arguments. + std::vector<llvm::Value *> Args = {Queue, Flags, Range, Block, + ConstantInt::get(IntTy, NumArgs - 4)}; + std::vector<llvm::Type *> ArgTys = {QueueTy, IntTy, RangeTy, Int8PtrTy, + IntTy}; + + // Add the variadics. + for (unsigned I = 4; I < NumArgs; ++I) { + llvm::Value *ArgSize = EmitScalarExpr(E->getArg(I)); + unsigned TypeSizeInBytes = + getContext() + .getTypeSizeInChars(E->getArg(I)->getType()) + .getQuantity(); + Args.push_back(TypeSizeInBytes < 4 + ? Builder.CreateZExt(ArgSize, Int32Ty) + : ArgSize); + } + + llvm::FunctionType *FTy = llvm::FunctionType::get( + Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true); + return RValue::get( + Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), + llvm::ArrayRef<llvm::Value *>(Args))); + } + // Any calls now have event arguments passed. + if (NumArgs >= 7) { + llvm::Type *EventTy = ConvertType(getContext().OCLClkEventTy); + unsigned AS4 = + E->getArg(4)->getType()->isArrayType() + ? E->getArg(4)->getType().getAddressSpace() + : E->getArg(4)->getType()->getPointeeType().getAddressSpace(); + llvm::Type *EventPtrAS4Ty = + EventTy->getPointerTo(CGM.getContext().getTargetAddressSpace(AS4)); + unsigned AS5 = + E->getArg(5)->getType()->getPointeeType().getAddressSpace(); + llvm::Type *EventPtrAS5Ty = + EventTy->getPointerTo(CGM.getContext().getTargetAddressSpace(AS5)); + + llvm::Value *NumEvents = EmitScalarExpr(E->getArg(3)); + llvm::Value *EventList = + E->getArg(4)->getType()->isArrayType() + ? EmitArrayToPointerDecay(E->getArg(4)).getPointer() + : EmitScalarExpr(E->getArg(4)); + llvm::Value *ClkEvent = EmitScalarExpr(E->getArg(5)); + llvm::Value *Block = + Builder.CreateBitCast(EmitScalarExpr(E->getArg(6)), Int8PtrTy); + + std::vector<llvm::Type *> ArgTys = { + QueueTy, Int32Ty, RangeTy, Int32Ty, + EventPtrAS4Ty, EventPtrAS5Ty, Int8PtrTy}; + std::vector<llvm::Value *> Args = {Queue, Flags, Range, NumEvents, + EventList, ClkEvent, Block}; + + if (NumArgs == 7) { + // Has events but no variadics. + Name = "__enqueue_kernel_basic_events"; + llvm::FunctionType *FTy = llvm::FunctionType::get( + Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); + return RValue::get( + Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), + llvm::ArrayRef<llvm::Value *>(Args))); + } + // Has event info and variadics + // Pass the number of variadics to the runtime function too. + Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7)); + ArgTys.push_back(Int32Ty); + Name = "__enqueue_kernel_events_vaargs"; + + // Add the variadics. + for (unsigned I = 7; I < NumArgs; ++I) { + llvm::Value *ArgSize = EmitScalarExpr(E->getArg(I)); + unsigned TypeSizeInBytes = + getContext() + .getTypeSizeInChars(E->getArg(I)->getType()) + .getQuantity(); + Args.push_back(TypeSizeInBytes < 4 + ? Builder.CreateZExt(ArgSize, Int32Ty) + : ArgSize); + } + llvm::FunctionType *FTy = llvm::FunctionType::get( + Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true); + return RValue::get( + Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), + llvm::ArrayRef<llvm::Value *>(Args))); + } + } + // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block + // parameter. + case Builtin::BIget_kernel_work_group_size: { + Value *Arg = EmitScalarExpr(E->getArg(0)); + Arg = Builder.CreateBitCast(Arg, Int8PtrTy); + return RValue::get( + Builder.CreateCall(CGM.CreateRuntimeFunction( + llvm::FunctionType::get(IntTy, Int8PtrTy, false), + "__get_kernel_work_group_size_impl"), + Arg)); + } + case Builtin::BIget_kernel_preferred_work_group_size_multiple: { + Value *Arg = EmitScalarExpr(E->getArg(0)); + Arg = Builder.CreateBitCast(Arg, Int8PtrTy); + return RValue::get(Builder.CreateCall( + CGM.CreateRuntimeFunction( + llvm::FunctionType::get(IntTy, Int8PtrTy, false), + "__get_kernel_preferred_work_group_multiple_impl"), + Arg)); + } + case Builtin::BIprintf: + if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) + return EmitCUDADevicePrintfCallExpr(E, ReturnValue); + break; + case Builtin::BI__builtin_canonicalize: + case Builtin::BI__builtin_canonicalizef: + case Builtin::BI__builtin_canonicalizel: + return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize)); + + case Builtin::BI__builtin_thread_pointer: { + if (!getContext().getTargetInfo().isTLSSupported()) + CGM.ErrorUnsupported(E, "__builtin_thread_pointer"); + // Fall through - it's already mapped to the intrinsic by GCCBuiltin. + break; + } } // If this is an alias for a lib function (e.g. __builtin_sin), emit @@ -2155,7 +2564,7 @@ static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF, } Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) { - unsigned nElts = cast<llvm::VectorType>(V->getType())->getNumElements(); + unsigned nElts = V->getType()->getVectorNumElements(); Value* SV = llvm::ConstantVector::getSplat(nElts, C); return Builder.CreateShuffleVector(V, V, SV, "lane"); } @@ -3073,14 +3482,13 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( case NEON::BI__builtin_neon_vext_v: case NEON::BI__builtin_neon_vextq_v: { int CV = cast<ConstantInt>(Ops[2])->getSExtValue(); - SmallVector<Constant*, 16> Indices; + SmallVector<uint32_t, 16> Indices; for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) - Indices.push_back(ConstantInt::get(Int32Ty, i+CV)); + Indices.push_back(i+CV); Ops[0] = Builder.CreateBitCast(Ops[0], Ty); Ops[1] = Builder.CreateBitCast(Ops[1], Ty); - Value *SV = llvm::ConstantVector::get(Indices); - return Builder.CreateShuffleVector(Ops[0], Ops[1], SV, "vext"); + return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext"); } case NEON::BI__builtin_neon_vfma_v: case NEON::BI__builtin_neon_vfmaq_v: { @@ -3278,14 +3686,13 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( Value *SV = nullptr; for (unsigned vi = 0; vi != 2; ++vi) { - SmallVector<Constant*, 16> Indices; + SmallVector<uint32_t, 16> Indices; for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { - Indices.push_back(Builder.getInt32(i+vi)); - Indices.push_back(Builder.getInt32(i+e+vi)); + Indices.push_back(i+vi); + Indices.push_back(i+e+vi); } Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); - SV = llvm::ConstantVector::get(Indices); - SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn"); + SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn"); SV = Builder.CreateDefaultAlignedStore(SV, Addr); } return SV; @@ -3307,13 +3714,12 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( Value *SV = nullptr; for (unsigned vi = 0; vi != 2; ++vi) { - SmallVector<Constant*, 16> Indices; + SmallVector<uint32_t, 16> Indices; for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) - Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi)); + Indices.push_back(2*i+vi); Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); - SV = llvm::ConstantVector::get(Indices); - SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp"); + SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp"); SV = Builder.CreateDefaultAlignedStore(SV, Addr); } return SV; @@ -3326,14 +3732,13 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( Value *SV = nullptr; for (unsigned vi = 0; vi != 2; ++vi) { - SmallVector<Constant*, 16> Indices; + SmallVector<uint32_t, 16> Indices; for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { - Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1)); - Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e)); + Indices.push_back((i + vi*e) >> 1); + Indices.push_back(((i + vi*e) >> 1)+e); } Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); - SV = llvm::ConstantVector::get(Indices); - SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip"); + SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip"); SV = Builder.CreateDefaultAlignedStore(SV, Addr); } return SV; @@ -3381,19 +3786,19 @@ static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops, if (ExtOp) TblOps.push_back(ExtOp); - // Build a vector containing sequential number like (0, 1, 2, ..., 15) - SmallVector<Constant*, 16> Indices; + // Build a vector containing sequential number like (0, 1, 2, ..., 15) + SmallVector<uint32_t, 16> Indices; llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType()); for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) { - Indices.push_back(ConstantInt::get(CGF.Int32Ty, 2*i)); - Indices.push_back(ConstantInt::get(CGF.Int32Ty, 2*i+1)); + Indices.push_back(2*i); + Indices.push_back(2*i+1); } - Value *SV = llvm::ConstantVector::get(Indices); int PairPos = 0, End = Ops.size() - 1; while (PairPos < End) { TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos], - Ops[PairPos+1], SV, Name)); + Ops[PairPos+1], Indices, + Name)); PairPos += 2; } @@ -3402,13 +3807,13 @@ static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops, if (PairPos == End) { Value *ZeroTbl = ConstantAggregateZero::get(TblTy); TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos], - ZeroTbl, SV, Name)); + ZeroTbl, Indices, Name)); } Function *TblF; TblOps.push_back(IndexOp); TblF = CGF.CGM.getIntrinsic(IntID, ResTy); - + return CGF.EmitNeonCall(TblF, TblOps, Name); } @@ -3452,7 +3857,9 @@ Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) { static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, - llvm::Type *ValueType, bool IsRead) { + llvm::Type *ValueType, + bool IsRead, + StringRef SysReg = "") { // write and register intrinsics only support 32 and 64 bit operations. assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64)) && "Unsupported size for register."); @@ -3461,8 +3868,10 @@ static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, CodeGen::CodeGenModule &CGM = CGF.CGM; LLVMContext &Context = CGM.getLLVMContext(); - const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts(); - StringRef SysReg = cast<StringLiteral>(SysRegStrExpr)->getString(); + if (SysReg.empty()) { + const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts(); + SysReg = cast<StringLiteral>(SysRegStrExpr)->getString(); + } llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) }; llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops); @@ -3602,6 +4011,74 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops); } + if (BuiltinID == ARM::BI__builtin_arm_mcrr || + BuiltinID == ARM::BI__builtin_arm_mcrr2) { + Function *F; + + switch (BuiltinID) { + default: llvm_unreachable("unexpected builtin"); + case ARM::BI__builtin_arm_mcrr: + F = CGM.getIntrinsic(Intrinsic::arm_mcrr); + break; + case ARM::BI__builtin_arm_mcrr2: + F = CGM.getIntrinsic(Intrinsic::arm_mcrr2); + break; + } + + // MCRR{2} instruction has 5 operands but + // the intrinsic has 4 because Rt and Rt2 + // are represented as a single unsigned 64 + // bit integer in the intrinsic definition + // but internally it's represented as 2 32 + // bit integers. + + Value *Coproc = EmitScalarExpr(E->getArg(0)); + Value *Opc1 = EmitScalarExpr(E->getArg(1)); + Value *RtAndRt2 = EmitScalarExpr(E->getArg(2)); + Value *CRm = EmitScalarExpr(E->getArg(3)); + + Value *C1 = llvm::ConstantInt::get(Int64Ty, 32); + Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty); + Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1); + Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty); + + return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm}); + } + + if (BuiltinID == ARM::BI__builtin_arm_mrrc || + BuiltinID == ARM::BI__builtin_arm_mrrc2) { + Function *F; + + switch (BuiltinID) { + default: llvm_unreachable("unexpected builtin"); + case ARM::BI__builtin_arm_mrrc: + F = CGM.getIntrinsic(Intrinsic::arm_mrrc); + break; + case ARM::BI__builtin_arm_mrrc2: + F = CGM.getIntrinsic(Intrinsic::arm_mrrc2); + break; + } + + Value *Coproc = EmitScalarExpr(E->getArg(0)); + Value *Opc1 = EmitScalarExpr(E->getArg(1)); + Value *CRm = EmitScalarExpr(E->getArg(2)); + Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm}); + + // Returns an unsigned 64 bit integer, represented + // as two 32 bit integers. + + Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1); + Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0); + Rt = Builder.CreateZExt(Rt, Int64Ty); + Rt1 = Builder.CreateZExt(Rt1, Int64Ty); + + Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32); + RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true); + RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1); + + return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType())); + } + if (BuiltinID == ARM::BI__builtin_arm_ldrexd || ((BuiltinID == ARM::BI__builtin_arm_ldrex || BuiltinID == ARM::BI__builtin_arm_ldaex) && @@ -3914,7 +4391,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, // the first argument, but the LLVM intrinsic expects it as the third one. case ARM::BI_MoveToCoprocessor: case ARM::BI_MoveToCoprocessor2: { - Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ? + Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ? Intrinsic::arm_mcr : Intrinsic::arm_mcr2); return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0], Ops[3], Ops[4], Ops[5]}); @@ -4478,11 +4955,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F); } - if (BuiltinID == AArch64::BI__builtin_thread_pointer) { - Function *F = CGM.getIntrinsic(Intrinsic::aarch64_thread_pointer); - return Builder.CreateCall(F); - } - // CRC32 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic; switch (BuiltinID) { @@ -5150,22 +5622,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]}); } - case NEON::BI__builtin_neon_vfms_v: - case NEON::BI__builtin_neon_vfmsq_v: { // Only used for FP types - // FIXME: probably remove when we no longer support aarch64_simd.h - // (arm_neon.h delegates to vfma). - - // The ARM builtins (and instructions) have the addend as the first - // operand, but the 'fma' intrinsics have it last. Swap it around here. - Value *Subtrahend = Ops[0]; - Value *Multiplicand = Ops[2]; - Ops[0] = Multiplicand; - Ops[2] = Subtrahend; - Ops[1] = Builder.CreateBitCast(Ops[1], VTy); - Ops[1] = Builder.CreateFNeg(Ops[1]); - Int = Intrinsic::fma; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmls"); - } case NEON::BI__builtin_neon_vmull_v: // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull; @@ -5988,14 +6444,13 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Value *SV = nullptr; for (unsigned vi = 0; vi != 2; ++vi) { - SmallVector<Constant*, 16> Indices; + SmallVector<uint32_t, 16> Indices; for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { - Indices.push_back(ConstantInt::get(Int32Ty, i+vi)); - Indices.push_back(ConstantInt::get(Int32Ty, i+e+vi)); + Indices.push_back(i+vi); + Indices.push_back(i+e+vi); } Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); - SV = llvm::ConstantVector::get(Indices); - SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn"); + SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn"); SV = Builder.CreateDefaultAlignedStore(SV, Addr); } return SV; @@ -6008,13 +6463,12 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Value *SV = nullptr; for (unsigned vi = 0; vi != 2; ++vi) { - SmallVector<Constant*, 16> Indices; + SmallVector<uint32_t, 16> Indices; for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) - Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi)); + Indices.push_back(2*i+vi); Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); - SV = llvm::ConstantVector::get(Indices); - SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp"); + SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp"); SV = Builder.CreateDefaultAlignedStore(SV, Addr); } return SV; @@ -6027,14 +6481,13 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Value *SV = nullptr; for (unsigned vi = 0; vi != 2; ++vi) { - SmallVector<Constant*, 16> Indices; + SmallVector<uint32_t, 16> Indices; for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { - Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1)); - Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e)); + Indices.push_back((i + vi*e) >> 1); + Indices.push_back(((i + vi*e) >> 1)+e); } Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); - SV = llvm::ConstantVector::get(Indices); - SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip"); + SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip"); SV = Builder.CreateDefaultAlignedStore(SV, Addr); } return SV; @@ -6110,6 +6563,118 @@ BuildVector(ArrayRef<llvm::Value*> Ops) { return Result; } +// Convert the mask from an integer type to a vector of i1. +static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask, + unsigned NumElts) { + + llvm::VectorType *MaskTy = llvm::VectorType::get(CGF.Builder.getInt1Ty(), + cast<IntegerType>(Mask->getType())->getBitWidth()); + Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy); + + // If we have less than 8 elements, then the starting mask was an i8 and + // we need to extract down to the right number of elements. + if (NumElts < 8) { + uint32_t Indices[4]; + for (unsigned i = 0; i != NumElts; ++i) + Indices[i] = i; + MaskVec = CGF.Builder.CreateShuffleVector(MaskVec, MaskVec, + makeArrayRef(Indices, NumElts), + "extract"); + } + return MaskVec; +} + +static Value *EmitX86MaskedStore(CodeGenFunction &CGF, + SmallVectorImpl<Value *> &Ops, + unsigned Align) { + // Cast the pointer to right type. + Ops[0] = CGF.Builder.CreateBitCast(Ops[0], + llvm::PointerType::getUnqual(Ops[1]->getType())); + + // If the mask is all ones just emit a regular store. + if (const auto *C = dyn_cast<Constant>(Ops[2])) + if (C->isAllOnesValue()) + return CGF.Builder.CreateAlignedStore(Ops[1], Ops[0], Align); + + Value *MaskVec = getMaskVecValue(CGF, Ops[2], + Ops[1]->getType()->getVectorNumElements()); + + return CGF.Builder.CreateMaskedStore(Ops[1], Ops[0], Align, MaskVec); +} + +static Value *EmitX86MaskedLoad(CodeGenFunction &CGF, + SmallVectorImpl<Value *> &Ops, unsigned Align) { + // Cast the pointer to right type. + Ops[0] = CGF.Builder.CreateBitCast(Ops[0], + llvm::PointerType::getUnqual(Ops[1]->getType())); + + // If the mask is all ones just emit a regular store. + if (const auto *C = dyn_cast<Constant>(Ops[2])) + if (C->isAllOnesValue()) + return CGF.Builder.CreateAlignedLoad(Ops[0], Align); + + Value *MaskVec = getMaskVecValue(CGF, Ops[2], + Ops[1]->getType()->getVectorNumElements()); + + return CGF.Builder.CreateMaskedLoad(Ops[0], Align, MaskVec, Ops[1]); +} + +static Value *EmitX86Select(CodeGenFunction &CGF, + Value *Mask, Value *Op0, Value *Op1) { + + // If the mask is all ones just return first argument. + if (const auto *C = dyn_cast<Constant>(Mask)) + if (C->isAllOnesValue()) + return Op0; + + Mask = getMaskVecValue(CGF, Mask, Op0->getType()->getVectorNumElements()); + + return CGF.Builder.CreateSelect(Mask, Op0, Op1); +} + +static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC, + bool Signed, SmallVectorImpl<Value *> &Ops) { + unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); + Value *Cmp; + + if (CC == 3) { + Cmp = Constant::getNullValue( + llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts)); + } else if (CC == 7) { + Cmp = Constant::getAllOnesValue( + llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts)); + } else { + ICmpInst::Predicate Pred; + switch (CC) { + default: llvm_unreachable("Unknown condition code"); + case 0: Pred = ICmpInst::ICMP_EQ; break; + case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break; + case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break; + case 4: Pred = ICmpInst::ICMP_NE; break; + case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break; + case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break; + } + Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]); + } + + const auto *C = dyn_cast<Constant>(Ops.back()); + if (!C || !C->isAllOnesValue()) + Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, Ops.back(), NumElts)); + + if (NumElts < 8) { + uint32_t Indices[8]; + for (unsigned i = 0; i != NumElts; ++i) + Indices[i] = i; + for (unsigned i = NumElts; i != 8; ++i) + Indices[i] = i % NumElts + NumElts; + Cmp = CGF.Builder.CreateShuffleVector( + Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices); + } + return CGF.Builder.CreateBitCast(Cmp, + IntegerType::get(CGF.getLLVMContext(), + std::max(NumElts, 8U))); +} + Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E) { if (BuiltinID == X86::BI__builtin_ms_va_start || @@ -6160,6 +6725,31 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result)); } + // These exist so that the builtin that takes an immediate can be bounds + // checked by clang to avoid passing bad immediates to the backend. Since + // AVX has a larger immediate than SSE we would need separate builtins to + // do the different bounds checking. Rather than create a clang specific + // SSE only builtin, this implements eight separate builtins to match gcc + // implementation. + auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) { + Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm)); + llvm::Function *F = CGM.getIntrinsic(ID); + return Builder.CreateCall(F, Ops); + }; + + // For the vector forms of FP comparisons, translate the builtins directly to + // IR. + // TODO: The builtins could be removed if the SSE header files used vector + // extension comparisons directly (vector ordered/unordered may need + // additional support via __builtin_isnan()). + auto getVectorFCmpIR = [this, &Ops](CmpInst::Predicate Pred) { + Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]); + llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType()); + llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy); + Value *Sext = Builder.CreateSExt(Cmp, IntVecTy); + return Builder.CreateBitCast(Sext, FPVecTy); + }; + switch (BuiltinID) { default: return nullptr; case X86::BI__builtin_cpu_supports: { @@ -6188,6 +6778,16 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, AVX512F, BMI, BMI2, + AES, + PCLMUL, + AVX512VL, + AVX512BW, + AVX512DQ, + AVX512CD, + AVX512ER, + AVX512PF, + AVX512VBMI, + AVX512IFMA, MAX }; @@ -6198,6 +6798,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, .Case("sse", X86Features::SSE) .Case("sse2", X86Features::SSE2) .Case("sse3", X86Features::SSE3) + .Case("ssse3", X86Features::SSSE3) .Case("sse4.1", X86Features::SSE4_1) .Case("sse4.2", X86Features::SSE4_2) .Case("avx", X86Features::AVX) @@ -6209,6 +6810,16 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, .Case("avx512f", X86Features::AVX512F) .Case("bmi", X86Features::BMI) .Case("bmi2", X86Features::BMI2) + .Case("aes", X86Features::AES) + .Case("pclmul", X86Features::PCLMUL) + .Case("avx512vl", X86Features::AVX512VL) + .Case("avx512bw", X86Features::AVX512BW) + .Case("avx512dq", X86Features::AVX512DQ) + .Case("avx512cd", X86Features::AVX512CD) + .Case("avx512er", X86Features::AVX512ER) + .Case("avx512pf", X86Features::AVX512PF) + .Case("avx512vbmi", X86Features::AVX512VBMI) + .Case("avx512ifma", X86Features::AVX512IFMA) .Default(X86Features::MAX); assert(Feature != X86Features::MAX && "Invalid feature!"); @@ -6237,7 +6848,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, // Check the value of the bit corresponding to the feature requested. Value *Bitset = Builder.CreateAnd( - Features, llvm::ConstantInt::get(Int32Ty, 1 << Feature)); + Features, llvm::ConstantInt::get(Int32Ty, 1ULL << Feature)); return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0)); } case X86::BI_mm_prefetch: { @@ -6312,6 +6923,78 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Ops.push_back(Mlo); return Builder.CreateCall(CGM.getIntrinsic(ID), Ops); } + case X86::BI__builtin_ia32_storedqudi128_mask: + case X86::BI__builtin_ia32_storedqusi128_mask: + case X86::BI__builtin_ia32_storedquhi128_mask: + case X86::BI__builtin_ia32_storedquqi128_mask: + case X86::BI__builtin_ia32_storeupd128_mask: + case X86::BI__builtin_ia32_storeups128_mask: + case X86::BI__builtin_ia32_storedqudi256_mask: + case X86::BI__builtin_ia32_storedqusi256_mask: + case X86::BI__builtin_ia32_storedquhi256_mask: + case X86::BI__builtin_ia32_storedquqi256_mask: + case X86::BI__builtin_ia32_storeupd256_mask: + case X86::BI__builtin_ia32_storeups256_mask: + case X86::BI__builtin_ia32_storedqudi512_mask: + case X86::BI__builtin_ia32_storedqusi512_mask: + case X86::BI__builtin_ia32_storedquhi512_mask: + case X86::BI__builtin_ia32_storedquqi512_mask: + case X86::BI__builtin_ia32_storeupd512_mask: + case X86::BI__builtin_ia32_storeups512_mask: + return EmitX86MaskedStore(*this, Ops, 1); + + case X86::BI__builtin_ia32_movdqa32store128_mask: + case X86::BI__builtin_ia32_movdqa64store128_mask: + case X86::BI__builtin_ia32_storeaps128_mask: + case X86::BI__builtin_ia32_storeapd128_mask: + case X86::BI__builtin_ia32_movdqa32store256_mask: + case X86::BI__builtin_ia32_movdqa64store256_mask: + case X86::BI__builtin_ia32_storeaps256_mask: + case X86::BI__builtin_ia32_storeapd256_mask: + case X86::BI__builtin_ia32_movdqa32store512_mask: + case X86::BI__builtin_ia32_movdqa64store512_mask: + case X86::BI__builtin_ia32_storeaps512_mask: + case X86::BI__builtin_ia32_storeapd512_mask: { + unsigned Align = + getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity(); + return EmitX86MaskedStore(*this, Ops, Align); + } + case X86::BI__builtin_ia32_loadups128_mask: + case X86::BI__builtin_ia32_loadups256_mask: + case X86::BI__builtin_ia32_loadups512_mask: + case X86::BI__builtin_ia32_loadupd128_mask: + case X86::BI__builtin_ia32_loadupd256_mask: + case X86::BI__builtin_ia32_loadupd512_mask: + case X86::BI__builtin_ia32_loaddquqi128_mask: + case X86::BI__builtin_ia32_loaddquqi256_mask: + case X86::BI__builtin_ia32_loaddquqi512_mask: + case X86::BI__builtin_ia32_loaddquhi128_mask: + case X86::BI__builtin_ia32_loaddquhi256_mask: + case X86::BI__builtin_ia32_loaddquhi512_mask: + case X86::BI__builtin_ia32_loaddqusi128_mask: + case X86::BI__builtin_ia32_loaddqusi256_mask: + case X86::BI__builtin_ia32_loaddqusi512_mask: + case X86::BI__builtin_ia32_loaddqudi128_mask: + case X86::BI__builtin_ia32_loaddqudi256_mask: + case X86::BI__builtin_ia32_loaddqudi512_mask: + return EmitX86MaskedLoad(*this, Ops, 1); + + case X86::BI__builtin_ia32_loadaps128_mask: + case X86::BI__builtin_ia32_loadaps256_mask: + case X86::BI__builtin_ia32_loadaps512_mask: + case X86::BI__builtin_ia32_loadapd128_mask: + case X86::BI__builtin_ia32_loadapd256_mask: + case X86::BI__builtin_ia32_loadapd512_mask: + case X86::BI__builtin_ia32_movdqa32load128_mask: + case X86::BI__builtin_ia32_movdqa32load256_mask: + case X86::BI__builtin_ia32_movdqa32load512_mask: + case X86::BI__builtin_ia32_movdqa64load128_mask: + case X86::BI__builtin_ia32_movdqa64load256_mask: + case X86::BI__builtin_ia32_movdqa64load512_mask: { + unsigned Align = + getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity(); + return EmitX86MaskedLoad(*this, Ops, Align); + } case X86::BI__builtin_ia32_storehps: case X86::BI__builtin_ia32_storelps: { llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty); @@ -6330,103 +7013,50 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); } case X86::BI__builtin_ia32_palignr128: - case X86::BI__builtin_ia32_palignr256: { + case X86::BI__builtin_ia32_palignr256: + case X86::BI__builtin_ia32_palignr128_mask: + case X86::BI__builtin_ia32_palignr256_mask: + case X86::BI__builtin_ia32_palignr512_mask: { unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); - unsigned NumElts = - cast<llvm::VectorType>(Ops[0]->getType())->getNumElements(); + unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); assert(NumElts % 16 == 0); - unsigned NumLanes = NumElts / 16; - unsigned NumLaneElts = NumElts / NumLanes; // If palignr is shifting the pair of vectors more than the size of two // lanes, emit zero. - if (ShiftVal >= (2 * NumLaneElts)) + if (ShiftVal >= 32) return llvm::Constant::getNullValue(ConvertType(E->getType())); // If palignr is shifting the pair of input vectors more than one lane, // but less than two lanes, convert to shifting in zeroes. - if (ShiftVal > NumLaneElts) { - ShiftVal -= NumLaneElts; + if (ShiftVal > 16) { + ShiftVal -= 16; Ops[1] = Ops[0]; Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType()); } - uint32_t Indices[32]; + uint32_t Indices[64]; // 256-bit palignr operates on 128-bit lanes so we need to handle that - for (unsigned l = 0; l != NumElts; l += NumLaneElts) { - for (unsigned i = 0; i != NumLaneElts; ++i) { + for (unsigned l = 0; l != NumElts; l += 16) { + for (unsigned i = 0; i != 16; ++i) { unsigned Idx = ShiftVal + i; - if (Idx >= NumLaneElts) - Idx += NumElts - NumLaneElts; // End of lane, switch operand. + if (Idx >= 16) + Idx += NumElts - 16; // End of lane, switch operand. Indices[l + i] = Idx + l; } } - Value *SV = llvm::ConstantDataVector::get(getLLVMContext(), - makeArrayRef(Indices, NumElts)); - return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr"); - } - case X86::BI__builtin_ia32_pslldqi256: { - // Shift value is in bits so divide by 8. - unsigned shiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() >> 3; + Value *Align = Builder.CreateShuffleVector(Ops[1], Ops[0], + makeArrayRef(Indices, NumElts), + "palignr"); - // If pslldq is shifting the vector more than 15 bytes, emit zero. - if (shiftVal >= 16) - return llvm::Constant::getNullValue(ConvertType(E->getType())); - - uint32_t Indices[32]; - // 256-bit pslldq operates on 128-bit lanes so we need to handle that - for (unsigned l = 0; l != 32; l += 16) { - for (unsigned i = 0; i != 16; ++i) { - unsigned Idx = 32 + i - shiftVal; - if (Idx < 32) Idx -= 16; // end of lane, switch operand. - Indices[l + i] = Idx + l; - } - } - - llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, 32); - Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast"); - Value *Zero = llvm::Constant::getNullValue(VecTy); + // If this isn't a masked builtin, just return the align operation. + if (Ops.size() == 3) + return Align; - Value *SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices); - SV = Builder.CreateShuffleVector(Zero, Ops[0], SV, "pslldq"); - llvm::Type *ResultType = ConvertType(E->getType()); - return Builder.CreateBitCast(SV, ResultType, "cast"); + return EmitX86Select(*this, Ops[4], Align, Ops[3]); } - case X86::BI__builtin_ia32_psrldqi256: { - // Shift value is in bits so divide by 8. - unsigned shiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() >> 3; - - // If psrldq is shifting the vector more than 15 bytes, emit zero. - if (shiftVal >= 16) - return llvm::Constant::getNullValue(ConvertType(E->getType())); - uint32_t Indices[32]; - // 256-bit psrldq operates on 128-bit lanes so we need to handle that - for (unsigned l = 0; l != 32; l += 16) { - for (unsigned i = 0; i != 16; ++i) { - unsigned Idx = i + shiftVal; - if (Idx >= 16) Idx += 16; // end of lane, switch operand. - Indices[l + i] = Idx + l; - } - } - - llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, 32); - Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast"); - Value *Zero = llvm::Constant::getNullValue(VecTy); - - Value *SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices); - SV = Builder.CreateShuffleVector(Ops[0], Zero, SV, "psrldq"); - llvm::Type *ResultType = ConvertType(E->getType()); - return Builder.CreateBitCast(SV, ResultType, "cast"); - } - case X86::BI__builtin_ia32_movntps: - case X86::BI__builtin_ia32_movntps256: - case X86::BI__builtin_ia32_movntpd: - case X86::BI__builtin_ia32_movntpd256: - case X86::BI__builtin_ia32_movntdq: - case X86::BI__builtin_ia32_movntdq256: case X86::BI__builtin_ia32_movnti: case X86::BI__builtin_ia32_movnti64: { llvm::MDNode *Node = llvm::MDNode::get( @@ -6439,17 +7069,156 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, StoreInst *SI = Builder.CreateDefaultAlignedStore(Ops[1], BC); SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node); - // If the operand is an integer, we can't assume alignment. Otherwise, - // assume natural alignment. - QualType ArgTy = E->getArg(1)->getType(); - unsigned Align; - if (ArgTy->isIntegerType()) - Align = 1; - else - Align = getContext().getTypeSizeInChars(ArgTy).getQuantity(); - SI->setAlignment(Align); + // No alignment for scalar intrinsic store. + SI->setAlignment(1); + return SI; + } + case X86::BI__builtin_ia32_movntsd: + case X86::BI__builtin_ia32_movntss: { + llvm::MDNode *Node = llvm::MDNode::get( + getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1))); + + // Extract the 0'th element of the source vector. + Value *Scl = Builder.CreateExtractElement(Ops[1], (uint64_t)0, "extract"); + + // Convert the type of the pointer to a pointer to the stored type. + Value *BC = Builder.CreateBitCast(Ops[0], + llvm::PointerType::getUnqual(Scl->getType()), + "cast"); + + // Unaligned nontemporal store of the scalar value. + StoreInst *SI = Builder.CreateDefaultAlignedStore(Scl, BC); + SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node); + SI->setAlignment(1); return SI; } + + case X86::BI__builtin_ia32_selectb_128: + case X86::BI__builtin_ia32_selectb_256: + case X86::BI__builtin_ia32_selectb_512: + case X86::BI__builtin_ia32_selectw_128: + case X86::BI__builtin_ia32_selectw_256: + case X86::BI__builtin_ia32_selectw_512: + case X86::BI__builtin_ia32_selectd_128: + case X86::BI__builtin_ia32_selectd_256: + case X86::BI__builtin_ia32_selectd_512: + case X86::BI__builtin_ia32_selectq_128: + case X86::BI__builtin_ia32_selectq_256: + case X86::BI__builtin_ia32_selectq_512: + case X86::BI__builtin_ia32_selectps_128: + case X86::BI__builtin_ia32_selectps_256: + case X86::BI__builtin_ia32_selectps_512: + case X86::BI__builtin_ia32_selectpd_128: + case X86::BI__builtin_ia32_selectpd_256: + case X86::BI__builtin_ia32_selectpd_512: + return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]); + case X86::BI__builtin_ia32_pcmpeqb128_mask: + case X86::BI__builtin_ia32_pcmpeqb256_mask: + case X86::BI__builtin_ia32_pcmpeqb512_mask: + case X86::BI__builtin_ia32_pcmpeqw128_mask: + case X86::BI__builtin_ia32_pcmpeqw256_mask: + case X86::BI__builtin_ia32_pcmpeqw512_mask: + case X86::BI__builtin_ia32_pcmpeqd128_mask: + case X86::BI__builtin_ia32_pcmpeqd256_mask: + case X86::BI__builtin_ia32_pcmpeqd512_mask: + case X86::BI__builtin_ia32_pcmpeqq128_mask: + case X86::BI__builtin_ia32_pcmpeqq256_mask: + case X86::BI__builtin_ia32_pcmpeqq512_mask: + return EmitX86MaskedCompare(*this, 0, false, Ops); + case X86::BI__builtin_ia32_pcmpgtb128_mask: + case X86::BI__builtin_ia32_pcmpgtb256_mask: + case X86::BI__builtin_ia32_pcmpgtb512_mask: + case X86::BI__builtin_ia32_pcmpgtw128_mask: + case X86::BI__builtin_ia32_pcmpgtw256_mask: + case X86::BI__builtin_ia32_pcmpgtw512_mask: + case X86::BI__builtin_ia32_pcmpgtd128_mask: + case X86::BI__builtin_ia32_pcmpgtd256_mask: + case X86::BI__builtin_ia32_pcmpgtd512_mask: + case X86::BI__builtin_ia32_pcmpgtq128_mask: + case X86::BI__builtin_ia32_pcmpgtq256_mask: + case X86::BI__builtin_ia32_pcmpgtq512_mask: + return EmitX86MaskedCompare(*this, 6, true, Ops); + case X86::BI__builtin_ia32_cmpb128_mask: + case X86::BI__builtin_ia32_cmpb256_mask: + case X86::BI__builtin_ia32_cmpb512_mask: + case X86::BI__builtin_ia32_cmpw128_mask: + case X86::BI__builtin_ia32_cmpw256_mask: + case X86::BI__builtin_ia32_cmpw512_mask: + case X86::BI__builtin_ia32_cmpd128_mask: + case X86::BI__builtin_ia32_cmpd256_mask: + case X86::BI__builtin_ia32_cmpd512_mask: + case X86::BI__builtin_ia32_cmpq128_mask: + case X86::BI__builtin_ia32_cmpq256_mask: + case X86::BI__builtin_ia32_cmpq512_mask: { + unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7; + return EmitX86MaskedCompare(*this, CC, true, Ops); + } + case X86::BI__builtin_ia32_ucmpb128_mask: + case X86::BI__builtin_ia32_ucmpb256_mask: + case X86::BI__builtin_ia32_ucmpb512_mask: + case X86::BI__builtin_ia32_ucmpw128_mask: + case X86::BI__builtin_ia32_ucmpw256_mask: + case X86::BI__builtin_ia32_ucmpw512_mask: + case X86::BI__builtin_ia32_ucmpd128_mask: + case X86::BI__builtin_ia32_ucmpd256_mask: + case X86::BI__builtin_ia32_ucmpd512_mask: + case X86::BI__builtin_ia32_ucmpq128_mask: + case X86::BI__builtin_ia32_ucmpq256_mask: + case X86::BI__builtin_ia32_ucmpq512_mask: { + unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7; + return EmitX86MaskedCompare(*this, CC, false, Ops); + } + + case X86::BI__builtin_ia32_vplzcntd_128_mask: + case X86::BI__builtin_ia32_vplzcntd_256_mask: + case X86::BI__builtin_ia32_vplzcntd_512_mask: + case X86::BI__builtin_ia32_vplzcntq_128_mask: + case X86::BI__builtin_ia32_vplzcntq_256_mask: + case X86::BI__builtin_ia32_vplzcntq_512_mask: { + Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType()); + return EmitX86Select(*this, Ops[2], + Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)}), + Ops[1]); + } + + // TODO: Handle 64/512-bit vector widths of min/max. + case X86::BI__builtin_ia32_pmaxsb128: + case X86::BI__builtin_ia32_pmaxsw128: + case X86::BI__builtin_ia32_pmaxsd128: + case X86::BI__builtin_ia32_pmaxsb256: + case X86::BI__builtin_ia32_pmaxsw256: + case X86::BI__builtin_ia32_pmaxsd256: { + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SGT, Ops[0], Ops[1]); + return Builder.CreateSelect(Cmp, Ops[0], Ops[1]); + } + case X86::BI__builtin_ia32_pmaxub128: + case X86::BI__builtin_ia32_pmaxuw128: + case X86::BI__builtin_ia32_pmaxud128: + case X86::BI__builtin_ia32_pmaxub256: + case X86::BI__builtin_ia32_pmaxuw256: + case X86::BI__builtin_ia32_pmaxud256: { + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_UGT, Ops[0], Ops[1]); + return Builder.CreateSelect(Cmp, Ops[0], Ops[1]); + } + case X86::BI__builtin_ia32_pminsb128: + case X86::BI__builtin_ia32_pminsw128: + case X86::BI__builtin_ia32_pminsd128: + case X86::BI__builtin_ia32_pminsb256: + case X86::BI__builtin_ia32_pminsw256: + case X86::BI__builtin_ia32_pminsd256: { + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SLT, Ops[0], Ops[1]); + return Builder.CreateSelect(Cmp, Ops[0], Ops[1]); + } + case X86::BI__builtin_ia32_pminub128: + case X86::BI__builtin_ia32_pminuw128: + case X86::BI__builtin_ia32_pminud128: + case X86::BI__builtin_ia32_pminub256: + case X86::BI__builtin_ia32_pminuw256: + case X86::BI__builtin_ia32_pminud256: { + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, Ops[0], Ops[1]); + return Builder.CreateSelect(Cmp, Ops[0], Ops[1]); + } + // 3DNow! case X86::BI__builtin_ia32_pswapdsf: case X86::BI__builtin_ia32_pswapdsi: { @@ -6492,154 +7261,107 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Ops[0]); return Builder.CreateExtractValue(Call, 1); } - // SSE comparison intrisics + + // SSE packed comparison intrinsics case X86::BI__builtin_ia32_cmpeqps: + case X86::BI__builtin_ia32_cmpeqpd: + return getVectorFCmpIR(CmpInst::FCMP_OEQ); case X86::BI__builtin_ia32_cmpltps: + case X86::BI__builtin_ia32_cmpltpd: + return getVectorFCmpIR(CmpInst::FCMP_OLT); case X86::BI__builtin_ia32_cmpleps: + case X86::BI__builtin_ia32_cmplepd: + return getVectorFCmpIR(CmpInst::FCMP_OLE); case X86::BI__builtin_ia32_cmpunordps: + case X86::BI__builtin_ia32_cmpunordpd: + return getVectorFCmpIR(CmpInst::FCMP_UNO); case X86::BI__builtin_ia32_cmpneqps: + case X86::BI__builtin_ia32_cmpneqpd: + return getVectorFCmpIR(CmpInst::FCMP_UNE); case X86::BI__builtin_ia32_cmpnltps: + case X86::BI__builtin_ia32_cmpnltpd: + return getVectorFCmpIR(CmpInst::FCMP_UGE); case X86::BI__builtin_ia32_cmpnleps: + case X86::BI__builtin_ia32_cmpnlepd: + return getVectorFCmpIR(CmpInst::FCMP_UGT); case X86::BI__builtin_ia32_cmpordps: + case X86::BI__builtin_ia32_cmpordpd: + return getVectorFCmpIR(CmpInst::FCMP_ORD); + case X86::BI__builtin_ia32_cmpps: + case X86::BI__builtin_ia32_cmpps256: + case X86::BI__builtin_ia32_cmppd: + case X86::BI__builtin_ia32_cmppd256: { + unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); + // If this one of the SSE immediates, we can use native IR. + if (CC < 8) { + FCmpInst::Predicate Pred; + switch (CC) { + case 0: Pred = FCmpInst::FCMP_OEQ; break; + case 1: Pred = FCmpInst::FCMP_OLT; break; + case 2: Pred = FCmpInst::FCMP_OLE; break; + case 3: Pred = FCmpInst::FCMP_UNO; break; + case 4: Pred = FCmpInst::FCMP_UNE; break; + case 5: Pred = FCmpInst::FCMP_UGE; break; + case 6: Pred = FCmpInst::FCMP_UGT; break; + case 7: Pred = FCmpInst::FCMP_ORD; break; + } + return getVectorFCmpIR(Pred); + } + + // We can't handle 8-31 immediates with native IR, use the intrinsic. + Intrinsic::ID ID; + switch (BuiltinID) { + default: llvm_unreachable("Unsupported intrinsic!"); + case X86::BI__builtin_ia32_cmpps: + ID = Intrinsic::x86_sse_cmp_ps; + break; + case X86::BI__builtin_ia32_cmpps256: + ID = Intrinsic::x86_avx_cmp_ps_256; + break; + case X86::BI__builtin_ia32_cmppd: + ID = Intrinsic::x86_sse2_cmp_pd; + break; + case X86::BI__builtin_ia32_cmppd256: + ID = Intrinsic::x86_avx_cmp_pd_256; + break; + } + + return Builder.CreateCall(CGM.getIntrinsic(ID), Ops); + } + + // SSE scalar comparison intrinsics case X86::BI__builtin_ia32_cmpeqss: + return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0); case X86::BI__builtin_ia32_cmpltss: + return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1); case X86::BI__builtin_ia32_cmpless: + return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2); case X86::BI__builtin_ia32_cmpunordss: + return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3); case X86::BI__builtin_ia32_cmpneqss: + return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4); case X86::BI__builtin_ia32_cmpnltss: + return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5); case X86::BI__builtin_ia32_cmpnless: + return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6); case X86::BI__builtin_ia32_cmpordss: - case X86::BI__builtin_ia32_cmpeqpd: - case X86::BI__builtin_ia32_cmpltpd: - case X86::BI__builtin_ia32_cmplepd: - case X86::BI__builtin_ia32_cmpunordpd: - case X86::BI__builtin_ia32_cmpneqpd: - case X86::BI__builtin_ia32_cmpnltpd: - case X86::BI__builtin_ia32_cmpnlepd: - case X86::BI__builtin_ia32_cmpordpd: + return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7); case X86::BI__builtin_ia32_cmpeqsd: + return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0); case X86::BI__builtin_ia32_cmpltsd: + return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1); case X86::BI__builtin_ia32_cmplesd: + return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2); case X86::BI__builtin_ia32_cmpunordsd: + return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3); case X86::BI__builtin_ia32_cmpneqsd: + return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4); case X86::BI__builtin_ia32_cmpnltsd: + return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5); case X86::BI__builtin_ia32_cmpnlesd: + return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6); case X86::BI__builtin_ia32_cmpordsd: - // These exist so that the builtin that takes an immediate can be bounds - // checked by clang to avoid passing bad immediates to the backend. Since - // AVX has a larger immediate than SSE we would need separate builtins to - // do the different bounds checking. Rather than create a clang specific - // SSE only builtin, this implements eight separate builtins to match gcc - // implementation. - - // Choose the immediate. - unsigned Imm; - switch (BuiltinID) { - default: llvm_unreachable("Unsupported intrinsic!"); - case X86::BI__builtin_ia32_cmpeqps: - case X86::BI__builtin_ia32_cmpeqss: - case X86::BI__builtin_ia32_cmpeqpd: - case X86::BI__builtin_ia32_cmpeqsd: - Imm = 0; - break; - case X86::BI__builtin_ia32_cmpltps: - case X86::BI__builtin_ia32_cmpltss: - case X86::BI__builtin_ia32_cmpltpd: - case X86::BI__builtin_ia32_cmpltsd: - Imm = 1; - break; - case X86::BI__builtin_ia32_cmpleps: - case X86::BI__builtin_ia32_cmpless: - case X86::BI__builtin_ia32_cmplepd: - case X86::BI__builtin_ia32_cmplesd: - Imm = 2; - break; - case X86::BI__builtin_ia32_cmpunordps: - case X86::BI__builtin_ia32_cmpunordss: - case X86::BI__builtin_ia32_cmpunordpd: - case X86::BI__builtin_ia32_cmpunordsd: - Imm = 3; - break; - case X86::BI__builtin_ia32_cmpneqps: - case X86::BI__builtin_ia32_cmpneqss: - case X86::BI__builtin_ia32_cmpneqpd: - case X86::BI__builtin_ia32_cmpneqsd: - Imm = 4; - break; - case X86::BI__builtin_ia32_cmpnltps: - case X86::BI__builtin_ia32_cmpnltss: - case X86::BI__builtin_ia32_cmpnltpd: - case X86::BI__builtin_ia32_cmpnltsd: - Imm = 5; - break; - case X86::BI__builtin_ia32_cmpnleps: - case X86::BI__builtin_ia32_cmpnless: - case X86::BI__builtin_ia32_cmpnlepd: - case X86::BI__builtin_ia32_cmpnlesd: - Imm = 6; - break; - case X86::BI__builtin_ia32_cmpordps: - case X86::BI__builtin_ia32_cmpordss: - case X86::BI__builtin_ia32_cmpordpd: - case X86::BI__builtin_ia32_cmpordsd: - Imm = 7; - break; - } - - // Choose the intrinsic ID. - const char *name; - Intrinsic::ID ID; - switch (BuiltinID) { - default: llvm_unreachable("Unsupported intrinsic!"); - case X86::BI__builtin_ia32_cmpeqps: - case X86::BI__builtin_ia32_cmpltps: - case X86::BI__builtin_ia32_cmpleps: - case X86::BI__builtin_ia32_cmpunordps: - case X86::BI__builtin_ia32_cmpneqps: - case X86::BI__builtin_ia32_cmpnltps: - case X86::BI__builtin_ia32_cmpnleps: - case X86::BI__builtin_ia32_cmpordps: - name = "cmpps"; - ID = Intrinsic::x86_sse_cmp_ps; - break; - case X86::BI__builtin_ia32_cmpeqss: - case X86::BI__builtin_ia32_cmpltss: - case X86::BI__builtin_ia32_cmpless: - case X86::BI__builtin_ia32_cmpunordss: - case X86::BI__builtin_ia32_cmpneqss: - case X86::BI__builtin_ia32_cmpnltss: - case X86::BI__builtin_ia32_cmpnless: - case X86::BI__builtin_ia32_cmpordss: - name = "cmpss"; - ID = Intrinsic::x86_sse_cmp_ss; - break; - case X86::BI__builtin_ia32_cmpeqpd: - case X86::BI__builtin_ia32_cmpltpd: - case X86::BI__builtin_ia32_cmplepd: - case X86::BI__builtin_ia32_cmpunordpd: - case X86::BI__builtin_ia32_cmpneqpd: - case X86::BI__builtin_ia32_cmpnltpd: - case X86::BI__builtin_ia32_cmpnlepd: - case X86::BI__builtin_ia32_cmpordpd: - name = "cmppd"; - ID = Intrinsic::x86_sse2_cmp_pd; - break; - case X86::BI__builtin_ia32_cmpeqsd: - case X86::BI__builtin_ia32_cmpltsd: - case X86::BI__builtin_ia32_cmplesd: - case X86::BI__builtin_ia32_cmpunordsd: - case X86::BI__builtin_ia32_cmpneqsd: - case X86::BI__builtin_ia32_cmpnltsd: - case X86::BI__builtin_ia32_cmpnlesd: - case X86::BI__builtin_ia32_cmpordsd: - name = "cmpsd"; - ID = Intrinsic::x86_sse2_cmp_sd; - break; - } - - Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm)); - llvm::Function *F = CGM.getIntrinsic(ID); - return Builder.CreateCall(F, Ops, name); + return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7); } } @@ -6812,6 +7534,16 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, llvm::Function *F = CGM.getIntrinsic(ID, ResultType); return Builder.CreateCall(F, X); } + + // Absolute value + case PPC::BI__builtin_vsx_xvabsdp: + case PPC::BI__builtin_vsx_xvabssp: { + llvm::Type *ResultType = ConvertType(E->getType()); + Value *X = EmitScalarExpr(E->getArg(0)); + llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); + return Builder.CreateCall(F, X); + } + // FMA variations case PPC::BI__builtin_vsx_xvmaddadp: case PPC::BI__builtin_vsx_xvmaddasp: @@ -6851,44 +7583,11 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, } } -// Emit an intrinsic that has 1 float or double. -static Value *emitUnaryFPBuiltin(CodeGenFunction &CGF, - const CallExpr *E, - unsigned IntrinsicID) { - llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); - - Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); - return CGF.Builder.CreateCall(F, Src0); -} - -// Emit an intrinsic that has 3 float or double operands. -static Value *emitTernaryFPBuiltin(CodeGenFunction &CGF, - const CallExpr *E, - unsigned IntrinsicID) { - llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); - llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); - llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2)); - - Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); - return CGF.Builder.CreateCall(F, {Src0, Src1, Src2}); -} - -// Emit an intrinsic that has 1 float or double operand, and 1 integer. -static Value *emitFPIntBuiltin(CodeGenFunction &CGF, - const CallExpr *E, - unsigned IntrinsicID) { - llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); - llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); - - Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); - return CGF.Builder.CreateCall(F, {Src0, Src1}); -} - Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { switch (BuiltinID) { - case AMDGPU::BI__builtin_amdgpu_div_scale: - case AMDGPU::BI__builtin_amdgpu_div_scalef: { + case AMDGPU::BI__builtin_amdgcn_div_scale: + case AMDGPU::BI__builtin_amdgcn_div_scalef: { // Translate from the intrinsics's struct return to the builtin's out // argument. @@ -6898,7 +7597,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, llvm::Value *Y = EmitScalarExpr(E->getArg(1)); llvm::Value *Z = EmitScalarExpr(E->getArg(2)); - llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::AMDGPU_div_scale, + llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale, X->getType()); llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z}); @@ -6913,40 +7612,85 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, Builder.CreateStore(FlagExt, FlagOutPtr); return Result; } - case AMDGPU::BI__builtin_amdgpu_div_fmas: - case AMDGPU::BI__builtin_amdgpu_div_fmasf: { + case AMDGPU::BI__builtin_amdgcn_div_fmas: + case AMDGPU::BI__builtin_amdgcn_div_fmasf: { llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); llvm::Value *Src3 = EmitScalarExpr(E->getArg(3)); - llvm::Value *F = CGM.getIntrinsic(Intrinsic::AMDGPU_div_fmas, + llvm::Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas, Src0->getType()); llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3); return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool}); } - case AMDGPU::BI__builtin_amdgpu_div_fixup: - case AMDGPU::BI__builtin_amdgpu_div_fixupf: - return emitTernaryFPBuiltin(*this, E, Intrinsic::AMDGPU_div_fixup); - case AMDGPU::BI__builtin_amdgpu_trig_preop: - case AMDGPU::BI__builtin_amdgpu_trig_preopf: - return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_trig_preop); - case AMDGPU::BI__builtin_amdgpu_rcp: - case AMDGPU::BI__builtin_amdgpu_rcpf: - return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rcp); - case AMDGPU::BI__builtin_amdgpu_rsq: - case AMDGPU::BI__builtin_amdgpu_rsqf: - return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rsq); - case AMDGPU::BI__builtin_amdgpu_rsq_clamped: - case AMDGPU::BI__builtin_amdgpu_rsq_clampedf: - return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rsq_clamped); - case AMDGPU::BI__builtin_amdgpu_ldexp: - case AMDGPU::BI__builtin_amdgpu_ldexpf: - return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_ldexp); - case AMDGPU::BI__builtin_amdgpu_class: - case AMDGPU::BI__builtin_amdgpu_classf: - return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_class); - default: + case AMDGPU::BI__builtin_amdgcn_div_fixup: + case AMDGPU::BI__builtin_amdgcn_div_fixupf: + return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup); + case AMDGPU::BI__builtin_amdgcn_trig_preop: + case AMDGPU::BI__builtin_amdgcn_trig_preopf: + return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop); + case AMDGPU::BI__builtin_amdgcn_rcp: + case AMDGPU::BI__builtin_amdgcn_rcpf: + return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp); + case AMDGPU::BI__builtin_amdgcn_rsq: + case AMDGPU::BI__builtin_amdgcn_rsqf: + return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq); + case AMDGPU::BI__builtin_amdgcn_rsq_clamp: + case AMDGPU::BI__builtin_amdgcn_rsq_clampf: + return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp); + case AMDGPU::BI__builtin_amdgcn_sinf: + return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin); + case AMDGPU::BI__builtin_amdgcn_cosf: + return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos); + case AMDGPU::BI__builtin_amdgcn_log_clampf: + return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp); + case AMDGPU::BI__builtin_amdgcn_ldexp: + case AMDGPU::BI__builtin_amdgcn_ldexpf: + return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp); + case AMDGPU::BI__builtin_amdgcn_frexp_mant: + case AMDGPU::BI__builtin_amdgcn_frexp_mantf: { + return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant); + } + case AMDGPU::BI__builtin_amdgcn_frexp_exp: + case AMDGPU::BI__builtin_amdgcn_frexp_expf: { + return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_exp); + } + case AMDGPU::BI__builtin_amdgcn_fract: + case AMDGPU::BI__builtin_amdgcn_fractf: + return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract); + case AMDGPU::BI__builtin_amdgcn_lerp: + return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp); + case AMDGPU::BI__builtin_amdgcn_class: + case AMDGPU::BI__builtin_amdgcn_classf: + return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class); + + case AMDGPU::BI__builtin_amdgcn_read_exec: { + CallInst *CI = cast<CallInst>( + EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec")); + CI->setConvergent(); + return CI; + } + + // amdgcn workitem + case AMDGPU::BI__builtin_amdgcn_workitem_id_x: + return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024); + case AMDGPU::BI__builtin_amdgcn_workitem_id_y: + return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024); + case AMDGPU::BI__builtin_amdgcn_workitem_id_z: + return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024); + + // r600 intrinsics + case AMDGPU::BI__builtin_r600_recipsqrt_ieee: + case AMDGPU::BI__builtin_r600_recipsqrt_ieeef: + return emitUnaryBuiltin(*this, E, Intrinsic::r600_recipsqrt_ieee); + case AMDGPU::BI__builtin_r600_read_tidig_x: + return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024); + case AMDGPU::BI__builtin_r600_read_tidig_y: + return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024); + case AMDGPU::BI__builtin_r600_read_tidig_z: + return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024); + default: return nullptr; } } @@ -7196,6 +7940,17 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { + auto MakeLdg = [&](unsigned IntrinsicID) { + Value *Ptr = EmitScalarExpr(E->getArg(0)); + AlignmentSource AlignSource; + clang::CharUnits Align = + getNaturalPointeeTypeAlignment(E->getArg(0)->getType(), &AlignSource); + return Builder.CreateCall( + CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(), + Ptr->getType()}), + {Ptr, ConstantInt::get(Builder.getInt32Ty(), Align.getQuantity())}); + }; + switch (BuiltinID) { case NVPTX::BI__nvvm_atom_add_gen_i: case NVPTX::BI__nvvm_atom_add_gen_l: @@ -7264,6 +8019,56 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(FnALAF32, {Ptr, Val}); } + case NVPTX::BI__nvvm_atom_inc_gen_ui: { + Value *Ptr = EmitScalarExpr(E->getArg(0)); + Value *Val = EmitScalarExpr(E->getArg(1)); + Value *FnALI32 = + CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType()); + return Builder.CreateCall(FnALI32, {Ptr, Val}); + } + + case NVPTX::BI__nvvm_atom_dec_gen_ui: { + Value *Ptr = EmitScalarExpr(E->getArg(0)); + Value *Val = EmitScalarExpr(E->getArg(1)); + Value *FnALD32 = + CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType()); + return Builder.CreateCall(FnALD32, {Ptr, Val}); + } + + case NVPTX::BI__nvvm_ldg_c: + case NVPTX::BI__nvvm_ldg_c2: + case NVPTX::BI__nvvm_ldg_c4: + case NVPTX::BI__nvvm_ldg_s: + case NVPTX::BI__nvvm_ldg_s2: + case NVPTX::BI__nvvm_ldg_s4: + case NVPTX::BI__nvvm_ldg_i: + case NVPTX::BI__nvvm_ldg_i2: + case NVPTX::BI__nvvm_ldg_i4: + case NVPTX::BI__nvvm_ldg_l: + case NVPTX::BI__nvvm_ldg_ll: + case NVPTX::BI__nvvm_ldg_ll2: + case NVPTX::BI__nvvm_ldg_uc: + case NVPTX::BI__nvvm_ldg_uc2: + case NVPTX::BI__nvvm_ldg_uc4: + case NVPTX::BI__nvvm_ldg_us: + case NVPTX::BI__nvvm_ldg_us2: + case NVPTX::BI__nvvm_ldg_us4: + case NVPTX::BI__nvvm_ldg_ui: + case NVPTX::BI__nvvm_ldg_ui2: + case NVPTX::BI__nvvm_ldg_ui4: + case NVPTX::BI__nvvm_ldg_ul: + case NVPTX::BI__nvvm_ldg_ull: + case NVPTX::BI__nvvm_ldg_ull2: + // PTX Interoperability section 2.2: "For a vector with an even number of + // elements, its alignment is set to number of elements times the alignment + // of its member: n*alignof(t)." + return MakeLdg(Intrinsic::nvvm_ldg_global_i); + case NVPTX::BI__nvvm_ldg_f: + case NVPTX::BI__nvvm_ldg_f2: + case NVPTX::BI__nvvm_ldg_f4: + case NVPTX::BI__nvvm_ldg_d: + case NVPTX::BI__nvvm_ldg_d2: + return MakeLdg(Intrinsic::nvvm_ldg_global_f); default: return nullptr; } @@ -7272,9 +8077,9 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { switch (BuiltinID) { - case WebAssembly::BI__builtin_wasm_memory_size: { + case WebAssembly::BI__builtin_wasm_current_memory: { llvm::Type *ResultType = ConvertType(E->getType()); - Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_size, ResultType); + Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_current_memory, ResultType); return Builder.CreateCall(Callee); } case WebAssembly::BI__builtin_wasm_grow_memory: { diff --git a/lib/CodeGen/CGCUDABuiltin.cpp b/lib/CodeGen/CGCUDABuiltin.cpp new file mode 100644 index 000000000000..ea3b888635c3 --- /dev/null +++ b/lib/CodeGen/CGCUDABuiltin.cpp @@ -0,0 +1,117 @@ +//===----- CGCUDABuiltin.cpp - Codegen for CUDA builtins ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Generates code for built-in CUDA calls which are not runtime-specific. +// (Runtime-specific codegen lives in CGCUDARuntime.) +// +//===----------------------------------------------------------------------===// + +#include "CodeGenFunction.h" +#include "clang/Basic/Builtins.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Instruction.h" +#include "llvm/Support/MathExtras.h" + +using namespace clang; +using namespace CodeGen; + +static llvm::Function *GetVprintfDeclaration(llvm::Module &M) { + llvm::Type *ArgTypes[] = {llvm::Type::getInt8PtrTy(M.getContext()), + llvm::Type::getInt8PtrTy(M.getContext())}; + llvm::FunctionType *VprintfFuncType = llvm::FunctionType::get( + llvm::Type::getInt32Ty(M.getContext()), ArgTypes, false); + + if (auto* F = M.getFunction("vprintf")) { + // Our CUDA system header declares vprintf with the right signature, so + // nobody else should have been able to declare vprintf with a bogus + // signature. + assert(F->getFunctionType() == VprintfFuncType); + return F; + } + + // vprintf doesn't already exist; create a declaration and insert it into the + // module. + return llvm::Function::Create( + VprintfFuncType, llvm::GlobalVariable::ExternalLinkage, "vprintf", &M); +} + +// Transforms a call to printf into a call to the NVPTX vprintf syscall (which +// isn't particularly special; it's invoked just like a regular function). +// vprintf takes two args: A format string, and a pointer to a buffer containing +// the varargs. +// +// For example, the call +// +// printf("format string", arg1, arg2, arg3); +// +// is converted into something resembling +// +// struct Tmp { +// Arg1 a1; +// Arg2 a2; +// Arg3 a3; +// }; +// char* buf = alloca(sizeof(Tmp)); +// *(Tmp*)buf = {a1, a2, a3}; +// vprintf("format string", buf); +// +// buf is aligned to the max of {alignof(Arg1), ...}. Furthermore, each of the +// args is itself aligned to its preferred alignment. +// +// Note that by the time this function runs, E's args have already undergone the +// standard C vararg promotion (short -> int, float -> double, etc.). +RValue +CodeGenFunction::EmitCUDADevicePrintfCallExpr(const CallExpr *E, + ReturnValueSlot ReturnValue) { + assert(getLangOpts().CUDA); + assert(getLangOpts().CUDAIsDevice); + assert(E->getBuiltinCallee() == Builtin::BIprintf); + assert(E->getNumArgs() >= 1); // printf always has at least one arg. + + const llvm::DataLayout &DL = CGM.getDataLayout(); + llvm::LLVMContext &Ctx = CGM.getLLVMContext(); + + CallArgList Args; + EmitCallArgs(Args, + E->getDirectCallee()->getType()->getAs<FunctionProtoType>(), + E->arguments(), E->getDirectCallee(), + /* ParamsToSkip = */ 0); + + // We don't know how to emit non-scalar varargs. + if (std::any_of(Args.begin() + 1, Args.end(), + [](const CallArg &A) { return !A.RV.isScalar(); })) { + CGM.ErrorUnsupported(E, "non-scalar arg to printf"); + return RValue::get(llvm::ConstantInt::get(IntTy, 0)); + } + + // Construct and fill the args buffer that we'll pass to vprintf. + llvm::Value *BufferPtr; + if (Args.size() <= 1) { + // If there are no args, pass a null pointer to vprintf. + BufferPtr = llvm::ConstantPointerNull::get(llvm::Type::getInt8PtrTy(Ctx)); + } else { + llvm::SmallVector<llvm::Type *, 8> ArgTypes; + for (unsigned I = 1, NumArgs = Args.size(); I < NumArgs; ++I) + ArgTypes.push_back(Args[I].RV.getScalarVal()->getType()); + llvm::Type *AllocaTy = llvm::StructType::create(ArgTypes, "printf_args"); + llvm::Value *Alloca = CreateTempAlloca(AllocaTy); + + for (unsigned I = 1, NumArgs = Args.size(); I < NumArgs; ++I) { + llvm::Value *P = Builder.CreateStructGEP(AllocaTy, Alloca, I - 1); + llvm::Value *Arg = Args[I].RV.getScalarVal(); + Builder.CreateAlignedStore(Arg, P, DL.getPrefTypeAlignment(Arg->getType())); + } + BufferPtr = Builder.CreatePointerCast(Alloca, llvm::Type::getInt8PtrTy(Ctx)); + } + + // Invoke vprintf and return. + llvm::Function* VprintfFunc = GetVprintfDeclaration(CGM.getModule()); + return RValue::get( + Builder.CreateCall(VprintfFunc, {Args[0].RV.getScalarVal(), BufferPtr})); +} diff --git a/lib/CodeGen/CGCUDANV.cpp b/lib/CodeGen/CGCUDANV.cpp index 045e19b189dc..6a04d4eea784 100644 --- a/lib/CodeGen/CGCUDANV.cpp +++ b/lib/CodeGen/CGCUDANV.cpp @@ -38,6 +38,7 @@ private: llvm::Module &TheModule; /// Keeps track of kernel launch stubs emitted in this module llvm::SmallVector<llvm::Function *, 16> EmittedKernels; + llvm::SmallVector<std::pair<llvm::GlobalVariable *, unsigned>, 16> DeviceVars; /// Keeps track of variables containing handles of GPU binaries. Populated by /// ModuleCtorFunction() and used to create corresponding cleanup calls in /// ModuleDtorFunction() @@ -47,7 +48,7 @@ private: llvm::Constant *getLaunchFn() const; /// Creates a function to register all kernel stubs generated in this module. - llvm::Function *makeRegisterKernelsFn(); + llvm::Function *makeRegisterGlobalsFn(); /// Helper function that generates a constant string and returns a pointer to /// the start of the string. The result of this function can be used anywhere @@ -68,6 +69,10 @@ public: CGNVCUDARuntime(CodeGenModule &CGM); void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) override; + void registerDeviceVar(llvm::GlobalVariable &Var, unsigned Flags) override { + DeviceVars.push_back(std::make_pair(&Var, Flags)); + } + /// Creates module constructor function llvm::Function *makeModuleCtorFunction() override; /// Creates module destructor function @@ -93,10 +98,7 @@ CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM) llvm::Constant *CGNVCUDARuntime::getSetupArgumentFn() const { // cudaError_t cudaSetupArgument(void *, size_t, size_t) - std::vector<llvm::Type*> Params; - Params.push_back(VoidPtrTy); - Params.push_back(SizeTy); - Params.push_back(SizeTy); + llvm::Type *Params[] = {VoidPtrTy, SizeTy, SizeTy}; return CGM.CreateRuntimeFunction(llvm::FunctionType::get(IntTy, Params, false), "cudaSetupArgument"); @@ -158,19 +160,28 @@ void CGNVCUDARuntime::emitDeviceStubBody(CodeGenFunction &CGF, CGF.EmitBlock(EndBlock); } -/// Creates internal function to register all kernel stubs generated in this -/// module with the CUDA runtime. +/// Creates a function that sets up state on the host side for CUDA objects that +/// have a presence on both the host and device sides. Specifically, registers +/// the host side of kernel functions and device global variables with the CUDA +/// runtime. /// \code -/// void __cuda_register_kernels(void** GpuBinaryHandle) { +/// void __cuda_register_globals(void** GpuBinaryHandle) { /// __cudaRegisterFunction(GpuBinaryHandle,Kernel0,...); /// ... /// __cudaRegisterFunction(GpuBinaryHandle,KernelM,...); +/// __cudaRegisterVar(GpuBinaryHandle, GlobalVar0, ...); +/// ... +/// __cudaRegisterVar(GpuBinaryHandle, GlobalVarN, ...); /// } /// \endcode -llvm::Function *CGNVCUDARuntime::makeRegisterKernelsFn() { +llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() { + // No need to register anything + if (EmittedKernels.empty() && DeviceVars.empty()) + return nullptr; + llvm::Function *RegisterKernelsFunc = llvm::Function::Create( llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false), - llvm::GlobalValue::InternalLinkage, "__cuda_register_kernels", &TheModule); + llvm::GlobalValue::InternalLinkage, "__cuda_register_globals", &TheModule); llvm::BasicBlock *EntryBB = llvm::BasicBlock::Create(Context, "entry", RegisterKernelsFunc); CGBuilderTy Builder(CGM, Context); @@ -178,7 +189,7 @@ llvm::Function *CGNVCUDARuntime::makeRegisterKernelsFn() { // void __cudaRegisterFunction(void **, const char *, char *, const char *, // int, uint3*, uint3*, dim3*, dim3*, int*) - std::vector<llvm::Type *> RegisterFuncParams = { + llvm::Type *RegisterFuncParams[] = { VoidPtrPtrTy, CharPtrTy, CharPtrTy, CharPtrTy, IntTy, VoidPtrTy, VoidPtrTy, VoidPtrTy, VoidPtrTy, IntTy->getPointerTo()}; llvm::Constant *RegisterFunc = CGM.CreateRuntimeFunction( @@ -186,18 +197,44 @@ llvm::Function *CGNVCUDARuntime::makeRegisterKernelsFn() { "__cudaRegisterFunction"); // Extract GpuBinaryHandle passed as the first argument passed to - // __cuda_register_kernels() and generate __cudaRegisterFunction() call for + // __cuda_register_globals() and generate __cudaRegisterFunction() call for // each emitted kernel. llvm::Argument &GpuBinaryHandlePtr = *RegisterKernelsFunc->arg_begin(); for (llvm::Function *Kernel : EmittedKernels) { llvm::Constant *KernelName = makeConstantString(Kernel->getName()); llvm::Constant *NullPtr = llvm::ConstantPointerNull::get(VoidPtrTy); - llvm::Value *args[] = { + llvm::Value *Args[] = { &GpuBinaryHandlePtr, Builder.CreateBitCast(Kernel, VoidPtrTy), KernelName, KernelName, llvm::ConstantInt::get(IntTy, -1), NullPtr, NullPtr, NullPtr, NullPtr, llvm::ConstantPointerNull::get(IntTy->getPointerTo())}; - Builder.CreateCall(RegisterFunc, args); + Builder.CreateCall(RegisterFunc, Args); + } + + // void __cudaRegisterVar(void **, char *, char *, const char *, + // int, int, int, int) + llvm::Type *RegisterVarParams[] = {VoidPtrPtrTy, CharPtrTy, CharPtrTy, + CharPtrTy, IntTy, IntTy, + IntTy, IntTy}; + llvm::Constant *RegisterVar = CGM.CreateRuntimeFunction( + llvm::FunctionType::get(IntTy, RegisterVarParams, false), + "__cudaRegisterVar"); + for (auto &Pair : DeviceVars) { + llvm::GlobalVariable *Var = Pair.first; + unsigned Flags = Pair.second; + llvm::Constant *VarName = makeConstantString(Var->getName()); + uint64_t VarSize = + CGM.getDataLayout().getTypeAllocSize(Var->getValueType()); + llvm::Value *Args[] = { + &GpuBinaryHandlePtr, + Builder.CreateBitCast(Var, VoidPtrTy), + VarName, + VarName, + llvm::ConstantInt::get(IntTy, (Flags & ExternDeviceVar) ? 1 : 0), + llvm::ConstantInt::get(IntTy, VarSize), + llvm::ConstantInt::get(IntTy, (Flags & ConstantDeviceVar) ? 1 : 0), + llvm::ConstantInt::get(IntTy, 0)}; + Builder.CreateCall(RegisterVar, Args); } Builder.CreateRetVoid(); @@ -208,15 +245,19 @@ llvm::Function *CGNVCUDARuntime::makeRegisterKernelsFn() { /// \code /// void __cuda_module_ctor(void*) { /// Handle0 = __cudaRegisterFatBinary(GpuBinaryBlob0); -/// __cuda_register_kernels(Handle0); +/// __cuda_register_globals(Handle0); /// ... /// HandleN = __cudaRegisterFatBinary(GpuBinaryBlobN); -/// __cuda_register_kernels(HandleN); +/// __cuda_register_globals(HandleN); /// } /// \endcode llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { - // void __cuda_register_kernels(void* handle); - llvm::Function *RegisterKernelsFunc = makeRegisterKernelsFn(); + // No need to generate ctors/dtors if there are no GPU binaries. + if (CGM.getCodeGenOpts().CudaGpuBinaryFileNames.empty()) + return nullptr; + + // void __cuda_register_globals(void* handle); + llvm::Function *RegisterGlobalsFunc = makeRegisterGlobalsFn(); // void ** __cudaRegisterFatBinary(void *); llvm::Constant *RegisterFatbinFunc = CGM.CreateRuntimeFunction( llvm::FunctionType::get(VoidPtrPtrTy, VoidPtrTy, false), @@ -259,6 +300,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { TheModule, FatbinWrapperTy, true, llvm::GlobalValue::InternalLinkage, llvm::ConstantStruct::get(FatbinWrapperTy, Values), "__cuda_fatbin_wrapper"); + // NVIDIA's cuobjdump looks for fatbins in this section. + FatbinWrapper->setSection(".nvFatBinSegment"); // GpuBinaryHandle = __cudaRegisterFatBinary(&FatbinWrapper); llvm::CallInst *RegisterFatbinCall = CtorBuilder.CreateCall( @@ -270,8 +313,9 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { CtorBuilder.CreateAlignedStore(RegisterFatbinCall, GpuBinaryHandle, CGM.getPointerAlign()); - // Call __cuda_register_kernels(GpuBinaryHandle); - CtorBuilder.CreateCall(RegisterKernelsFunc, RegisterFatbinCall); + // Call __cuda_register_globals(GpuBinaryHandle); + if (RegisterGlobalsFunc) + CtorBuilder.CreateCall(RegisterGlobalsFunc, RegisterFatbinCall); // Save GpuBinaryHandle so we can unregister it in destructor. GpuBinaryHandles.push_back(GpuBinaryHandle); @@ -291,6 +335,10 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { /// } /// \endcode llvm::Function *CGNVCUDARuntime::makeModuleDtorFunction() { + // No need for destructor if we don't have handles to unregister. + if (GpuBinaryHandles.empty()) + return nullptr; + // void __cudaUnregisterFatBinary(void ** handle); llvm::Constant *UnregisterFatbinFunc = CGM.CreateRuntimeFunction( llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false), diff --git a/lib/CodeGen/CGCUDARuntime.h b/lib/CodeGen/CGCUDARuntime.h index dcacf9703277..0168f4f9e942 100644 --- a/lib/CodeGen/CGCUDARuntime.h +++ b/lib/CodeGen/CGCUDARuntime.h @@ -18,6 +18,7 @@ namespace llvm { class Function; +class GlobalVariable; } namespace clang { @@ -37,6 +38,12 @@ protected: CodeGenModule &CGM; public: + // Global variable properties that must be passed to CUDA runtime. + enum DeviceVarFlags { + ExternDeviceVar = 0x01, // extern + ConstantDeviceVar = 0x02, // __constant__ + }; + CGCUDARuntime(CodeGenModule &CGM) : CGM(CGM) {} virtual ~CGCUDARuntime(); @@ -46,6 +53,7 @@ public: /// Emits a kernel launch stub. virtual void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) = 0; + virtual void registerDeviceVar(llvm::GlobalVariable &Var, unsigned Flags) = 0; /// Constructs and returns a module initialization function or nullptr if it's /// not needed. Must be called after all kernels have been emitted. diff --git a/lib/CodeGen/CGCXX.cpp b/lib/CodeGen/CGCXX.cpp index 6847df9b749b..40f1bc426ff7 100644 --- a/lib/CodeGen/CGCXX.cpp +++ b/lib/CodeGen/CGCXX.cpp @@ -164,7 +164,7 @@ bool CodeGenModule::TryEmitDefinitionAsAlias(GlobalDecl AliasDecl, // members with attribute "AlwaysInline" and expect no reference to // be generated. It is desirable to reenable this optimisation after // corresponding LLVM changes. - Replacements[MangledName] = Aliasee; + addReplacement(MangledName, Aliasee); return false; } diff --git a/lib/CodeGen/CGCXXABI.h b/lib/CodeGen/CGCXXABI.h index 3f240b1802b8..9e10ec068e09 100644 --- a/lib/CodeGen/CGCXXABI.h +++ b/lib/CodeGen/CGCXXABI.h @@ -106,6 +106,16 @@ public: virtual bool hasMostDerivedReturn(GlobalDecl GD) const { return false; } + /// Returns true if the target allows calling a function through a pointer + /// with a different signature than the actual function (or equivalently, + /// bitcasting a function or function pointer to a different function type). + /// In principle in the most general case this could depend on the target, the + /// calling convention, and the actual types of the arguments and return + /// value. Here it just means whether the signature mismatch could *ever* be + /// allowed; in other words, does the target do strict checking of signatures + /// for all calls. + virtual bool canCallMismatchedFunctionType() const { return true; } + /// If the C++ ABI requires the given type be returned in a particular way, /// this method sets RetAI and returns true. virtual bool classifyReturnType(CGFunctionInfo &FI) const = 0; @@ -326,6 +336,12 @@ public: virtual void addImplicitStructorParams(CodeGenFunction &CGF, QualType &ResTy, FunctionArgList &Params) = 0; + /// Get the ABI-specific "this" parameter adjustment to apply in the prologue + /// of a virtual function. + virtual CharUnits getVirtualFunctionPrologueThisAdjustment(GlobalDecl GD) { + return CharUnits::Zero(); + } + /// Perform ABI-specific "this" parameter adjustment in a virtual function /// prologue. virtual llvm::Value *adjustThisParameterInVirtualFunctionPrologue( diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp index 935985049c01..242b5962070a 100644 --- a/lib/CodeGen/CGCall.cpp +++ b/lib/CodeGen/CGCall.cpp @@ -14,6 +14,7 @@ #include "CGCall.h" #include "ABIInfo.h" +#include "CGBlocks.h" #include "CGCXXABI.h" #include "CGCleanup.h" #include "CodeGenFunction.h" @@ -25,9 +26,11 @@ #include "clang/Basic/TargetBuiltins.h" #include "clang/Basic/TargetInfo.h" #include "clang/CodeGen/CGFunctionInfo.h" +#include "clang/CodeGen/SwiftCallingConv.h" #include "clang/Frontend/CodeGenOptions.h" #include "llvm/ADT/StringExtras.h" #include "llvm/IR/Attributes.h" +#include "llvm/IR/CallingConv.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" @@ -39,7 +42,7 @@ using namespace CodeGen; /***/ -static unsigned ClangCallConvToLLVMCallConv(CallingConv CC) { +unsigned CodeGenTypes::ClangCallConvToLLVMCallConv(CallingConv CC) { switch (CC) { default: return llvm::CallingConv::C; case CC_X86StdCall: return llvm::CallingConv::X86_StdCall; @@ -55,7 +58,10 @@ static unsigned ClangCallConvToLLVMCallConv(CallingConv CC) { // TODO: Add support for __vectorcall to LLVM. case CC_X86VectorCall: return llvm::CallingConv::X86_VectorCall; case CC_SpirFunction: return llvm::CallingConv::SPIR_FUNC; - case CC_SpirKernel: return llvm::CallingConv::SPIR_KERNEL; + case CC_OpenCLKernel: return CGM.getTargetCodeGenInfo().getOpenCLKernelCallingConv(); + case CC_PreserveMost: return llvm::CallingConv::PreserveMost; + case CC_PreserveAll: return llvm::CallingConv::PreserveAll; + case CC_Swift: return llvm::CallingConv::Swift; } } @@ -90,15 +96,25 @@ CodeGenTypes::arrangeFreeFunctionType(CanQual<FunctionNoProtoType> FTNP) { return arrangeLLVMFunctionInfo(FTNP->getReturnType().getUnqualifiedType(), /*instanceMethod=*/false, /*chainCall=*/false, None, - FTNP->getExtInfo(), RequiredArgs(0)); + FTNP->getExtInfo(), {}, RequiredArgs(0)); } /// Adds the formal paramaters in FPT to the given prefix. If any parameter in /// FPT has pass_object_size attrs, then we'll add parameters for those, too. static void appendParameterTypes(const CodeGenTypes &CGT, SmallVectorImpl<CanQualType> &prefix, - const CanQual<FunctionProtoType> &FPT, + SmallVectorImpl<FunctionProtoType::ExtParameterInfo> ¶mInfos, + CanQual<FunctionProtoType> FPT, const FunctionDecl *FD) { + // Fill out paramInfos. + if (FPT->hasExtParameterInfos() || !paramInfos.empty()) { + assert(paramInfos.size() <= prefix.size()); + auto protoParamInfos = FPT->getExtParameterInfos(); + paramInfos.reserve(prefix.size() + protoParamInfos.size()); + paramInfos.resize(prefix.size()); + paramInfos.append(protoParamInfos.begin(), protoParamInfos.end()); + } + // Fast path: unknown target. if (FD == nullptr) { prefix.append(FPT->param_type_begin(), FPT->param_type_end()); @@ -125,13 +141,17 @@ arrangeLLVMFunctionInfo(CodeGenTypes &CGT, bool instanceMethod, SmallVectorImpl<CanQualType> &prefix, CanQual<FunctionProtoType> FTP, const FunctionDecl *FD) { - RequiredArgs required = RequiredArgs::forPrototypePlus(FTP, prefix.size()); + SmallVector<FunctionProtoType::ExtParameterInfo, 16> paramInfos; + RequiredArgs Required = + RequiredArgs::forPrototypePlus(FTP, prefix.size(), FD); // FIXME: Kill copy. - appendParameterTypes(CGT, prefix, FTP, FD); + appendParameterTypes(CGT, prefix, paramInfos, FTP, FD); CanQualType resultType = FTP->getReturnType().getUnqualifiedType(); + return CGT.arrangeLLVMFunctionInfo(resultType, instanceMethod, /*chainCall=*/false, prefix, - FTP->getExtInfo(), required); + FTP->getExtInfo(), paramInfos, + Required); } /// Arrange the argument and result information for a value of the @@ -173,6 +193,12 @@ static CallingConv getCallingConventionForDecl(const Decl *D, bool IsWindows) { if (D->hasAttr<SysVABIAttr>()) return IsWindows ? CC_X86_64SysV : CC_C; + if (D->hasAttr<PreserveMostAttr>()) + return CC_PreserveMost; + + if (D->hasAttr<PreserveAllAttr>()) + return CC_PreserveAll; + return CC_C; } @@ -219,16 +245,33 @@ CodeGenTypes::arrangeCXXMethodDeclaration(const CXXMethodDecl *MD) { return arrangeFreeFunctionType(prototype, MD); } +bool CodeGenTypes::inheritingCtorHasParams( + const InheritedConstructor &Inherited, CXXCtorType Type) { + // Parameters are unnecessary if we're constructing a base class subobject + // and the inherited constructor lives in a virtual base. + return Type == Ctor_Complete || + !Inherited.getShadowDecl()->constructsVirtualBase() || + !Target.getCXXABI().hasConstructorVariants(); + } + const CGFunctionInfo & CodeGenTypes::arrangeCXXStructorDeclaration(const CXXMethodDecl *MD, StructorType Type) { SmallVector<CanQualType, 16> argTypes; + SmallVector<FunctionProtoType::ExtParameterInfo, 16> paramInfos; argTypes.push_back(GetThisType(Context, MD->getParent())); + bool PassParams = true; + GlobalDecl GD; if (auto *CD = dyn_cast<CXXConstructorDecl>(MD)) { GD = GlobalDecl(CD, toCXXCtorType(Type)); + + // A base class inheriting constructor doesn't get forwarded arguments + // needed to construct a virtual base (or base class thereof). + if (auto Inherited = CD->getInheritedConstructor()) + PassParams = inheritingCtorHasParams(Inherited, toCXXCtorType(Type)); } else { auto *DD = dyn_cast<CXXDestructorDecl>(MD); GD = GlobalDecl(DD, toCXXDtorType(Type)); @@ -237,12 +280,14 @@ CodeGenTypes::arrangeCXXStructorDeclaration(const CXXMethodDecl *MD, CanQual<FunctionProtoType> FTP = GetFormalType(MD); // Add the formal parameters. - appendParameterTypes(*this, argTypes, FTP, MD); + if (PassParams) + appendParameterTypes(*this, argTypes, paramInfos, FTP, MD); TheCXXABI.buildStructorSignature(MD, Type, argTypes); RequiredArgs required = - (MD->isVariadic() ? RequiredArgs(argTypes.size()) : RequiredArgs::All); + (PassParams && MD->isVariadic() ? RequiredArgs(argTypes.size()) + : RequiredArgs::All); FunctionType::ExtInfo extInfo = FTP->getExtInfo(); CanQualType resultType = TheCXXABI.HasThisReturn(GD) @@ -252,7 +297,53 @@ CodeGenTypes::arrangeCXXStructorDeclaration(const CXXMethodDecl *MD, : Context.VoidTy; return arrangeLLVMFunctionInfo(resultType, /*instanceMethod=*/true, /*chainCall=*/false, argTypes, extInfo, - required); + paramInfos, required); +} + +static SmallVector<CanQualType, 16> +getArgTypesForCall(ASTContext &ctx, const CallArgList &args) { + SmallVector<CanQualType, 16> argTypes; + for (auto &arg : args) + argTypes.push_back(ctx.getCanonicalParamType(arg.Ty)); + return argTypes; +} + +static SmallVector<CanQualType, 16> +getArgTypesForDeclaration(ASTContext &ctx, const FunctionArgList &args) { + SmallVector<CanQualType, 16> argTypes; + for (auto &arg : args) + argTypes.push_back(ctx.getCanonicalParamType(arg->getType())); + return argTypes; +} + +static void addExtParameterInfosForCall( + llvm::SmallVectorImpl<FunctionProtoType::ExtParameterInfo> ¶mInfos, + const FunctionProtoType *proto, + unsigned prefixArgs, + unsigned totalArgs) { + assert(proto->hasExtParameterInfos()); + assert(paramInfos.size() <= prefixArgs); + assert(proto->getNumParams() + prefixArgs <= totalArgs); + + // Add default infos for any prefix args that don't already have infos. + paramInfos.resize(prefixArgs); + + // Add infos for the prototype. + auto protoInfos = proto->getExtParameterInfos(); + paramInfos.append(protoInfos.begin(), protoInfos.end()); + + // Add default infos for the variadic arguments. + paramInfos.resize(totalArgs); +} + +static llvm::SmallVector<FunctionProtoType::ExtParameterInfo, 16> +getExtParameterInfosForCall(const FunctionProtoType *proto, + unsigned prefixArgs, unsigned totalArgs) { + llvm::SmallVector<FunctionProtoType::ExtParameterInfo, 16> result; + if (proto->hasExtParameterInfos()) { + addExtParameterInfosForCall(result, proto, prefixArgs, totalArgs); + } + return result; } /// Arrange a call to a C++ method, passing the given arguments. @@ -267,7 +358,7 @@ CodeGenTypes::arrangeCXXConstructorCall(const CallArgList &args, ArgTypes.push_back(Context.getCanonicalParamType(Arg.Ty)); CanQual<FunctionProtoType> FPT = GetFormalType(D); - RequiredArgs Required = RequiredArgs::forPrototypePlus(FPT, 1 + ExtraArgs); + RequiredArgs Required = RequiredArgs::forPrototypePlus(FPT, 1 + ExtraArgs, D); GlobalDecl GD(D, CtorKind); CanQualType ResultType = TheCXXABI.HasThisReturn(GD) ? ArgTypes.front() @@ -276,9 +367,11 @@ CodeGenTypes::arrangeCXXConstructorCall(const CallArgList &args, : Context.VoidTy; FunctionType::ExtInfo Info = FPT->getExtInfo(); + auto ParamInfos = getExtParameterInfosForCall(FPT.getTypePtr(), 1 + ExtraArgs, + ArgTypes.size()); return arrangeLLVMFunctionInfo(ResultType, /*instanceMethod=*/true, /*chainCall=*/false, ArgTypes, Info, - Required); + ParamInfos, Required); } /// Arrange the argument and result information for the declaration or @@ -299,7 +392,7 @@ CodeGenTypes::arrangeFunctionDeclaration(const FunctionDecl *FD) { CanQual<FunctionNoProtoType> noProto = FTy.getAs<FunctionNoProtoType>(); return arrangeLLVMFunctionInfo( noProto->getReturnType(), /*instanceMethod=*/false, - /*chainCall=*/false, None, noProto->getExtInfo(), RequiredArgs::All); + /*chainCall=*/false, None, noProto->getExtInfo(), {},RequiredArgs::All); } assert(isa<FunctionProtoType>(FTy)); @@ -328,7 +421,7 @@ CodeGenTypes::arrangeObjCMessageSendSignature(const ObjCMethodDecl *MD, argTys.push_back(Context.getCanonicalParamType(receiverType)); argTys.push_back(Context.getCanonicalParamType(Context.getObjCSelType())); // FIXME: Kill copy? - for (const auto *I : MD->params()) { + for (const auto *I : MD->parameters()) { argTys.push_back(Context.getCanonicalParamType(I->getType())); } @@ -345,7 +438,18 @@ CodeGenTypes::arrangeObjCMessageSendSignature(const ObjCMethodDecl *MD, return arrangeLLVMFunctionInfo( GetReturnType(MD->getReturnType()), /*instanceMethod=*/false, - /*chainCall=*/false, argTys, einfo, required); + /*chainCall=*/false, argTys, einfo, {}, required); +} + +const CGFunctionInfo & +CodeGenTypes::arrangeUnprototypedObjCMessageSend(QualType returnType, + const CallArgList &args) { + auto argTypes = getArgTypesForCall(Context, args); + FunctionType::ExtInfo einfo; + + return arrangeLLVMFunctionInfo( + GetReturnType(returnType), /*instanceMethod=*/false, + /*chainCall=*/false, argTypes, einfo, {}, RequiredArgs::All); } const CGFunctionInfo & @@ -374,7 +478,7 @@ CodeGenTypes::arrangeMSMemberPointerThunk(const CXXMethodDecl *MD) { CanQualType ArgTys[] = { GetThisType(Context, MD->getParent()) }; return arrangeLLVMFunctionInfo(Context.VoidTy, /*instanceMethod=*/false, /*chainCall=*/false, ArgTys, - FTP->getExtInfo(), RequiredArgs(1)); + FTP->getExtInfo(), {}, RequiredArgs(1)); } const CGFunctionInfo & @@ -394,7 +498,8 @@ CodeGenTypes::arrangeMSCtorClosure(const CXXConstructorDecl *CD, /*IsVariadic=*/false, /*IsCXXMethod=*/true); return arrangeLLVMFunctionInfo(Context.VoidTy, /*instanceMethod=*/true, /*chainCall=*/false, ArgTys, - FunctionType::ExtInfo(CC), RequiredArgs::All); + FunctionType::ExtInfo(CC), {}, + RequiredArgs::All); } /// Arrange a call as unto a free function, except possibly with an @@ -408,6 +513,8 @@ arrangeFreeFunctionLikeCall(CodeGenTypes &CGT, bool chainCall) { assert(args.size() >= numExtraRequiredArgs); + llvm::SmallVector<FunctionProtoType::ExtParameterInfo, 16> paramInfos; + // In most cases, there are no optional arguments. RequiredArgs required = RequiredArgs::All; @@ -417,6 +524,10 @@ arrangeFreeFunctionLikeCall(CodeGenTypes &CGT, if (proto->isVariadic()) required = RequiredArgs(proto->getNumParams() + numExtraRequiredArgs); + if (proto->hasExtParameterInfos()) + addExtParameterInfosForCall(paramInfos, proto, numExtraRequiredArgs, + args.size()); + // If we don't have a prototype at all, but we're supposed to // explicitly use the variadic convention for unprototyped calls, // treat all of the arguments as required but preserve the nominal @@ -433,7 +544,8 @@ arrangeFreeFunctionLikeCall(CodeGenTypes &CGT, argTypes.push_back(CGT.getContext().getCanonicalParamType(arg.Ty)); return CGT.arrangeLLVMFunctionInfo(GetReturnType(fnType->getReturnType()), /*instanceMethod=*/false, chainCall, - argTypes, fnType->getExtInfo(), required); + argTypes, fnType->getExtInfo(), paramInfos, + required); } /// Figure out the rules for calling a function with the given formal @@ -448,7 +560,7 @@ CodeGenTypes::arrangeFreeFunctionCall(const CallArgList &args, chainCall ? 1 : 0, chainCall); } -/// A block function call is essentially a free-function call with an +/// A block function is essentially a free function with an /// extra implicit argument. const CGFunctionInfo & CodeGenTypes::arrangeBlockFunctionCall(const CallArgList &args, @@ -458,54 +570,99 @@ CodeGenTypes::arrangeBlockFunctionCall(const CallArgList &args, } const CGFunctionInfo & -CodeGenTypes::arrangeFreeFunctionCall(QualType resultType, - const CallArgList &args, - FunctionType::ExtInfo info, - RequiredArgs required) { +CodeGenTypes::arrangeBlockFunctionDeclaration(const FunctionProtoType *proto, + const FunctionArgList ¶ms) { + auto paramInfos = getExtParameterInfosForCall(proto, 1, params.size()); + auto argTypes = getArgTypesForDeclaration(Context, params); + + return arrangeLLVMFunctionInfo( + GetReturnType(proto->getReturnType()), + /*instanceMethod*/ false, /*chainCall*/ false, argTypes, + proto->getExtInfo(), paramInfos, + RequiredArgs::forPrototypePlus(proto, 1, nullptr)); +} + +const CGFunctionInfo & +CodeGenTypes::arrangeBuiltinFunctionCall(QualType resultType, + const CallArgList &args) { // FIXME: Kill copy. SmallVector<CanQualType, 16> argTypes; for (const auto &Arg : args) argTypes.push_back(Context.getCanonicalParamType(Arg.Ty)); return arrangeLLVMFunctionInfo( GetReturnType(resultType), /*instanceMethod=*/false, - /*chainCall=*/false, argTypes, info, required); + /*chainCall=*/false, argTypes, FunctionType::ExtInfo(), + /*paramInfos=*/ {}, RequiredArgs::All); } -/// Arrange a call to a C++ method, passing the given arguments. const CGFunctionInfo & -CodeGenTypes::arrangeCXXMethodCall(const CallArgList &args, - const FunctionProtoType *FPT, - RequiredArgs required) { - // FIXME: Kill copy. - SmallVector<CanQualType, 16> argTypes; - for (const auto &Arg : args) - argTypes.push_back(Context.getCanonicalParamType(Arg.Ty)); +CodeGenTypes::arrangeBuiltinFunctionDeclaration(QualType resultType, + const FunctionArgList &args) { + auto argTypes = getArgTypesForDeclaration(Context, args); + + return arrangeLLVMFunctionInfo( + GetReturnType(resultType), /*instanceMethod=*/false, /*chainCall=*/false, + argTypes, FunctionType::ExtInfo(), {}, RequiredArgs::All); +} - FunctionType::ExtInfo info = FPT->getExtInfo(); +const CGFunctionInfo & +CodeGenTypes::arrangeBuiltinFunctionDeclaration(CanQualType resultType, + ArrayRef<CanQualType> argTypes) { return arrangeLLVMFunctionInfo( - GetReturnType(FPT->getReturnType()), /*instanceMethod=*/true, - /*chainCall=*/false, argTypes, info, required); + resultType, /*instanceMethod=*/false, /*chainCall=*/false, + argTypes, FunctionType::ExtInfo(), {}, RequiredArgs::All); } -const CGFunctionInfo &CodeGenTypes::arrangeFreeFunctionDeclaration( - QualType resultType, const FunctionArgList &args, - const FunctionType::ExtInfo &info, bool isVariadic) { +/// Arrange a call to a C++ method, passing the given arguments. +const CGFunctionInfo & +CodeGenTypes::arrangeCXXMethodCall(const CallArgList &args, + const FunctionProtoType *proto, + RequiredArgs required) { + unsigned numRequiredArgs = + (proto->isVariadic() ? required.getNumRequiredArgs() : args.size()); + unsigned numPrefixArgs = numRequiredArgs - proto->getNumParams(); + auto paramInfos = + getExtParameterInfosForCall(proto, numPrefixArgs, args.size()); + // FIXME: Kill copy. - SmallVector<CanQualType, 16> argTypes; - for (auto Arg : args) - argTypes.push_back(Context.getCanonicalParamType(Arg->getType())); + auto argTypes = getArgTypesForCall(Context, args); - RequiredArgs required = - (isVariadic ? RequiredArgs(args.size()) : RequiredArgs::All); + FunctionType::ExtInfo info = proto->getExtInfo(); return arrangeLLVMFunctionInfo( - GetReturnType(resultType), /*instanceMethod=*/false, - /*chainCall=*/false, argTypes, info, required); + GetReturnType(proto->getReturnType()), /*instanceMethod=*/true, + /*chainCall=*/false, argTypes, info, paramInfos, required); } const CGFunctionInfo &CodeGenTypes::arrangeNullaryFunction() { return arrangeLLVMFunctionInfo( getContext().VoidTy, /*instanceMethod=*/false, /*chainCall=*/false, - None, FunctionType::ExtInfo(), RequiredArgs::All); + None, FunctionType::ExtInfo(), {}, RequiredArgs::All); +} + +const CGFunctionInfo & +CodeGenTypes::arrangeCall(const CGFunctionInfo &signature, + const CallArgList &args) { + assert(signature.arg_size() <= args.size()); + if (signature.arg_size() == args.size()) + return signature; + + SmallVector<FunctionProtoType::ExtParameterInfo, 16> paramInfos; + auto sigParamInfos = signature.getExtParameterInfos(); + if (!sigParamInfos.empty()) { + paramInfos.append(sigParamInfos.begin(), sigParamInfos.end()); + paramInfos.resize(args.size()); + } + + auto argTypes = getArgTypesForCall(Context, args); + + assert(signature.getRequiredArgs().allowsOptionalArgs()); + return arrangeLLVMFunctionInfo(signature.getReturnType(), + signature.isInstanceMethod(), + signature.isChainCall(), + argTypes, + signature.getExtInfo(), + paramInfos, + signature.getRequiredArgs()); } /// Arrange the argument and result information for an abstract value @@ -517,25 +674,26 @@ CodeGenTypes::arrangeLLVMFunctionInfo(CanQualType resultType, bool chainCall, ArrayRef<CanQualType> argTypes, FunctionType::ExtInfo info, + ArrayRef<FunctionProtoType::ExtParameterInfo> paramInfos, RequiredArgs required) { assert(std::all_of(argTypes.begin(), argTypes.end(), std::mem_fun_ref(&CanQualType::isCanonicalAsParam))); - unsigned CC = ClangCallConvToLLVMCallConv(info.getCC()); - // Lookup or create unique function info. llvm::FoldingSetNodeID ID; - CGFunctionInfo::Profile(ID, instanceMethod, chainCall, info, required, - resultType, argTypes); + CGFunctionInfo::Profile(ID, instanceMethod, chainCall, info, paramInfos, + required, resultType, argTypes); void *insertPos = nullptr; CGFunctionInfo *FI = FunctionInfos.FindNodeOrInsertPos(ID, insertPos); if (FI) return *FI; + unsigned CC = ClangCallConvToLLVMCallConv(info.getCC()); + // Construct the function info. We co-allocate the ArgInfos. FI = CGFunctionInfo::create(CC, instanceMethod, chainCall, info, - resultType, argTypes, required); + paramInfos, resultType, argTypes, required); FunctionInfos.InsertNode(FI, insertPos); bool inserted = FunctionsBeingProcessed.insert(FI).second; @@ -543,7 +701,11 @@ CodeGenTypes::arrangeLLVMFunctionInfo(CanQualType resultType, assert(inserted && "Recursively being processed?"); // Compute ABI information. - getABIInfo().computeInfo(*FI); + if (info.getCC() != CC_Swift) { + getABIInfo().computeInfo(*FI); + } else { + swiftcall::computeABIInfo(CGM, *FI); + } // Loop over all of the computed argument and return value info. If any of // them are direct or extend without a specified coerce type, specify the @@ -566,11 +728,16 @@ CGFunctionInfo *CGFunctionInfo::create(unsigned llvmCC, bool instanceMethod, bool chainCall, const FunctionType::ExtInfo &info, + ArrayRef<ExtParameterInfo> paramInfos, CanQualType resultType, ArrayRef<CanQualType> argTypes, RequiredArgs required) { - void *buffer = operator new(sizeof(CGFunctionInfo) + - sizeof(ArgInfo) * (argTypes.size() + 1)); + assert(paramInfos.empty() || paramInfos.size() == argTypes.size()); + + void *buffer = + operator new(totalSizeToAlloc<ArgInfo, ExtParameterInfo>( + argTypes.size() + 1, paramInfos.size())); + CGFunctionInfo *FI = new(buffer) CGFunctionInfo(); FI->CallingConvention = llvmCC; FI->EffectiveCallingConvention = llvmCC; @@ -585,9 +752,12 @@ CGFunctionInfo *CGFunctionInfo::create(unsigned llvmCC, FI->ArgStruct = nullptr; FI->ArgStructAlign = 0; FI->NumArgs = argTypes.size(); + FI->HasExtParameterInfos = !paramInfos.empty(); FI->getArgsBuffer()[0].type = resultType; for (unsigned i = 0, e = argTypes.size(); i != e; ++i) FI->getArgsBuffer()[i + 1].type = argTypes[i]; + for (unsigned i = 0, e = paramInfos.size(); i != e; ++i) + FI->getExtParameterInfosBuffer()[i] = paramInfos[i]; return FI; } @@ -634,7 +804,8 @@ struct RecordExpansion : TypeExpansion { RecordExpansion(SmallVector<const CXXBaseSpecifier *, 1> &&Bases, SmallVector<const FieldDecl *, 1> &&Fields) - : TypeExpansion(TEK_Record), Bases(Bases), Fields(Fields) {} + : TypeExpansion(TEK_Record), Bases(std::move(Bases)), + Fields(std::move(Fields)) {} static bool classof(const TypeExpansion *TE) { return TE->Kind == TEK_Record; } @@ -773,7 +944,7 @@ static void forConstantArrayExpansion(CodeGenFunction &CGF, } void CodeGenFunction::ExpandTypeFromArgs( - QualType Ty, LValue LV, SmallVectorImpl<llvm::Argument *>::iterator &AI) { + QualType Ty, LValue LV, SmallVectorImpl<llvm::Value *>::iterator &AI) { assert(LV.isSimple() && "Unexpected non-simple lvalue during struct expansion."); @@ -798,7 +969,7 @@ void CodeGenFunction::ExpandTypeFromArgs( } for (auto FD : RExp->Fields) { // FIXME: What are the right qualifiers here? - LValue SubLV = EmitLValueForField(LV, FD); + LValue SubLV = EmitLValueForFieldInitialization(LV, FD); ExpandTypeFromArgs(FD->getType(), SubLV, AI); } } else if (isa<ComplexExpansion>(Exp.get())) { @@ -1220,11 +1391,13 @@ void ClangToLLVMArgMapping::construct(const ASTContext &Context, // ignore and inalloca doesn't have matching LLVM parameters. IRArgs.NumberOfArgs = 0; break; - case ABIArgInfo::Expand: { + case ABIArgInfo::CoerceAndExpand: + IRArgs.NumberOfArgs = AI.getCoerceAndExpandTypeSequence().size(); + break; + case ABIArgInfo::Expand: IRArgs.NumberOfArgs = getExpansionSize(ArgType, Context); break; } - } if (IRArgs.NumberOfArgs > 0) { IRArgs.FirstArgIndex = IRArgNo; @@ -1323,6 +1496,10 @@ CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) { case ABIArgInfo::Ignore: resultType = llvm::Type::getVoidTy(getLLVMContext()); break; + + case ABIArgInfo::CoerceAndExpand: + resultType = retAI.getUnpaddedCoerceAndExpandType(); + break; } ClangToLLVMArgMapping IRFunctionArgs(getContext(), FI, true); @@ -1390,6 +1567,15 @@ CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) { break; } + case ABIArgInfo::CoerceAndExpand: { + auto ArgTypesIter = ArgTypes.begin() + FirstIRArg; + for (auto EltTy : ArgInfo.getCoerceAndExpandTypeSequence()) { + *ArgTypesIter++ = EltTy; + } + assert(ArgTypesIter == ArgTypes.begin() + FirstIRArg + NumIRArgs); + break; + } + case ABIArgInfo::Expand: auto ArgTypesIter = ArgTypes.begin() + FirstIRArg; getExpandedTypes(it->type, ArgTypesIter); @@ -1450,6 +1636,7 @@ void CodeGenModule::ConstructAttributeList( const Decl *TargetDecl = CalleeInfo.getCalleeDecl(); + bool HasAnyX86InterruptAttr = false; // FIXME: handle sseregparm someday... if (TargetDecl) { if (TargetDecl->hasAttr<ReturnsTwiceAttr>()) @@ -1487,6 +1674,7 @@ void CodeGenModule::ConstructAttributeList( if (TargetDecl->hasAttr<ReturnsNonNullAttr>()) RetAttrs.addAttribute(llvm::Attribute::NonNull); + HasAnyX86InterruptAttr = TargetDecl->hasAttr<AnyX86InterruptAttr>(); HasOptnone = TargetDecl->hasAttr<OptimizeNoneAttr>(); } @@ -1526,10 +1714,11 @@ void CodeGenModule::ConstructAttributeList( } bool DisableTailCalls = - CodeGenOpts.DisableTailCalls || + CodeGenOpts.DisableTailCalls || HasAnyX86InterruptAttr || (TargetDecl && TargetDecl->hasAttr<DisableTailCallsAttr>()); - FuncAttrs.addAttribute("disable-tail-calls", - llvm::toStringRef(DisableTailCalls)); + FuncAttrs.addAttribute( + "disable-tail-calls", + llvm::toStringRef(DisableTailCalls)); FuncAttrs.addAttribute("less-precise-fpmad", llvm::toStringRef(CodeGenOpts.LessPreciseFPMAD)); @@ -1543,9 +1732,13 @@ void CodeGenModule::ConstructAttributeList( llvm::toStringRef(CodeGenOpts.SoftFloat)); FuncAttrs.addAttribute("stack-protector-buffer-size", llvm::utostr(CodeGenOpts.SSPBufferSize)); + FuncAttrs.addAttribute("no-signed-zeros-fp-math", + llvm::toStringRef(CodeGenOpts.NoSignedZeros)); if (CodeGenOpts.StackRealignment) FuncAttrs.addAttribute("stackrealign"); + if (CodeGenOpts.Backchain) + FuncAttrs.addAttribute("backchain"); // Add target-cpu and target-features attributes to functions. If // we have a decl for the function and it has a target attribute then @@ -1594,6 +1787,18 @@ void CodeGenModule::ConstructAttributeList( } } + if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) { + // Conservatively, mark all functions and calls in CUDA as convergent + // (meaning, they may call an intrinsically convergent op, such as + // __syncthreads(), and so can't have certain optimizations applied around + // them). LLVM will remove this attribute where it safely can. + FuncAttrs.addAttribute(llvm::Attribute::Convergent); + + // Respect -fcuda-flush-denormals-to-zero. + if (getLangOpts().CUDADeviceFlushDenormalsToZero) + FuncAttrs.addAttribute("nvptx-f32ftz", "true"); + } + ClangToLLVMArgMapping IRFunctionArgs(getContext(), FI); QualType RetTy = FI.getReturnType(); @@ -1620,6 +1825,9 @@ void CodeGenModule::ConstructAttributeList( break; } + case ABIArgInfo::CoerceAndExpand: + break; + case ABIArgInfo::Expand: llvm_unreachable("Invalid ABI kind for return argument"); } @@ -1639,10 +1847,13 @@ void CodeGenModule::ConstructAttributeList( getLLVMContext(), llvm::AttributeSet::ReturnIndex, RetAttrs)); } + bool hasUsedSRet = false; + // Attach attributes to sret. if (IRFunctionArgs.hasSRetArg()) { llvm::AttrBuilder SRETAttrs; SRETAttrs.addAttribute(llvm::Attribute::StructRet); + hasUsedSRet = true; if (RetAI.getInReg()) SRETAttrs.addAttribute(llvm::Attribute::InReg); PAL.push_back(llvm::AttributeSet::get( @@ -1727,7 +1938,8 @@ void CodeGenModule::ConstructAttributeList( } case ABIArgInfo::Ignore: case ABIArgInfo::Expand: - continue; + case ABIArgInfo::CoerceAndExpand: + break; case ABIArgInfo::InAlloca: // inalloca disables readnone and readonly. @@ -1745,6 +1957,41 @@ void CodeGenModule::ConstructAttributeList( Attrs.addAttribute(llvm::Attribute::NonNull); } + switch (FI.getExtParameterInfo(ArgNo).getABI()) { + case ParameterABI::Ordinary: + break; + + case ParameterABI::SwiftIndirectResult: { + // Add 'sret' if we haven't already used it for something, but + // only if the result is void. + if (!hasUsedSRet && RetTy->isVoidType()) { + Attrs.addAttribute(llvm::Attribute::StructRet); + hasUsedSRet = true; + } + + // Add 'noalias' in either case. + Attrs.addAttribute(llvm::Attribute::NoAlias); + + // Add 'dereferenceable' and 'alignment'. + auto PTy = ParamType->getPointeeType(); + if (!PTy->isIncompleteType() && PTy->isConstantSizeType()) { + auto info = getContext().getTypeInfoInChars(PTy); + Attrs.addDereferenceableAttr(info.first.getQuantity()); + Attrs.addAttribute(llvm::Attribute::getWithAlignment(getLLVMContext(), + info.second.getQuantity())); + } + break; + } + + case ParameterABI::SwiftErrorResult: + Attrs.addAttribute(llvm::Attribute::SwiftError); + break; + + case ParameterABI::SwiftContext: + Attrs.addAttribute(llvm::Attribute::SwiftSelf); + break; + } + if (Attrs.hasAttributes()) { unsigned FirstIRArg, NumIRArgs; std::tie(FirstIRArg, NumIRArgs) = IRFunctionArgs.getIRArgs(ArgNo); @@ -1810,6 +2057,18 @@ static const NonNullAttr *getNonNullAttr(const Decl *FD, const ParmVarDecl *PVD, return nullptr; } +namespace { + struct CopyBackSwiftError final : EHScopeStack::Cleanup { + Address Temp; + Address Arg; + CopyBackSwiftError(Address temp, Address arg) : Temp(temp), Arg(arg) {} + void Emit(CodeGenFunction &CGF, Flags flags) override { + llvm::Value *errorValue = CGF.Builder.CreateLoad(Temp); + CGF.Builder.CreateStore(errorValue, Arg); + } + }; +} + void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, llvm::Function *Fn, const FunctionArgList &Args) { @@ -1835,7 +2094,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, ClangToLLVMArgMapping IRFunctionArgs(CGM.getContext(), FI); // Flattened function arguments. - SmallVector<llvm::Argument *, 16> FnArgs; + SmallVector<llvm::Value *, 16> FnArgs; FnArgs.reserve(IRFunctionArgs.totalIRArgs()); for (auto &Arg : Fn->args()) { FnArgs.push_back(&Arg); @@ -1856,7 +2115,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, // Name the struct return parameter. if (IRFunctionArgs.hasSRetArg()) { - auto AI = FnArgs[IRFunctionArgs.getSRetArgNo()]; + auto AI = cast<llvm::Argument>(FnArgs[IRFunctionArgs.getSRetArgNo()]); AI->setName("agg.result"); AI->addAttr(llvm::AttributeSet::get(getLLVMContext(), AI->getArgNo() + 1, llvm::Attribute::NoAlias)); @@ -1944,8 +2203,8 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, ArgI.getCoerceToType() == ConvertType(Ty) && ArgI.getDirectOffset() == 0) { assert(NumIRArgs == 1); - auto AI = FnArgs[FirstIRArg]; - llvm::Value *V = AI; + llvm::Value *V = FnArgs[FirstIRArg]; + auto AI = cast<llvm::Argument>(V); if (const ParmVarDecl *PVD = dyn_cast<ParmVarDecl>(Arg)) { if (getNonNullAttr(CurCodeDecl, PVD, PVD->getType(), @@ -2014,6 +2273,25 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, AI->getArgNo() + 1, llvm::Attribute::NoAlias)); + // LLVM expects swifterror parameters to be used in very restricted + // ways. Copy the value into a less-restricted temporary. + if (FI.getExtParameterInfo(ArgNo).getABI() + == ParameterABI::SwiftErrorResult) { + QualType pointeeTy = Ty->getPointeeType(); + assert(pointeeTy->isPointerType()); + Address temp = + CreateMemTemp(pointeeTy, getPointerAlign(), "swifterror.temp"); + Address arg = Address(V, getContext().getTypeAlignInChars(pointeeTy)); + llvm::Value *incomingErrorValue = Builder.CreateLoad(arg); + Builder.CreateStore(incomingErrorValue, temp); + V = temp.getPointer(); + + // Push a cleanup to copy the value back at the end of the function. + // The convention does not guarantee that the value will be written + // back if the function exits with an unwind exception. + EHStack.pushCleanup<CopyBackSwiftError>(NormalCleanup, temp, arg); + } + // Ensure the argument is the correct type. if (V->getType() != ArgI.getCoerceToType()) V = Builder.CreateBitCast(V, ArgI.getCoerceToType()); @@ -2100,6 +2378,29 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, break; } + case ABIArgInfo::CoerceAndExpand: { + // Reconstruct into a temporary. + Address alloca = CreateMemTemp(Ty, getContext().getDeclAlign(Arg)); + ArgVals.push_back(ParamValue::forIndirect(alloca)); + + auto coercionType = ArgI.getCoerceAndExpandType(); + alloca = Builder.CreateElementBitCast(alloca, coercionType); + auto layout = CGM.getDataLayout().getStructLayout(coercionType); + + unsigned argIndex = FirstIRArg; + for (unsigned i = 0, e = coercionType->getNumElements(); i != e; ++i) { + llvm::Type *eltType = coercionType->getElementType(i); + if (ABIArgInfo::isPaddingForCoerceAndExpand(eltType)) + continue; + + auto eltAddr = Builder.CreateStructGEP(alloca, i, layout); + auto elt = FnArgs[argIndex++]; + Builder.CreateStore(elt, eltAddr); + } + assert(argIndex == FirstIRArg + NumIRArgs); + break; + } + case ABIArgInfo::Expand: { // If this structure was expanded into multiple arguments then // we need to create a temporary and reconstruct it from the @@ -2462,9 +2763,26 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI, // In ARC, end functions that return a retainable type with a call // to objc_autoreleaseReturnValue. if (AutoreleaseResult) { +#ifndef NDEBUG + // Type::isObjCRetainabletype has to be called on a QualType that hasn't + // been stripped of the typedefs, so we cannot use RetTy here. Get the + // original return type of FunctionDecl, CurCodeDecl, and BlockDecl from + // CurCodeDecl or BlockInfo. + QualType RT; + + if (auto *FD = dyn_cast<FunctionDecl>(CurCodeDecl)) + RT = FD->getReturnType(); + else if (auto *MD = dyn_cast<ObjCMethodDecl>(CurCodeDecl)) + RT = MD->getReturnType(); + else if (isa<BlockDecl>(CurCodeDecl)) + RT = BlockInfo->BlockExpression->getFunctionType()->getReturnType(); + else + llvm_unreachable("Unexpected function/method type"); + assert(getLangOpts().ObjCAutoRefCount && !FI.isReturnsRetained() && - RetTy->isObjCRetainableType()); + RT->isObjCRetainableType()); +#endif RV = emitAutoreleaseOfResult(*this, RV); } @@ -2473,6 +2791,40 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI, case ABIArgInfo::Ignore: break; + case ABIArgInfo::CoerceAndExpand: { + auto coercionType = RetAI.getCoerceAndExpandType(); + auto layout = CGM.getDataLayout().getStructLayout(coercionType); + + // Load all of the coerced elements out into results. + llvm::SmallVector<llvm::Value*, 4> results; + Address addr = Builder.CreateElementBitCast(ReturnValue, coercionType); + for (unsigned i = 0, e = coercionType->getNumElements(); i != e; ++i) { + auto coercedEltType = coercionType->getElementType(i); + if (ABIArgInfo::isPaddingForCoerceAndExpand(coercedEltType)) + continue; + + auto eltAddr = Builder.CreateStructGEP(addr, i, layout); + auto elt = Builder.CreateLoad(eltAddr); + results.push_back(elt); + } + + // If we have one result, it's the single direct result type. + if (results.size() == 1) { + RV = results[0]; + + // Otherwise, we need to make a first-class aggregate. + } else { + // Construct a return type that lacks padding elements. + llvm::Type *returnType = RetAI.getUnpaddedCoerceAndExpandType(); + + RV = llvm::UndefValue::get(returnType); + for (unsigned i = 0, e = results.size(); i != e; ++i) { + RV = Builder.CreateInsertValue(RV, results[i], i); + } + } + break; + } + case ABIArgInfo::Expand: llvm_unreachable("Invalid ABI kind for return argument"); } @@ -2536,23 +2888,15 @@ void CodeGenFunction::EmitDelegateCallArg(CallArgList &args, QualType type = param->getType(); - // For the most part, we just need to load the alloca, except: - // 1) aggregate r-values are actually pointers to temporaries, and - // 2) references to non-scalars are pointers directly to the aggregate. - // I don't know why references to scalars are different here. - if (const ReferenceType *ref = type->getAs<ReferenceType>()) { - if (!hasScalarEvaluationKind(ref->getPointeeType())) - return args.add(RValue::getAggregate(local), type); - - // Locals which are references to scalars are represented - // with allocas holding the pointer. - return args.add(RValue::get(Builder.CreateLoad(local)), type); - } - assert(!isInAllocaArgument(CGM.getCXXABI(), type) && "cannot emit delegate call arguments for inalloca arguments!"); - args.add(convertTempToRValue(local, type, loc), type); + // For the most part, we just need to load the alloca, except that + // aggregate r-values are actually pointers to temporaries. + if (type->isReferenceType()) + args.add(RValue::get(Builder.CreateLoad(local)), type); + else + args.add(convertTempToRValue(local, type, loc), type); } static bool isProvablyNull(llvm::Value *addr) { @@ -2863,10 +3207,10 @@ void CodeGenFunction::EmitCallArgs( size_t CallArgsStart = Args.size(); for (int I = ArgTypes.size() - 1; I >= 0; --I) { CallExpr::const_arg_iterator Arg = ArgRange.begin() + I; + MaybeEmitImplicitObjectSize(I, *Arg); EmitCallArg(Args, *Arg, ArgTypes[I]); EmitNonNullArgCheck(Args.back().RV, ArgTypes[I], (*Arg)->getExprLoc(), CalleeDecl, ParamsToSkip + I); - MaybeEmitImplicitObjectSize(I, *Arg); } // Un-reverse the arguments we just evaluated so they match up with the LLVM @@ -3046,24 +3390,13 @@ CodeGenFunction::EmitRuntimeCall(llvm::Value *callee, return EmitRuntimeCall(callee, None, name); } -/// Emits a simple call (never an invoke) to the given runtime -/// function. -llvm::CallInst * -CodeGenFunction::EmitRuntimeCall(llvm::Value *callee, - ArrayRef<llvm::Value*> args, - const llvm::Twine &name) { - llvm::CallInst *call = Builder.CreateCall(callee, args, name); - call->setCallingConv(getRuntimeCC()); - return call; -} - // Calls which may throw must have operand bundles indicating which funclet // they are nested within. static void -getBundlesForFunclet(llvm::Value *Callee, - llvm::Instruction *CurrentFuncletPad, +getBundlesForFunclet(llvm::Value *Callee, llvm::Instruction *CurrentFuncletPad, SmallVectorImpl<llvm::OperandBundleDef> &BundleList) { - // There is no need for a funclet operand bundle if we aren't inside a funclet. + // There is no need for a funclet operand bundle if we aren't inside a + // funclet. if (!CurrentFuncletPad) return; @@ -3075,6 +3408,19 @@ getBundlesForFunclet(llvm::Value *Callee, BundleList.emplace_back("funclet", CurrentFuncletPad); } +/// Emits a simple call (never an invoke) to the given runtime function. +llvm::CallInst * +CodeGenFunction::EmitRuntimeCall(llvm::Value *callee, + ArrayRef<llvm::Value*> args, + const llvm::Twine &name) { + SmallVector<llvm::OperandBundleDef, 1> BundleList; + getBundlesForFunclet(callee, CurrentFuncletPad, BundleList); + + llvm::CallInst *call = Builder.CreateCall(callee, args, BundleList, name); + call->setCallingConv(getRuntimeCC()); + return call; +} + /// Emits a call or invoke to the given noreturn runtime function. void CodeGenFunction::EmitNoreturnRuntimeCallOrInvoke(llvm::Value *callee, ArrayRef<llvm::Value*> args) { @@ -3098,8 +3444,7 @@ void CodeGenFunction::EmitNoreturnRuntimeCallOrInvoke(llvm::Value *callee, } } -/// Emits a call or invoke instruction to the given nullary runtime -/// function. +/// Emits a call or invoke instruction to the given nullary runtime function. llvm::CallSite CodeGenFunction::EmitRuntimeCallOrInvoke(llvm::Value *callee, const Twine &name) { @@ -3123,13 +3468,16 @@ CodeGenFunction::EmitCallOrInvoke(llvm::Value *Callee, ArrayRef<llvm::Value *> Args, const Twine &Name) { llvm::BasicBlock *InvokeDest = getInvokeDest(); + SmallVector<llvm::OperandBundleDef, 1> BundleList; + getBundlesForFunclet(Callee, CurrentFuncletPad, BundleList); llvm::Instruction *Inst; if (!InvokeDest) - Inst = Builder.CreateCall(Callee, Args, Name); + Inst = Builder.CreateCall(Callee, Args, BundleList, Name); else { llvm::BasicBlock *ContBB = createBasicBlock("invoke.cont"); - Inst = Builder.CreateInvoke(Callee, ContBB, InvokeDest, Args, Name); + Inst = Builder.CreateInvoke(Callee, ContBB, InvokeDest, Args, BundleList, + Name); EmitBlock(ContBB); } @@ -3208,7 +3556,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // alloca to hold the result, unless one is given to us. Address SRetPtr = Address::invalid(); size_t UnusedReturnSize = 0; - if (RetAI.isIndirect() || RetAI.isInAlloca()) { + if (RetAI.isIndirect() || RetAI.isInAlloca() || RetAI.isCoerceAndExpand()) { if (!ReturnValue.isNull()) { SRetPtr = ReturnValue.getValue(); } else { @@ -3222,12 +3570,15 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, } if (IRFunctionArgs.hasSRetArg()) { IRCallArgs[IRFunctionArgs.getSRetArgNo()] = SRetPtr.getPointer(); - } else { + } else if (RetAI.isInAlloca()) { Address Addr = createInAllocaStructGEP(RetAI.getInAllocaFieldIndex()); Builder.CreateStore(SRetPtr.getPointer(), Addr); } } + Address swiftErrorTemp = Address::invalid(); + Address swiftErrorArg = Address::invalid(); + assert(CallInfo.arg_size() == CallArgs.size() && "Mismatch between function signature & arguments."); unsigned ArgNo = 0; @@ -3334,6 +3685,25 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, else V = Builder.CreateLoad(RV.getAggregateAddress()); + // Implement swifterror by copying into a new swifterror argument. + // We'll write back in the normal path out of the call. + if (CallInfo.getExtParameterInfo(ArgNo).getABI() + == ParameterABI::SwiftErrorResult) { + assert(!swiftErrorTemp.isValid() && "multiple swifterror args"); + + QualType pointeeTy = I->Ty->getPointeeType(); + swiftErrorArg = + Address(V, getContext().getTypeAlignInChars(pointeeTy)); + + swiftErrorTemp = + CreateMemTemp(pointeeTy, getPointerAlign(), "swifterror.temp"); + V = swiftErrorTemp.getPointer(); + cast<llvm::AllocaInst>(V)->setSwiftError(true); + + llvm::Value *errorValue = Builder.CreateLoad(swiftErrorArg); + Builder.CreateStore(errorValue, swiftErrorTemp); + } + // We might have to widen integers, but we should never truncate. if (ArgInfo.getCoerceToType() != V->getType() && V->getType()->isIntegerTy()) @@ -3344,6 +3714,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, if (FirstIRArg < IRFuncTy->getNumParams() && V->getType() != IRFuncTy->getParamType(FirstIRArg)) V = Builder.CreateBitCast(V, IRFuncTy->getParamType(FirstIRArg)); + IRCallArgs[FirstIRArg] = V; break; } @@ -3402,6 +3773,51 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, break; } + case ABIArgInfo::CoerceAndExpand: { + auto coercionType = ArgInfo.getCoerceAndExpandType(); + auto layout = CGM.getDataLayout().getStructLayout(coercionType); + + llvm::Value *tempSize = nullptr; + Address addr = Address::invalid(); + if (RV.isAggregate()) { + addr = RV.getAggregateAddress(); + } else { + assert(RV.isScalar()); // complex should always just be direct + + llvm::Type *scalarType = RV.getScalarVal()->getType(); + auto scalarSize = CGM.getDataLayout().getTypeAllocSize(scalarType); + auto scalarAlign = CGM.getDataLayout().getPrefTypeAlignment(scalarType); + + tempSize = llvm::ConstantInt::get(CGM.Int64Ty, scalarSize); + + // Materialize to a temporary. + addr = CreateTempAlloca(RV.getScalarVal()->getType(), + CharUnits::fromQuantity(std::max(layout->getAlignment(), + scalarAlign))); + EmitLifetimeStart(scalarSize, addr.getPointer()); + + Builder.CreateStore(RV.getScalarVal(), addr); + } + + addr = Builder.CreateElementBitCast(addr, coercionType); + + unsigned IRArgPos = FirstIRArg; + for (unsigned i = 0, e = coercionType->getNumElements(); i != e; ++i) { + llvm::Type *eltType = coercionType->getElementType(i); + if (ABIArgInfo::isPaddingForCoerceAndExpand(eltType)) continue; + Address eltAddr = Builder.CreateStructGEP(addr, i, layout); + llvm::Value *elt = Builder.CreateLoad(eltAddr); + IRCallArgs[IRArgPos++] = elt; + } + assert(IRArgPos == FirstIRArg + NumIRArgs); + + if (tempSize) { + EmitLifetimeEnd(tempSize, addr.getPointer()); + } + + break; + } + case ABIArgInfo::Expand: unsigned IRArgPos = FirstIRArg; ExpandTypeToArgs(I->Ty, RV, IRFuncTy, IRCallArgs, IRArgPos); @@ -3541,6 +3957,13 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, CS.setAttributes(Attrs); CS.setCallingConv(static_cast<llvm::CallingConv::ID>(CallingConv)); + // Insert instrumentation or attach profile metadata at indirect call sites. + // For more details, see the comment before the definition of + // IPVK_IndirectCallTarget in InstrProfData.inc. + if (!CS.getCalledFunction()) + PGO.valueProfile(Builder, llvm::IPVK_IndirectCallTarget, + CS.getInstruction(), Callee); + // In ObjC ARC mode with no ObjC ARC exception safety, tell the ARC // optimizer it can aggressively ignore unwind edges. if (CGM.getLangOpts().ObjCAutoRefCount) @@ -3567,9 +3990,15 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, } llvm::Instruction *CI = CS.getInstruction(); - if (Builder.isNamePreserving() && !CI->getType()->isVoidTy()) + if (!CI->getType()->isVoidTy()) CI->setName("call"); + // Perform the swifterror writeback. + if (swiftErrorTemp.isValid()) { + llvm::Value *errorResult = Builder.CreateLoad(swiftErrorTemp); + Builder.CreateStore(errorResult, swiftErrorArg); + } + // Emit any writebacks immediately. Arguably this should happen // after any return-value munging. if (CallArgs.hasWritebacks()) @@ -3587,6 +4016,31 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, RValue Ret = [&] { switch (RetAI.getKind()) { + case ABIArgInfo::CoerceAndExpand: { + auto coercionType = RetAI.getCoerceAndExpandType(); + auto layout = CGM.getDataLayout().getStructLayout(coercionType); + + Address addr = SRetPtr; + addr = Builder.CreateElementBitCast(addr, coercionType); + + assert(CI->getType() == RetAI.getUnpaddedCoerceAndExpandType()); + bool requiresExtract = isa<llvm::StructType>(CI->getType()); + + unsigned unpaddedIndex = 0; + for (unsigned i = 0, e = coercionType->getNumElements(); i != e; ++i) { + llvm::Type *eltType = coercionType->getElementType(i); + if (ABIArgInfo::isPaddingForCoerceAndExpand(eltType)) continue; + Address eltAddr = Builder.CreateStructGEP(addr, i, layout); + llvm::Value *elt = CI; + if (requiresExtract) + elt = Builder.CreateExtractValue(elt, unpaddedIndex++); + else + assert(unpaddedIndex == 0); + Builder.CreateStore(elt, eltAddr); + } + // FALLTHROUGH + } + case ABIArgInfo::InAlloca: case ABIArgInfo::Indirect: { RValue ret = convertTempToRValue(SRetPtr, RetTy, SourceLocation()); diff --git a/lib/CodeGen/CGClass.cpp b/lib/CodeGen/CGClass.cpp index 2e566de6d8ac..7ed891f426aa 100644 --- a/lib/CodeGen/CGClass.cpp +++ b/lib/CodeGen/CGClass.cpp @@ -26,6 +26,7 @@ #include "clang/Frontend/CodeGenOptions.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Metadata.h" +#include "llvm/Transforms/Utils/SanitizerStats.h" using namespace clang; using namespace CodeGen; @@ -94,7 +95,7 @@ CodeGenModule::getDynamicOffsetAlignment(CharUnits actualBaseAlign, // unless we someday add some sort of attribute to change the // assumed alignment of 'this'. So our goal here is pretty much // just to allow the user to explicitly say that a pointer is - // under-aligned and then safely access its fields and v-tables. + // under-aligned and then safely access its fields and vtables. if (actualBaseAlign >= expectedBaseAlign) { return expectedTargetAlign; } @@ -745,7 +746,7 @@ static void EmitMemberInitializer(CodeGenFunction &CGF, ArrayRef<VarDecl *> ArrayIndexes; if (MemberInit->getNumArrayIndices()) - ArrayIndexes = MemberInit->getArrayIndexes(); + ArrayIndexes = MemberInit->getArrayIndices(); CGF.EmitInitializerForField(Field, LHS, MemberInit->getInit(), ArrayIndexes); } @@ -986,7 +987,7 @@ namespace { CodeGenFunction &CGF; SanitizerSet OldSanOpts; }; -} +} // end anonymous namespace namespace { class FieldMemcpyizer { @@ -1071,7 +1072,6 @@ namespace { const CXXRecordDecl *ClassDecl; private: - void emitMemcpyIR(Address DestPtr, Address SrcPtr, CharUnits Size) { llvm::PointerType *DPT = DestPtr.getType(); llvm::Type *DBP = @@ -1087,13 +1087,12 @@ namespace { } void addInitialField(FieldDecl *F) { - FirstField = F; - LastField = F; - FirstFieldOffset = RecLayout.getFieldOffset(F->getFieldIndex()); - LastFieldOffset = FirstFieldOffset; - LastAddedFieldIndex = F->getFieldIndex(); - return; - } + FirstField = F; + LastField = F; + FirstFieldOffset = RecLayout.getFieldOffset(F->getFieldIndex()); + LastFieldOffset = FirstFieldOffset; + LastAddedFieldIndex = F->getFieldIndex(); + } void addNextField(FieldDecl *F) { // For the most part, the following invariant will hold: @@ -1127,7 +1126,6 @@ namespace { class ConstructorMemcpyizer : public FieldMemcpyizer { private: - /// Get source argument for copy constructor. Returns null if not a copy /// constructor. static const VarDecl *getTrivialCopySource(CodeGenFunction &CGF, @@ -1232,7 +1230,6 @@ namespace { class AssignmentMemcpyizer : public FieldMemcpyizer { private: - // Returns the memcpyable field copied by the given statement, if one // exists. Otherwise returns null. FieldDecl *getMemcpyableField(Stmt *S) { @@ -1306,7 +1303,6 @@ namespace { SmallVector<Stmt*, 16> AggregatedStmts; public: - AssignmentMemcpyizer(CodeGenFunction &CGF, const CXXMethodDecl *AD, FunctionArgList &Args) : FieldMemcpyizer(CGF, AD->getParent(), Args[Args.size() - 1]), @@ -1607,6 +1603,7 @@ void CodeGenFunction::emitImplicitAssignmentOperatorBody(FunctionArgList &Args) LexicalScope Scope(*this, RootCS->getSourceRange()); + incrementProfileCounter(RootCS); AssignmentMemcpyizer AM(*this, AssignOp, Args); for (auto *I : RootCS->body()) AM.emitAssignment(I); @@ -1628,6 +1625,7 @@ namespace { struct CallDtorDeleteConditional final : EHScopeStack::Cleanup { llvm::Value *ShouldDeleteCondition; + public: CallDtorDeleteConditional(llvm::Value *ShouldDeleteCondition) : ShouldDeleteCondition(ShouldDeleteCondition) { @@ -1917,7 +1915,7 @@ void CodeGenFunction::EnterDtorCleanups(const CXXDestructorDecl *DD, /// \param zeroInitialize true if each element should be /// zero-initialized before it is constructed void CodeGenFunction::EmitCXXAggrConstructorCall( - const CXXConstructorDecl *ctor, const ConstantArrayType *arrayType, + const CXXConstructorDecl *ctor, const ArrayType *arrayType, Address arrayBegin, const CXXConstructExpr *E, bool zeroInitialize) { QualType elementType; llvm::Value *numElements = @@ -2050,6 +2048,62 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D, bool ForVirtualBase, bool Delegating, Address This, const CXXConstructExpr *E) { + CallArgList Args; + + // Push the this ptr. + Args.add(RValue::get(This.getPointer()), D->getThisType(getContext())); + + // If this is a trivial constructor, emit a memcpy now before we lose + // the alignment information on the argument. + // FIXME: It would be better to preserve alignment information into CallArg. + if (isMemcpyEquivalentSpecialMember(D)) { + assert(E->getNumArgs() == 1 && "unexpected argcount for trivial ctor"); + + const Expr *Arg = E->getArg(0); + QualType SrcTy = Arg->getType(); + Address Src = EmitLValue(Arg).getAddress(); + QualType DestTy = getContext().getTypeDeclType(D->getParent()); + EmitAggregateCopyCtor(This, Src, DestTy, SrcTy); + return; + } + + // Add the rest of the user-supplied arguments. + const FunctionProtoType *FPT = D->getType()->castAs<FunctionProtoType>(); + EmitCallArgs(Args, FPT, E->arguments(), E->getConstructor()); + + EmitCXXConstructorCall(D, Type, ForVirtualBase, Delegating, This, Args); +} + +static bool canEmitDelegateCallArgs(CodeGenFunction &CGF, + const CXXConstructorDecl *Ctor, + CXXCtorType Type, CallArgList &Args) { + // We can't forward a variadic call. + if (Ctor->isVariadic()) + return false; + + if (CGF.getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee()) { + // If the parameters are callee-cleanup, it's not safe to forward. + for (auto *P : Ctor->parameters()) + if (P->getType().isDestructedType()) + return false; + + // Likewise if they're inalloca. + const CGFunctionInfo &Info = + CGF.CGM.getTypes().arrangeCXXConstructorCall(Args, Ctor, Type, 0); + if (Info.usesInAlloca()) + return false; + } + + // Anything else should be OK. + return true; +} + +void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D, + CXXCtorType Type, + bool ForVirtualBase, + bool Delegating, + Address This, + CallArgList &Args) { const CXXRecordDecl *ClassDecl = D->getParent(); // C++11 [class.mfct.non-static]p2: @@ -2060,7 +2114,7 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D, This.getPointer(), getContext().getRecordType(ClassDecl)); if (D->isTrivial() && D->isDefaultConstructor()) { - assert(E->getNumArgs() == 0 && "trivial default ctor with args"); + assert(Args.size() == 1 && "trivial default ctor with args"); return; } @@ -2068,24 +2122,24 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D, // union copy constructor, we must emit a memcpy, because the AST does not // model that copy. if (isMemcpyEquivalentSpecialMember(D)) { - assert(E->getNumArgs() == 1 && "unexpected argcount for trivial ctor"); + assert(Args.size() == 2 && "unexpected argcount for trivial ctor"); - const Expr *Arg = E->getArg(0); - QualType SrcTy = Arg->getType(); - Address Src = EmitLValue(Arg).getAddress(); + QualType SrcTy = D->getParamDecl(0)->getType().getNonReferenceType(); + Address Src(Args[1].RV.getScalarVal(), getNaturalTypeAlignment(SrcTy)); QualType DestTy = getContext().getTypeDeclType(ClassDecl); EmitAggregateCopyCtor(This, Src, DestTy, SrcTy); return; } - CallArgList Args; - - // Push the this ptr. - Args.add(RValue::get(This.getPointer()), D->getThisType(getContext())); - - // Add the rest of the user-supplied arguments. - const FunctionProtoType *FPT = D->getType()->castAs<FunctionProtoType>(); - EmitCallArgs(Args, FPT, E->arguments(), E->getConstructor()); + // Check whether we can actually emit the constructor before trying to do so. + if (auto Inherited = D->getInheritedConstructor()) { + if (getTypes().inheritingCtorHasParams(Inherited, Type) && + !canEmitDelegateCallArgs(*this, D, Type, Args)) { + EmitInlinedInheritingCXXConstructorCall(D, Type, ForVirtualBase, + Delegating, Args); + return; + } + } // Insert any ABI-specific implicit constructor arguments. unsigned ExtraArgs = CGM.getCXXABI().addImplicitConstructorArgs( @@ -2115,6 +2169,95 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D, EmitVTableAssumptionLoads(ClassDecl, This); } +void CodeGenFunction::EmitInheritedCXXConstructorCall( + const CXXConstructorDecl *D, bool ForVirtualBase, Address This, + bool InheritedFromVBase, const CXXInheritedCtorInitExpr *E) { + CallArgList Args; + CallArg ThisArg(RValue::get(This.getPointer()), D->getThisType(getContext()), + /*NeedsCopy=*/false); + + // Forward the parameters. + if (InheritedFromVBase && + CGM.getTarget().getCXXABI().hasConstructorVariants()) { + // Nothing to do; this construction is not responsible for constructing + // the base class containing the inherited constructor. + // FIXME: Can we just pass undef's for the remaining arguments if we don't + // have constructor variants? + Args.push_back(ThisArg); + } else if (!CXXInheritedCtorInitExprArgs.empty()) { + // The inheriting constructor was inlined; just inject its arguments. + assert(CXXInheritedCtorInitExprArgs.size() >= D->getNumParams() && + "wrong number of parameters for inherited constructor call"); + Args = CXXInheritedCtorInitExprArgs; + Args[0] = ThisArg; + } else { + // The inheriting constructor was not inlined. Emit delegating arguments. + Args.push_back(ThisArg); + const auto *OuterCtor = cast<CXXConstructorDecl>(CurCodeDecl); + assert(OuterCtor->getNumParams() == D->getNumParams()); + assert(!OuterCtor->isVariadic() && "should have been inlined"); + + for (const auto *Param : OuterCtor->parameters()) { + assert(getContext().hasSameUnqualifiedType( + OuterCtor->getParamDecl(Param->getFunctionScopeIndex())->getType(), + Param->getType())); + EmitDelegateCallArg(Args, Param, E->getLocation()); + + // Forward __attribute__(pass_object_size). + if (Param->hasAttr<PassObjectSizeAttr>()) { + auto *POSParam = SizeArguments[Param]; + assert(POSParam && "missing pass_object_size value for forwarding"); + EmitDelegateCallArg(Args, POSParam, E->getLocation()); + } + } + } + + EmitCXXConstructorCall(D, Ctor_Base, ForVirtualBase, /*Delegating*/false, + This, Args); +} + +void CodeGenFunction::EmitInlinedInheritingCXXConstructorCall( + const CXXConstructorDecl *Ctor, CXXCtorType CtorType, bool ForVirtualBase, + bool Delegating, CallArgList &Args) { + InlinedInheritingConstructorScope Scope(*this, GlobalDecl(Ctor, CtorType)); + + // Save the arguments to be passed to the inherited constructor. + CXXInheritedCtorInitExprArgs = Args; + + FunctionArgList Params; + QualType RetType = BuildFunctionArgList(CurGD, Params); + FnRetTy = RetType; + + // Insert any ABI-specific implicit constructor arguments. + CGM.getCXXABI().addImplicitConstructorArgs(*this, Ctor, CtorType, + ForVirtualBase, Delegating, Args); + + // Emit a simplified prolog. We only need to emit the implicit params. + assert(Args.size() >= Params.size() && "too few arguments for call"); + for (unsigned I = 0, N = Args.size(); I != N; ++I) { + if (I < Params.size() && isa<ImplicitParamDecl>(Params[I])) { + const RValue &RV = Args[I].RV; + assert(!RV.isComplex() && "complex indirect params not supported"); + ParamValue Val = RV.isScalar() + ? ParamValue::forDirect(RV.getScalarVal()) + : ParamValue::forIndirect(RV.getAggregateAddress()); + EmitParmDecl(*Params[I], Val, I + 1); + } + } + + // Create a return value slot if the ABI implementation wants one. + // FIXME: This is dumb, we should ask the ABI not to try to set the return + // value instead. + if (!RetType->isVoidType()) + ReturnValue = CreateIRTemp(RetType, "retval.inhctor"); + + CGM.getCXXABI().EmitInstanceFunctionProlog(*this); + CXXThisValue = CXXABIThisValue; + + // Directly emit the constructor initializers. + EmitCtorPrologue(Ctor, CtorType, Params); +} + void CodeGenFunction::EmitVTableAssumptionLoad(const VPtr &Vptr, Address This) { llvm::Value *VTableGlobal = CGM.getCXXABI().getVTableAddressPoint(Vptr.Base, Vptr.VTableClass); @@ -2147,19 +2290,6 @@ void CodeGenFunction::EmitSynthesizedCXXCopyCtorCall(const CXXConstructorDecl *D, Address This, Address Src, const CXXConstructExpr *E) { - if (isMemcpyEquivalentSpecialMember(D)) { - assert(E->getNumArgs() == 1 && "unexpected argcount for trivial ctor"); - assert(D->isCopyOrMoveConstructor() && - "trivial 1-arg ctor not a copy/move ctor"); - EmitAggregateCopyCtor(This, Src, - getContext().getTypeDeclType(D->getParent()), - (*E->arg_begin())->getType()); - return; - } - llvm::Value *Callee = CGM.getAddrOfCXXStructor(D, StructorType::Complete); - assert(D->isInstance() && - "Trying to emit a member call expr on a static method!"); - const FunctionProtoType *FPT = D->getType()->castAs<FunctionProtoType>(); CallArgList Args; @@ -2177,8 +2307,7 @@ CodeGenFunction::EmitSynthesizedCXXCopyCtorCall(const CXXConstructorDecl *D, EmitCallArgs(Args, FPT, drop_begin(E->arguments(), 1), E->getConstructor(), /*ParamsToSkip*/ 1); - EmitCall(CGM.getTypes().arrangeCXXMethodCall(Args, FPT, RequiredArgs::All), - Callee, ReturnValueSlot(), Args, D); + EmitCXXConstructorCall(D, Ctor_Complete, false, false, This, Args); } void @@ -2192,21 +2321,17 @@ CodeGenFunction::EmitDelegateCXXConstructorCall(const CXXConstructorDecl *Ctor, assert(I != E && "no parameters to constructor"); // this - DelegateArgs.add(RValue::get(LoadCXXThis()), (*I)->getType()); + Address This = LoadCXXThisAddress(); + DelegateArgs.add(RValue::get(This.getPointer()), (*I)->getType()); ++I; - // vtt - if (llvm::Value *VTT = GetVTTParameter(GlobalDecl(Ctor, CtorType), - /*ForVirtualBase=*/false, - /*Delegating=*/true)) { - QualType VoidPP = getContext().getPointerType(getContext().VoidPtrTy); - DelegateArgs.add(RValue::get(VTT), VoidPP); - - if (CGM.getCXXABI().NeedsVTTParameter(CurGD)) { - assert(I != E && "cannot skip vtt parameter, already done with args"); - assert((*I)->getType() == VoidPP && "skipping parameter not of vtt type"); - ++I; - } + // FIXME: The location of the VTT parameter in the parameter list is + // specific to the Itanium ABI and shouldn't be hardcoded here. + if (CGM.getCXXABI().NeedsVTTParameter(CurGD)) { + assert(I != E && "cannot skip vtt parameter, already done with args"); + assert((*I)->getType()->isPointerType() && + "skipping parameter not of vtt type"); + ++I; } // Explicit arguments. @@ -2216,11 +2341,8 @@ CodeGenFunction::EmitDelegateCXXConstructorCall(const CXXConstructorDecl *Ctor, EmitDelegateCallArg(DelegateArgs, param, Loc); } - llvm::Value *Callee = - CGM.getAddrOfCXXStructor(Ctor, getFromCtorType(CtorType)); - EmitCall(CGM.getTypes() - .arrangeCXXStructorDeclaration(Ctor, getFromCtorType(CtorType)), - Callee, ReturnValueSlot(), DelegateArgs, Ctor); + EmitCXXConstructorCall(Ctor, CtorType, /*ForVirtualBase=*/false, + /*Delegating=*/true, This, DelegateArgs); } namespace { @@ -2289,7 +2411,7 @@ namespace { /*Delegating=*/false, Addr); } }; -} +} // end anonymous namespace void CodeGenFunction::PushDestructorCleanup(const CXXDestructorDecl *D, Address Addr) { @@ -2487,15 +2609,35 @@ LeastDerivedClassWithSameLayout(const CXXRecordDecl *RD) { RD->bases_begin()->getType()->getAsCXXRecordDecl()); } -void CodeGenFunction::EmitVTablePtrCheckForCall(const CXXMethodDecl *MD, +void CodeGenFunction::EmitTypeMetadataCodeForVCall(const CXXRecordDecl *RD, + llvm::Value *VTable, + SourceLocation Loc) { + if (CGM.getCodeGenOpts().WholeProgramVTables && + CGM.HasHiddenLTOVisibility(RD)) { + llvm::Metadata *MD = + CGM.CreateMetadataIdentifierForType(QualType(RD->getTypeForDecl(), 0)); + llvm::Value *TypeId = + llvm::MetadataAsValue::get(CGM.getLLVMContext(), MD); + + llvm::Value *CastedVTable = Builder.CreateBitCast(VTable, Int8PtrTy); + llvm::Value *TypeTest = + Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::type_test), + {CastedVTable, TypeId}); + Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::assume), TypeTest); + } + + if (SanOpts.has(SanitizerKind::CFIVCall)) + EmitVTablePtrCheckForCall(RD, VTable, CodeGenFunction::CFITCK_VCall, Loc); +} + +void CodeGenFunction::EmitVTablePtrCheckForCall(const CXXRecordDecl *RD, llvm::Value *VTable, CFITypeCheckKind TCK, SourceLocation Loc) { - const CXXRecordDecl *ClassDecl = MD->getParent(); if (!SanOpts.has(SanitizerKind::CFICastStrict)) - ClassDecl = LeastDerivedClassWithSameLayout(ClassDecl); + RD = LeastDerivedClassWithSameLayout(RD); - EmitVTablePtrCheck(ClassDecl, VTable, TCK, Loc); + EmitVTablePtrCheck(RD, VTable, TCK, Loc); } void CodeGenFunction::EmitVTablePtrCheckForCast(QualType T, @@ -2547,26 +2689,41 @@ void CodeGenFunction::EmitVTablePtrCheck(const CXXRecordDecl *RD, llvm::Value *VTable, CFITypeCheckKind TCK, SourceLocation Loc) { - if (CGM.IsCFIBlacklistedRecord(RD)) + if (!CGM.getCodeGenOpts().SanitizeCfiCrossDso && + !CGM.HasHiddenLTOVisibility(RD)) + return; + + std::string TypeName = RD->getQualifiedNameAsString(); + if (getContext().getSanitizerBlacklist().isBlacklistedType(TypeName)) return; SanitizerScope SanScope(this); + llvm::SanitizerStatKind SSK; + switch (TCK) { + case CFITCK_VCall: + SSK = llvm::SanStat_CFI_VCall; + break; + case CFITCK_NVCall: + SSK = llvm::SanStat_CFI_NVCall; + break; + case CFITCK_DerivedCast: + SSK = llvm::SanStat_CFI_DerivedCast; + break; + case CFITCK_UnrelatedCast: + SSK = llvm::SanStat_CFI_UnrelatedCast; + break; + case CFITCK_ICall: + llvm_unreachable("not expecting CFITCK_ICall"); + } + EmitSanitizerStatReport(SSK); llvm::Metadata *MD = CGM.CreateMetadataIdentifierForType(QualType(RD->getTypeForDecl(), 0)); - llvm::Value *BitSetName = llvm::MetadataAsValue::get(getLLVMContext(), MD); + llvm::Value *TypeId = llvm::MetadataAsValue::get(getLLVMContext(), MD); llvm::Value *CastedVTable = Builder.CreateBitCast(VTable, Int8PtrTy); - llvm::Value *BitSetTest = - Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::bitset_test), - {CastedVTable, BitSetName}); - - if (CGM.getCodeGenOpts().SanitizeCfiCrossDso) { - if (auto TypeId = CGM.CreateCfiIdForTypeMetadata(MD)) { - EmitCfiSlowPathCheck(BitSetTest, TypeId, CastedVTable); - return; - } - } + llvm::Value *TypeTest = Builder.CreateCall( + CGM.getIntrinsic(llvm::Intrinsic::type_test), {CastedVTable, TypeId}); SanitizerMask M; switch (TCK) { @@ -2582,15 +2739,70 @@ void CodeGenFunction::EmitVTablePtrCheck(const CXXRecordDecl *RD, case CFITCK_UnrelatedCast: M = SanitizerKind::CFIUnrelatedCast; break; + case CFITCK_ICall: + llvm_unreachable("not expecting CFITCK_ICall"); } llvm::Constant *StaticData[] = { + llvm::ConstantInt::get(Int8Ty, TCK), EmitCheckSourceLocation(Loc), EmitCheckTypeDescriptor(QualType(RD->getTypeForDecl(), 0)), - llvm::ConstantInt::get(Int8Ty, TCK), }; - EmitCheck(std::make_pair(BitSetTest, M), "cfi_bad_type", StaticData, - CastedVTable); + + auto CrossDsoTypeId = CGM.CreateCrossDsoCfiTypeId(MD); + if (CGM.getCodeGenOpts().SanitizeCfiCrossDso && CrossDsoTypeId) { + EmitCfiSlowPathCheck(M, TypeTest, CrossDsoTypeId, CastedVTable, StaticData); + return; + } + + if (CGM.getCodeGenOpts().SanitizeTrap.has(M)) { + EmitTrapCheck(TypeTest); + return; + } + + llvm::Value *AllVtables = llvm::MetadataAsValue::get( + CGM.getLLVMContext(), + llvm::MDString::get(CGM.getLLVMContext(), "all-vtables")); + llvm::Value *ValidVtable = Builder.CreateCall( + CGM.getIntrinsic(llvm::Intrinsic::type_test), {CastedVTable, AllVtables}); + EmitCheck(std::make_pair(TypeTest, M), "cfi_check_fail", StaticData, + {CastedVTable, ValidVtable}); +} + +bool CodeGenFunction::ShouldEmitVTableTypeCheckedLoad(const CXXRecordDecl *RD) { + if (!CGM.getCodeGenOpts().WholeProgramVTables || + !SanOpts.has(SanitizerKind::CFIVCall) || + !CGM.getCodeGenOpts().SanitizeTrap.has(SanitizerKind::CFIVCall) || + !CGM.HasHiddenLTOVisibility(RD)) + return false; + + std::string TypeName = RD->getQualifiedNameAsString(); + return !getContext().getSanitizerBlacklist().isBlacklistedType(TypeName); +} + +llvm::Value *CodeGenFunction::EmitVTableTypeCheckedLoad( + const CXXRecordDecl *RD, llvm::Value *VTable, uint64_t VTableByteOffset) { + SanitizerScope SanScope(this); + + EmitSanitizerStatReport(llvm::SanStat_CFI_VCall); + + llvm::Metadata *MD = + CGM.CreateMetadataIdentifierForType(QualType(RD->getTypeForDecl(), 0)); + llvm::Value *TypeId = llvm::MetadataAsValue::get(CGM.getLLVMContext(), MD); + + llvm::Value *CastedVTable = Builder.CreateBitCast(VTable, Int8PtrTy); + llvm::Value *CheckedLoad = Builder.CreateCall( + CGM.getIntrinsic(llvm::Intrinsic::type_checked_load), + {CastedVTable, llvm::ConstantInt::get(Int32Ty, VTableByteOffset), + TypeId}); + llvm::Value *CheckResult = Builder.CreateExtractValue(CheckedLoad, 1); + + EmitCheck(std::make_pair(CheckResult, SanitizerKind::CFIVCall), + "cfi_check_fail", nullptr, nullptr); + + return Builder.CreateBitCast( + Builder.CreateExtractValue(CheckedLoad, 0), + cast<llvm::PointerType>(VTable->getType())->getElementType()); } // FIXME: Ideally Expr::IgnoreParenNoopCasts should do this, but it doesn't do @@ -2731,7 +2943,7 @@ void CodeGenFunction::EmitLambdaBlockInvokeBody() { CallArgs.add(RValue::get(ThisPtr.getPointer()), ThisType); // Add the rest of the parameters. - for (auto param : BD->params()) + for (auto param : BD->parameters()) EmitDelegateCallArg(CallArgs, param, param->getLocStart()); assert(!Lambda->isGenericLambda() && @@ -2761,7 +2973,7 @@ void CodeGenFunction::EmitLambdaDelegatingInvokeBody(const CXXMethodDecl *MD) { CallArgs.add(RValue::get(ThisPtr), ThisType); // Add the rest of the parameters. - for (auto Param : MD->params()) + for (auto Param : MD->parameters()) EmitDelegateCallArg(CallArgs, Param, Param->getLocStart()); const CXXMethodDecl *CallOp = Lambda->getLambdaCallOperator(); diff --git a/lib/CodeGen/CGCleanup.cpp b/lib/CodeGen/CGCleanup.cpp index ba7dcf7de6c7..b3278b3b4fef 100644 --- a/lib/CodeGen/CGCleanup.cpp +++ b/lib/CodeGen/CGCleanup.cpp @@ -112,7 +112,7 @@ RValue DominatingValue<RValue>::saved_type::restore(CodeGenFunction &CGF) { /// Push an entry of the given size onto this protected-scope stack. char *EHScopeStack::allocate(size_t Size) { - Size = llvm::RoundUpToAlignment(Size, ScopeStackAlignment); + Size = llvm::alignTo(Size, ScopeStackAlignment); if (!StartOfBuffer) { unsigned Capacity = 1024; while (Capacity < Size) Capacity *= 2; @@ -143,7 +143,7 @@ char *EHScopeStack::allocate(size_t Size) { } void EHScopeStack::deallocate(size_t Size) { - StartOfData += llvm::RoundUpToAlignment(Size, ScopeStackAlignment); + StartOfData += llvm::alignTo(Size, ScopeStackAlignment); } bool EHScopeStack::containsOnlyLifetimeMarkers( @@ -157,6 +157,20 @@ bool EHScopeStack::containsOnlyLifetimeMarkers( return true; } +bool EHScopeStack::requiresLandingPad() const { + for (stable_iterator si = getInnermostEHScope(); si != stable_end(); ) { + // Skip lifetime markers. + if (auto *cleanup = dyn_cast<EHCleanupScope>(&*find(si))) + if (cleanup->isLifetimeMarker()) { + si = cleanup->getEnclosingEHScope(); + continue; + } + return true; + } + + return false; +} + EHScopeStack::stable_iterator EHScopeStack::getInnermostActiveNormalCleanup() const { for (stable_iterator si = getInnermostNormalCleanup(), se = stable_end(); @@ -174,6 +188,7 @@ void *EHScopeStack::pushCleanup(CleanupKind Kind, size_t Size) { bool IsNormalCleanup = Kind & NormalCleanup; bool IsEHCleanup = Kind & EHCleanup; bool IsActive = !(Kind & InactiveCleanup); + bool IsLifetimeMarker = Kind & LifetimeMarker; EHCleanupScope *Scope = new (Buffer) EHCleanupScope(IsNormalCleanup, IsEHCleanup, @@ -186,6 +201,8 @@ void *EHScopeStack::pushCleanup(CleanupKind Kind, size_t Size) { InnermostNormalCleanup = stable_begin(); if (IsEHCleanup) InnermostEHScope = stable_begin(); + if (IsLifetimeMarker) + Scope->setLifetimeMarker(); return Scope->getCleanupBuffer(); } diff --git a/lib/CodeGen/CGCleanup.h b/lib/CodeGen/CGCleanup.h index 909f00b05925..98d01b1326c9 100644 --- a/lib/CodeGen/CGCleanup.h +++ b/lib/CodeGen/CGCleanup.h @@ -86,11 +86,6 @@ protected: /// The amount of extra storage needed by the Cleanup. /// Always a multiple of the scope-stack alignment. unsigned CleanupSize : 12; - - /// The number of fixups required by enclosing scopes (not including - /// this one). If this is the top cleanup scope, all the fixups - /// from this index onwards belong to this scope. - unsigned FixupDepth : 32 - 18 - NumCommonBits; // currently 12 }; class FilterBitFields { @@ -188,6 +183,7 @@ public: EHScopeStack::stable_iterator enclosingEHScope) : EHScope(Catch, enclosingEHScope) { CatchBits.NumHandlers = numHandlers; + assert(CatchBits.NumHandlers == numHandlers && "NumHandlers overflow?"); } unsigned getNumHandlers() const { @@ -263,6 +259,11 @@ class LLVM_ALIGNAS(/*alignof(uint64_t)*/ 8) EHCleanupScope : public EHScope { }; mutable struct ExtInfo *ExtInfo; + /// The number of fixups required by enclosing scopes (not including + /// this one). If this is the top cleanup scope, all the fixups + /// from this index onwards belong to this scope. + unsigned FixupDepth; + struct ExtInfo &getExtInfo() { if (!ExtInfo) ExtInfo = new struct ExtInfo(); return *ExtInfo; @@ -288,8 +289,9 @@ public: unsigned cleanupSize, unsigned fixupDepth, EHScopeStack::stable_iterator enclosingNormal, EHScopeStack::stable_iterator enclosingEH) - : EHScope(EHScope::Cleanup, enclosingEH), EnclosingNormal(enclosingNormal), - NormalBlock(nullptr), ActiveFlag(nullptr), ExtInfo(nullptr) { + : EHScope(EHScope::Cleanup, enclosingEH), + EnclosingNormal(enclosingNormal), NormalBlock(nullptr), + ActiveFlag(nullptr), ExtInfo(nullptr), FixupDepth(fixupDepth) { CleanupBits.IsNormalCleanup = isNormal; CleanupBits.IsEHCleanup = isEH; CleanupBits.IsActive = isActive; @@ -297,7 +299,6 @@ public: CleanupBits.TestFlagInNormalCleanup = false; CleanupBits.TestFlagInEHCleanup = false; CleanupBits.CleanupSize = cleanupSize; - CleanupBits.FixupDepth = fixupDepth; assert(CleanupBits.CleanupSize == cleanupSize && "cleanup size overflow"); } @@ -343,7 +344,7 @@ public: return CleanupBits.TestFlagInEHCleanup; } - unsigned getFixupDepth() const { return CleanupBits.FixupDepth; } + unsigned getFixupDepth() const { return FixupDepth; } EHScopeStack::stable_iterator getEnclosingNormalCleanup() const { return EnclosingNormal; } @@ -451,6 +452,7 @@ public: EHFilterScope(unsigned numFilters) : EHScope(Filter, EHScopeStack::stable_end()) { FilterBits.NumFilters = numFilters; + assert(FilterBits.NumFilters == numFilters && "NumFilters overflow"); } static size_t getSizeForNumFilters(unsigned numFilters) { @@ -540,7 +542,7 @@ public: Size = EHPadEndScope::getSize(); break; } - Ptr += llvm::RoundUpToAlignment(Size, ScopeStackAlignment); + Ptr += llvm::alignTo(Size, ScopeStackAlignment); return *this; } diff --git a/lib/CodeGen/CGDebugInfo.cpp b/lib/CodeGen/CGDebugInfo.cpp index 5df85194878d..5e9d73f082fc 100644 --- a/lib/CodeGen/CGDebugInfo.cpp +++ b/lib/CodeGen/CGDebugInfo.cpp @@ -13,6 +13,7 @@ #include "CGDebugInfo.h" #include "CGBlocks.h" +#include "CGRecordLayout.h" #include "CGCXXABI.h" #include "CGObjCRuntime.h" #include "CodeGenFunction.h" @@ -168,10 +169,10 @@ llvm::DIScope *CGDebugInfo::getContextDescriptor(const Decl *Context, } // Check namespace. - if (const NamespaceDecl *NSDecl = dyn_cast<NamespaceDecl>(Context)) + if (const auto *NSDecl = dyn_cast<NamespaceDecl>(Context)) return getOrCreateNameSpace(NSDecl); - if (const RecordDecl *RDecl = dyn_cast<RecordDecl>(Context)) + if (const auto *RDecl = dyn_cast<RecordDecl>(Context)) if (!RDecl->isDependentType()) return getOrCreateType(CGM.getContext().getTypeDeclType(RDecl), getOrCreateMainFile()); @@ -184,30 +185,32 @@ StringRef CGDebugInfo::getFunctionName(const FunctionDecl *FD) { FunctionTemplateSpecializationInfo *Info = FD->getTemplateSpecializationInfo(); - if (!Info && FII && !CGM.getCodeGenOpts().EmitCodeView) + // Emit the unqualified name in normal operation. LLVM and the debugger can + // compute the fully qualified name from the scope chain. If we're only + // emitting line table info, there won't be any scope chains, so emit the + // fully qualified name here so that stack traces are more accurate. + // FIXME: Do this when emitting DWARF as well as when emitting CodeView after + // evaluating the size impact. + bool UseQualifiedName = DebugKind == codegenoptions::DebugLineTablesOnly && + CGM.getCodeGenOpts().EmitCodeView; + + if (!Info && FII && !UseQualifiedName) return FII->getName(); - // Otherwise construct human readable name for debug info. SmallString<128> NS; llvm::raw_svector_ostream OS(NS); PrintingPolicy Policy(CGM.getLangOpts()); - - if (CGM.getCodeGenOpts().EmitCodeView) { - // Print a fully qualified name like MSVC would. - Policy.MSVCFormatting = true; - FD->printQualifiedName(OS, Policy); - } else { - // Print the unqualified name with some template arguments. This is what - // DWARF-based debuggers expect. + Policy.MSVCFormatting = CGM.getCodeGenOpts().EmitCodeView; + if (!UseQualifiedName) FD->printName(OS); - // Add any template specialization args. - if (Info) { - const TemplateArgumentList *TArgs = Info->TemplateArguments; - const TemplateArgument *Args = TArgs->data(); - unsigned NumArgs = TArgs->size(); - TemplateSpecializationType::PrintTemplateArgumentList(OS, Args, NumArgs, - Policy); - } + else + FD->printQualifiedName(OS, Policy); + + // Add any template specialization args. + if (Info) { + const TemplateArgumentList *TArgs = Info->TemplateArguments; + TemplateSpecializationType::PrintTemplateArgumentList(OS, TArgs->asArray(), + Policy); } // Copy this name on the side and use its reference. @@ -219,21 +222,18 @@ StringRef CGDebugInfo::getObjCMethodName(const ObjCMethodDecl *OMD) { llvm::raw_svector_ostream OS(MethodName); OS << (OMD->isInstanceMethod() ? '-' : '+') << '['; const DeclContext *DC = OMD->getDeclContext(); - if (const ObjCImplementationDecl *OID = - dyn_cast<const ObjCImplementationDecl>(DC)) { + if (const auto *OID = dyn_cast<ObjCImplementationDecl>(DC)) { OS << OID->getName(); - } else if (const ObjCInterfaceDecl *OID = - dyn_cast<const ObjCInterfaceDecl>(DC)) { + } else if (const auto *OID = dyn_cast<ObjCInterfaceDecl>(DC)) { OS << OID->getName(); - } else if (const ObjCCategoryDecl *OC = dyn_cast<ObjCCategoryDecl>(DC)) { + } else if (const auto *OC = dyn_cast<ObjCCategoryDecl>(DC)) { if (OC->IsClassExtension()) { OS << OC->getClassInterface()->getName(); } else { - OS << ((const NamedDecl *)OC)->getIdentifier()->getNameStart() << '(' + OS << OC->getIdentifier()->getNameStart() << '(' << OC->getIdentifier()->getNameStart() << ')'; } - } else if (const ObjCCategoryImplDecl *OCD = - dyn_cast<const ObjCCategoryImplDecl>(DC)) { + } else if (const auto *OCD = dyn_cast<ObjCCategoryImplDecl>(DC)) { OS << ((const NamedDecl *)OCD)->getIdentifier()->getNameStart() << '(' << OCD->getIdentifier()->getNameStart() << ')'; } else if (isa<ObjCProtocolDecl>(DC)) { @@ -254,20 +254,56 @@ StringRef CGDebugInfo::getSelectorName(Selector S) { } StringRef CGDebugInfo::getClassName(const RecordDecl *RD) { - // quick optimization to avoid having to intern strings that are already - // stored reliably elsewhere - if (!isa<ClassTemplateSpecializationDecl>(RD)) - return RD->getName(); - - SmallString<128> Name; - { + if (isa<ClassTemplateSpecializationDecl>(RD)) { + SmallString<128> Name; llvm::raw_svector_ostream OS(Name); RD->getNameForDiagnostic(OS, CGM.getContext().getPrintingPolicy(), /*Qualified*/ false); + + // Copy this name on the side and use its reference. + return internString(Name); } - // Copy this name on the side and use its reference. - return internString(Name); + // quick optimization to avoid having to intern strings that are already + // stored reliably elsewhere + if (const IdentifierInfo *II = RD->getIdentifier()) + return II->getName(); + + // The CodeView printer in LLVM wants to see the names of unnamed types: it is + // used to reconstruct the fully qualified type names. + if (CGM.getCodeGenOpts().EmitCodeView) { + if (const TypedefNameDecl *D = RD->getTypedefNameForAnonDecl()) { + assert(RD->getDeclContext() == D->getDeclContext() && + "Typedef should not be in another decl context!"); + assert(D->getDeclName().getAsIdentifierInfo() && + "Typedef was not named!"); + return D->getDeclName().getAsIdentifierInfo()->getName(); + } + + if (CGM.getLangOpts().CPlusPlus) { + StringRef Name; + + ASTContext &Context = CGM.getContext(); + if (const DeclaratorDecl *DD = Context.getDeclaratorForUnnamedTagDecl(RD)) + // Anonymous types without a name for linkage purposes have their + // declarator mangled in if they have one. + Name = DD->getName(); + else if (const TypedefNameDecl *TND = + Context.getTypedefNameForUnnamedTagDecl(RD)) + // Anonymous types without a name for linkage purposes have their + // associate typedef mangled in if they have one. + Name = TND->getName(); + + if (!Name.empty()) { + SmallString<256> UnnamedType("<unnamed-type-"); + UnnamedType += Name; + UnnamedType += '>'; + return internString(UnnamedType); + } + } + } + + return StringRef(); } llvm::DIFile *CGDebugInfo::getOrCreateFile(SourceLocation Loc) { @@ -383,6 +419,8 @@ void CGDebugInfo::CreateCompileUnit() { LangTag = llvm::dwarf::DW_LANG_C_plus_plus; } else if (LO.ObjC1) { LangTag = llvm::dwarf::DW_LANG_ObjC; + } else if (LO.RenderScript) { + LangTag = llvm::dwarf::DW_LANG_GOOGLE_RenderScript; } else if (LO.C99) { LangTag = llvm::dwarf::DW_LANG_C99; } else { @@ -396,16 +434,27 @@ void CGDebugInfo::CreateCompileUnit() { if (LO.ObjC1) RuntimeVers = LO.ObjCRuntime.isNonFragile() ? 2 : 1; + llvm::DICompileUnit::DebugEmissionKind EmissionKind; + switch (DebugKind) { + case codegenoptions::NoDebugInfo: + case codegenoptions::LocTrackingOnly: + EmissionKind = llvm::DICompileUnit::NoDebug; + break; + case codegenoptions::DebugLineTablesOnly: + EmissionKind = llvm::DICompileUnit::LineTablesOnly; + break; + case codegenoptions::LimitedDebugInfo: + case codegenoptions::FullDebugInfo: + EmissionKind = llvm::DICompileUnit::FullDebug; + break; + } + // Create new compile unit. // FIXME - Eliminate TheCU. TheCU = DBuilder.createCompileUnit( LangTag, remapDIPath(MainFileName), remapDIPath(getCurrentDirname()), Producer, LO.Optimize, CGM.getCodeGenOpts().DwarfDebugFlags, RuntimeVers, - CGM.getCodeGenOpts().SplitDwarfFile, - DebugKind <= CodeGenOptions::DebugLineTablesOnly - ? llvm::DIBuilder::LineTablesOnly - : llvm::DIBuilder::FullDebug, - 0 /* DWOid */, DebugKind != CodeGenOptions::LocTrackingOnly); + CGM.getCodeGenOpts().SplitDwarfFile, EmissionKind, 0 /* DWOid */); } llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) { @@ -463,39 +512,11 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) { return SelTy; } - case BuiltinType::OCLImage1d: - return getOrCreateStructPtrType("opencl_image1d_t", OCLImage1dDITy); - case BuiltinType::OCLImage1dArray: - return getOrCreateStructPtrType("opencl_image1d_array_t", - OCLImage1dArrayDITy); - case BuiltinType::OCLImage1dBuffer: - return getOrCreateStructPtrType("opencl_image1d_buffer_t", - OCLImage1dBufferDITy); - case BuiltinType::OCLImage2d: - return getOrCreateStructPtrType("opencl_image2d_t", OCLImage2dDITy); - case BuiltinType::OCLImage2dArray: - return getOrCreateStructPtrType("opencl_image2d_array_t", - OCLImage2dArrayDITy); - case BuiltinType::OCLImage2dDepth: - return getOrCreateStructPtrType("opencl_image2d_depth_t", - OCLImage2dDepthDITy); - case BuiltinType::OCLImage2dArrayDepth: - return getOrCreateStructPtrType("opencl_image2d_array_depth_t", - OCLImage2dArrayDepthDITy); - case BuiltinType::OCLImage2dMSAA: - return getOrCreateStructPtrType("opencl_image2d_msaa_t", - OCLImage2dMSAADITy); - case BuiltinType::OCLImage2dArrayMSAA: - return getOrCreateStructPtrType("opencl_image2d_array_msaa_t", - OCLImage2dArrayMSAADITy); - case BuiltinType::OCLImage2dMSAADepth: - return getOrCreateStructPtrType("opencl_image2d_msaa_depth_t", - OCLImage2dMSAADepthDITy); - case BuiltinType::OCLImage2dArrayMSAADepth: - return getOrCreateStructPtrType("opencl_image2d_array_msaa_depth_t", - OCLImage2dArrayMSAADepthDITy); - case BuiltinType::OCLImage3d: - return getOrCreateStructPtrType("opencl_image3d_t", OCLImage3dDITy); +#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \ + case BuiltinType::Id: \ + return getOrCreateStructPtrType("opencl_" #ImgType "_" #Suffix "_t", \ + SingletonId); +#include "clang/Basic/OpenCLImageTypes.def" case BuiltinType::OCLSampler: return DBuilder.createBasicType( "opencl_sampler_t", CGM.getContext().getTypeSize(BT), @@ -545,7 +566,13 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) { case BuiltinType::Half: case BuiltinType::Float: case BuiltinType::LongDouble: + case BuiltinType::Float128: case BuiltinType::Double: + // FIXME: For targets where long double and __float128 have the same size, + // they are currently indistinguishable in the debugger without some + // special treatment. However, there is currently no consensus on encoding + // and this should be updated once a DWARF encoding exists for distinct + // floating point types of the same size. Encoding = llvm::dwarf::DW_ATE_float; break; } @@ -660,10 +687,6 @@ static SmallString<256> getUniqueTagTypeName(const TagType *Ty, if (!hasCXXMangling(TD, TheCU) || !TD->isExternallyVisible()) return FullName; - // Microsoft Mangler does not have support for mangleCXXRTTIName yet. - if (CGM.getTarget().getCXXABI().isMicrosoft()) - return FullName; - // TODO: This is using the RTTI name. Is there a better way to get // a unique string for a type? llvm::raw_svector_ostream Out(FullName); @@ -817,10 +840,10 @@ llvm::DIType *CGDebugInfo::CreateType(const TemplateSpecializationType *Ty, /*qualified*/ false); TemplateSpecializationType::PrintTemplateArgumentList( - OS, Ty->getArgs(), Ty->getNumArgs(), + OS, Ty->template_arguments(), CGM.getContext().getPrintingPolicy()); - TypeAliasDecl *AliasDecl = cast<TypeAliasTemplateDecl>( + auto *AliasDecl = cast<TypeAliasTemplateDecl>( Ty->getTemplateName().getAsTemplateDecl())->getTemplatedDecl(); SourceLocation Loc = AliasDecl->getLocation(); @@ -842,6 +865,39 @@ llvm::DIType *CGDebugInfo::CreateType(const TypedefType *Ty, getDeclContextDescriptor(Ty->getDecl())); } +static unsigned getDwarfCC(CallingConv CC) { + switch (CC) { + case CC_C: + // Avoid emitting DW_AT_calling_convention if the C convention was used. + return 0; + + case CC_X86StdCall: + return llvm::dwarf::DW_CC_BORLAND_stdcall; + case CC_X86FastCall: + return llvm::dwarf::DW_CC_BORLAND_msfastcall; + case CC_X86ThisCall: + return llvm::dwarf::DW_CC_BORLAND_thiscall; + case CC_X86VectorCall: + return llvm::dwarf::DW_CC_LLVM_vectorcall; + case CC_X86Pascal: + return llvm::dwarf::DW_CC_BORLAND_pascal; + + // FIXME: Create new DW_CC_ codes for these calling conventions. + case CC_X86_64Win64: + case CC_X86_64SysV: + case CC_AAPCS: + case CC_AAPCS_VFP: + case CC_IntelOclBicc: + case CC_SpirFunction: + case CC_OpenCLKernel: + case CC_Swift: + case CC_PreserveMost: + case CC_PreserveAll: + return 0; + } + return 0; +} + llvm::DIType *CGDebugInfo::CreateType(const FunctionType *Ty, llvm::DIFile *Unit) { SmallVector<llvm::Metadata *, 16> EltTys; @@ -853,15 +909,16 @@ llvm::DIType *CGDebugInfo::CreateType(const FunctionType *Ty, // otherwise emit it as a variadic function. if (isa<FunctionNoProtoType>(Ty)) EltTys.push_back(DBuilder.createUnspecifiedParameter()); - else if (const FunctionProtoType *FPT = dyn_cast<FunctionProtoType>(Ty)) { - for (unsigned i = 0, e = FPT->getNumParams(); i != e; ++i) - EltTys.push_back(getOrCreateType(FPT->getParamType(i), Unit)); + else if (const auto *FPT = dyn_cast<FunctionProtoType>(Ty)) { + for (const QualType &ParamType : FPT->param_types()) + EltTys.push_back(getOrCreateType(ParamType, Unit)); if (FPT->isVariadic()) EltTys.push_back(DBuilder.createUnspecifiedParameter()); } llvm::DITypeRefArray EltTypeArray = DBuilder.getOrCreateTypeArray(EltTys); - return DBuilder.createSubroutineType(EltTypeArray); + return DBuilder.createSubroutineType(EltTypeArray, 0, + getDwarfCC(Ty->getCallConv())); } /// Convert an AccessSpecifier into the corresponding DINode flag. @@ -890,10 +947,38 @@ static unsigned getAccessFlag(AccessSpecifier Access, const RecordDecl *RD) { llvm_unreachable("unexpected access enumerator"); } -llvm::DIType *CGDebugInfo::createFieldType( - StringRef name, QualType type, uint64_t sizeInBitsOverride, - SourceLocation loc, AccessSpecifier AS, uint64_t offsetInBits, - llvm::DIFile *tunit, llvm::DIScope *scope, const RecordDecl *RD) { +llvm::DIType *CGDebugInfo::createBitFieldType(const FieldDecl *BitFieldDecl, + llvm::DIScope *RecordTy, + const RecordDecl *RD) { + StringRef Name = BitFieldDecl->getName(); + QualType Ty = BitFieldDecl->getType(); + SourceLocation Loc = BitFieldDecl->getLocation(); + llvm::DIFile *VUnit = getOrCreateFile(Loc); + llvm::DIType *DebugType = getOrCreateType(Ty, VUnit); + + // Get the location for the field. + llvm::DIFile *File = getOrCreateFile(Loc); + unsigned Line = getLineNumber(Loc); + + const CGBitFieldInfo &BitFieldInfo = + CGM.getTypes().getCGRecordLayout(RD).getBitFieldInfo(BitFieldDecl); + uint64_t SizeInBits = BitFieldInfo.Size; + assert(SizeInBits > 0 && "found named 0-width bitfield"); + unsigned AlignInBits = CGM.getContext().getTypeAlign(Ty); + uint64_t StorageOffsetInBits = + CGM.getContext().toBits(BitFieldInfo.StorageOffset); + uint64_t OffsetInBits = StorageOffsetInBits + BitFieldInfo.Offset; + unsigned Flags = getAccessFlag(BitFieldDecl->getAccess(), RD); + return DBuilder.createBitFieldMemberType( + RecordTy, Name, File, Line, SizeInBits, AlignInBits, OffsetInBits, + StorageOffsetInBits, Flags, DebugType); +} + +llvm::DIType * +CGDebugInfo::createFieldType(StringRef name, QualType type, SourceLocation loc, + AccessSpecifier AS, uint64_t offsetInBits, + llvm::DIFile *tunit, llvm::DIScope *scope, + const RecordDecl *RD) { llvm::DIType *debugType = getOrCreateType(type, tunit); // Get the location for the field. @@ -906,9 +991,6 @@ llvm::DIType *CGDebugInfo::createFieldType( TypeInfo TI = CGM.getContext().getTypeInfo(type); SizeInBits = TI.Width; AlignInBits = TI.Align; - - if (sizeInBitsOverride) - SizeInBits = sizeInBitsOverride; } unsigned flags = getAccessFlag(AS, RD); @@ -930,19 +1012,15 @@ void CGDebugInfo::CollectRecordLambdaFields( I != E; ++I, ++Field, ++fieldno) { const LambdaCapture &C = *I; if (C.capturesVariable()) { + SourceLocation Loc = C.getLocation(); + assert(!Field->isBitField() && "lambdas don't have bitfield members!"); VarDecl *V = C.getCapturedVar(); - llvm::DIFile *VUnit = getOrCreateFile(C.getLocation()); StringRef VName = V->getName(); - uint64_t SizeInBitsOverride = 0; - if (Field->isBitField()) { - SizeInBitsOverride = Field->getBitWidthValue(CGM.getContext()); - assert(SizeInBitsOverride && "found named 0-width bitfield"); - } - llvm::DIType *fieldType = createFieldType( - VName, Field->getType(), SizeInBitsOverride, C.getLocation(), - Field->getAccess(), layout.getFieldOffset(fieldno), VUnit, RecordTy, - CXXDecl); - elements.push_back(fieldType); + llvm::DIFile *VUnit = getOrCreateFile(Loc); + llvm::DIType *FieldType = createFieldType( + VName, Field->getType(), Loc, Field->getAccess(), + layout.getFieldOffset(fieldno), VUnit, RecordTy, CXXDecl); + elements.push_back(FieldType); } else if (C.capturesThis()) { // TODO: Need to handle 'this' in some way by probably renaming the // this of the lambda class and having a field member of 'this' or @@ -952,7 +1030,7 @@ void CGDebugInfo::CollectRecordLambdaFields( llvm::DIFile *VUnit = getOrCreateFile(f->getLocation()); QualType type = f->getType(); llvm::DIType *fieldType = createFieldType( - "this", type, 0, f->getLocation(), f->getAccess(), + "this", type, f->getLocation(), f->getAccess(), layout.getFieldOffset(fieldno), VUnit, RecordTy, CXXDecl); elements.push_back(fieldType); @@ -1000,24 +1078,23 @@ void CGDebugInfo::CollectRecordNormalField( if (name.empty() && !type->isRecordType()) return; - uint64_t SizeInBitsOverride = 0; + llvm::DIType *FieldType; if (field->isBitField()) { - SizeInBitsOverride = field->getBitWidthValue(CGM.getContext()); - assert(SizeInBitsOverride && "found named 0-width bitfield"); + FieldType = createBitFieldType(field, RecordTy, RD); + } else { + FieldType = + createFieldType(name, type, field->getLocation(), field->getAccess(), + OffsetInBits, tunit, RecordTy, RD); } - llvm::DIType *fieldType = - createFieldType(name, type, SizeInBitsOverride, field->getLocation(), - field->getAccess(), OffsetInBits, tunit, RecordTy, RD); - - elements.push_back(fieldType); + elements.push_back(FieldType); } void CGDebugInfo::CollectRecordFields( const RecordDecl *record, llvm::DIFile *tunit, SmallVectorImpl<llvm::Metadata *> &elements, llvm::DICompositeType *RecordTy) { - const CXXRecordDecl *CXXDecl = dyn_cast<CXXRecordDecl>(record); + const auto *CXXDecl = dyn_cast<CXXRecordDecl>(record); if (CXXDecl && CXXDecl->isLambda()) CollectRecordLambdaFields(CXXDecl, elements, RecordTy); @@ -1031,6 +1108,8 @@ void CGDebugInfo::CollectRecordFields( // the corresponding declarations in the source program. for (const auto *I : record->decls()) if (const auto *V = dyn_cast<VarDecl>(I)) { + if (V->hasAttr<NoDebugAttr>()) + continue; // Reuse the existing static member declaration if one exists auto MI = StaticDataMemberCache.find(V->getCanonicalDecl()); if (MI != StaticDataMemberCache.end()) { @@ -1112,13 +1191,14 @@ llvm::DISubroutineType *CGDebugInfo::getOrCreateInstanceMethodType( if (Func->getExtProtoInfo().RefQualifier == RQ_RValue) Flags |= llvm::DINode::FlagRValueReference; - return DBuilder.createSubroutineType(EltTypeArray, Flags); + return DBuilder.createSubroutineType(EltTypeArray, Flags, + getDwarfCC(Func->getCallConv())); } /// isFunctionLocalClass - Return true if CXXRecordDecl is defined /// inside a function. static bool isFunctionLocalClass(const CXXRecordDecl *RD) { - if (const CXXRecordDecl *NRD = dyn_cast<CXXRecordDecl>(RD->getDeclContext())) + if (const auto *NRD = dyn_cast<CXXRecordDecl>(RD->getDeclContext())) return isFunctionLocalClass(NRD); if (isa<FunctionDecl>(RD->getDeclContext())) return true; @@ -1136,6 +1216,11 @@ llvm::DISubprogram *CGDebugInfo::CreateCXXMemberFunction( // Since a single ctor/dtor corresponds to multiple functions, it doesn't // make sense to give a single ctor/dtor a linkage name. StringRef MethodLinkageName; + // FIXME: 'isFunctionLocalClass' seems like an arbitrary/unintentional + // property to use here. It may've been intended to model "is non-external + // type" but misses cases of non-function-local but non-external classes such + // as those in anonymous namespaces as well as the reverse - external types + // that are function local, such as those in (non-local) inline functions. if (!IsCtorOrDtor && !isFunctionLocalClass(Method->getParent())) MethodLinkageName = CGM.getMangledName(Method); @@ -1151,6 +1236,8 @@ llvm::DISubprogram *CGDebugInfo::CreateCXXMemberFunction( llvm::DIType *ContainingType = nullptr; unsigned Virtuality = 0; unsigned VIndex = 0; + unsigned Flags = 0; + int ThisAdjustment = 0; if (Method->isVirtual()) { if (Method->isPure()) @@ -1158,26 +1245,45 @@ llvm::DISubprogram *CGDebugInfo::CreateCXXMemberFunction( else Virtuality = llvm::dwarf::DW_VIRTUALITY_virtual; - // It doesn't make sense to give a virtual destructor a vtable index, - // since a single destructor has two entries in the vtable. - // FIXME: Add proper support for debug info for virtual calls in - // the Microsoft ABI, where we may use multiple vptrs to make a vftable - // lookup if we have multiple or virtual inheritance. - if (!isa<CXXDestructorDecl>(Method) && - !CGM.getTarget().getCXXABI().isMicrosoft()) - VIndex = CGM.getItaniumVTableContext().getMethodVTableIndex(Method); + if (CGM.getTarget().getCXXABI().isItaniumFamily()) { + // It doesn't make sense to give a virtual destructor a vtable index, + // since a single destructor has two entries in the vtable. + if (!isa<CXXDestructorDecl>(Method)) + VIndex = CGM.getItaniumVTableContext().getMethodVTableIndex(Method); + } else { + // Emit MS ABI vftable information. There is only one entry for the + // deleting dtor. + const auto *DD = dyn_cast<CXXDestructorDecl>(Method); + GlobalDecl GD = DD ? GlobalDecl(DD, Dtor_Deleting) : GlobalDecl(Method); + MicrosoftVTableContext::MethodVFTableLocation ML = + CGM.getMicrosoftVTableContext().getMethodVFTableLocation(GD); + VIndex = ML.Index; + + // CodeView only records the vftable offset in the class that introduces + // the virtual method. This is possible because, unlike Itanium, the MS + // C++ ABI does not include all virtual methods from non-primary bases in + // the vtable for the most derived class. For example, if C inherits from + // A and B, C's primary vftable will not include B's virtual methods. + if (Method->begin_overridden_methods() == Method->end_overridden_methods()) + Flags |= llvm::DINode::FlagIntroducedVirtual; + + // The 'this' adjustment accounts for both the virtual and non-virtual + // portions of the adjustment. Presumably the debugger only uses it when + // it knows the dynamic type of an object. + ThisAdjustment = CGM.getCXXABI() + .getVirtualFunctionPrologueThisAdjustment(GD) + .getQuantity(); + } ContainingType = RecordTy; } - unsigned Flags = 0; if (Method->isImplicit()) Flags |= llvm::DINode::FlagArtificial; Flags |= getAccessFlag(Method->getAccess(), Method->getParent()); - if (const CXXConstructorDecl *CXXC = dyn_cast<CXXConstructorDecl>(Method)) { + if (const auto *CXXC = dyn_cast<CXXConstructorDecl>(Method)) { if (CXXC->isExplicit()) Flags |= llvm::DINode::FlagExplicit; - } else if (const CXXConversionDecl *CXXC = - dyn_cast<CXXConversionDecl>(Method)) { + } else if (const auto *CXXC = dyn_cast<CXXConversionDecl>(Method)) { if (CXXC->isExplicit()) Flags |= llvm::DINode::FlagExplicit; } @@ -1191,9 +1297,9 @@ llvm::DISubprogram *CGDebugInfo::CreateCXXMemberFunction( llvm::DINodeArray TParamsArray = CollectFunctionTemplateParams(Method, Unit); llvm::DISubprogram *SP = DBuilder.createMethod( RecordTy, MethodName, MethodLinkageName, MethodDefUnit, MethodLine, - MethodTy, /*isLocalToUnit=*/false, - /* isDefinition=*/false, Virtuality, VIndex, ContainingType, Flags, - CGM.getLangOpts().Optimize, TParamsArray.get()); + MethodTy, /*isLocalToUnit=*/false, /*isDefinition=*/false, Virtuality, + VIndex, ThisAdjustment, ContainingType, Flags, CGM.getLangOpts().Optimize, + TParamsArray.get()); SPCache[Method->getCanonicalDecl()].reset(SP); @@ -1246,7 +1352,7 @@ void CGDebugInfo::CollectCXXBases(const CXXRecordDecl *RD, llvm::DIFile *Unit, unsigned BFlags = 0; uint64_t BaseOffset; - const CXXRecordDecl *Base = + const auto *Base = cast<CXXRecordDecl>(BI.getType()->getAs<RecordType>()->getDecl()); if (BI.isVirtual()) { @@ -1334,8 +1440,7 @@ CGDebugInfo::CollectTemplateParams(const TemplateParameterList *TPList, llvm::Constant *V = nullptr; // Special case member data pointer null values since they're actually -1 // instead of zero. - if (const MemberPointerType *MPT = - dyn_cast<MemberPointerType>(T.getTypePtr())) + if (const auto *MPT = dyn_cast<MemberPointerType>(T.getTypePtr())) // But treat member function pointers as simple zero integers because // it's easier than having a special case in LLVM's CodeGen. If LLVM // CodeGen grows handling for values of non-null member function @@ -1346,7 +1451,7 @@ CGDebugInfo::CollectTemplateParams(const TemplateParameterList *TPList, if (!V) V = llvm::ConstantInt::get(CGM.Int8Ty, 0); TemplateParams.push_back(DBuilder.createTemplateValueParameter( - TheCU, Name, TTy, cast<llvm::Constant>(V))); + TheCU, Name, TTy, V)); } break; case TemplateArgument::Template: TemplateParams.push_back(DBuilder.createTemplateTemplateParameter( @@ -1367,7 +1472,7 @@ CGDebugInfo::CollectTemplateParams(const TemplateParameterList *TPList, assert(V && "Expression in template argument isn't constant"); llvm::DIType *TTy = getOrCreateType(T, Unit); TemplateParams.push_back(DBuilder.createTemplateValueParameter( - TheCU, Name, TTy, cast<llvm::Constant>(V->stripPointerCasts()))); + TheCU, Name, TTy, V->stripPointerCasts())); } break; // And the following should never occur: case TemplateArgument::TemplateExpansion: @@ -1446,7 +1551,7 @@ void CGDebugInfo::CollectVTableInfo(const CXXRecordDecl *RD, llvm::DIFile *Unit, llvm::DIType *CGDebugInfo::getOrCreateRecordType(QualType RTy, SourceLocation Loc) { - assert(DebugKind >= CodeGenOptions::LimitedDebugInfo); + assert(DebugKind >= codegenoptions::LimitedDebugInfo); llvm::DIType *T = getOrCreateType(RTy, getOrCreateFile(Loc)); return T; } @@ -1458,22 +1563,17 @@ llvm::DIType *CGDebugInfo::getOrCreateInterfaceType(QualType D, llvm::DIType *CGDebugInfo::getOrCreateStandaloneType(QualType D, SourceLocation Loc) { - assert(DebugKind >= CodeGenOptions::LimitedDebugInfo); + assert(DebugKind >= codegenoptions::LimitedDebugInfo); assert(!D.isNull() && "null type"); llvm::DIType *T = getOrCreateType(D, getOrCreateFile(Loc)); assert(T && "could not create debug info for type"); - // Composite types with UIDs were already retained by DIBuilder - // because they are only referenced by name in the IR. - if (auto *CTy = dyn_cast<llvm::DICompositeType>(T)) - if (!CTy->getIdentifier().empty()) - return T; RetainedTypes.push_back(D.getAsOpaquePtr()); return T; } void CGDebugInfo::completeType(const EnumDecl *ED) { - if (DebugKind <= CodeGenOptions::DebugLineTablesOnly) + if (DebugKind <= codegenoptions::DebugLineTablesOnly) return; QualType Ty = CGM.getContext().getEnumType(ED); void *TyPtr = Ty.getAsOpaquePtr(); @@ -1486,16 +1586,16 @@ void CGDebugInfo::completeType(const EnumDecl *ED) { } void CGDebugInfo::completeType(const RecordDecl *RD) { - if (DebugKind > CodeGenOptions::LimitedDebugInfo || + if (DebugKind > codegenoptions::LimitedDebugInfo || !CGM.getLangOpts().CPlusPlus) completeRequiredType(RD); } void CGDebugInfo::completeRequiredType(const RecordDecl *RD) { - if (DebugKind <= CodeGenOptions::DebugLineTablesOnly) + if (DebugKind <= codegenoptions::DebugLineTablesOnly) return; - if (const CXXRecordDecl *CXXDecl = dyn_cast<CXXRecordDecl>(RD)) + if (const auto *CXXDecl = dyn_cast<CXXRecordDecl>(RD)) if (CXXDecl->isDynamicClass()) return; @@ -1509,7 +1609,7 @@ void CGDebugInfo::completeRequiredType(const RecordDecl *RD) { } void CGDebugInfo::completeClassData(const RecordDecl *RD) { - if (DebugKind <= CodeGenOptions::DebugLineTablesOnly) + if (DebugKind <= codegenoptions::DebugLineTablesOnly) return; QualType Ty = CGM.getContext().getRecordType(RD); void *TyPtr = Ty.getAsOpaquePtr(); @@ -1523,23 +1623,38 @@ void CGDebugInfo::completeClassData(const RecordDecl *RD) { static bool hasExplicitMemberDefinition(CXXRecordDecl::method_iterator I, CXXRecordDecl::method_iterator End) { - for (; I != End; ++I) - if (FunctionDecl *Tmpl = I->getInstantiatedFromMemberFunction()) + for (CXXMethodDecl *MD : llvm::make_range(I, End)) + if (FunctionDecl *Tmpl = MD->getInstantiatedFromMemberFunction()) if (!Tmpl->isImplicit() && Tmpl->isThisDeclarationADefinition() && - !I->getMemberSpecializationInfo()->isExplicitSpecialization()) + !MD->getMemberSpecializationInfo()->isExplicitSpecialization()) return true; return false; } -static bool shouldOmitDefinition(CodeGenOptions::DebugInfoKind DebugKind, - bool DebugTypeExtRefs, - const RecordDecl *RD, +/// Does a type definition exist in an imported clang module? +static bool isDefinedInClangModule(const RecordDecl *RD) { + if (!RD || !RD->isFromASTFile()) + return false; + if (!RD->isExternallyVisible() && RD->getName().empty()) + return false; + if (auto *CXXDecl = dyn_cast<CXXRecordDecl>(RD)) { + assert(CXXDecl->isCompleteDefinition() && "incomplete record definition"); + if (CXXDecl->getTemplateSpecializationKind() != TSK_Undeclared) + // Make sure the instantiation is actually in a module. + if (CXXDecl->field_begin() != CXXDecl->field_end()) + return CXXDecl->field_begin()->isFromASTFile(); + } + + return true; +} + +static bool shouldOmitDefinition(codegenoptions::DebugInfoKind DebugKind, + bool DebugTypeExtRefs, const RecordDecl *RD, const LangOptions &LangOpts) { - // Does the type exist in an imported clang module? - if (DebugTypeExtRefs && RD->isFromASTFile() && RD->getDefinition()) - return true; + if (DebugTypeExtRefs && isDefinedInClangModule(RD->getDefinition())) + return true; - if (DebugKind > CodeGenOptions::LimitedDebugInfo) + if (DebugKind > codegenoptions::LimitedDebugInfo) return false; if (!LangOpts.CPlusPlus) @@ -1548,7 +1663,7 @@ static bool shouldOmitDefinition(CodeGenOptions::DebugInfoKind DebugKind, if (!RD->isCompleteDefinitionRequired()) return true; - const CXXRecordDecl *CXXDecl = dyn_cast<CXXRecordDecl>(RD); + const auto *CXXDecl = dyn_cast<CXXRecordDecl>(RD); if (!CXXDecl) return false; @@ -1557,8 +1672,7 @@ static bool shouldOmitDefinition(CodeGenOptions::DebugInfoKind DebugKind, return true; TemplateSpecializationKind Spec = TSK_Undeclared; - if (const ClassTemplateSpecializationDecl *SD = - dyn_cast<ClassTemplateSpecializationDecl>(RD)) + if (const auto *SD = dyn_cast<ClassTemplateSpecializationDecl>(RD)) Spec = SD->getSpecializationKind(); if (Spec == TSK_ExplicitInstantiationDeclaration && @@ -1600,7 +1714,7 @@ llvm::DIType *CGDebugInfo::CreateTypeDefinition(const RecordType *Ty) { if (!D || !D->isCompleteDefinition()) return FwdDecl; - if (const CXXRecordDecl *CXXDecl = dyn_cast<CXXRecordDecl>(RD)) + if (const auto *CXXDecl = dyn_cast<CXXRecordDecl>(RD)) CollectContainingType(CXXDecl, FwdDecl); // Push the struct on region stack. @@ -1615,7 +1729,7 @@ llvm::DIType *CGDebugInfo::CreateTypeDefinition(const RecordType *Ty) { // gdb tests will depend on a certain ordering at printout. The debug // information offsets are still correct if we merge them all together // though. - const CXXRecordDecl *CXXDecl = dyn_cast<CXXRecordDecl>(RD); + const auto *CXXDecl = dyn_cast<CXXRecordDecl>(RD); if (CXXDecl) { CollectCXXBases(CXXDecl, DefUnit, EltTys, FwdDecl); CollectVTableInfo(CXXDecl, DefUnit, EltTys); @@ -1676,8 +1790,11 @@ llvm::DIType *CGDebugInfo::CreateType(const ObjCInterfaceType *Ty, if (!ID) return nullptr; - // Return a forward declaration if this type was imported from a clang module. - if (DebugTypeExtRefs && ID->isFromASTFile() && ID->getDefinition()) + // Return a forward declaration if this type was imported from a clang module, + // and this is not the compile unit with the implementation of the type (which + // may contain hidden ivars). + if (DebugTypeExtRefs && ID->isFromASTFile() && ID->getDefinition() && + !ID->getImplementation()) return DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type, ID->getName(), getDeclContextDescriptor(ID), Unit, 0); @@ -1739,11 +1856,14 @@ CGDebugInfo::getOrCreateModuleRef(ExternalASTSource::ASTSourceDescriptor Mod, bool IsRootModule = M ? !M->Parent : true; if (CreateSkeletonCU && IsRootModule) { + // PCH files don't have a signature field in the control block, + // but LLVM detects skeleton CUs by looking for a non-zero DWO id. + uint64_t Signature = Mod.getSignature() ? Mod.getSignature() : ~1ULL; llvm::DIBuilder DIB(CGM.getModule()); DIB.createCompileUnit(TheCU->getSourceLanguage(), Mod.getModuleName(), Mod.getPath(), TheCU->getProducer(), true, StringRef(), 0, Mod.getASTFile(), - llvm::DIBuilder::FullDebug, Mod.getSignature()); + llvm::DICompileUnit::FullDebug, Signature); DIB.finalize(); } llvm::DIModule *Parent = @@ -1942,7 +2062,7 @@ llvm::DIType *CGDebugInfo::CreateType(const ArrayType *Ty, llvm::DIFile *Unit) { uint64_t Align; // FIXME: make getTypeAlign() aware of VLAs and incomplete array types - if (const VariableArrayType *VAT = dyn_cast<VariableArrayType>(Ty)) { + if (const auto *VAT = dyn_cast<VariableArrayType>(Ty)) { Size = 0; Align = CGM.getContext().getTypeAlign(CGM.getContext().getBaseElementType(VAT)); @@ -1975,7 +2095,7 @@ llvm::DIType *CGDebugInfo::CreateType(const ArrayType *Ty, llvm::DIFile *Unit) { // int x[0]; // }; int64_t Count = -1; // Count == -1 is an unbounded array. - if (const ConstantArrayType *CAT = dyn_cast<ConstantArrayType>(Ty)) + if (const auto *CAT = dyn_cast<ConstantArrayType>(Ty)) Count = CAT->getSize().getZExtValue(); // FIXME: Verify this is right for VLAs. @@ -2003,12 +2123,35 @@ llvm::DIType *CGDebugInfo::CreateType(const RValueReferenceType *Ty, llvm::DIType *CGDebugInfo::CreateType(const MemberPointerType *Ty, llvm::DIFile *U) { - uint64_t Size = - !Ty->isIncompleteType() ? CGM.getContext().getTypeSize(Ty) : 0; + unsigned Flags = 0; + uint64_t Size = 0; + + if (!Ty->isIncompleteType()) { + Size = CGM.getContext().getTypeSize(Ty); + + // Set the MS inheritance model. There is no flag for the unspecified model. + if (CGM.getTarget().getCXXABI().isMicrosoft()) { + switch (Ty->getMostRecentCXXRecordDecl()->getMSInheritanceModel()) { + case MSInheritanceAttr::Keyword_single_inheritance: + Flags |= llvm::DINode::FlagSingleInheritance; + break; + case MSInheritanceAttr::Keyword_multiple_inheritance: + Flags |= llvm::DINode::FlagMultipleInheritance; + break; + case MSInheritanceAttr::Keyword_virtual_inheritance: + Flags |= llvm::DINode::FlagVirtualInheritance; + break; + case MSInheritanceAttr::Keyword_unspecified_inheritance: + break; + } + } + } + llvm::DIType *ClassType = getOrCreateType(QualType(Ty->getClass(), 0), U); if (Ty->isMemberDataPointerType()) return DBuilder.createMemberPointerType( - getOrCreateType(Ty->getPointeeType(), U), ClassType, Size); + getOrCreateType(Ty->getPointeeType(), U), ClassType, Size, /*Align=*/0, + Flags); const FunctionProtoType *FPT = Ty->getPointeeType()->getAs<FunctionProtoType>(); @@ -2016,7 +2159,7 @@ llvm::DIType *CGDebugInfo::CreateType(const MemberPointerType *Ty, getOrCreateInstanceMethodType(CGM.getContext().getPointerType(QualType( Ty->getClass(), FPT->getTypeQuals())), FPT, U), - ClassType, Size); + ClassType, Size, /*Align=*/0, Flags); } llvm::DIType *CGDebugInfo::CreateType(const AtomicType *Ty, llvm::DIFile *U) { @@ -2048,13 +2191,23 @@ llvm::DIType *CGDebugInfo::CreateEnumType(const EnumType *Ty) { // If this is just a forward declaration, construct an appropriately // marked node and just return it. if (isImportedFromModule || !ED->getDefinition()) { + // Note that it is possible for enums to be created as part of + // their own declcontext. In this case a FwdDecl will be created + // twice. This doesn't cause a problem because both FwdDecls are + // entered into the ReplaceMap: finalize() will replace the first + // FwdDecl with the second and then replace the second with + // complete type. llvm::DIScope *EDContext = getDeclContextDescriptor(ED); llvm::DIFile *DefUnit = getOrCreateFile(ED->getLocation()); + llvm::TempDIScope TmpContext(DBuilder.createReplaceableCompositeType( + llvm::dwarf::DW_TAG_enumeration_type, "", TheCU, DefUnit, 0)); + unsigned Line = getLineNumber(ED->getLocation()); StringRef EDName = ED->getName(); llvm::DIType *RetTy = DBuilder.createReplaceableCompositeType( llvm::dwarf::DW_TAG_enumeration_type, EDName, EDContext, DefUnit, Line, 0, Size, Align, llvm::DINode::FlagFwdDecl, FullName); + ReplaceMap.emplace_back( std::piecewise_construct, std::make_tuple(Ty), std::make_tuple(static_cast<llvm::Metadata *>(RetTy))); @@ -2168,7 +2321,7 @@ llvm::DIType *CGDebugInfo::getTypeOrNull(QualType Ty) { void CGDebugInfo::completeTemplateDefinition( const ClassTemplateSpecializationDecl &SD) { - if (DebugKind <= CodeGenOptions::DebugLineTablesOnly) + if (DebugKind <= codegenoptions::DebugLineTablesOnly) return; completeClassData(&SD); @@ -2220,8 +2373,12 @@ llvm::DIModule *CGDebugInfo::getParentModuleOrNull(const Decl *D) { // option. FullSourceLoc Loc(D->getLocation(), CGM.getContext().getSourceManager()); if (Module *M = ClangModuleMap->inferModuleFromLocation(Loc)) { + // This is a (sub-)module. auto Info = ExternalASTSource::ASTSourceDescriptor(*M); return getOrCreateModuleRef(Info, /*SkeletonCU=*/false); + } else { + // This the precompiled header being built. + return getOrCreateModuleRef(PCHDescriptor, /*SkeletonCU=*/false); } } @@ -2369,11 +2526,34 @@ llvm::DICompositeType *CGDebugInfo::CreateLimitedType(const RecordType *Ty) { getTagForRecord(RD), RDName, RDContext, DefUnit, Line, 0, Size, Align, 0, FullName); + // Elements of composite types usually have back to the type, creating + // uniquing cycles. Distinct nodes are more efficient. + switch (RealDecl->getTag()) { + default: + llvm_unreachable("invalid composite type tag"); + + case llvm::dwarf::DW_TAG_array_type: + case llvm::dwarf::DW_TAG_enumeration_type: + // Array elements and most enumeration elements don't have back references, + // so they don't tend to be involved in uniquing cycles and there is some + // chance of merging them when linking together two modules. Only make + // them distinct if they are ODR-uniqued. + if (FullName.empty()) + break; + + case llvm::dwarf::DW_TAG_structure_type: + case llvm::dwarf::DW_TAG_union_type: + case llvm::dwarf::DW_TAG_class_type: + // Immediatley resolve to a distinct node. + RealDecl = + llvm::MDNode::replaceWithDistinct(llvm::TempDICompositeType(RealDecl)); + break; + } + RegionMap[Ty->getDecl()].reset(RealDecl); TypeCache[QualType(Ty, 0).getAsOpaquePtr()].reset(RealDecl); - if (const ClassTemplateSpecializationDecl *TSpecial = - dyn_cast<ClassTemplateSpecializationDecl>(RD)) + if (const auto *TSpecial = dyn_cast<ClassTemplateSpecializationDecl>(RD)) DBuilder.replaceArrays(RealDecl, llvm::DINodeArray(), CollectCXXTemplateParams(TSpecial, DefUnit)); return RealDecl; @@ -2420,7 +2600,7 @@ void CGDebugInfo::collectFunctionDeclProps(GlobalDecl GD, llvm::DIFile *Unit, llvm::DIScope *&FDContext, llvm::DINodeArray &TParamsArray, unsigned &Flags) { - const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl()); + const auto *FD = cast<FunctionDecl>(GD.getDecl()); Name = getFunctionName(FD); // Use mangled name as linkage name for C/C++ functions. if (FD->hasPrototype()) { @@ -2430,13 +2610,12 @@ void CGDebugInfo::collectFunctionDeclProps(GlobalDecl GD, llvm::DIFile *Unit, // No need to replicate the linkage name if it isn't different from the // subprogram name, no need to have it at all unless coverage is enabled or // debug is set to more than just line tables. - if (LinkageName == Name || - (!CGM.getCodeGenOpts().EmitGcovArcs && - !CGM.getCodeGenOpts().EmitGcovNotes && - DebugKind <= CodeGenOptions::DebugLineTablesOnly)) + if (LinkageName == Name || (!CGM.getCodeGenOpts().EmitGcovArcs && + !CGM.getCodeGenOpts().EmitGcovNotes && + DebugKind <= codegenoptions::DebugLineTablesOnly)) LinkageName = StringRef(); - if (DebugKind >= CodeGenOptions::LimitedDebugInfo) { + if (DebugKind >= codegenoptions::LimitedDebugInfo) { if (const NamespaceDecl *NSDecl = dyn_cast_or_null<NamespaceDecl>(FD->getDeclContext())) FDContext = getOrCreateNameSpace(NSDecl); @@ -2513,15 +2692,15 @@ CGDebugInfo::getFunctionForwardDeclaration(const FunctionDecl *FD) { SmallVector<QualType, 16> ArgTypes; for (const ParmVarDecl *Parm: FD->parameters()) ArgTypes.push_back(Parm->getType()); - QualType FnType = - CGM.getContext().getFunctionType(FD->getReturnType(), ArgTypes, - FunctionProtoType::ExtProtoInfo()); + CallingConv CC = FD->getType()->castAs<FunctionType>()->getCallConv(); + QualType FnType = CGM.getContext().getFunctionType( + FD->getReturnType(), ArgTypes, FunctionProtoType::ExtProtoInfo(CC)); llvm::DISubprogram *SP = DBuilder.createTempFunctionFwdDecl( DContext, Name, LinkageName, Unit, Line, getOrCreateFunctionType(FD, FnType, Unit), !FD->isExternallyVisible(), /* isDefinition = */ false, 0, Flags, CGM.getLangOpts().Optimize, TParamsArray.get(), getFunctionDeclaration(FD)); - const FunctionDecl *CanonDecl = cast<FunctionDecl>(FD->getCanonicalDecl()); + const auto *CanonDecl = cast<FunctionDecl>(FD->getCanonicalDecl()); FwdDeclReplaceMap.emplace_back(std::piecewise_construct, std::make_tuple(CanonDecl), std::make_tuple(SP)); @@ -2553,7 +2732,7 @@ llvm::DINode *CGDebugInfo::getDeclarationOrDefinition(const Decl *D) { // we would otherwise do to get a type for a pointee. (forward declarations in // limited debug info, full definitions (if the type definition is available) // in unlimited debug info) - if (const TypeDecl *TD = dyn_cast<TypeDecl>(D)) + if (const auto *TD = dyn_cast<TypeDecl>(D)) return getOrCreateType(CGM.getContext().getTypeDeclType(TD), getOrCreateFile(TD->getLocation())); auto I = DeclCache.find(D->getCanonicalDecl()); @@ -2563,7 +2742,7 @@ llvm::DINode *CGDebugInfo::getDeclarationOrDefinition(const Decl *D) { // No definition for now. Emit a forward definition that might be // merged with a potential upcoming definition. - if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) + if (const auto *FD = dyn_cast<FunctionDecl>(D)) return getFunctionForwardDeclaration(FD); else if (const auto *VD = dyn_cast<VarDecl>(D)) return getGlobalVariableForwardDeclaration(VD); @@ -2572,10 +2751,10 @@ llvm::DINode *CGDebugInfo::getDeclarationOrDefinition(const Decl *D) { } llvm::DISubprogram *CGDebugInfo::getFunctionDeclaration(const Decl *D) { - if (!D || DebugKind <= CodeGenOptions::DebugLineTablesOnly) + if (!D || DebugKind <= codegenoptions::DebugLineTablesOnly) return nullptr; - const FunctionDecl *FD = dyn_cast<FunctionDecl>(D); + const auto *FD = dyn_cast<FunctionDecl>(D); if (!FD) return nullptr; @@ -2584,8 +2763,7 @@ llvm::DISubprogram *CGDebugInfo::getFunctionDeclaration(const Decl *D) { auto MI = SPCache.find(FD->getCanonicalDecl()); if (MI == SPCache.end()) { - if (const CXXMethodDecl *MD = - dyn_cast<CXXMethodDecl>(FD->getCanonicalDecl())) { + if (const auto *MD = dyn_cast<CXXMethodDecl>(FD->getCanonicalDecl())) { return CreateCXXMemberFunction(MD, getOrCreateFile(MD->getLocation()), cast<llvm::DICompositeType>(S)); } @@ -2612,14 +2790,18 @@ llvm::DISubprogram *CGDebugInfo::getFunctionDeclaration(const Decl *D) { llvm::DISubroutineType *CGDebugInfo::getOrCreateFunctionType(const Decl *D, QualType FnType, llvm::DIFile *F) { - if (!D || DebugKind <= CodeGenOptions::DebugLineTablesOnly) + if (!D || DebugKind <= codegenoptions::DebugLineTablesOnly) // Create fake but valid subroutine type. Otherwise -verify would fail, and // subprogram DIE will miss DW_AT_decl_file and DW_AT_decl_line fields. return DBuilder.createSubroutineType(DBuilder.getOrCreateTypeArray(None)); - if (const CXXMethodDecl *Method = dyn_cast<CXXMethodDecl>(D)) + if (const auto *Method = dyn_cast<CXXMethodDecl>(D)) return getOrCreateMethodType(Method, F); - if (const ObjCMethodDecl *OMethod = dyn_cast<ObjCMethodDecl>(D)) { + + const auto *FTy = FnType->getAs<FunctionType>(); + CallingConv CC = FTy ? FTy->getCallConv() : CallingConv::CC_C; + + if (const auto *OMethod = dyn_cast<ObjCMethodDecl>(D)) { // Add "self" and "_cmd" SmallVector<llvm::Metadata *, 16> Elts; @@ -2645,28 +2827,28 @@ llvm::DISubroutineType *CGDebugInfo::getOrCreateFunctionType(const Decl *D, Elts.push_back(DBuilder.createArtificialType( getOrCreateType(CGM.getContext().getObjCSelType(), F))); // Get rest of the arguments. - for (const auto *PI : OMethod->params()) + for (const auto *PI : OMethod->parameters()) Elts.push_back(getOrCreateType(PI->getType(), F)); // Variadic methods need a special marker at the end of the type list. if (OMethod->isVariadic()) Elts.push_back(DBuilder.createUnspecifiedParameter()); llvm::DITypeRefArray EltTypeArray = DBuilder.getOrCreateTypeArray(Elts); - return DBuilder.createSubroutineType(EltTypeArray); + return DBuilder.createSubroutineType(EltTypeArray, 0, getDwarfCC(CC)); } // Handle variadic function types; they need an additional // unspecified parameter. - if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) + if (const auto *FD = dyn_cast<FunctionDecl>(D)) if (FD->isVariadic()) { SmallVector<llvm::Metadata *, 16> EltTys; EltTys.push_back(getOrCreateType(FD->getReturnType(), F)); - if (const FunctionProtoType *FPT = dyn_cast<FunctionProtoType>(FnType)) - for (unsigned i = 0, e = FPT->getNumParams(); i != e; ++i) - EltTys.push_back(getOrCreateType(FPT->getParamType(i), F)); + if (const auto *FPT = dyn_cast<FunctionProtoType>(FnType)) + for (QualType ParamType : FPT->param_types()) + EltTys.push_back(getOrCreateType(ParamType, F)); EltTys.push_back(DBuilder.createUnspecifiedParameter()); llvm::DITypeRefArray EltTypeArray = DBuilder.getOrCreateTypeArray(EltTys); - return DBuilder.createSubroutineType(EltTypeArray); + return DBuilder.createSubroutineType(EltTypeArray, 0, getDwarfCC(CC)); } return cast<llvm::DISubroutineType>(getOrCreateType(FnType, F)); @@ -2691,7 +2873,7 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc, if (!HasDecl) { // Use llvm function name. LinkageName = Fn->getName(); - } else if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) { + } else if (const auto *FD = dyn_cast<FunctionDecl>(D)) { // If there is a subprogram for this function available then use it. auto FI = SPCache.find(FD->getCanonicalDecl()); if (FI != SPCache.end()) { @@ -2704,7 +2886,7 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc, } collectFunctionDeclProps(GD, Unit, Name, LinkageName, FDContext, TParamsArray, Flags); - } else if (const ObjCMethodDecl *OMD = dyn_cast<ObjCMethodDecl>(D)) { + } else if (const auto *OMD = dyn_cast<ObjCMethodDecl>(D)) { Name = getObjCMethodName(OMD); Flags |= llvm::DINode::FlagPrototyped; } else { @@ -2712,7 +2894,7 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc, Name = Fn->getName(); Flags |= llvm::DINode::FlagPrototyped; } - if (!Name.empty() && Name[0] == '\01') + if (Name.startswith("\01")) Name = Name.substr(1); if (!HasDecl || D->isImplicit()) { @@ -2731,7 +2913,7 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc, // are emitted as CU level entities by the backend. llvm::DISubprogram *SP = DBuilder.createFunction( FDContext, Name, LinkageName, Unit, LineNo, - getOrCreateFunctionType(D, FnType, Unit), Fn->hasInternalLinkage(), + getOrCreateFunctionType(D, FnType, Unit), Fn->hasLocalLinkage(), true /*definition*/, ScopeLine, Flags, CGM.getLangOpts().Optimize, TParamsArray.get(), getFunctionDeclaration(D)); Fn->setSubprogram(SP); @@ -2739,7 +2921,7 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc, // code for the initialization of globals. Do not record these decls // as they will overwrite the actual VarDecl Decl in the cache. if (HasDecl && isa<FunctionDecl>(D)) - DeclCache[D->getCanonicalDecl()].reset(static_cast<llvm::Metadata *>(SP)); + DeclCache[D->getCanonicalDecl()].reset(SP); // Push the function onto the lexical block stack. LexicalBlockStack.emplace_back(SP); @@ -2765,7 +2947,7 @@ void CGDebugInfo::EmitFunctionDecl(GlobalDecl GD, SourceLocation Loc, // If there is a DISubprogram for this function available then use it. collectFunctionDeclProps(GD, Unit, Name, LinkageName, FDContext, TParamsArray, Flags); - } else if (const ObjCMethodDecl *OMD = dyn_cast<ObjCMethodDecl>(D)) { + } else if (const auto *OMD = dyn_cast<ObjCMethodDecl>(D)) { Name = getObjCMethodName(OMD); Flags |= llvm::DINode::FlagPrototyped; } else { @@ -2783,11 +2965,11 @@ void CGDebugInfo::EmitFunctionDecl(GlobalDecl GD, SourceLocation Loc, unsigned LineNo = getLineNumber(Loc); unsigned ScopeLine = 0; - DBuilder.createFunction(FDContext, Name, LinkageName, Unit, LineNo, - getOrCreateFunctionType(D, FnType, Unit), - false /*internalLinkage*/, true /*definition*/, - ScopeLine, Flags, CGM.getLangOpts().Optimize, - TParamsArray.get(), getFunctionDeclaration(D)); + DBuilder.retainType(DBuilder.createFunction( + FDContext, Name, LinkageName, Unit, LineNo, + getOrCreateFunctionType(D, FnType, Unit), false /*internalLinkage*/, + false /*definition*/, ScopeLine, Flags, CGM.getLangOpts().Optimize, + TParamsArray.get(), getFunctionDeclaration(D))); } void CGDebugInfo::EmitLocation(CGBuilderTy &Builder, SourceLocation Loc) { @@ -2820,7 +3002,7 @@ void CGDebugInfo::EmitLexicalBlockStart(CGBuilderTy &Builder, Builder.SetCurrentDebugLocation(llvm::DebugLoc::get( getLineNumber(Loc), getColumnNumber(Loc), LexicalBlockStack.back())); - if (DebugKind <= CodeGenOptions::DebugLineTablesOnly) + if (DebugKind <= codegenoptions::DebugLineTablesOnly) return; // Create a new lexical block and push it on the stack. @@ -2834,7 +3016,7 @@ void CGDebugInfo::EmitLexicalBlockEnd(CGBuilderTy &Builder, // Provide an entry in the line table for the end of the block. EmitLocation(Builder, Loc); - if (DebugKind <= CodeGenOptions::DebugLineTablesOnly) + if (DebugKind <= codegenoptions::DebugLineTablesOnly) return; LexicalBlockStack.pop_back(); @@ -2896,8 +3078,7 @@ llvm::DIType *CGDebugInfo::EmitTypeForVarWithBlocksAttr(const VarDecl *VD, CGM.getTarget().getPointerAlign(0))) { CharUnits FieldOffsetInBytes = CGM.getContext().toCharUnitsFromBits(FieldOffset); - CharUnits AlignedOffsetInBytes = - FieldOffsetInBytes.RoundUpToAlignment(Align); + CharUnits AlignedOffsetInBytes = FieldOffsetInBytes.alignTo(Align); CharUnits NumPaddingBytes = AlignedOffsetInBytes - FieldOffsetInBytes; if (NumPaddingBytes.isPositive()) { @@ -2930,8 +3111,10 @@ llvm::DIType *CGDebugInfo::EmitTypeForVarWithBlocksAttr(const VarDecl *VD, void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::Value *Storage, llvm::Optional<unsigned> ArgNo, CGBuilderTy &Builder) { - assert(DebugKind >= CodeGenOptions::LimitedDebugInfo); + assert(DebugKind >= codegenoptions::LimitedDebugInfo); assert(!LexicalBlockStack.empty() && "Region stack mismatch, stack empty!"); + if (VD->hasAttr<NoDebugAttr>()) + return; bool Unwritten = VD->isImplicit() || (isa<Decl>(VD->getDeclContext()) && @@ -2969,7 +3152,7 @@ void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::Value *Storage, // otherwise it is 'self' or 'this'. if (isa<ImplicitParamDecl>(VD) && ArgNo && *ArgNo == 1) Flags |= llvm::DINode::FlagObjectPointer; - if (llvm::Argument *Arg = dyn_cast<llvm::Argument>(Storage)) + if (auto *Arg = dyn_cast<llvm::Argument>(Storage)) if (Arg->getType()->isPointerTy() && !Arg->hasByValAttr() && !VD->getType()->isPointerType()) Expr.push_back(llvm::dwarf::DW_OP_deref); @@ -3005,10 +3188,10 @@ void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::Value *Storage, return; } else if (isa<VariableArrayType>(VD->getType())) Expr.push_back(llvm::dwarf::DW_OP_deref); - } else if (const RecordType *RT = dyn_cast<RecordType>(VD->getType())) { + } else if (const auto *RT = dyn_cast<RecordType>(VD->getType())) { // If VD is an anonymous union then Storage represents value for // all union fields. - const RecordDecl *RD = cast<RecordDecl>(RT->getDecl()); + const auto *RD = cast<RecordDecl>(RT->getDecl()); if (RD->isUnion() && RD->isAnonymousStructOrUnion()) { // GDB has trouble finding local variables in anonymous unions, so we emit // artifical local variables for each of the members. @@ -3056,7 +3239,7 @@ void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::Value *Storage, void CGDebugInfo::EmitDeclareOfAutoVariable(const VarDecl *VD, llvm::Value *Storage, CGBuilderTy &Builder) { - assert(DebugKind >= CodeGenOptions::LimitedDebugInfo); + assert(DebugKind >= codegenoptions::LimitedDebugInfo); EmitDeclare(VD, Storage, llvm::None, Builder); } @@ -3071,11 +3254,13 @@ llvm::DIType *CGDebugInfo::CreateSelfType(const QualType &QualTy, void CGDebugInfo::EmitDeclareOfBlockDeclRefVariable( const VarDecl *VD, llvm::Value *Storage, CGBuilderTy &Builder, const CGBlockInfo &blockInfo, llvm::Instruction *InsertPoint) { - assert(DebugKind >= CodeGenOptions::LimitedDebugInfo); + assert(DebugKind >= codegenoptions::LimitedDebugInfo); assert(!LexicalBlockStack.empty() && "Region stack mismatch, stack empty!"); if (Builder.GetInsertBlock() == nullptr) return; + if (VD->hasAttr<NoDebugAttr>()) + return; bool isByRef = VD->hasAttr<BlocksAttr>(); @@ -3139,7 +3324,7 @@ void CGDebugInfo::EmitDeclareOfBlockDeclRefVariable( void CGDebugInfo::EmitDeclareOfArgVariable(const VarDecl *VD, llvm::Value *AI, unsigned ArgNo, CGBuilderTy &Builder) { - assert(DebugKind >= CodeGenOptions::LimitedDebugInfo); + assert(DebugKind >= codegenoptions::LimitedDebugInfo); EmitDeclare(VD, AI, ArgNo, Builder); } @@ -3158,7 +3343,7 @@ void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block, unsigned ArgNo, llvm::Value *LocalAddr, CGBuilderTy &Builder) { - assert(DebugKind >= CodeGenOptions::LimitedDebugInfo); + assert(DebugKind >= codegenoptions::LimitedDebugInfo); ASTContext &C = CGM.getContext(); const BlockDecl *blockDecl = block.getBlockDecl(); @@ -3175,25 +3360,25 @@ void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block, CGM.getDataLayout().getStructLayout(block.StructureType); SmallVector<llvm::Metadata *, 16> fields; - fields.push_back(createFieldType("__isa", C.VoidPtrTy, 0, loc, AS_public, + fields.push_back(createFieldType("__isa", C.VoidPtrTy, loc, AS_public, blockLayout->getElementOffsetInBits(0), tunit, tunit)); - fields.push_back(createFieldType("__flags", C.IntTy, 0, loc, AS_public, + fields.push_back(createFieldType("__flags", C.IntTy, loc, AS_public, blockLayout->getElementOffsetInBits(1), tunit, tunit)); - fields.push_back(createFieldType("__reserved", C.IntTy, 0, loc, AS_public, + fields.push_back(createFieldType("__reserved", C.IntTy, loc, AS_public, blockLayout->getElementOffsetInBits(2), tunit, tunit)); auto *FnTy = block.getBlockExpr()->getFunctionType(); auto FnPtrType = CGM.getContext().getPointerType(FnTy->desugar()); - fields.push_back(createFieldType("__FuncPtr", FnPtrType, 0, loc, AS_public, + fields.push_back(createFieldType("__FuncPtr", FnPtrType, loc, AS_public, blockLayout->getElementOffsetInBits(3), tunit, tunit)); fields.push_back(createFieldType( "__descriptor", C.getPointerType(block.NeedsCopyDispose ? C.getBlockDescriptorExtendedType() : C.getBlockDescriptorType()), - 0, loc, AS_public, blockLayout->getElementOffsetInBits(4), tunit, tunit)); + loc, AS_public, blockLayout->getElementOffsetInBits(4), tunit, tunit)); // We want to sort the captures by offset, not because DWARF // requires this, but because we're paranoid about debuggers. @@ -3227,19 +3412,22 @@ void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block, // Sort by offset. llvm::array_pod_sort(chunks.begin(), chunks.end()); - for (SmallVectorImpl<BlockLayoutChunk>::iterator i = chunks.begin(), - e = chunks.end(); - i != e; ++i) { - uint64_t offsetInBits = i->OffsetInBits; - const BlockDecl::Capture *capture = i->Capture; + for (const BlockLayoutChunk &Chunk : chunks) { + uint64_t offsetInBits = Chunk.OffsetInBits; + const BlockDecl::Capture *capture = Chunk.Capture; // If we have a null capture, this must be the C++ 'this' capture. if (!capture) { - const CXXMethodDecl *method = - cast<CXXMethodDecl>(blockDecl->getNonClosureContext()); - QualType type = method->getThisType(C); + QualType type; + if (auto *Method = + cast_or_null<CXXMethodDecl>(blockDecl->getNonClosureContext())) + type = Method->getThisType(C); + else if (auto *RDecl = dyn_cast<CXXRecordDecl>(blockDecl->getParent())) + type = QualType(RDecl->getTypeForDecl(), 0); + else + llvm_unreachable("unexpected block declcontext"); - fields.push_back(createFieldType("this", type, 0, loc, AS_public, + fields.push_back(createFieldType("this", type, loc, AS_public, offsetInBits, tunit, tunit)); continue; } @@ -3259,7 +3447,7 @@ void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block, DBuilder.createMemberType(tunit, name, tunit, line, PtrInfo.Width, PtrInfo.Align, offsetInBits, 0, fieldType); } else { - fieldType = createFieldType(name, variable->getType(), 0, loc, AS_public, + fieldType = createFieldType(name, variable->getType(), loc, AS_public, offsetInBits, tunit, tunit); } fields.push_back(fieldType); @@ -3328,8 +3516,7 @@ llvm::DIGlobalVariable *CGDebugInfo::CollectAnonRecordDecls( // Ignore unnamed fields, but recurse into anonymous records. if (FieldName.empty()) { - const RecordType *RT = dyn_cast<RecordType>(Field->getType()); - if (RT) + if (const auto *RT = dyn_cast<RecordType>(Field->getType())) GV = CollectAnonRecordDecls(RT->getDecl(), Unit, LineNo, LinkageName, Var, DContext); continue; @@ -3337,14 +3524,16 @@ llvm::DIGlobalVariable *CGDebugInfo::CollectAnonRecordDecls( // Use VarDecl's Tag, Scope and Line number. GV = DBuilder.createGlobalVariable(DContext, FieldName, LinkageName, Unit, LineNo, FieldTy, - Var->hasInternalLinkage(), Var, nullptr); + Var->hasLocalLinkage(), Var, nullptr); } return GV; } void CGDebugInfo::EmitGlobalVariable(llvm::GlobalVariable *Var, const VarDecl *D) { - assert(DebugKind >= CodeGenOptions::LimitedDebugInfo); + assert(DebugKind >= codegenoptions::LimitedDebugInfo); + if (D->hasAttr<NoDebugAttr>()) + return; // Create global variable debug descriptor. llvm::DIFile *Unit = nullptr; llvm::DIScope *DContext = nullptr; @@ -3368,21 +3557,23 @@ void CGDebugInfo::EmitGlobalVariable(llvm::GlobalVariable *Var, } else { GV = DBuilder.createGlobalVariable( DContext, DeclName, LinkageName, Unit, LineNo, getOrCreateType(T, Unit), - Var->hasInternalLinkage(), Var, + Var->hasLocalLinkage(), Var, getOrCreateStaticDataMemberDeclarationOrNull(D)); } - DeclCache[D->getCanonicalDecl()].reset(static_cast<llvm::Metadata *>(GV)); + DeclCache[D->getCanonicalDecl()].reset(GV); } void CGDebugInfo::EmitGlobalVariable(const ValueDecl *VD, llvm::Constant *Init) { - assert(DebugKind >= CodeGenOptions::LimitedDebugInfo); + assert(DebugKind >= codegenoptions::LimitedDebugInfo); + if (VD->hasAttr<NoDebugAttr>()) + return; // Create the descriptor for the variable. llvm::DIFile *Unit = getOrCreateFile(VD->getLocation()); StringRef Name = VD->getName(); llvm::DIType *Ty = getOrCreateType(VD->getType(), Unit); - if (const EnumConstantDecl *ECD = dyn_cast<EnumConstantDecl>(VD)) { - const EnumDecl *ED = cast<EnumDecl>(ECD->getDeclContext()); + if (const auto *ECD = dyn_cast<EnumConstantDecl>(VD)) { + const auto *ED = cast<EnumDecl>(ECD->getDeclContext()); assert(isa<EnumType>(ED->getTypeForDecl()) && "Enum without EnumType?"); Ty = getOrCreateType(QualType(ED->getTypeForDecl(), 0), Unit); } @@ -3400,6 +3591,9 @@ void CGDebugInfo::EmitGlobalVariable(const ValueDecl *VD, auto *RD = cast<RecordDecl>(VarD->getDeclContext()); getDeclContextDescriptor(VarD); // Ensure that the type is retained even though it's otherwise unreferenced. + // + // FIXME: This is probably unnecessary, since Ty should reference RD + // through its scope. RetainedTypes.push_back( CGM.getContext().getRecordType(RD).getAsOpaquePtr()); return; @@ -3423,7 +3617,7 @@ llvm::DIScope *CGDebugInfo::getCurrentContextDescriptor(const Decl *D) { } void CGDebugInfo::EmitUsingDirective(const UsingDirectiveDecl &UD) { - if (CGM.getCodeGenOpts().getDebugInfo() < CodeGenOptions::LimitedDebugInfo) + if (CGM.getCodeGenOpts().getDebugInfo() < codegenoptions::LimitedDebugInfo) return; const NamespaceDecl *NSDecl = UD.getNominatedNamespace(); if (!NSDecl->isAnonymousNamespace() || @@ -3436,7 +3630,7 @@ void CGDebugInfo::EmitUsingDirective(const UsingDirectiveDecl &UD) { } void CGDebugInfo::EmitUsingDecl(const UsingDecl &UD) { - if (CGM.getCodeGenOpts().getDebugInfo() < CodeGenOptions::LimitedDebugInfo) + if (CGM.getCodeGenOpts().getDebugInfo() < codegenoptions::LimitedDebugInfo) return; assert(UD.shadow_size() && "We shouldn't be codegening an invalid UsingDecl containing no decls"); @@ -3451,6 +3645,8 @@ void CGDebugInfo::EmitUsingDecl(const UsingDecl &UD) { } void CGDebugInfo::EmitImportDecl(const ImportDecl &ID) { + if (CGM.getCodeGenOpts().getDebuggerTuning() != llvm::DebuggerKind::LLDB) + return; if (Module *M = ID.getImportedModule()) { auto Info = ExternalASTSource::ASTSourceDescriptor(*M); DBuilder.createImportedDeclaration( @@ -3462,13 +3658,13 @@ void CGDebugInfo::EmitImportDecl(const ImportDecl &ID) { llvm::DIImportedEntity * CGDebugInfo::EmitNamespaceAlias(const NamespaceAliasDecl &NA) { - if (CGM.getCodeGenOpts().getDebugInfo() < CodeGenOptions::LimitedDebugInfo) + if (CGM.getCodeGenOpts().getDebugInfo() < codegenoptions::LimitedDebugInfo) return nullptr; auto &VH = NamespaceAliasCache[&NA]; if (VH) return cast<llvm::DIImportedEntity>(VH); llvm::DIImportedEntity *R; - if (const NamespaceAliasDecl *Underlying = + if (const auto *Underlying = dyn_cast<NamespaceAliasDecl>(NA.getAliasedNamespace())) // This could cache & dedup here rather than relying on metadata deduping. R = DBuilder.createImportedDeclaration( @@ -3557,7 +3753,7 @@ void CGDebugInfo::finalize() { } void CGDebugInfo::EmitExplicitCastType(QualType Ty) { - if (CGM.getCodeGenOpts().getDebugInfo() < CodeGenOptions::LimitedDebugInfo) + if (CGM.getCodeGenOpts().getDebugInfo() < codegenoptions::LimitedDebugInfo) return; if (auto *DieTy = getOrCreateType(Ty, getOrCreateMainFile())) diff --git a/lib/CodeGen/CGDebugInfo.h b/lib/CodeGen/CGDebugInfo.h index a68dd33fa5fe..366dd81ac812 100644 --- a/lib/CodeGen/CGDebugInfo.h +++ b/lib/CodeGen/CGDebugInfo.h @@ -16,6 +16,7 @@ #include "CGBuilder.h" #include "clang/AST/Expr.h" +#include "clang/AST/ExternalASTSource.h" #include "clang/AST/Type.h" #include "clang/Basic/SourceLocation.h" #include "clang/Frontend/CodeGenOptions.h" @@ -52,28 +53,20 @@ class CGDebugInfo { friend class ApplyDebugLocation; friend class SaveAndRestoreLocation; CodeGenModule &CGM; - const CodeGenOptions::DebugInfoKind DebugKind; + const codegenoptions::DebugInfoKind DebugKind; bool DebugTypeExtRefs; llvm::DIBuilder DBuilder; llvm::DICompileUnit *TheCU = nullptr; ModuleMap *ClangModuleMap = nullptr; + ExternalASTSource::ASTSourceDescriptor PCHDescriptor; SourceLocation CurLoc; llvm::DIType *VTablePtrType = nullptr; llvm::DIType *ClassTy = nullptr; llvm::DICompositeType *ObjTy = nullptr; llvm::DIType *SelTy = nullptr; - llvm::DIType *OCLImage1dDITy = nullptr; - llvm::DIType *OCLImage1dArrayDITy = nullptr; - llvm::DIType *OCLImage1dBufferDITy = nullptr; - llvm::DIType *OCLImage2dDITy = nullptr; - llvm::DIType *OCLImage2dArrayDITy = nullptr; - llvm::DIType *OCLImage2dDepthDITy = nullptr; - llvm::DIType *OCLImage2dArrayDepthDITy = nullptr; - llvm::DIType *OCLImage2dMSAADITy = nullptr; - llvm::DIType *OCLImage2dArrayMSAADITy = nullptr; - llvm::DIType *OCLImage2dMSAADepthDITy = nullptr; - llvm::DIType *OCLImage2dArrayMSAADepthDITy = nullptr; - llvm::DIType *OCLImage3dDITy = nullptr; +#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \ + llvm::DIType *SingletonId = nullptr; +#include "clang/Basic/OpenCLImageTypes.def" llvm::DIType *OCLEventDITy = nullptr; llvm::DIType *OCLClkEventDITy = nullptr; llvm::DIType *OCLQueueDITy = nullptr; @@ -107,7 +100,7 @@ class CGDebugInfo { /// compilation. std::vector<std::pair<const TagType *, llvm::TrackingMDRef>> ReplaceMap; - /// Cache of replaceable forward declarartions (functions and + /// Cache of replaceable forward declarations (functions and /// variables) to RAUW at the end of compilation. std::vector<std::pair<const DeclaratorDecl *, llvm::TrackingMDRef>> FwdDeclReplaceMap; @@ -239,11 +232,16 @@ class CGDebugInfo { llvm::DIFile *F); llvm::DIType *createFieldType(StringRef name, QualType type, - uint64_t sizeInBitsOverride, SourceLocation loc, - AccessSpecifier AS, uint64_t offsetInBits, - llvm::DIFile *tunit, llvm::DIScope *scope, + SourceLocation loc, AccessSpecifier AS, + uint64_t offsetInBits, llvm::DIFile *tunit, + llvm::DIScope *scope, const RecordDecl *RD = nullptr); + /// Create new bit field member. + llvm::DIType *createBitFieldType(const FieldDecl *BitFieldDecl, + llvm::DIScope *RecordTy, + const RecordDecl *RD); + /// Helpers for collecting fields of a record. /// @{ void CollectRecordLambdaFields(const CXXRecordDecl *CXXDecl, @@ -275,6 +273,8 @@ public: void finalize(); + /// Module debugging: Support for building PCMs. + /// @{ /// Set the main CU's DwoId field to \p Signature. void setDwoId(uint64_t Signature); @@ -283,6 +283,14 @@ public: /// the module of origin of each Decl. void setModuleMap(ModuleMap &MMap) { ClangModuleMap = &MMap; } + /// When generating debug information for a clang module or + /// precompiled header, this module map will be used to determine + /// the module of origin of each Decl. + void setPCHDescriptor(ExternalASTSource::ASTSourceDescriptor PCH) { + PCHDescriptor = PCH; + } + /// @} + /// Update the current source location. If \arg loc is invalid it is /// ignored. void setLocation(SourceLocation Loc); diff --git a/lib/CodeGen/CGDecl.cpp b/lib/CodeGen/CGDecl.cpp index b78e80d79ddd..89407cd70c3d 100644 --- a/lib/CodeGen/CGDecl.cpp +++ b/lib/CodeGen/CGDecl.cpp @@ -16,11 +16,13 @@ #include "CGCleanup.h" #include "CGDebugInfo.h" #include "CGOpenCLRuntime.h" +#include "CGOpenMPRuntime.h" #include "CodeGenModule.h" #include "clang/AST/ASTContext.h" #include "clang/AST/CharUnits.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclObjC.h" +#include "clang/AST/DeclOpenMP.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/TargetInfo.h" #include "clang/CodeGen/CGFunctionInfo.h" @@ -29,10 +31,10 @@ #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Type.h" + using namespace clang; using namespace CodeGen; - void CodeGenFunction::EmitDecl(const Decl &D) { switch (D.getKind()) { case Decl::BuiltinTemplate: @@ -71,6 +73,8 @@ void CodeGenFunction::EmitDecl(const Decl &D) { case Decl::ObjCImplementation: case Decl::ObjCProperty: case Decl::ObjCCompatibleAlias: + case Decl::PragmaComment: + case Decl::PragmaDetectMismatch: case Decl::AccessSpec: case Decl::LinkageSpec: case Decl::ObjCPropertyImpl: @@ -81,6 +85,7 @@ void CodeGenFunction::EmitDecl(const Decl &D) { case Decl::Captured: case Decl::ClassScopeFunctionSpecialization: case Decl::UsingShadow: + case Decl::ConstructorUsingShadow: case Decl::ObjCTypeParam: llvm_unreachable("Declaration should not be in declstmts!"); case Decl::Function: // void X(); @@ -92,6 +97,7 @@ void CodeGenFunction::EmitDecl(const Decl &D) { case Decl::Label: // __label__ x; case Decl::Import: case Decl::OMPThreadPrivate: + case Decl::OMPCapturedExpr: case Decl::Empty: // None of these decls require codegen support. return; @@ -115,6 +121,9 @@ void CodeGenFunction::EmitDecl(const Decl &D) { return EmitVarDecl(VD); } + case Decl::OMPDeclareReduction: + return CGM.EmitOMPDeclareReduction(cast<OMPDeclareReductionDecl>(&D), this); + case Decl::Typedef: // typedef int X; case Decl::TypeAlias: { // using X = int; [C++0x] const TypedefNameDecl &TD = cast<TypedefNameDecl>(D); @@ -363,8 +372,15 @@ void CodeGenFunction::EmitStaticVarDecl(const VarDecl &D, llvm::GlobalVariable *var = cast<llvm::GlobalVariable>(addr->stripPointerCasts()); + + // CUDA's local and local static __shared__ variables should not + // have any non-empty initializers. This is ensured by Sema. + // Whatever initializer such variable may have when it gets here is + // a no-op and should not be emitted. + bool isCudaSharedVar = getLangOpts().CUDA && getLangOpts().CUDAIsDevice && + D.hasAttr<CUDASharedAttr>(); // If this value has an initializer, emit it. - if (D.getInit()) + if (D.getInit() && !isCudaSharedVar) var = AddInitializerToStaticVarDecl(D, var); var->setAlignment(alignment.getQuantity()); @@ -394,7 +410,7 @@ void CodeGenFunction::EmitStaticVarDecl(const VarDecl &D, // Emit global variable debug descriptor for static vars. CGDebugInfo *DI = getDebugInfo(); if (DI && - CGM.getCodeGenOpts().getDebugInfo() >= CodeGenOptions::LimitedDebugInfo) { + CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo) { DI->setLocation(D.getLocation()); DI->EmitGlobalVariable(var, &D); } @@ -513,20 +529,7 @@ namespace { CGF.EmitCall(FnInfo, CleanupFn, ReturnValueSlot(), Args); } }; - - /// A cleanup to call @llvm.lifetime.end. - class CallLifetimeEnd final : public EHScopeStack::Cleanup { - llvm::Value *Addr; - llvm::Value *Size; - public: - CallLifetimeEnd(Address addr, llvm::Value *size) - : Addr(addr.getPointer()), Size(size) {} - - void Emit(CodeGenFunction &CGF, Flags flags) override { - CGF.EmitLifetimeEnd(Size, Addr); - } - }; -} +} // end anonymous namespace /// EmitAutoVarWithLifetime - Does the setup required for an automatic /// variable with lifetime. @@ -644,7 +647,6 @@ static bool tryEmitARCCopyWeakInit(CodeGenFunction &CGF, } init = castExpr->getSubExpr(); - continue; } return false; } @@ -665,10 +667,10 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, const ValueDecl *D, EmitStoreThroughLValue(RValue::get(value), lvalue, true); return; } - + if (const CXXDefaultInitExpr *DIE = dyn_cast<CXXDefaultInitExpr>(init)) init = DIE->getExpr(); - + // If we're emitting a value with lifetime, we have to do the // initialization *before* we leave the cleanup scopes. if (const ExprWithCleanups *ewc = dyn_cast<ExprWithCleanups>(init)) { @@ -715,8 +717,7 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, const ValueDecl *D, llvm_unreachable("present but none"); case Qualifiers::OCL_ExplicitNone: - // nothing to do - value = EmitScalarExpr(init); + value = EmitARCUnsafeUnretainedScalarExpr(init); break; case Qualifiers::OCL_Strong: { @@ -819,7 +820,7 @@ static bool canEmitInitWithFewStoresAfterMemset(llvm::Constant *Init, } return true; } - + if (llvm::ConstantDataSequential *CDS = dyn_cast<llvm::ConstantDataSequential>(Init)) { for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { @@ -848,9 +849,9 @@ static void emitStoresForInitAfterMemset(llvm::Constant *Init, llvm::Value *Loc, Builder.CreateDefaultAlignedStore(Init, Loc, isVolatile); return; } - - if (llvm::ConstantDataSequential *CDS = - dyn_cast<llvm::ConstantDataSequential>(Init)) { + + if (llvm::ConstantDataSequential *CDS = + dyn_cast<llvm::ConstantDataSequential>(Init)) { for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { llvm::Constant *Elt = CDS->getElementAsConstant(i); @@ -877,7 +878,6 @@ static void emitStoresForInitAfterMemset(llvm::Constant *Init, llvm::Value *Loc, } } - /// shouldUseMemSetPlusStoresToInitialize - Decide whether we should use memset /// plus some stores to initialize a local variable instead of using a memcpy /// from a constant global. It is beneficial to use memset if the global is all @@ -907,18 +907,29 @@ void CodeGenFunction::EmitAutoVarDecl(const VarDecl &D) { EmitAutoVarCleanups(emission); } +/// shouldEmitLifetimeMarkers - Decide whether we need emit the life-time +/// markers. +static bool shouldEmitLifetimeMarkers(const CodeGenOptions &CGOpts, + const LangOptions &LangOpts) { + // Asan uses markers for use-after-scope checks. + if (CGOpts.SanitizeAddressUseAfterScope) + return true; + + // Disable lifetime markers in msan builds. + // FIXME: Remove this when msan works with lifetime markers. + if (LangOpts.Sanitize.has(SanitizerKind::Memory)) + return false; + + // For now, only in optimized builds. + return CGOpts.OptimizationLevel != 0; +} + /// Emit a lifetime.begin marker if some criteria are satisfied. /// \return a pointer to the temporary size Value if a marker was emitted, null /// otherwise llvm::Value *CodeGenFunction::EmitLifetimeStart(uint64_t Size, llvm::Value *Addr) { - // For now, only in optimized builds. - if (CGM.getCodeGenOpts().OptimizationLevel == 0) - return nullptr; - - // Disable lifetime markers in msan builds. - // FIXME: Remove this when msan works with lifetime markers. - if (getLangOpts().Sanitize.has(SanitizerKind::Memory)) + if (!shouldEmitLifetimeMarkers(CGM.getCodeGenOpts(), getLangOpts())) return nullptr; llvm::Value *SizeV = llvm::ConstantInt::get(Int64Ty, Size); @@ -1086,8 +1097,8 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { // Emit debug info for local var declaration. if (HaveInsertPoint()) if (CGDebugInfo *DI = getDebugInfo()) { - if (CGM.getCodeGenOpts().getDebugInfo() - >= CodeGenOptions::LimitedDebugInfo) { + if (CGM.getCodeGenOpts().getDebugInfo() >= + codegenoptions::LimitedDebugInfo) { DI->setLocation(D.getLocation()); DI->EmitDeclareOfAutoVariable(&D, address.getPointer(), Builder); } @@ -1163,6 +1174,7 @@ bool CodeGenFunction::isTrivialInitializer(const Expr *Init) { return false; } + void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) { assert(emission.Variable && "emission was not valid!"); @@ -1250,7 +1262,7 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) { llvm::GlobalValue::PrivateLinkage, constant, Name); GV->setAlignment(Loc.getAlignment().getQuantity()); - GV->setUnnamedAddr(true); + GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); Address SrcPtr = Address(GV, Loc.getAlignment()); if (SrcPtr.getType() != BP) @@ -1381,13 +1393,10 @@ void CodeGenFunction::EmitAutoVarCleanups(const AutoVarEmission &emission) { // Make sure we call @llvm.lifetime.end. This needs to happen // *last*, so the cleanup needs to be pushed *first*. - if (emission.useLifetimeMarkers()) { - EHStack.pushCleanup<CallLifetimeEnd>(NormalCleanup, + if (emission.useLifetimeMarkers()) + EHStack.pushCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker, emission.getAllocatedAddress(), emission.getSizeForLifetimeMarkers()); - EHCleanupScope &cleanup = cast<EHCleanupScope>(*EHStack.begin()); - cleanup.setLifetimeMarker(); - } // Check the type for a cleanup. if (QualType::DestructionKind dtorKind = D.getType().isDestructedType()) @@ -1662,7 +1671,7 @@ namespace { ElementType, ElementAlign, Destroyer); } }; -} +} // end anonymous namespace /// pushIrregularPartialArrayCleanup - Push an EH cleanup to destroy /// already-constructed elements of the given array. The cleanup @@ -1731,7 +1740,7 @@ namespace { CGF.EmitARCRelease(Param, Precise); } }; -} +} // end anonymous namespace /// Emit an alloca (or GlobalValue depending on target) /// for the specified parameter and set up LocalDeclMap. @@ -1852,8 +1861,8 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg, // Emit debug info for param declaration. if (CGDebugInfo *DI = getDebugInfo()) { - if (CGM.getCodeGenOpts().getDebugInfo() - >= CodeGenOptions::LimitedDebugInfo) { + if (CGM.getCodeGenOpts().getDebugInfo() >= + codegenoptions::LimitedDebugInfo) { DI->EmitDeclareOfArgVariable(&D, DeclPtr.getPointer(), ArgNo, Builder); } } @@ -1861,3 +1870,10 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg, if (D.hasAttr<AnnotateAttr>()) EmitVarAnnotations(&D, DeclPtr.getPointer()); } + +void CodeGenModule::EmitOMPDeclareReduction(const OMPDeclareReductionDecl *D, + CodeGenFunction *CGF) { + if (!LangOpts.OpenMP || (!LangOpts.EmitAllDecls && !D->isUsed())) + return; + getOpenMPRuntime().emitUserDefinedReduction(CGF, D); +} diff --git a/lib/CodeGen/CGDeclCXX.cpp b/lib/CodeGen/CGDeclCXX.cpp index adba73168797..89d142e44b49 100644 --- a/lib/CodeGen/CGDeclCXX.cpp +++ b/lib/CodeGen/CGDeclCXX.cpp @@ -86,13 +86,21 @@ static void EmitDeclDestroy(CodeGenFunction &CGF, const VarDecl &D, llvm::Constant *function; llvm::Constant *argument; - // Special-case non-array C++ destructors, where there's a function - // with the right signature that we can just call. - const CXXRecordDecl *record = nullptr; - if (dtorKind == QualType::DK_cxx_destructor && - (record = type->getAsCXXRecordDecl())) { - assert(!record->hasTrivialDestructor()); - CXXDestructorDecl *dtor = record->getDestructor(); + // Special-case non-array C++ destructors, if they have the right signature. + // Under some ABIs, destructors return this instead of void, and cannot be + // passed directly to __cxa_atexit if the target does not allow this mismatch. + const CXXRecordDecl *Record = type->getAsCXXRecordDecl(); + bool CanRegisterDestructor = + Record && (!CGM.getCXXABI().HasThisReturn( + GlobalDecl(Record->getDestructor(), Dtor_Complete)) || + CGM.getCXXABI().canCallMismatchedFunctionType()); + // If __cxa_atexit is disabled via a flag, a different helper function is + // generated elsewhere which uses atexit instead, and it takes the destructor + // directly. + bool UsingExternalHelper = !CGM.getCodeGenOpts().CXAAtExit; + if (Record && (CanRegisterDestructor || UsingExternalHelper)) { + assert(!Record->hasTrivialDestructor()); + CXXDestructorDecl *dtor = Record->getDestructor(); function = CGM.getAddrOfCXXStructor(dtor, StructorType::Complete); argument = llvm::ConstantExpr::getBitCast( @@ -304,6 +312,17 @@ void CodeGenModule::EmitCXXGlobalVarDeclInitFunc(const VarDecl *D, llvm::GlobalVariable *Addr, bool PerformInit) { + + // According to E.2.3.1 in CUDA-7.5 Programming guide: __device__, + // __constant__ and __shared__ variables defined in namespace scope, + // that are of class type, cannot have a non-empty constructor. All + // the checks have been done in Sema by now. Whatever initializers + // are allowed are empty and we just need to ignore them here. + if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice && + (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>() || + D->hasAttr<CUDASharedAttr>())) + return; + // Check if we've already initialized this decl. auto I = DelayedCXXInitPosition.find(D); if (I != DelayedCXXInitPosition.end() && I->second == ~0U) @@ -587,8 +606,8 @@ llvm::Function *CodeGenFunction::generateDestroyHelper( getContext().VoidPtrTy); args.push_back(&dst); - const CGFunctionInfo &FI = CGM.getTypes().arrangeFreeFunctionDeclaration( - getContext().VoidTy, args, FunctionType::ExtInfo(), /*variadic=*/false); + const CGFunctionInfo &FI = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(getContext().VoidTy, args); llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); llvm::Function *fn = CGM.CreateGlobalInitOrDestructFunction( FTy, "__cxx_global_array_dtor", FI, VD->getLocation()); diff --git a/lib/CodeGen/CGException.cpp b/lib/CodeGen/CGException.cpp index fce2e7581962..4a7dc4205e09 100644 --- a/lib/CodeGen/CGException.cpp +++ b/lib/CodeGen/CGException.cpp @@ -686,8 +686,10 @@ llvm::BasicBlock *CodeGenFunction::getInvokeDestImpl() { assert(EHStack.requiresLandingPad()); assert(!EHStack.empty()); - // If exceptions are disabled, there are usually no landingpads. However, when - // SEH is enabled, functions using SEH still get landingpads. + // If exceptions are disabled and SEH is not in use, then there is no invoke + // destination. SEH "works" even if exceptions are off. In practice, this + // means that C++ destructors and other EH cleanups don't run, which is + // consistent with MSVC's behavior. const LangOptions &LO = CGM.getLangOpts(); if (!LO.Exceptions) { if (!LO.Borland && !LO.MicrosoftExt) @@ -1326,11 +1328,13 @@ llvm::BasicBlock *CodeGenFunction::getTerminateHandler() { TerminateHandler = createBasicBlock("terminate.handler"); Builder.SetInsertPoint(TerminateHandler); llvm::Value *Exn = nullptr; + SaveAndRestore<llvm::Instruction *> RestoreCurrentFuncletPad( + CurrentFuncletPad); if (EHPersonality::get(*this).usesFuncletPads()) { llvm::Value *ParentPad = CurrentFuncletPad; if (!ParentPad) ParentPad = llvm::ConstantTokenNone::get(CGM.getLLVMContext()); - Builder.CreateCleanupPad(ParentPad); + CurrentFuncletPad = Builder.CreateCleanupPad(ParentPad); } else { if (getLangOpts().CPlusPlus) Exn = getExceptionFromSlot(); @@ -1422,12 +1426,8 @@ struct PerformSEHFinally final : EHScopeStack::Cleanup { Args.add(RValue::get(FP), ArgTys[1]); // Arrange a two-arg function info and type. - FunctionProtoType::ExtProtoInfo EPI; - const auto *FPT = cast<FunctionProtoType>( - Context.getFunctionType(Context.VoidTy, ArgTys, EPI)); const CGFunctionInfo &FnInfo = - CGM.getTypes().arrangeFreeFunctionCall(Args, FPT, - /*chainCall=*/false); + CGM.getTypes().arrangeBuiltinFunctionCall(Context.VoidTy, Args); CGF.EmitCall(FnInfo, OutlinedFinally, ReturnValueSlot(), Args); } @@ -1623,14 +1623,13 @@ void CodeGenFunction::startOutlinedSEHHelper(CodeGenFunction &ParentCGF, SmallString<128> Name; { llvm::raw_svector_ostream OS(Name); - const Decl *ParentCodeDecl = ParentCGF.CurCodeDecl; - const NamedDecl *Parent = dyn_cast_or_null<NamedDecl>(ParentCodeDecl); - assert(Parent && "FIXME: handle unnamed decls (lambdas, blocks) with SEH"); + const FunctionDecl *ParentSEHFn = ParentCGF.CurSEHParent; + assert(ParentSEHFn && "No CurSEHParent!"); MangleContext &Mangler = CGM.getCXXABI().getMangleContext(); if (IsFilter) - Mangler.mangleSEHFilterExpression(Parent, OS); + Mangler.mangleSEHFilterExpression(ParentSEHFn, OS); else - Mangler.mangleSEHFinallyBlock(Parent, OS); + Mangler.mangleSEHFinallyBlock(ParentSEHFn, OS); } FunctionArgList Args; @@ -1656,8 +1655,8 @@ void CodeGenFunction::startOutlinedSEHHelper(CodeGenFunction &ParentCGF, QualType RetTy = IsFilter ? getContext().LongTy : getContext().VoidTy; llvm::Function *ParentFn = ParentCGF.CurFn; - const CGFunctionInfo &FnInfo = CGM.getTypes().arrangeFreeFunctionDeclaration( - RetTy, Args, FunctionType::ExtInfo(), /*isVariadic=*/false); + const CGFunctionInfo &FnInfo = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(RetTy, Args); llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); llvm::Function *Fn = llvm::Function::Create( @@ -1677,6 +1676,7 @@ void CodeGenFunction::startOutlinedSEHHelper(CodeGenFunction &ParentCGF, StartFunction(GlobalDecl(), RetTy, Fn, FnInfo, Args, OutlinedStmt->getLocStart(), OutlinedStmt->getLocStart()); + CurSEHParent = ParentCGF.CurSEHParent; CGM.SetLLVMFunctionAttributes(nullptr, FnInfo, CurFn); EmitCapturedLocals(ParentCGF, OutlinedStmt, IsFilter); @@ -1708,12 +1708,6 @@ CodeGenFunction::GenerateSEHFinallyFunction(CodeGenFunction &ParentCGF, const Stmt *FinallyBlock = Finally.getBlock(); startOutlinedSEHHelper(ParentCGF, false, FinallyBlock); - // Mark finally block calls as nounwind and noinline to make LLVM's job a - // little easier. - // FIXME: Remove these restrictions in the future. - CurFn->addFnAttr(llvm::Attribute::NoUnwind); - CurFn->addFnAttr(llvm::Attribute::NoInline); - // Emit the original filter expression, convert to i32, and return. EmitStmt(FinallyBlock); diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp index 507ce3d7d0ce..3e1ae3604f94 100644 --- a/lib/CodeGen/CGExpr.cpp +++ b/lib/CodeGen/CGExpr.cpp @@ -11,13 +11,14 @@ // //===----------------------------------------------------------------------===// -#include "CodeGenFunction.h" #include "CGCXXABI.h" #include "CGCall.h" +#include "CGCleanup.h" #include "CGDebugInfo.h" #include "CGObjCRuntime.h" #include "CGOpenMPRuntime.h" #include "CGRecordLayout.h" +#include "CodeGenFunction.h" #include "CodeGenModule.h" #include "TargetInfo.h" #include "clang/AST/ASTContext.h" @@ -32,6 +33,8 @@ #include "llvm/IR/MDBuilder.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/Path.h" +#include "llvm/Transforms/Utils/SanitizerStats.h" using namespace clang; using namespace CodeGen; @@ -65,8 +68,6 @@ Address CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, CharUnits Align, /// block. llvm::AllocaInst *CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, const Twine &Name) { - if (!Builder.isNamePreserving()) - return new llvm::AllocaInst(Ty, nullptr, "", AllocaInsertPt); return new llvm::AllocaInst(Ty, nullptr, Name, AllocaInsertPt); } @@ -361,9 +362,16 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) { ConvertTypeForMem(E->getType()) ->getPointerTo(Object.getAddressSpace())), Object.getAlignment()); - // We should not have emitted the initializer for this temporary as a - // constant. - assert(!Var->hasInitializer()); + + // createReferenceTemporary will promote the temporary to a global with a + // constant initializer if it can. It can only do this to a value of + // ARC-manageable type if the value is global and therefore "immune" to + // ref-counting operations. Therefore we have no need to emit either a + // dynamic initialization or a cleanup and we can just return the address + // of the temporary. + if (Var->hasInitializer()) + return MakeAddrLValue(Object, M->getType(), AlignmentSource::Decl); + Var->setInitializer(CGM.EmitNullConstant(E->getType())); } LValue RefTempDst = MakeAddrLValue(Object, M->getType(), @@ -416,6 +424,23 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) { EmitAnyExprToMem(E, Object, Qualifiers(), /*IsInit*/true); } } else { + switch (M->getStorageDuration()) { + case SD_Automatic: + case SD_FullExpression: + if (auto *Size = EmitLifetimeStart( + CGM.getDataLayout().getTypeAllocSize(Object.getElementType()), + Object.getPointer())) { + if (M->getStorageDuration() == SD_Automatic) + pushCleanupAfterFullExpr<CallLifetimeEnd>(NormalEHLifetimeMarker, + Object, Size); + else + pushFullExprCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker, Object, + Size); + } + break; + default: + break; + } EmitAnyExprToMem(E, Object, Qualifiers(), /*IsInit*/true); } pushTemporaryCleanup(*this, M, E, Object); @@ -577,7 +602,7 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, if (Checks.size() > 0) { llvm::Constant *StaticData[] = { - EmitCheckSourceLocation(Loc), + EmitCheckSourceLocation(Loc), EmitCheckTypeDescriptor(Ty), llvm::ConstantInt::get(SizeTy, AlignVal), llvm::ConstantInt::get(Int8Ty, TCK) @@ -824,7 +849,8 @@ Address CodeGenFunction::EmitPointerWithAlignment(const Expr *E, getNaturalPointeeTypeAlignment(E->getType(), Source)); } - if (SanOpts.has(SanitizerKind::CFIUnrelatedCast)) { + if (SanOpts.has(SanitizerKind::CFIUnrelatedCast) && + CE->getCastKind() == CK_BitCast) { if (auto PT = E->getType()->getAs<PointerType>()) EmitVTablePtrCheckForCast(PT->getPointeeType(), Addr.getPointer(), /*MayBeNull=*/true, @@ -1265,10 +1291,10 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile, } // Atomic operations have to be done on integral types. - if (Ty->isAtomicType() || typeIsSuitableForInlineAtomic(Ty, Volatile)) { - LValue lvalue = + LValue AtomicLValue = LValue::MakeAddr(Addr, Ty, getContext(), AlignSource, TBAAInfo); - return EmitAtomicLoad(lvalue, Loc).getScalarVal(); + if (Ty->isAtomicType() || LValueIsSuitableForInlineAtomic(AtomicLValue)) { + return EmitAtomicLoad(AtomicLValue, Loc).getScalarVal(); } llvm::LoadInst *Load = Builder.CreateLoad(Addr, Volatile); @@ -1376,12 +1402,11 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr, Value = EmitToMemory(Value, Ty); + LValue AtomicLValue = + LValue::MakeAddr(Addr, Ty, getContext(), AlignSource, TBAAInfo); if (Ty->isAtomicType() || - (!isInit && typeIsSuitableForInlineAtomic(Ty, Volatile))) { - EmitAtomicStore(RValue::get(Value), - LValue::MakeAddr(Addr, Ty, getContext(), - AlignSource, TBAAInfo), - isInit); + (!isInit && LValueIsSuitableForInlineAtomic(AtomicLValue))) { + EmitAtomicStore(RValue::get(Value), AtomicLValue, isInit); return; } @@ -1733,8 +1758,7 @@ void CodeGenFunction::EmitStoreThroughExtVectorComponentLValue(RValue Src, if (const VectorType *VTy = Dst.getType()->getAs<VectorType>()) { unsigned NumSrcElts = VTy->getNumElements(); - unsigned NumDstElts = - cast<llvm::VectorType>(Vec->getType())->getNumElements(); + unsigned NumDstElts = Vec->getType()->getVectorNumElements(); if (NumDstElts == NumSrcElts) { // Use shuffle vector is the src and destination are the same number of // elements and restore the vector mask since it is on the side it will be @@ -1947,6 +1971,21 @@ LValue CodeGenFunction::EmitLoadOfReferenceLValue(Address RefAddr, return MakeAddrLValue(Addr, RefTy->getPointeeType(), Source); } +Address CodeGenFunction::EmitLoadOfPointer(Address Ptr, + const PointerType *PtrTy, + AlignmentSource *Source) { + llvm::Value *Addr = Builder.CreateLoad(Ptr); + return Address(Addr, getNaturalTypeAlignment(PtrTy->getPointeeType(), Source, + /*forPointeeType=*/true)); +} + +LValue CodeGenFunction::EmitLoadOfPointerLValue(Address PtrAddr, + const PointerType *PtrTy) { + AlignmentSource Source; + Address Addr = EmitLoadOfPointer(PtrAddr, PtrTy, &Source); + return MakeAddrLValue(Addr, PtrTy->getPointeeType(), Source); +} + static LValue EmitGlobalVarDeclLValue(CodeGenFunction &CGF, const Expr *E, const VarDecl *VD) { QualType T = E->getType(); @@ -2302,7 +2341,7 @@ llvm::Constant *CodeGenFunction::EmitCheckTypeDescriptor(QualType T) { auto *GV = new llvm::GlobalVariable( CGM.getModule(), Descriptor->getType(), /*isConstant=*/true, llvm::GlobalVariable::PrivateLinkage, Descriptor); - GV->setUnnamedAddr(true); + GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); CGM.getSanitizerMetadata()->disableSanitizerForGlobal(GV); // Remember the descriptor for this type. @@ -2352,7 +2391,33 @@ llvm::Constant *CodeGenFunction::EmitCheckSourceLocation(SourceLocation Loc) { PresumedLoc PLoc = getContext().getSourceManager().getPresumedLoc(Loc); if (PLoc.isValid()) { - auto FilenameGV = CGM.GetAddrOfConstantCString(PLoc.getFilename(), ".src"); + StringRef FilenameString = PLoc.getFilename(); + + int PathComponentsToStrip = + CGM.getCodeGenOpts().EmitCheckPathComponentsToStrip; + if (PathComponentsToStrip < 0) { + assert(PathComponentsToStrip != INT_MIN); + int PathComponentsToKeep = -PathComponentsToStrip; + auto I = llvm::sys::path::rbegin(FilenameString); + auto E = llvm::sys::path::rend(FilenameString); + while (I != E && --PathComponentsToKeep) + ++I; + + FilenameString = FilenameString.substr(I - E); + } else if (PathComponentsToStrip > 0) { + auto I = llvm::sys::path::begin(FilenameString); + auto E = llvm::sys::path::end(FilenameString); + while (I != E && PathComponentsToStrip--) + ++I; + + if (I != E) + FilenameString = + FilenameString.substr(I - llvm::sys::path::begin(FilenameString)); + else + FilenameString = llvm::sys::path::filename(FilenameString); + } + + auto FilenameGV = CGM.GetAddrOfConstantCString(FilenameString, ".src"); CGM.getSanitizerMetadata()->disableSanitizerForGlobal( cast<llvm::GlobalVariable>(FilenameGV.getPointer())); Filename = FilenameGV.getPointer(); @@ -2483,24 +2548,26 @@ void CodeGenFunction::EmitCheck( Branch->setMetadata(llvm::LLVMContext::MD_prof, Node); EmitBlock(Handlers); - // Emit handler arguments and create handler function type. - llvm::Constant *Info = llvm::ConstantStruct::getAnon(StaticArgs); - auto *InfoPtr = - new llvm::GlobalVariable(CGM.getModule(), Info->getType(), false, - llvm::GlobalVariable::PrivateLinkage, Info); - InfoPtr->setUnnamedAddr(true); - CGM.getSanitizerMetadata()->disableSanitizerForGlobal(InfoPtr); - + // Handler functions take an i8* pointing to the (handler-specific) static + // information block, followed by a sequence of intptr_t arguments + // representing operand values. SmallVector<llvm::Value *, 4> Args; SmallVector<llvm::Type *, 4> ArgTypes; Args.reserve(DynamicArgs.size() + 1); ArgTypes.reserve(DynamicArgs.size() + 1); - // Handler functions take an i8* pointing to the (handler-specific) static - // information block, followed by a sequence of intptr_t arguments - // representing operand values. - Args.push_back(Builder.CreateBitCast(InfoPtr, Int8PtrTy)); - ArgTypes.push_back(Int8PtrTy); + // Emit handler arguments and create handler function type. + if (!StaticArgs.empty()) { + llvm::Constant *Info = llvm::ConstantStruct::getAnon(StaticArgs); + auto *InfoPtr = + new llvm::GlobalVariable(CGM.getModule(), Info->getType(), false, + llvm::GlobalVariable::PrivateLinkage, Info); + InfoPtr->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); + CGM.getSanitizerMetadata()->disableSanitizerForGlobal(InfoPtr); + Args.push_back(Builder.CreateBitCast(InfoPtr, Int8PtrTy)); + ArgTypes.push_back(Int8PtrTy); + } + for (size_t i = 0, n = DynamicArgs.size(); i != n; ++i) { Args.push_back(EmitCheckValue(DynamicArgs[i])); ArgTypes.push_back(IntPtrTy); @@ -2532,10 +2599,9 @@ void CodeGenFunction::EmitCheck( EmitBlock(Cont); } -void CodeGenFunction::EmitCfiSlowPathCheck(llvm::Value *Cond, - llvm::ConstantInt *TypeId, - llvm::Value *Ptr) { - auto &Ctx = getLLVMContext(); +void CodeGenFunction::EmitCfiSlowPathCheck( + SanitizerMask Kind, llvm::Value *Cond, llvm::ConstantInt *TypeId, + llvm::Value *Ptr, ArrayRef<llvm::Constant *> StaticArgs) { llvm::BasicBlock *Cont = createBasicBlock("cfi.cont"); llvm::BasicBlock *CheckBB = createBasicBlock("cfi.slowpath"); @@ -2547,19 +2613,122 @@ void CodeGenFunction::EmitCfiSlowPathCheck(llvm::Value *Cond, EmitBlock(CheckBB); - llvm::Constant *SlowPathFn = CGM.getModule().getOrInsertFunction( - "__cfi_slowpath", - llvm::FunctionType::get( - llvm::Type::getVoidTy(Ctx), - {llvm::Type::getInt64Ty(Ctx), - llvm::PointerType::getUnqual(llvm::Type::getInt8Ty(Ctx))}, - false)); - llvm::CallInst *CheckCall = Builder.CreateCall(SlowPathFn, {TypeId, Ptr}); + bool WithDiag = !CGM.getCodeGenOpts().SanitizeTrap.has(Kind); + + llvm::CallInst *CheckCall; + if (WithDiag) { + llvm::Constant *Info = llvm::ConstantStruct::getAnon(StaticArgs); + auto *InfoPtr = + new llvm::GlobalVariable(CGM.getModule(), Info->getType(), false, + llvm::GlobalVariable::PrivateLinkage, Info); + InfoPtr->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); + CGM.getSanitizerMetadata()->disableSanitizerForGlobal(InfoPtr); + + llvm::Constant *SlowPathDiagFn = CGM.getModule().getOrInsertFunction( + "__cfi_slowpath_diag", + llvm::FunctionType::get(VoidTy, {Int64Ty, Int8PtrTy, Int8PtrTy}, + false)); + CheckCall = Builder.CreateCall( + SlowPathDiagFn, + {TypeId, Ptr, Builder.CreateBitCast(InfoPtr, Int8PtrTy)}); + } else { + llvm::Constant *SlowPathFn = CGM.getModule().getOrInsertFunction( + "__cfi_slowpath", + llvm::FunctionType::get(VoidTy, {Int64Ty, Int8PtrTy}, false)); + CheckCall = Builder.CreateCall(SlowPathFn, {TypeId, Ptr}); + } + CheckCall->setDoesNotThrow(); EmitBlock(Cont); } +// This function is basically a switch over the CFI failure kind, which is +// extracted from CFICheckFailData (1st function argument). Each case is either +// llvm.trap or a call to one of the two runtime handlers, based on +// -fsanitize-trap and -fsanitize-recover settings. Default case (invalid +// failure kind) traps, but this should really never happen. CFICheckFailData +// can be nullptr if the calling module has -fsanitize-trap behavior for this +// check kind; in this case __cfi_check_fail traps as well. +void CodeGenFunction::EmitCfiCheckFail() { + SanitizerScope SanScope(this); + FunctionArgList Args; + ImplicitParamDecl ArgData(getContext(), nullptr, SourceLocation(), nullptr, + getContext().VoidPtrTy); + ImplicitParamDecl ArgAddr(getContext(), nullptr, SourceLocation(), nullptr, + getContext().VoidPtrTy); + Args.push_back(&ArgData); + Args.push_back(&ArgAddr); + + const CGFunctionInfo &FI = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(getContext().VoidTy, Args); + + llvm::Function *F = llvm::Function::Create( + llvm::FunctionType::get(VoidTy, {VoidPtrTy, VoidPtrTy}, false), + llvm::GlobalValue::WeakODRLinkage, "__cfi_check_fail", &CGM.getModule()); + F->setVisibility(llvm::GlobalValue::HiddenVisibility); + + StartFunction(GlobalDecl(), CGM.getContext().VoidTy, F, FI, Args, + SourceLocation()); + + llvm::Value *Data = + EmitLoadOfScalar(GetAddrOfLocalVar(&ArgData), /*Volatile=*/false, + CGM.getContext().VoidPtrTy, ArgData.getLocation()); + llvm::Value *Addr = + EmitLoadOfScalar(GetAddrOfLocalVar(&ArgAddr), /*Volatile=*/false, + CGM.getContext().VoidPtrTy, ArgAddr.getLocation()); + + // Data == nullptr means the calling module has trap behaviour for this check. + llvm::Value *DataIsNotNullPtr = + Builder.CreateICmpNE(Data, llvm::ConstantPointerNull::get(Int8PtrTy)); + EmitTrapCheck(DataIsNotNullPtr); + + llvm::StructType *SourceLocationTy = + llvm::StructType::get(VoidPtrTy, Int32Ty, Int32Ty, nullptr); + llvm::StructType *CfiCheckFailDataTy = + llvm::StructType::get(Int8Ty, SourceLocationTy, VoidPtrTy, nullptr); + + llvm::Value *V = Builder.CreateConstGEP2_32( + CfiCheckFailDataTy, + Builder.CreatePointerCast(Data, CfiCheckFailDataTy->getPointerTo(0)), 0, + 0); + Address CheckKindAddr(V, getIntAlign()); + llvm::Value *CheckKind = Builder.CreateLoad(CheckKindAddr); + + llvm::Value *AllVtables = llvm::MetadataAsValue::get( + CGM.getLLVMContext(), + llvm::MDString::get(CGM.getLLVMContext(), "all-vtables")); + llvm::Value *ValidVtable = Builder.CreateZExt( + Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::type_test), + {Addr, AllVtables}), + IntPtrTy); + + const std::pair<int, SanitizerMask> CheckKinds[] = { + {CFITCK_VCall, SanitizerKind::CFIVCall}, + {CFITCK_NVCall, SanitizerKind::CFINVCall}, + {CFITCK_DerivedCast, SanitizerKind::CFIDerivedCast}, + {CFITCK_UnrelatedCast, SanitizerKind::CFIUnrelatedCast}, + {CFITCK_ICall, SanitizerKind::CFIICall}}; + + SmallVector<std::pair<llvm::Value *, SanitizerMask>, 5> Checks; + for (auto CheckKindMaskPair : CheckKinds) { + int Kind = CheckKindMaskPair.first; + SanitizerMask Mask = CheckKindMaskPair.second; + llvm::Value *Cond = + Builder.CreateICmpNE(CheckKind, llvm::ConstantInt::get(Int8Ty, Kind)); + if (CGM.getLangOpts().Sanitize.has(Mask)) + EmitCheck(std::make_pair(Cond, Mask), "cfi_check_fail", {}, + {Data, Addr, ValidVtable}); + else + EmitTrapCheck(Cond); + } + + FinishFunction(); + // The only reference to this function will be created during LTO link. + // Make sure it survives until then. + CGM.addUsedGlobal(F); +} + void CodeGenFunction::EmitTrapCheck(llvm::Value *Checked) { llvm::BasicBlock *Cont = createBasicBlock("cont"); @@ -2827,22 +2996,55 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, return LV; } +static Address emitOMPArraySectionBase(CodeGenFunction &CGF, const Expr *Base, + AlignmentSource &AlignSource, + QualType BaseTy, QualType ElTy, + bool IsLowerBound) { + LValue BaseLVal; + if (auto *ASE = dyn_cast<OMPArraySectionExpr>(Base->IgnoreParenImpCasts())) { + BaseLVal = CGF.EmitOMPArraySectionExpr(ASE, IsLowerBound); + if (BaseTy->isArrayType()) { + Address Addr = BaseLVal.getAddress(); + AlignSource = BaseLVal.getAlignmentSource(); + + // If the array type was an incomplete type, we need to make sure + // the decay ends up being the right type. + llvm::Type *NewTy = CGF.ConvertType(BaseTy); + Addr = CGF.Builder.CreateElementBitCast(Addr, NewTy); + + // Note that VLA pointers are always decayed, so we don't need to do + // anything here. + if (!BaseTy->isVariableArrayType()) { + assert(isa<llvm::ArrayType>(Addr.getElementType()) && + "Expected pointer to array"); + Addr = CGF.Builder.CreateStructGEP(Addr, 0, CharUnits::Zero(), + "arraydecay"); + } + + return CGF.Builder.CreateElementBitCast(Addr, + CGF.ConvertTypeForMem(ElTy)); + } + CharUnits Align = CGF.getNaturalTypeAlignment(ElTy, &AlignSource); + return Address(CGF.Builder.CreateLoad(BaseLVal.getAddress()), Align); + } + return CGF.EmitPointerWithAlignment(Base, &AlignSource); +} + LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E, bool IsLowerBound) { - LValue Base; + QualType BaseTy; if (auto *ASE = dyn_cast<OMPArraySectionExpr>(E->getBase()->IgnoreParenImpCasts())) - Base = EmitOMPArraySectionExpr(ASE, IsLowerBound); + BaseTy = OMPArraySectionExpr::getBaseOriginalType(ASE); else - Base = EmitLValue(E->getBase()); - QualType BaseTy = Base.getType(); - llvm::Value *Idx = nullptr; + BaseTy = E->getBase()->getType(); QualType ResultExprTy; if (auto *AT = getContext().getAsArrayType(BaseTy)) ResultExprTy = AT->getElementType(); else ResultExprTy = BaseTy->getPointeeType(); - if (IsLowerBound || (!IsLowerBound && E->getColonLoc().isInvalid())) { + llvm::Value *Idx = nullptr; + if (IsLowerBound || E->getColonLoc().isInvalid()) { // Requesting lower bound or upper bound, but without provided length and // without ':' symbol for the default length -> length = 1. // Idx = LowerBound ?: 0; @@ -2853,9 +3055,9 @@ LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E, } else Idx = llvm::ConstantInt::getNullValue(IntPtrTy); } else { - // Try to emit length or lower bound as constant. If this is possible, 1 is - // subtracted from constant length or lower bound. Otherwise, emit LLVM IR - // (LB + Len) - 1. + // Try to emit length or lower bound as constant. If this is possible, 1 + // is subtracted from constant length or lower bound. Otherwise, emit LLVM + // IR (LB + Len) - 1. auto &C = CGM.getContext(); auto *Length = E->getLength(); llvm::APSInt ConstLength; @@ -2901,12 +3103,15 @@ LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E, Idx = llvm::ConstantInt::get(IntPtrTy, ConstLength + ConstLowerBound); } else { // Idx = ArraySize - 1; - if (auto *VAT = C.getAsVariableArrayType(BaseTy)) { + QualType ArrayTy = BaseTy->isPointerType() + ? E->getBase()->IgnoreParenImpCasts()->getType() + : BaseTy; + if (auto *VAT = C.getAsVariableArrayType(ArrayTy)) { Length = VAT->getSizeExpr(); if (Length->isIntegerConstantExpr(ConstLength, C)) Length = nullptr; } else { - auto *CAT = C.getAsConstantArrayType(BaseTy); + auto *CAT = C.getAsConstantArrayType(ArrayTy); ConstLength = CAT->getSize(); } if (Length) { @@ -2925,52 +3130,56 @@ LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E, } assert(Idx); - llvm::Value *EltPtr; - QualType FixedSizeEltType = ResultExprTy; + Address EltPtr = Address::invalid(); + AlignmentSource AlignSource; if (auto *VLA = getContext().getAsVariableArrayType(ResultExprTy)) { + // The base must be a pointer, which is not an aggregate. Emit + // it. It needs to be emitted first in case it's what captures + // the VLA bounds. + Address Base = + emitOMPArraySectionBase(*this, E->getBase(), AlignSource, BaseTy, + VLA->getElementType(), IsLowerBound); // The element count here is the total number of non-VLA elements. - llvm::Value *numElements = getVLASize(VLA).first; - FixedSizeEltType = getFixedSizeElementType(getContext(), VLA); + llvm::Value *NumElements = getVLASize(VLA).first; // Effectively, the multiply by the VLA size is part of the GEP. // GEP indexes are signed, and scaling an index isn't permitted to // signed-overflow, so we use the same semantics for our explicit // multiply. We suppress this if overflow is not undefined behavior. - if (getLangOpts().isSignedOverflowDefined()) { - Idx = Builder.CreateMul(Idx, numElements); - EltPtr = Builder.CreateGEP(Base.getPointer(), Idx, "arrayidx"); - } else { - Idx = Builder.CreateNSWMul(Idx, numElements); - EltPtr = Builder.CreateInBoundsGEP(Base.getPointer(), Idx, "arrayidx"); - } - } else if (BaseTy->isConstantArrayType()) { - llvm::Value *ArrayPtr = Base.getPointer(); - llvm::Value *Zero = llvm::ConstantInt::getNullValue(IntPtrTy); - llvm::Value *Args[] = {Zero, Idx}; - if (getLangOpts().isSignedOverflowDefined()) - EltPtr = Builder.CreateGEP(ArrayPtr, Args, "arrayidx"); + Idx = Builder.CreateMul(Idx, NumElements); else - EltPtr = Builder.CreateInBoundsGEP(ArrayPtr, Args, "arrayidx"); - } else { - // The base must be a pointer, which is not an aggregate. Emit it. - if (getLangOpts().isSignedOverflowDefined()) - EltPtr = Builder.CreateGEP(Base.getPointer(), Idx, "arrayidx"); + Idx = Builder.CreateNSWMul(Idx, NumElements); + EltPtr = emitArraySubscriptGEP(*this, Base, Idx, VLA->getElementType(), + !getLangOpts().isSignedOverflowDefined()); + } else if (const Expr *Array = isSimpleArrayDecayOperand(E->getBase())) { + // If this is A[i] where A is an array, the frontend will have decayed the + // base to be a ArrayToPointerDecay implicit cast. While correct, it is + // inefficient at -O0 to emit a "gep A, 0, 0" when codegen'ing it, then a + // "gep x, i" here. Emit one "gep A, 0, i". + assert(Array->getType()->isArrayType() && + "Array to pointer decay must have array source type!"); + LValue ArrayLV; + // For simple multidimensional array indexing, set the 'accessed' flag for + // better bounds-checking of the base expression. + if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Array)) + ArrayLV = EmitArraySubscriptExpr(ASE, /*Accessed*/ true); else - EltPtr = Builder.CreateInBoundsGEP(Base.getPointer(), Idx, "arrayidx"); - } - - CharUnits EltAlign = - Base.getAlignment().alignmentOfArrayElement( - getContext().getTypeSizeInChars(FixedSizeEltType)); - - // Limit the alignment to that of the result type. - LValue LV = MakeAddrLValue(Address(EltPtr, EltAlign), ResultExprTy, - Base.getAlignmentSource()); + ArrayLV = EmitLValue(Array); - LV.getQuals().setAddressSpace(BaseTy.getAddressSpace()); + // Propagate the alignment from the array itself to the result. + EltPtr = emitArraySubscriptGEP( + *this, ArrayLV.getAddress(), {CGM.getSize(CharUnits::Zero()), Idx}, + ResultExprTy, !getLangOpts().isSignedOverflowDefined()); + AlignSource = ArrayLV.getAlignmentSource(); + } else { + Address Base = emitOMPArraySectionBase(*this, E->getBase(), AlignSource, + BaseTy, ResultExprTy, IsLowerBound); + EltPtr = emitArraySubscriptGEP(*this, Base, Idx, ResultExprTy, + !getLangOpts().isSignedOverflowDefined()); + } - return LV; + return MakeAddrLValue(EltPtr, ResultExprTy, AlignSource); } LValue CodeGenFunction:: @@ -3508,6 +3717,10 @@ RValue CodeGenFunction::EmitRValueForField(LValue LV, case TEK_Aggregate: return FieldLV.asAggregateRValue(); case TEK_Scalar: + // This routine is used to load fields one-by-one to perform a copy, so + // don't load reference fields. + if (FD->getType()->isReferenceType()) + return RValue::get(FieldLV.getPointer()); return EmitLoadOfLValue(FieldLV, Loc); } llvm_unreachable("bad evaluation kind"); @@ -3851,25 +4064,28 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, llvm::Value *Callee, if (SanOpts.has(SanitizerKind::CFIICall) && (!TargetDecl || !isa<FunctionDecl>(TargetDecl))) { SanitizerScope SanScope(this); + EmitSanitizerStatReport(llvm::SanStat_CFI_ICall); llvm::Metadata *MD = CGM.CreateMetadataIdentifierForType(QualType(FnType, 0)); - llvm::Value *BitSetName = llvm::MetadataAsValue::get(getLLVMContext(), MD); + llvm::Value *TypeId = llvm::MetadataAsValue::get(getLLVMContext(), MD); llvm::Value *CastedCallee = Builder.CreateBitCast(Callee, Int8PtrTy); - llvm::Value *BitSetTest = - Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::bitset_test), - {CastedCallee, BitSetName}); + llvm::Value *TypeTest = Builder.CreateCall( + CGM.getIntrinsic(llvm::Intrinsic::type_test), {CastedCallee, TypeId}); - auto TypeId = CGM.CreateCfiIdForTypeMetadata(MD); - if (CGM.getCodeGenOpts().SanitizeCfiCrossDso && TypeId) { - EmitCfiSlowPathCheck(BitSetTest, TypeId, CastedCallee); + auto CrossDsoTypeId = CGM.CreateCrossDsoCfiTypeId(MD); + llvm::Constant *StaticData[] = { + llvm::ConstantInt::get(Int8Ty, CFITCK_ICall), + EmitCheckSourceLocation(E->getLocStart()), + EmitCheckTypeDescriptor(QualType(FnType, 0)), + }; + if (CGM.getCodeGenOpts().SanitizeCfiCrossDso && CrossDsoTypeId) { + EmitCfiSlowPathCheck(SanitizerKind::CFIICall, TypeTest, CrossDsoTypeId, + CastedCallee, StaticData); } else { - llvm::Constant *StaticData[] = { - EmitCheckSourceLocation(E->getLocStart()), - EmitCheckTypeDescriptor(QualType(FnType, 0)), - }; - EmitCheck(std::make_pair(BitSetTest, SanitizerKind::CFIICall), - "cfi_bad_icall", StaticData, CastedCallee); + EmitCheck(std::make_pair(TypeTest, SanitizerKind::CFIICall), + "cfi_check_fail", StaticData, + {CastedCallee, llvm::UndefValue::get(IntPtrTy)}); } } diff --git a/lib/CodeGen/CGExprAgg.cpp b/lib/CodeGen/CGExprAgg.cpp index a4547a9982be..6d18843591f3 100644 --- a/lib/CodeGen/CGExprAgg.cpp +++ b/lib/CodeGen/CGExprAgg.cpp @@ -175,6 +175,7 @@ public: } void VisitCXXBindTemporaryExpr(CXXBindTemporaryExpr *E); void VisitCXXConstructExpr(const CXXConstructExpr *E); + void VisitCXXInheritedCtorInitExpr(const CXXInheritedCtorInitExpr *E); void VisitLambdaExpr(LambdaExpr *E); void VisitCXXStdInitializerListExpr(CXXStdInitializerListExpr *E); void VisitExprWithCleanups(ExprWithCleanups *E); @@ -967,12 +968,9 @@ void AggExprEmitter::VisitVAArgExpr(VAArgExpr *VE) { Address ArgValue = Address::invalid(); Address ArgPtr = CGF.EmitVAArg(VE, ArgValue); + // If EmitVAArg fails, emit an error. if (!ArgPtr.isValid()) { - // If EmitVAArg fails, we fall back to the LLVM instruction. - llvm::Value *Val = Builder.CreateVAArg(ArgValue.getPointer(), - CGF.ConvertType(VE->getType())); - if (!Dest.isIgnored()) - Builder.CreateStore(Val, Dest.getAddress()); + CGF.ErrorUnsupported(VE, "aggregate va_arg expression"); return; } @@ -1001,6 +999,14 @@ AggExprEmitter::VisitCXXConstructExpr(const CXXConstructExpr *E) { CGF.EmitCXXConstructExpr(E, Slot); } +void AggExprEmitter::VisitCXXInheritedCtorInitExpr( + const CXXInheritedCtorInitExpr *E) { + AggValueSlot Slot = EnsureSlot(E->getType()); + CGF.EmitInheritedCXXConstructorCall( + E->getConstructor(), E->constructsVBase(), Slot.getAddress(), + E->inheritedFromVBase(), E); +} + void AggExprEmitter::VisitLambdaExpr(LambdaExpr *E) { AggValueSlot Slot = EnsureSlot(E->getType()); @@ -1174,6 +1180,38 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) { unsigned NumInitElements = E->getNumInits(); RecordDecl *record = E->getType()->castAs<RecordType>()->getDecl(); + // We'll need to enter cleanup scopes in case any of the element + // initializers throws an exception. + SmallVector<EHScopeStack::stable_iterator, 16> cleanups; + llvm::Instruction *cleanupDominator = nullptr; + + unsigned curInitIndex = 0; + + // Emit initialization of base classes. + if (auto *CXXRD = dyn_cast<CXXRecordDecl>(record)) { + assert(E->getNumInits() >= CXXRD->getNumBases() && + "missing initializer for base class"); + for (auto &Base : CXXRD->bases()) { + assert(!Base.isVirtual() && "should not see vbases here"); + auto *BaseRD = Base.getType()->getAsCXXRecordDecl(); + Address V = CGF.GetAddressOfDirectBaseInCompleteClass( + Dest.getAddress(), CXXRD, BaseRD, + /*isBaseVirtual*/ false); + AggValueSlot AggSlot = + AggValueSlot::forAddr(V, Qualifiers(), + AggValueSlot::IsDestructed, + AggValueSlot::DoesNotNeedGCBarriers, + AggValueSlot::IsNotAliased); + CGF.EmitAggExpr(E->getInit(curInitIndex++), AggSlot); + + if (QualType::DestructionKind dtorKind = + Base.getType().isDestructedType()) { + CGF.pushDestroy(dtorKind, V, Base.getType()); + cleanups.push_back(CGF.EHStack.stable_begin()); + } + } + } + // Prepare a 'this' for CXXDefaultInitExprs. CodeGenFunction::FieldConstructionScope FCS(CGF, Dest.getAddress()); @@ -1207,14 +1245,8 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) { return; } - // We'll need to enter cleanup scopes in case any of the member - // initializers throw an exception. - SmallVector<EHScopeStack::stable_iterator, 16> cleanups; - llvm::Instruction *cleanupDominator = nullptr; - // Here we iterate over the fields; this makes it simpler to both // default-initialize fields and skip over unnamed fields. - unsigned curInitIndex = 0; for (const auto *field : record->fields()) { // We're done once we hit the flexible array member. if (field->getType()->isIncompleteArrayType()) @@ -1320,6 +1352,10 @@ static CharUnits GetNumNonZeroBytesInInit(const Expr *E, CodeGenFunction &CGF) { CharUnits NumNonZeroBytes = CharUnits::Zero(); unsigned ILEElement = 0; + if (auto *CXXRD = dyn_cast<CXXRecordDecl>(SD)) + while (ILEElement != CXXRD->getNumBases()) + NumNonZeroBytes += + GetNumNonZeroBytesInInit(ILE->getInit(ILEElement++), CGF); for (const auto *Field : SD->fields()) { // We're done once we hit the flexible array member or run out of // InitListExpr elements. diff --git a/lib/CodeGen/CGExprCXX.cpp b/lib/CodeGen/CGExprCXX.cpp index 604cde76a7b1..eec2aceb88a2 100644 --- a/lib/CodeGen/CGExprCXX.cpp +++ b/lib/CodeGen/CGExprCXX.cpp @@ -24,10 +24,11 @@ using namespace clang; using namespace CodeGen; -static RequiredArgs commonEmitCXXMemberOrOperatorCall( - CodeGenFunction &CGF, const CXXMethodDecl *MD, llvm::Value *Callee, - ReturnValueSlot ReturnValue, llvm::Value *This, llvm::Value *ImplicitParam, - QualType ImplicitParamTy, const CallExpr *CE, CallArgList &Args) { +static RequiredArgs +commonEmitCXXMemberOrOperatorCall(CodeGenFunction &CGF, const CXXMethodDecl *MD, + llvm::Value *This, llvm::Value *ImplicitParam, + QualType ImplicitParamTy, const CallExpr *CE, + CallArgList &Args) { assert(CE == nullptr || isa<CXXMemberCallExpr>(CE) || isa<CXXOperatorCallExpr>(CE)); assert(MD->isInstance() && @@ -53,7 +54,7 @@ static RequiredArgs commonEmitCXXMemberOrOperatorCall( } const FunctionProtoType *FPT = MD->getType()->castAs<FunctionProtoType>(); - RequiredArgs required = RequiredArgs::forPrototypePlus(FPT, Args.size()); + RequiredArgs required = RequiredArgs::forPrototypePlus(FPT, Args.size(), MD); // And the rest of the call args. if (CE) { @@ -76,21 +77,20 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorCall( const FunctionProtoType *FPT = MD->getType()->castAs<FunctionProtoType>(); CallArgList Args; RequiredArgs required = commonEmitCXXMemberOrOperatorCall( - *this, MD, Callee, ReturnValue, This, ImplicitParam, ImplicitParamTy, CE, - Args); + *this, MD, This, ImplicitParam, ImplicitParamTy, CE, Args); return EmitCall(CGM.getTypes().arrangeCXXMethodCall(Args, FPT, required), Callee, ReturnValue, Args, MD); } -RValue CodeGenFunction::EmitCXXStructorCall( - const CXXMethodDecl *MD, llvm::Value *Callee, ReturnValueSlot ReturnValue, - llvm::Value *This, llvm::Value *ImplicitParam, QualType ImplicitParamTy, - const CallExpr *CE, StructorType Type) { +RValue CodeGenFunction::EmitCXXDestructorCall( + const CXXDestructorDecl *DD, llvm::Value *Callee, llvm::Value *This, + llvm::Value *ImplicitParam, QualType ImplicitParamTy, const CallExpr *CE, + StructorType Type) { CallArgList Args; - commonEmitCXXMemberOrOperatorCall(*this, MD, Callee, ReturnValue, This, - ImplicitParam, ImplicitParamTy, CE, Args); - return EmitCall(CGM.getTypes().arrangeCXXStructorDeclaration(MD, Type), - Callee, ReturnValue, Args, MD); + commonEmitCXXMemberOrOperatorCall(*this, DD, This, ImplicitParam, + ImplicitParamTy, CE, Args); + return EmitCall(CGM.getTypes().arrangeCXXStructorDeclaration(DD, Type), + Callee, ReturnValueSlot(), Args, DD); } static CXXRecordDecl *getCXXRecord(const Expr *E) { @@ -259,7 +259,8 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( if (SanOpts.has(SanitizerKind::CFINVCall) && MD->getParent()->isDynamicClass()) { llvm::Value *VTable = GetVTablePtr(This, Int8PtrTy, MD->getParent()); - EmitVTablePtrCheckForCall(MD, VTable, CFITCK_NVCall, CE->getLocStart()); + EmitVTablePtrCheckForCall(MD->getParent(), VTable, CFITCK_NVCall, + CE->getLocStart()); } if (getLangOpts().AppleKext && MD->isVirtual() && HasQualifier) @@ -273,7 +274,7 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( if (MD->isVirtual()) { This = CGM.getCXXABI().adjustThisArgumentForVirtualFunctionCall( - *this, MD, This, UseVirtualCall); + *this, CalleeDecl, This, UseVirtualCall); } return EmitCXXMemberOrOperatorCall(MD, Callee, ReturnValue, This.getPointer(), @@ -323,10 +324,11 @@ CodeGenFunction::EmitCXXMemberPointerCallExpr(const CXXMemberCallExpr *E, // Push the this ptr. Args.add(RValue::get(ThisPtrForCall), ThisType); - RequiredArgs required = RequiredArgs::forPrototypePlus(FPT, 1); - + RequiredArgs required = + RequiredArgs::forPrototypePlus(FPT, 1, /*FD=*/nullptr); + // And the rest of the call args - EmitCallArgs(Args, FPT, E->arguments(), E->getDirectCallee()); + EmitCallArgs(Args, FPT, E->arguments()); return EmitCall(CGM.getTypes().arrangeCXXMethodCall(Args, FPT, required), Callee, ReturnValue, Args); } @@ -369,6 +371,9 @@ static void EmitNullBaseClassInitialization(CodeGenFunction &CGF, std::vector<CharUnits> VBPtrOffsets = CGF.CGM.getCXXABI().getVBPtrOffsets(Base); for (CharUnits VBPtrOffset : VBPtrOffsets) { + // Stop before we hit any virtual base pointers located in virtual bases. + if (VBPtrOffset >= NVSize) + break; std::pair<CharUnits, CharUnits> LastStore = Stores.pop_back_val(); CharUnits LastStoreOffset = LastStore.first; CharUnits LastStoreSize = LastStore.second; @@ -471,8 +476,8 @@ CodeGenFunction::EmitCXXConstructExpr(const CXXConstructExpr *E, } } - if (const ConstantArrayType *arrayType - = getContext().getAsConstantArrayType(E->getType())) { + if (const ArrayType *arrayType + = getContext().getAsArrayType(E->getType())) { EmitCXXAggrConstructorCall(CD, arrayType, Dest.getAddress(), E); } else { CXXCtorType Type = Ctor_Complete; @@ -1010,15 +1015,18 @@ void CodeGenFunction::EmitNewArrayInitializer( if (auto *ILE = dyn_cast<InitListExpr>(Init)) { if (const RecordType *RType = ILE->getType()->getAs<RecordType>()) { if (RType->getDecl()->isStruct()) { - unsigned NumFields = 0; + unsigned NumElements = 0; + if (auto *CXXRD = dyn_cast<CXXRecordDecl>(RType->getDecl())) + NumElements = CXXRD->getNumBases(); for (auto *Field : RType->getDecl()->fields()) if (!Field->isUnnamedBitfield()) - ++NumFields; - if (ILE->getNumInits() == NumFields) + ++NumElements; + // FIXME: Recurse into nested InitListExprs. + if (ILE->getNumInits() == NumElements) for (unsigned i = 0, e = ILE->getNumInits(); i != e; ++i) if (!isa<ImplicitValueInitExpr>(ILE->getInit(i))) - --NumFields; - if (ILE->getNumInits() == NumFields && TryMemsetInitialization()) + --NumElements; + if (ILE->getNumInits() == NumElements && TryMemsetInitialization()) return; } } diff --git a/lib/CodeGen/CGExprConstant.cpp b/lib/CodeGen/CGExprConstant.cpp index ee049f1810a2..803b39907dd7 100644 --- a/lib/CodeGen/CGExprConstant.cpp +++ b/lib/CodeGen/CGExprConstant.cpp @@ -111,7 +111,7 @@ AppendBytes(CharUnits FieldOffsetInChars, llvm::Constant *InitCst) { // Round up the field offset to the alignment of the field type. CharUnits AlignedNextFieldOffsetInChars = - NextFieldOffsetInChars.RoundUpToAlignment(FieldAlignment); + NextFieldOffsetInChars.alignTo(FieldAlignment); if (AlignedNextFieldOffsetInChars < FieldOffsetInChars) { // We need to append padding. @@ -121,7 +121,7 @@ AppendBytes(CharUnits FieldOffsetInChars, llvm::Constant *InitCst) { "Did not add enough padding!"); AlignedNextFieldOffsetInChars = - NextFieldOffsetInChars.RoundUpToAlignment(FieldAlignment); + NextFieldOffsetInChars.alignTo(FieldAlignment); } if (AlignedNextFieldOffsetInChars > FieldOffsetInChars) { @@ -162,8 +162,8 @@ void ConstStructBuilder::AppendBitField(const FieldDecl *Field, if (FieldOffset > NextFieldOffsetInBits) { // We need to add padding. CharUnits PadSize = Context.toCharUnitsFromBits( - llvm::RoundUpToAlignment(FieldOffset - NextFieldOffsetInBits, - Context.getTargetInfo().getCharAlign())); + llvm::alignTo(FieldOffset - NextFieldOffsetInBits, + Context.getTargetInfo().getCharAlign())); AppendPadding(PadSize); } @@ -334,7 +334,7 @@ void ConstStructBuilder::ConvertStructToPacked() { CharUnits ElementAlign = CharUnits::fromQuantity( CGM.getDataLayout().getABITypeAlignment(C->getType())); CharUnits AlignedElementOffsetInChars = - ElementOffsetInChars.RoundUpToAlignment(ElementAlign); + ElementOffsetInChars.alignTo(ElementAlign); if (AlignedElementOffsetInChars > ElementOffsetInChars) { // We need some padding. @@ -368,7 +368,14 @@ bool ConstStructBuilder::Build(InitListExpr *ILE) { unsigned FieldNo = 0; unsigned ElementNo = 0; - + + // Bail out if we have base classes. We could support these, but they only + // arise in C++1z where we will have already constant folded most interesting + // cases. FIXME: There are still a few more cases we can handle this way. + if (auto *CXXRD = dyn_cast<CXXRecordDecl>(RD)) + if (CXXRD->getNumBases()) + return false; + for (RecordDecl::field_iterator Field = RD->field_begin(), FieldEnd = RD->field_end(); Field != FieldEnd; ++Field, ++FieldNo) { // If this is a union, skip all the fields that aren't being initialized. @@ -508,13 +515,12 @@ llvm::Constant *ConstStructBuilder::Finalize(QualType Ty) { } else { // Append tail padding if necessary. CharUnits LLVMSizeInChars = - NextFieldOffsetInChars.RoundUpToAlignment(LLVMStructAlignment); + NextFieldOffsetInChars.alignTo(LLVMStructAlignment); if (LLVMSizeInChars != LayoutSizeInChars) AppendTailPadding(LayoutSizeInChars); - LLVMSizeInChars = - NextFieldOffsetInChars.RoundUpToAlignment(LLVMStructAlignment); + LLVMSizeInChars = NextFieldOffsetInChars.alignTo(LLVMStructAlignment); // Check if we need to convert the struct to a packed struct. if (NextFieldOffsetInChars <= LayoutSizeInChars && @@ -526,8 +532,7 @@ llvm::Constant *ConstStructBuilder::Finalize(QualType Ty) { "Converting to packed did not help!"); } - LLVMSizeInChars = - NextFieldOffsetInChars.RoundUpToAlignment(LLVMStructAlignment); + LLVMSizeInChars = NextFieldOffsetInChars.alignTo(LLVMStructAlignment); assert(LayoutSizeInChars == LLVMSizeInChars && "Tail padding mismatch!"); @@ -546,8 +551,9 @@ llvm::Constant *ConstStructBuilder::Finalize(QualType Ty) { llvm::Constant *Result = llvm::ConstantStruct::get(STy, Elements); - assert(NextFieldOffsetInChars.RoundUpToAlignment(getAlignment(Result)) == - getSizeInChars(Result) && "Size mismatch!"); + assert(NextFieldOffsetInChars.alignTo(getAlignment(Result)) == + getSizeInChars(Result) && + "Size mismatch!"); return Result; } @@ -758,6 +764,12 @@ public: return Visit(DIE->getExpr()); } + llvm::Constant *VisitExprWithCleanups(ExprWithCleanups *E) { + if (!E->cleanupsHaveSideEffects()) + return Visit(E->getSubExpr()); + return nullptr; + } + llvm::Constant *VisitMaterializeTemporaryExpr(MaterializeTemporaryExpr *E) { return Visit(E->GetTemporaryExpr()); } @@ -1125,6 +1137,13 @@ bool ConstStructBuilder::Build(ConstExprEmitter *Emitter, unsigned FieldNo = -1; unsigned ElementNo = 0; + // Bail out if we have base classes. We could support these, but they only + // arise in C++1z where we will have already constant folded most interesting + // cases. FIXME: There are still a few more cases we can handle this way. + if (auto *CXXRD = dyn_cast<CXXRecordDecl>(RD)) + if (CXXRD->getNumBases()) + return false; + for (FieldDecl *Field : RD->fields()) { ++FieldNo; @@ -1301,8 +1320,14 @@ llvm::Constant *CodeGenModule::EmitConstantValue(const APValue &Value, // Convert to the appropriate type; this could be an lvalue for // an integer. - if (isa<llvm::PointerType>(DestTy)) + if (isa<llvm::PointerType>(DestTy)) { + // Convert the integer to a pointer-sized integer before converting it + // to a pointer. + C = llvm::ConstantExpr::getIntegerCast( + C, getDataLayout().getIntPtrType(DestTy), + /*isSigned=*/false); return llvm::ConstantExpr::getIntToPtr(C, DestTy); + } // If the types don't match this should only be a truncate. if (C->getType() != DestTy) diff --git a/lib/CodeGen/CGExprScalar.cpp b/lib/CodeGen/CGExprScalar.cpp index 268e7967b808..064bc9532a6d 100644 --- a/lib/CodeGen/CGExprScalar.cpp +++ b/lib/CodeGen/CGExprScalar.cpp @@ -818,7 +818,7 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType, "Splatted expr doesn't match with vector element type?"); // Splat the element across to all elements - unsigned NumElements = cast<llvm::VectorType>(DstTy)->getNumElements(); + unsigned NumElements = DstTy->getVectorNumElements(); return Builder.CreateVectorSplat(NumElements, Src, "splat"); } @@ -984,8 +984,7 @@ Value *ScalarExprEmitter::VisitExpr(Expr *E) { Value *ScalarExprEmitter::VisitShuffleVectorExpr(ShuffleVectorExpr *E) { // Vector Mask Case - if (E->getNumSubExprs() == 2 || - (E->getNumSubExprs() == 3 && E->getExpr(2)->getType()->isVectorType())) { + if (E->getNumSubExprs() == 2) { Value *LHS = CGF.EmitScalarExpr(E->getExpr(0)); Value *RHS = CGF.EmitScalarExpr(E->getExpr(1)); Value *Mask; @@ -993,22 +992,7 @@ Value *ScalarExprEmitter::VisitShuffleVectorExpr(ShuffleVectorExpr *E) { llvm::VectorType *LTy = cast<llvm::VectorType>(LHS->getType()); unsigned LHSElts = LTy->getNumElements(); - if (E->getNumSubExprs() == 3) { - Mask = CGF.EmitScalarExpr(E->getExpr(2)); - - // Shuffle LHS & RHS into one input vector. - SmallVector<llvm::Constant*, 32> concat; - for (unsigned i = 0; i != LHSElts; ++i) { - concat.push_back(Builder.getInt32(2*i)); - concat.push_back(Builder.getInt32(2*i+1)); - } - - Value* CV = llvm::ConstantVector::get(concat); - LHS = Builder.CreateShuffleVector(LHS, RHS, CV, "concat"); - LHSElts *= 2; - } else { - Mask = RHS; - } + Mask = RHS; llvm::VectorType *MTy = cast<llvm::VectorType>(Mask->getType()); @@ -1366,8 +1350,9 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { QualType DestTy = CE->getType(); CastKind Kind = CE->getCastKind(); - if (!DestTy->isVoidType()) - TestAndClearIgnoreResultAssign(); + // These cases are generally not written to ignore the result of + // evaluating their sub-expressions, so we clear this now. + bool Ignored = TestAndClearIgnoreResultAssign(); // Since almost all cast kinds apply to scalars, this switch doesn't have // a default case, so the compiler will warn on a missing case. The cases @@ -1410,7 +1395,10 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { } case CK_AddressSpaceConversion: { Value *Src = Visit(const_cast<Expr*>(E)); - return Builder.CreateAddrSpaceCast(Src, ConvertType(DestTy)); + // Since target may map different address spaces in AST to the same address + // space, an address space conversion may end up as a bitcast. + return Builder.CreatePointerBitCastOrAddrSpaceCast(Src, + ConvertType(DestTy)); } case CK_AtomicToNonAtomic: case CK_NonAtomicToAtomic: @@ -1494,11 +1482,8 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { return CGF.EmitARCRetainScalarExpr(E); case CK_ARCConsumeObject: return CGF.EmitObjCConsumeObject(E->getType(), Visit(E)); - case CK_ARCReclaimReturnedObject: { - llvm::Value *value = Visit(E); - value = CGF.EmitARCRetainAutoreleasedReturnValue(value); - return CGF.EmitObjCConsumeObject(E->getType(), value); - } + case CK_ARCReclaimReturnedObject: + return CGF.EmitARCReclaimReturnedObject(E, /*allowUnsafe*/ Ignored); case CK_ARCExtendBlockObject: return CGF.EmitARCExtendBlockObject(E); @@ -1544,7 +1529,7 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { llvm::Type *DstTy = ConvertType(DestTy); Value *Elt = Visit(const_cast<Expr*>(E)); // Splat the element across to all elements - unsigned NumElements = cast<llvm::VectorType>(DstTy)->getNumElements(); + unsigned NumElements = DstTy->getVectorNumElements(); return Builder.CreateVectorSplat(NumElements, Elt, "splat"); } @@ -1654,13 +1639,14 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, llvm::Value *True = CGF.EmitToMemory(Builder.getTrue(), type); if (isPre) { Builder.CreateStore(True, LV.getAddress(), LV.isVolatileQualified()) - ->setAtomic(llvm::SequentiallyConsistent); + ->setAtomic(llvm::AtomicOrdering::SequentiallyConsistent); return Builder.getTrue(); } // For atomic bool increment, we just store true and return it for // preincrement, do an atomic swap with true for postincrement - return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, - LV.getPointer(), True, llvm::SequentiallyConsistent); + return Builder.CreateAtomicRMW( + llvm::AtomicRMWInst::Xchg, LV.getPointer(), True, + llvm::AtomicOrdering::SequentiallyConsistent); } // Special case for atomic increment / decrement on integers, emit // atomicrmw instructions. We skip this if we want to be doing overflow @@ -1677,7 +1663,7 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, llvm::Value *amt = CGF.EmitToMemory( llvm::ConstantInt::get(ConvertType(type), 1, true), type); llvm::Value *old = Builder.CreateAtomicRMW(aop, - LV.getPointer(), amt, llvm::SequentiallyConsistent); + LV.getPointer(), amt, llvm::AtomicOrdering::SequentiallyConsistent); return isPre ? Builder.CreateBinOp(op, old, amt) : old; } value = EmitLoadOfLValue(LV, E->getExprLoc()); @@ -1794,15 +1780,19 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, amt = llvm::ConstantFP::get(VMContext, llvm::APFloat(static_cast<double>(amount))); else { - // Remaining types are either Half or LongDouble. Convert from float. + // Remaining types are Half, LongDouble or __float128. Convert from float. llvm::APFloat F(static_cast<float>(amount)); bool ignored; + const llvm::fltSemantics *FS; // Don't use getFloatTypeSemantics because Half isn't // necessarily represented using the "half" LLVM type. - F.convert(value->getType()->isHalfTy() - ? CGF.getTarget().getHalfFormat() - : CGF.getTarget().getLongDoubleFormat(), - llvm::APFloat::rmTowardZero, &ignored); + if (value->getType()->isFP128Ty()) + FS = &CGF.getTarget().getFloat128Format(); + else if (value->getType()->isHalfTy()) + FS = &CGF.getTarget().getHalfFormat(); + else + FS = &CGF.getTarget().getLongDoubleFormat(); + F.convert(*FS, llvm::APFloat::rmTowardZero, &ignored); amt = llvm::ConstantFP::get(VMContext, F); } value = Builder.CreateFAdd(value, amt, isInc ? "inc" : "dec"); @@ -2159,7 +2149,7 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue( E->getExprLoc()), LHSTy); Builder.CreateAtomicRMW(aop, LHSLV.getPointer(), amt, - llvm::SequentiallyConsistent); + llvm::AtomicOrdering::SequentiallyConsistent); return LHSLV; } } @@ -2993,15 +2983,17 @@ Value *ScalarExprEmitter::VisitBinAssign(const BinaryOperator *E) { std::tie(LHS, RHS) = CGF.EmitARCStoreAutoreleasing(E); break; + case Qualifiers::OCL_ExplicitNone: + std::tie(LHS, RHS) = CGF.EmitARCStoreUnsafeUnretained(E, Ignore); + break; + case Qualifiers::OCL_Weak: RHS = Visit(E->getRHS()); LHS = EmitCheckedLValue(E->getLHS(), CodeGenFunction::TCK_Store); RHS = CGF.EmitARCStoreWeak(LHS.getAddress(), RHS, Ignore); break; - // No reason to do any of these differently. case Qualifiers::OCL_None: - case Qualifiers::OCL_ExplicitNone: // __block variables need to have the rhs evaluated first, plus // this should improve codegen just a little. RHS = Visit(E->getRHS()); @@ -3366,9 +3358,11 @@ Value *ScalarExprEmitter::VisitVAArgExpr(VAArgExpr *VE) { llvm::Type *ArgTy = ConvertType(VE->getType()); - // If EmitVAArg fails, we fall back to the LLVM instruction. - if (!ArgPtr.isValid()) - return Builder.CreateVAArg(ArgValue.getPointer(), ArgTy); + // If EmitVAArg fails, emit an error. + if (!ArgPtr.isValid()) { + CGF.ErrorUnsupported(VE, "va_arg expression"); + return llvm::UndefValue::get(ArgTy); + } // FIXME Volatility. llvm::Value *Val = Builder.CreateLoad(ArgPtr); @@ -3388,50 +3382,48 @@ Value *ScalarExprEmitter::VisitBlockExpr(const BlockExpr *block) { return CGF.EmitBlockLiteral(block); } +// Convert a vec3 to vec4, or vice versa. +static Value *ConvertVec3AndVec4(CGBuilderTy &Builder, CodeGenFunction &CGF, + Value *Src, unsigned NumElementsDst) { + llvm::Value *UnV = llvm::UndefValue::get(Src->getType()); + SmallVector<llvm::Constant*, 4> Args; + Args.push_back(Builder.getInt32(0)); + Args.push_back(Builder.getInt32(1)); + Args.push_back(Builder.getInt32(2)); + if (NumElementsDst == 4) + Args.push_back(llvm::UndefValue::get(CGF.Int32Ty)); + llvm::Constant *Mask = llvm::ConstantVector::get(Args); + return Builder.CreateShuffleVector(Src, UnV, Mask); +} + Value *ScalarExprEmitter::VisitAsTypeExpr(AsTypeExpr *E) { Value *Src = CGF.EmitScalarExpr(E->getSrcExpr()); llvm::Type *DstTy = ConvertType(E->getType()); - // Going from vec4->vec3 or vec3->vec4 is a special case and requires - // a shuffle vector instead of a bitcast. llvm::Type *SrcTy = Src->getType(); - if (isa<llvm::VectorType>(DstTy) && isa<llvm::VectorType>(SrcTy)) { - unsigned numElementsDst = cast<llvm::VectorType>(DstTy)->getNumElements(); - unsigned numElementsSrc = cast<llvm::VectorType>(SrcTy)->getNumElements(); - if ((numElementsDst == 3 && numElementsSrc == 4) - || (numElementsDst == 4 && numElementsSrc == 3)) { - - - // In the case of going from int4->float3, a bitcast is needed before - // doing a shuffle. - llvm::Type *srcElemTy = - cast<llvm::VectorType>(SrcTy)->getElementType(); - llvm::Type *dstElemTy = - cast<llvm::VectorType>(DstTy)->getElementType(); - - if ((srcElemTy->isIntegerTy() && dstElemTy->isFloatTy()) - || (srcElemTy->isFloatTy() && dstElemTy->isIntegerTy())) { - // Create a float type of the same size as the source or destination. - llvm::VectorType *newSrcTy = llvm::VectorType::get(dstElemTy, - numElementsSrc); - - Src = Builder.CreateBitCast(Src, newSrcTy, "astypeCast"); - } - - llvm::Value *UnV = llvm::UndefValue::get(Src->getType()); - - SmallVector<llvm::Constant*, 3> Args; - Args.push_back(Builder.getInt32(0)); - Args.push_back(Builder.getInt32(1)); - Args.push_back(Builder.getInt32(2)); - - if (numElementsDst == 4) - Args.push_back(llvm::UndefValue::get(CGF.Int32Ty)); - - llvm::Constant *Mask = llvm::ConstantVector::get(Args); + unsigned NumElementsSrc = isa<llvm::VectorType>(SrcTy) ? + cast<llvm::VectorType>(SrcTy)->getNumElements() : 0; + unsigned NumElementsDst = isa<llvm::VectorType>(DstTy) ? + cast<llvm::VectorType>(DstTy)->getNumElements() : 0; + + // Going from vec3 to non-vec3 is a special case and requires a shuffle + // vector to get a vec4, then a bitcast if the target type is different. + if (NumElementsSrc == 3 && NumElementsDst != 3) { + Src = ConvertVec3AndVec4(Builder, CGF, Src, 4); + Src = Builder.CreateBitCast(Src, DstTy); + Src->setName("astype"); + return Src; + } - return Builder.CreateShuffleVector(Src, UnV, Mask, "astype"); - } + // Going from non-vec3 to vec3 is a special case and requires a bitcast + // to vec4 if the original type is not vec4, then a shuffle vector to + // get a vec3. + if (NumElementsSrc != 3 && NumElementsDst == 3) { + auto Vec4Ty = llvm::VectorType::get(DstTy->getVectorElementType(), 4); + Src = Builder.CreateBitCast(Src, Vec4Ty); + Src = ConvertVec3AndVec4(Builder, CGF, Src, 3); + Src->setName("astype"); + return Src; } return Builder.CreateBitCast(Src, DstTy, "astype"); diff --git a/lib/CodeGen/CGLoopInfo.cpp b/lib/CodeGen/CGLoopInfo.cpp index 0afe7dbb9f1d..51474f16a018 100644 --- a/lib/CodeGen/CGLoopInfo.cpp +++ b/lib/CodeGen/CGLoopInfo.cpp @@ -19,12 +19,15 @@ using namespace clang::CodeGen; using namespace llvm; -static MDNode *createMetadata(LLVMContext &Ctx, const LoopAttributes &Attrs) { +static MDNode *createMetadata(LLVMContext &Ctx, const LoopAttributes &Attrs, + llvm::DebugLoc Location) { if (!Attrs.IsParallel && Attrs.VectorizeWidth == 0 && Attrs.InterleaveCount == 0 && Attrs.UnrollCount == 0 && Attrs.VectorizeEnable == LoopAttributes::Unspecified && - Attrs.UnrollEnable == LoopAttributes::Unspecified) + Attrs.UnrollEnable == LoopAttributes::Unspecified && + Attrs.DistributeEnable == LoopAttributes::Unspecified && + !Location) return nullptr; SmallVector<Metadata *, 4> Args; @@ -32,6 +35,10 @@ static MDNode *createMetadata(LLVMContext &Ctx, const LoopAttributes &Attrs) { auto TempNode = MDNode::getTemporary(Ctx, None); Args.push_back(TempNode.get()); + // If we have a valid debug location for the loop, add it. + if (Location) + Args.push_back(Location.getAsMDNode()); + // Setting vectorize.width if (Attrs.VectorizeWidth > 0) { Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.vectorize.width"), @@ -78,6 +85,14 @@ static MDNode *createMetadata(LLVMContext &Ctx, const LoopAttributes &Attrs) { Args.push_back(MDNode::get(Ctx, Vals)); } + if (Attrs.DistributeEnable != LoopAttributes::Unspecified) { + Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.distribute.enable"), + ConstantAsMetadata::get(ConstantInt::get( + Type::getInt1Ty(Ctx), (Attrs.DistributeEnable == + LoopAttributes::Enable)))}; + Args.push_back(MDNode::get(Ctx, Vals)); + } + // Set the first operand to itself. MDNode *LoopID = MDNode::get(Ctx, Args); LoopID->replaceOperandWith(0, LoopID); @@ -87,7 +102,8 @@ static MDNode *createMetadata(LLVMContext &Ctx, const LoopAttributes &Attrs) { LoopAttributes::LoopAttributes(bool IsParallel) : IsParallel(IsParallel), VectorizeEnable(LoopAttributes::Unspecified), UnrollEnable(LoopAttributes::Unspecified), VectorizeWidth(0), - InterleaveCount(0), UnrollCount(0) {} + InterleaveCount(0), UnrollCount(0), + DistributeEnable(LoopAttributes::Unspecified) {} void LoopAttributes::clear() { IsParallel = false; @@ -98,37 +114,60 @@ void LoopAttributes::clear() { UnrollEnable = LoopAttributes::Unspecified; } -LoopInfo::LoopInfo(BasicBlock *Header, const LoopAttributes &Attrs) +LoopInfo::LoopInfo(BasicBlock *Header, const LoopAttributes &Attrs, + llvm::DebugLoc Location) : LoopID(nullptr), Header(Header), Attrs(Attrs) { - LoopID = createMetadata(Header->getContext(), Attrs); + LoopID = createMetadata(Header->getContext(), Attrs, Location); } -void LoopInfoStack::push(BasicBlock *Header) { - Active.push_back(LoopInfo(Header, StagedAttrs)); +void LoopInfoStack::push(BasicBlock *Header, llvm::DebugLoc Location) { + Active.push_back(LoopInfo(Header, StagedAttrs, Location)); // Clear the attributes so nested loops do not inherit them. StagedAttrs.clear(); } void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx, - ArrayRef<const clang::Attr *> Attrs) { + ArrayRef<const clang::Attr *> Attrs, + llvm::DebugLoc Location) { // Identify loop hint attributes from Attrs. for (const auto *Attr : Attrs) { const LoopHintAttr *LH = dyn_cast<LoopHintAttr>(Attr); + const OpenCLUnrollHintAttr *OpenCLHint = + dyn_cast<OpenCLUnrollHintAttr>(Attr); // Skip non loop hint attributes - if (!LH) + if (!LH && !OpenCLHint) { continue; + } - auto *ValueExpr = LH->getValue(); + LoopHintAttr::OptionType Option = LoopHintAttr::Unroll; + LoopHintAttr::LoopHintState State = LoopHintAttr::Disable; unsigned ValueInt = 1; - if (ValueExpr) { - llvm::APSInt ValueAPS = ValueExpr->EvaluateKnownConstInt(Ctx); - ValueInt = ValueAPS.getSExtValue(); - } + // Translate opencl_unroll_hint attribute argument to + // equivalent LoopHintAttr enums. + // OpenCL v2.0 s6.11.5: + // 0 - full unroll (no argument). + // 1 - disable unroll. + // other positive integer n - unroll by n. + if (OpenCLHint) { + ValueInt = OpenCLHint->getUnrollHint(); + if (ValueInt == 0) { + State = LoopHintAttr::Full; + } else if (ValueInt != 1) { + Option = LoopHintAttr::UnrollCount; + State = LoopHintAttr::Numeric; + } + } else if (LH) { + auto *ValueExpr = LH->getValue(); + if (ValueExpr) { + llvm::APSInt ValueAPS = ValueExpr->EvaluateKnownConstInt(Ctx); + ValueInt = ValueAPS.getSExtValue(); + } - LoopHintAttr::OptionType Option = LH->getOption(); - LoopHintAttr::LoopHintState State = LH->getState(); + Option = LH->getOption(); + State = LH->getState(); + } switch (State) { case LoopHintAttr::Disable: switch (Option) { @@ -143,6 +182,9 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx, case LoopHintAttr::Unroll: setUnrollState(LoopAttributes::Disable); break; + case LoopHintAttr::Distribute: + setDistributeState(false); + break; case LoopHintAttr::UnrollCount: case LoopHintAttr::VectorizeWidth: case LoopHintAttr::InterleaveCount: @@ -159,6 +201,9 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx, case LoopHintAttr::Unroll: setUnrollState(LoopAttributes::Enable); break; + case LoopHintAttr::Distribute: + setDistributeState(true); + break; case LoopHintAttr::UnrollCount: case LoopHintAttr::VectorizeWidth: case LoopHintAttr::InterleaveCount: @@ -178,6 +223,7 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx, case LoopHintAttr::UnrollCount: case LoopHintAttr::VectorizeWidth: case LoopHintAttr::InterleaveCount: + case LoopHintAttr::Distribute: llvm_unreachable("Options cannot be used to assume mem safety."); break; } @@ -192,6 +238,7 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx, case LoopHintAttr::UnrollCount: case LoopHintAttr::VectorizeWidth: case LoopHintAttr::InterleaveCount: + case LoopHintAttr::Distribute: llvm_unreachable("Options cannot be used with 'full' hint."); break; } @@ -210,6 +257,7 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx, case LoopHintAttr::Unroll: case LoopHintAttr::Vectorize: case LoopHintAttr::Interleave: + case LoopHintAttr::Distribute: llvm_unreachable("Options cannot be assigned a value."); break; } @@ -218,7 +266,7 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx, } /// Stage the attributes. - push(Header); + push(Header, Location); } void LoopInfoStack::pop() { @@ -237,7 +285,7 @@ void LoopInfoStack::InsertHelper(Instruction *I) const { if (TerminatorInst *TI = dyn_cast<TerminatorInst>(I)) { for (unsigned i = 0, ie = TI->getNumSuccessors(); i < ie; ++i) if (TI->getSuccessor(i) == L.getHeader()) { - TI->setMetadata("llvm.loop", L.getLoopID()); + TI->setMetadata(llvm::LLVMContext::MD_loop, L.getLoopID()); break; } return; diff --git a/lib/CodeGen/CGLoopInfo.h b/lib/CodeGen/CGLoopInfo.h index ec3390677fa9..a0111edde5de 100644 --- a/lib/CodeGen/CGLoopInfo.h +++ b/lib/CodeGen/CGLoopInfo.h @@ -18,6 +18,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/IR/Value.h" #include "llvm/Support/Compiler.h" @@ -57,13 +58,17 @@ struct LoopAttributes { /// \brief llvm.unroll. unsigned UnrollCount; + + /// \brief Value for llvm.loop.distribute.enable metadata. + LVEnableState DistributeEnable; }; /// \brief Information used when generating a structured loop. class LoopInfo { public: /// \brief Construct a new LoopInfo for the loop with entry Header. - LoopInfo(llvm::BasicBlock *Header, const LoopAttributes &Attrs); + LoopInfo(llvm::BasicBlock *Header, const LoopAttributes &Attrs, + llvm::DebugLoc Location); /// \brief Get the loop id metadata for this loop. llvm::MDNode *getLoopID() const { return LoopID; } @@ -95,12 +100,14 @@ public: /// \brief Begin a new structured loop. The set of staged attributes will be /// applied to the loop and then cleared. - void push(llvm::BasicBlock *Header); + void push(llvm::BasicBlock *Header, + llvm::DebugLoc Location = llvm::DebugLoc()); /// \brief Begin a new structured loop. Stage attributes from the Attrs list. /// The staged attributes are applied to the loop and then cleared. void push(llvm::BasicBlock *Header, clang::ASTContext &Ctx, - llvm::ArrayRef<const Attr *> Attrs); + llvm::ArrayRef<const Attr *> Attrs, + llvm::DebugLoc Location = llvm::DebugLoc()); /// \brief End the current loop. void pop(); @@ -126,6 +133,12 @@ public: Enable ? LoopAttributes::Enable : LoopAttributes::Disable; } + /// \brief Set the next pushed loop as a distribution candidate. + void setDistributeState(bool Enable = true) { + StagedAttrs.DistributeEnable = + Enable ? LoopAttributes::Enable : LoopAttributes::Disable; + } + /// \brief Set the next pushed loop unroll state. void setUnrollState(const LoopAttributes::LVEnableState &State) { StagedAttrs.UnrollEnable = State; diff --git a/lib/CodeGen/CGObjC.cpp b/lib/CodeGen/CGObjC.cpp index 2d5991b71fca..db894ce67470 100644 --- a/lib/CodeGen/CGObjC.cpp +++ b/lib/CodeGen/CGObjC.cpp @@ -590,9 +590,7 @@ static void emitStructGetterCall(CodeGenFunction &CGF, ObjCIvarDecl *ivar, args.add(RValue::get(CGF.Builder.getInt1(hasStrong)), Context.BoolTy); llvm::Value *fn = CGF.CGM.getObjCRuntime().GetGetStructFunction(); - CGF.EmitCall(CGF.getTypes().arrangeFreeFunctionCall(Context.VoidTy, args, - FunctionType::ExtInfo(), - RequiredArgs::All), + CGF.EmitCall(CGF.getTypes().arrangeBuiltinFunctionCall(Context.VoidTy, args), fn, ReturnValueSlot(), args); } @@ -856,10 +854,8 @@ static void emitCPPObjectAtomicGetterCall(CodeGenFunction &CGF, llvm::Value *copyCppAtomicObjectFn = CGF.CGM.getObjCRuntime().GetCppAtomicObjectGetFunction(); - CGF.EmitCall(CGF.getTypes().arrangeFreeFunctionCall(CGF.getContext().VoidTy, - args, - FunctionType::ExtInfo(), - RequiredArgs::All), + CGF.EmitCall( + CGF.getTypes().arrangeBuiltinFunctionCall(CGF.getContext().VoidTy, args), copyCppAtomicObjectFn, ReturnValueSlot(), args); } @@ -901,21 +897,29 @@ CodeGenFunction::generateObjCGetterBody(const ObjCImplementationDecl *classImpl, // Currently, all atomic accesses have to be through integer // types, so there's no point in trying to pick a prettier type. - llvm::Type *bitcastType = - llvm::Type::getIntNTy(getLLVMContext(), - getContext().toBits(strategy.getIvarSize())); + uint64_t ivarSize = getContext().toBits(strategy.getIvarSize()); + llvm::Type *bitcastType = llvm::Type::getIntNTy(getLLVMContext(), ivarSize); bitcastType = bitcastType->getPointerTo(); // addrspace 0 okay // Perform an atomic load. This does not impose ordering constraints. Address ivarAddr = LV.getAddress(); ivarAddr = Builder.CreateBitCast(ivarAddr, bitcastType); llvm::LoadInst *load = Builder.CreateLoad(ivarAddr, "load"); - load->setAtomic(llvm::Unordered); + load->setAtomic(llvm::AtomicOrdering::Unordered); // Store that value into the return address. Doing this with a // bitcast is likely to produce some pretty ugly IR, but it's not // the *most* terrible thing in the world. - Builder.CreateStore(load, Builder.CreateBitCast(ReturnValue, bitcastType)); + llvm::Type *retTy = ConvertType(getterMethod->getReturnType()); + uint64_t retTySize = CGM.getDataLayout().getTypeSizeInBits(retTy); + llvm::Value *ivarVal = load; + if (ivarSize > retTySize) { + llvm::Type *newTy = llvm::Type::getIntNTy(getLLVMContext(), retTySize); + ivarVal = Builder.CreateTrunc(load, newTy); + bitcastType = newTy->getPointerTo(); + } + Builder.CreateStore(ivarVal, + Builder.CreateBitCast(ReturnValue, bitcastType)); // Make sure we don't do an autorelease. AutoreleaseResult = false; @@ -950,8 +954,7 @@ CodeGenFunction::generateObjCGetterBody(const ObjCImplementationDecl *classImpl, // runtime already should have computed it to build the function. llvm::Instruction *CallInstruction; RValue RV = EmitCall( - getTypes().arrangeFreeFunctionCall( - propType, args, FunctionType::ExtInfo(), RequiredArgs::All), + getTypes().arrangeBuiltinFunctionCall(propType, args), getPropertyFn, ReturnValueSlot(), args, CGCalleeInfo(), &CallInstruction); if (llvm::CallInst *call = dyn_cast<llvm::CallInst>(CallInstruction)) @@ -1015,7 +1018,6 @@ CodeGenFunction::generateObjCGetterBody(const ObjCImplementationDecl *classImpl, AutoreleaseResult = false; } - value = Builder.CreateBitCast(value, ConvertType(propType)); value = Builder.CreateBitCast( value, ConvertType(GetterMethodDecl->getReturnType())); } @@ -1067,10 +1069,8 @@ static void emitStructSetterCall(CodeGenFunction &CGF, ObjCMethodDecl *OMD, args.add(RValue::get(CGF.Builder.getFalse()), CGF.getContext().BoolTy); llvm::Value *copyStructFn = CGF.CGM.getObjCRuntime().GetSetStructFunction(); - CGF.EmitCall(CGF.getTypes().arrangeFreeFunctionCall(CGF.getContext().VoidTy, - args, - FunctionType::ExtInfo(), - RequiredArgs::All), + CGF.EmitCall( + CGF.getTypes().arrangeBuiltinFunctionCall(CGF.getContext().VoidTy, args), copyStructFn, ReturnValueSlot(), args); } @@ -1105,10 +1105,8 @@ static void emitCPPObjectAtomicSetterCall(CodeGenFunction &CGF, llvm::Value *copyCppAtomicObjectFn = CGF.CGM.getObjCRuntime().GetCppAtomicObjectSetFunction(); - CGF.EmitCall(CGF.getTypes().arrangeFreeFunctionCall(CGF.getContext().VoidTy, - args, - FunctionType::ExtInfo(), - RequiredArgs::All), + CGF.EmitCall( + CGF.getTypes().arrangeBuiltinFunctionCall(CGF.getContext().VoidTy, args), copyCppAtomicObjectFn, ReturnValueSlot(), args); } @@ -1192,7 +1190,7 @@ CodeGenFunction::generateObjCSetterBody(const ObjCImplementationDecl *classImpl, // Perform an atomic store. There are no memory ordering requirements. llvm::StoreInst *store = Builder.CreateStore(load, ivarAddr); - store->setAtomic(llvm::Unordered); + store->setAtomic(llvm::AtomicOrdering::Unordered); return; } @@ -1238,9 +1236,7 @@ CodeGenFunction::generateObjCSetterBody(const ObjCImplementationDecl *classImpl, if (setOptimizedPropertyFn) { args.add(RValue::get(arg), getContext().getObjCIdType()); args.add(RValue::get(ivarOffset), getContext().getPointerDiffType()); - EmitCall(getTypes().arrangeFreeFunctionCall(getContext().VoidTy, args, - FunctionType::ExtInfo(), - RequiredArgs::All), + EmitCall(getTypes().arrangeBuiltinFunctionCall(getContext().VoidTy, args), setOptimizedPropertyFn, ReturnValueSlot(), args); } else { args.add(RValue::get(ivarOffset), getContext().getPointerDiffType()); @@ -1251,9 +1247,7 @@ CodeGenFunction::generateObjCSetterBody(const ObjCImplementationDecl *classImpl, getContext().BoolTy); // FIXME: We shouldn't need to get the function info here, the runtime // already should have computed it to build the function. - EmitCall(getTypes().arrangeFreeFunctionCall(getContext().VoidTy, args, - FunctionType::ExtInfo(), - RequiredArgs::All), + EmitCall(getTypes().arrangeBuiltinFunctionCall(getContext().VoidTy, args), setPropertyFn, ReturnValueSlot(), args); } @@ -1498,6 +1492,8 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){ ArrayType::Normal, 0); Address ItemsPtr = CreateMemTemp(ItemsTy, "items.ptr"); + RunCleanupsScope ForScope(*this); + // Emit the collection pointer. In ARC, we do a retain. llvm::Value *Collection; if (getLangOpts().ObjCAutoRefCount) { @@ -1610,9 +1606,8 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){ Args2.add(RValue::get(V), getContext().getObjCIdType()); // FIXME: We shouldn't need to get the function info here, the runtime already // should have computed it to build the function. - EmitCall(CGM.getTypes().arrangeFreeFunctionCall(getContext().VoidTy, Args2, - FunctionType::ExtInfo(), - RequiredArgs::All), + EmitCall( + CGM.getTypes().arrangeBuiltinFunctionCall(getContext().VoidTy, Args2), EnumerationMutationFn, ReturnValueSlot(), Args2); // Otherwise, or if the mutation function returns, just continue. @@ -1739,10 +1734,7 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){ if (DI) DI->EmitLexicalBlockEnd(Builder, S.getSourceRange().getEnd()); - // Leave the cleanup we entered in ARC. - if (getLangOpts().ObjCAutoRefCount) - PopCleanupBlock(); - + ForScope.ForceCleanup(); EmitBlock(LoopEnd.getBlock()); } @@ -1980,20 +1972,14 @@ llvm::Value *CodeGenFunction::EmitARCRetainBlock(llvm::Value *value, return result; } -/// Retain the given object which is the result of a function call. -/// call i8* \@objc_retainAutoreleasedReturnValue(i8* %value) -/// -/// Yes, this function name is one character away from a different -/// call with completely different semantics. -llvm::Value * -CodeGenFunction::EmitARCRetainAutoreleasedReturnValue(llvm::Value *value) { +static void emitAutoreleasedReturnValueMarker(CodeGenFunction &CGF) { // Fetch the void(void) inline asm which marks that we're going to - // retain the autoreleased return value. + // do something with the autoreleased return value. llvm::InlineAsm *&marker - = CGM.getObjCEntrypoints().retainAutoreleasedReturnValueMarker; + = CGF.CGM.getObjCEntrypoints().retainAutoreleasedReturnValueMarker; if (!marker) { StringRef assembly - = CGM.getTargetCodeGenInfo() + = CGF.CGM.getTargetCodeGenInfo() .getARCRetainAutoreleasedReturnValueMarker(); // If we have an empty assembly string, there's nothing to do. @@ -2001,9 +1987,9 @@ CodeGenFunction::EmitARCRetainAutoreleasedReturnValue(llvm::Value *value) { // Otherwise, at -O0, build an inline asm that we're going to call // in a moment. - } else if (CGM.getCodeGenOpts().OptimizationLevel == 0) { + } else if (CGF.CGM.getCodeGenOpts().OptimizationLevel == 0) { llvm::FunctionType *type = - llvm::FunctionType::get(VoidTy, /*variadic*/false); + llvm::FunctionType::get(CGF.VoidTy, /*variadic*/false); marker = llvm::InlineAsm::get(type, assembly, "", /*sideeffects*/ true); @@ -2012,25 +1998,50 @@ CodeGenFunction::EmitARCRetainAutoreleasedReturnValue(llvm::Value *value) { // optimizer to pick up. } else { llvm::NamedMDNode *metadata = - CGM.getModule().getOrInsertNamedMetadata( + CGF.CGM.getModule().getOrInsertNamedMetadata( "clang.arc.retainAutoreleasedReturnValueMarker"); assert(metadata->getNumOperands() <= 1); if (metadata->getNumOperands() == 0) { - metadata->addOperand(llvm::MDNode::get( - getLLVMContext(), llvm::MDString::get(getLLVMContext(), assembly))); + auto &ctx = CGF.getLLVMContext(); + metadata->addOperand(llvm::MDNode::get(ctx, + llvm::MDString::get(ctx, assembly))); } } } // Call the marker asm if we made one, which we do only at -O0. if (marker) - Builder.CreateCall(marker); + CGF.Builder.CreateCall(marker); +} +/// Retain the given object which is the result of a function call. +/// call i8* \@objc_retainAutoreleasedReturnValue(i8* %value) +/// +/// Yes, this function name is one character away from a different +/// call with completely different semantics. +llvm::Value * +CodeGenFunction::EmitARCRetainAutoreleasedReturnValue(llvm::Value *value) { + emitAutoreleasedReturnValueMarker(*this); return emitARCValueOperation(*this, value, - CGM.getObjCEntrypoints().objc_retainAutoreleasedReturnValue, + CGM.getObjCEntrypoints().objc_retainAutoreleasedReturnValue, "objc_retainAutoreleasedReturnValue"); } +/// Claim a possibly-autoreleased return value at +0. This is only +/// valid to do in contexts which do not rely on the retain to keep +/// the object valid for for all of its uses; for example, when +/// the value is ignored, or when it is being assigned to an +/// __unsafe_unretained variable. +/// +/// call i8* \@objc_unsafeClaimAutoreleasedReturnValue(i8* %value) +llvm::Value * +CodeGenFunction::EmitARCUnsafeClaimAutoreleasedReturnValue(llvm::Value *value) { + emitAutoreleasedReturnValueMarker(*this); + return emitARCValueOperation(*this, value, + CGM.getObjCEntrypoints().objc_unsafeClaimAutoreleasedReturnValue, + "objc_unsafeClaimAutoreleasedReturnValue"); +} + /// Release the given object. /// call void \@objc_release(i8* %value) void CodeGenFunction::EmitARCRelease(llvm::Value *value, @@ -2446,25 +2457,22 @@ static TryEmitResult tryEmitARCRetainLoadOfScalar(CodeGenFunction &CGF, return tryEmitARCRetainLoadOfScalar(CGF, CGF.EmitLValue(e), type); } -static llvm::Value *emitARCRetainAfterCall(CodeGenFunction &CGF, - llvm::Value *value); +typedef llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, + llvm::Value *value)> + ValueTransform; -/// Given that the given expression is some sort of call (which does -/// not return retained), emit a retain following it. -static llvm::Value *emitARCRetainCall(CodeGenFunction &CGF, const Expr *e) { - llvm::Value *value = CGF.EmitScalarExpr(e); - return emitARCRetainAfterCall(CGF, value); -} - -static llvm::Value *emitARCRetainAfterCall(CodeGenFunction &CGF, - llvm::Value *value) { +/// Insert code immediately after a call. +static llvm::Value *emitARCOperationAfterCall(CodeGenFunction &CGF, + llvm::Value *value, + ValueTransform doAfterCall, + ValueTransform doFallback) { if (llvm::CallInst *call = dyn_cast<llvm::CallInst>(value)) { CGBuilderTy::InsertPoint ip = CGF.Builder.saveIP(); // Place the retain immediately following the call. CGF.Builder.SetInsertPoint(call->getParent(), ++llvm::BasicBlock::iterator(call)); - value = CGF.EmitARCRetainAutoreleasedReturnValue(value); + value = doAfterCall(CGF, value); CGF.Builder.restoreIP(ip); return value; @@ -2474,7 +2482,7 @@ static llvm::Value *emitARCRetainAfterCall(CodeGenFunction &CGF, // Place the retain at the beginning of the normal destination block. llvm::BasicBlock *BB = invoke->getNormalDest(); CGF.Builder.SetInsertPoint(BB, BB->begin()); - value = CGF.EmitARCRetainAutoreleasedReturnValue(value); + value = doAfterCall(CGF, value); CGF.Builder.restoreIP(ip); return value; @@ -2483,7 +2491,7 @@ static llvm::Value *emitARCRetainAfterCall(CodeGenFunction &CGF, // the operand. } else if (llvm::BitCastInst *bitcast = dyn_cast<llvm::BitCastInst>(value)) { llvm::Value *operand = bitcast->getOperand(0); - operand = emitARCRetainAfterCall(CGF, operand); + operand = emitARCOperationAfterCall(CGF, operand, doAfterCall, doFallback); bitcast->setOperand(0, operand); return bitcast; @@ -2491,7 +2499,46 @@ static llvm::Value *emitARCRetainAfterCall(CodeGenFunction &CGF, } else { // Retain using the non-block variant: we never need to do a copy // of a block that's been returned to us. - return CGF.EmitARCRetainNonBlock(value); + return doFallback(CGF, value); + } +} + +/// Given that the given expression is some sort of call (which does +/// not return retained), emit a retain following it. +static llvm::Value *emitARCRetainCallResult(CodeGenFunction &CGF, + const Expr *e) { + llvm::Value *value = CGF.EmitScalarExpr(e); + return emitARCOperationAfterCall(CGF, value, + [](CodeGenFunction &CGF, llvm::Value *value) { + return CGF.EmitARCRetainAutoreleasedReturnValue(value); + }, + [](CodeGenFunction &CGF, llvm::Value *value) { + return CGF.EmitARCRetainNonBlock(value); + }); +} + +/// Given that the given expression is some sort of call (which does +/// not return retained), perform an unsafeClaim following it. +static llvm::Value *emitARCUnsafeClaimCallResult(CodeGenFunction &CGF, + const Expr *e) { + llvm::Value *value = CGF.EmitScalarExpr(e); + return emitARCOperationAfterCall(CGF, value, + [](CodeGenFunction &CGF, llvm::Value *value) { + return CGF.EmitARCUnsafeClaimAutoreleasedReturnValue(value); + }, + [](CodeGenFunction &CGF, llvm::Value *value) { + return value; + }); +} + +llvm::Value *CodeGenFunction::EmitARCReclaimReturnedObject(const Expr *E, + bool allowUnsafeClaim) { + if (allowUnsafeClaim && + CGM.getLangOpts().ObjCRuntime.hasARCUnsafeClaimAutoreleasedReturnValue()) { + return emitARCUnsafeClaimCallResult(*this, E); + } else { + llvm::Value *value = emitARCRetainCallResult(*this, E); + return EmitObjCConsumeObject(E->getType(), value); } } @@ -2531,17 +2578,52 @@ static bool shouldEmitSeparateBlockRetain(const Expr *e) { return true; } -/// Try to emit a PseudoObjectExpr at +1. +namespace { +/// A CRTP base class for emitting expressions of retainable object +/// pointer type in ARC. +template <typename Impl, typename Result> class ARCExprEmitter { +protected: + CodeGenFunction &CGF; + Impl &asImpl() { return *static_cast<Impl*>(this); } + + ARCExprEmitter(CodeGenFunction &CGF) : CGF(CGF) {} + +public: + Result visit(const Expr *e); + Result visitCastExpr(const CastExpr *e); + Result visitPseudoObjectExpr(const PseudoObjectExpr *e); + Result visitBinaryOperator(const BinaryOperator *e); + Result visitBinAssign(const BinaryOperator *e); + Result visitBinAssignUnsafeUnretained(const BinaryOperator *e); + Result visitBinAssignAutoreleasing(const BinaryOperator *e); + Result visitBinAssignWeak(const BinaryOperator *e); + Result visitBinAssignStrong(const BinaryOperator *e); + + // Minimal implementation: + // Result visitLValueToRValue(const Expr *e) + // Result visitConsumeObject(const Expr *e) + // Result visitExtendBlockObject(const Expr *e) + // Result visitReclaimReturnedObject(const Expr *e) + // Result visitCall(const Expr *e) + // Result visitExpr(const Expr *e) + // + // Result emitBitCast(Result result, llvm::Type *resultType) + // llvm::Value *getValueOfResult(Result result) +}; +} + +/// Try to emit a PseudoObjectExpr under special ARC rules. /// /// This massively duplicates emitPseudoObjectRValue. -static TryEmitResult tryEmitARCRetainPseudoObject(CodeGenFunction &CGF, - const PseudoObjectExpr *E) { +template <typename Impl, typename Result> +Result +ARCExprEmitter<Impl,Result>::visitPseudoObjectExpr(const PseudoObjectExpr *E) { SmallVector<CodeGenFunction::OpaqueValueMappingData, 4> opaques; // Find the result expression. const Expr *resultExpr = E->getResultExpr(); assert(resultExpr); - TryEmitResult result; + Result result; for (PseudoObjectExpr::const_semantics_iterator i = E->semantics_begin(), e = E->semantics_end(); i != e; ++i) { @@ -2557,8 +2639,9 @@ static TryEmitResult tryEmitARCRetainPseudoObject(CodeGenFunction &CGF, // expression, try to evaluate the source as +1. if (ov == resultExpr) { assert(!OVMA::shouldBindAsLValue(ov)); - result = tryEmitARCRetainScalarExpr(CGF, ov->getSourceExpr()); - opaqueData = OVMA::bind(CGF, ov, RValue::get(result.getPointer())); + result = asImpl().visit(ov->getSourceExpr()); + opaqueData = OVMA::bind(CGF, ov, + RValue::get(asImpl().getValueOfResult(result))); // Otherwise, just bind it. } else { @@ -2569,7 +2652,7 @@ static TryEmitResult tryEmitARCRetainPseudoObject(CodeGenFunction &CGF, // Otherwise, if the expression is the result, evaluate it // and remember the result. } else if (semantic == resultExpr) { - result = tryEmitARCRetainScalarExpr(CGF, semantic); + result = asImpl().visit(semantic); // Otherwise, evaluate the expression in an ignored context. } else { @@ -2584,146 +2667,240 @@ static TryEmitResult tryEmitARCRetainPseudoObject(CodeGenFunction &CGF, return result; } -static TryEmitResult -tryEmitARCRetainScalarExpr(CodeGenFunction &CGF, const Expr *e) { +template <typename Impl, typename Result> +Result ARCExprEmitter<Impl,Result>::visitCastExpr(const CastExpr *e) { + switch (e->getCastKind()) { + + // No-op casts don't change the type, so we just ignore them. + case CK_NoOp: + return asImpl().visit(e->getSubExpr()); + + // These casts can change the type. + case CK_CPointerToObjCPointerCast: + case CK_BlockPointerToObjCPointerCast: + case CK_AnyPointerToBlockPointerCast: + case CK_BitCast: { + llvm::Type *resultType = CGF.ConvertType(e->getType()); + assert(e->getSubExpr()->getType()->hasPointerRepresentation()); + Result result = asImpl().visit(e->getSubExpr()); + return asImpl().emitBitCast(result, resultType); + } + + // Handle some casts specially. + case CK_LValueToRValue: + return asImpl().visitLValueToRValue(e->getSubExpr()); + case CK_ARCConsumeObject: + return asImpl().visitConsumeObject(e->getSubExpr()); + case CK_ARCExtendBlockObject: + return asImpl().visitExtendBlockObject(e->getSubExpr()); + case CK_ARCReclaimReturnedObject: + return asImpl().visitReclaimReturnedObject(e->getSubExpr()); + + // Otherwise, use the default logic. + default: + return asImpl().visitExpr(e); + } +} + +template <typename Impl, typename Result> +Result +ARCExprEmitter<Impl,Result>::visitBinaryOperator(const BinaryOperator *e) { + switch (e->getOpcode()) { + case BO_Comma: + CGF.EmitIgnoredExpr(e->getLHS()); + CGF.EnsureInsertPoint(); + return asImpl().visit(e->getRHS()); + + case BO_Assign: + return asImpl().visitBinAssign(e); + + default: + return asImpl().visitExpr(e); + } +} + +template <typename Impl, typename Result> +Result ARCExprEmitter<Impl,Result>::visitBinAssign(const BinaryOperator *e) { + switch (e->getLHS()->getType().getObjCLifetime()) { + case Qualifiers::OCL_ExplicitNone: + return asImpl().visitBinAssignUnsafeUnretained(e); + + case Qualifiers::OCL_Weak: + return asImpl().visitBinAssignWeak(e); + + case Qualifiers::OCL_Autoreleasing: + return asImpl().visitBinAssignAutoreleasing(e); + + case Qualifiers::OCL_Strong: + return asImpl().visitBinAssignStrong(e); + + case Qualifiers::OCL_None: + return asImpl().visitExpr(e); + } + llvm_unreachable("bad ObjC ownership qualifier"); +} + +/// The default rule for __unsafe_unretained emits the RHS recursively, +/// stores into the unsafe variable, and propagates the result outward. +template <typename Impl, typename Result> +Result ARCExprEmitter<Impl,Result>:: + visitBinAssignUnsafeUnretained(const BinaryOperator *e) { + // Recursively emit the RHS. + // For __block safety, do this before emitting the LHS. + Result result = asImpl().visit(e->getRHS()); + + // Perform the store. + LValue lvalue = + CGF.EmitCheckedLValue(e->getLHS(), CodeGenFunction::TCK_Store); + CGF.EmitStoreThroughLValue(RValue::get(asImpl().getValueOfResult(result)), + lvalue); + + return result; +} + +template <typename Impl, typename Result> +Result +ARCExprEmitter<Impl,Result>::visitBinAssignAutoreleasing(const BinaryOperator *e) { + return asImpl().visitExpr(e); +} + +template <typename Impl, typename Result> +Result +ARCExprEmitter<Impl,Result>::visitBinAssignWeak(const BinaryOperator *e) { + return asImpl().visitExpr(e); +} + +template <typename Impl, typename Result> +Result +ARCExprEmitter<Impl,Result>::visitBinAssignStrong(const BinaryOperator *e) { + return asImpl().visitExpr(e); +} + +/// The general expression-emission logic. +template <typename Impl, typename Result> +Result ARCExprEmitter<Impl,Result>::visit(const Expr *e) { // We should *never* see a nested full-expression here, because if // we fail to emit at +1, our caller must not retain after we close - // out the full-expression. + // out the full-expression. This isn't as important in the unsafe + // emitter. assert(!isa<ExprWithCleanups>(e)); - // The desired result type, if it differs from the type of the - // ultimate opaque expression. - llvm::Type *resultType = nullptr; - - while (true) { - e = e->IgnoreParens(); - - // There's a break at the end of this if-chain; anything - // that wants to keep looping has to explicitly continue. - if (const CastExpr *ce = dyn_cast<CastExpr>(e)) { - switch (ce->getCastKind()) { - // No-op casts don't change the type, so we just ignore them. - case CK_NoOp: - e = ce->getSubExpr(); - continue; - - case CK_LValueToRValue: { - TryEmitResult loadResult - = tryEmitARCRetainLoadOfScalar(CGF, ce->getSubExpr()); - if (resultType) { - llvm::Value *value = loadResult.getPointer(); - value = CGF.Builder.CreateBitCast(value, resultType); - loadResult.setPointer(value); - } - return loadResult; - } + // Look through parens, __extension__, generic selection, etc. + e = e->IgnoreParens(); - // These casts can change the type, so remember that and - // soldier on. We only need to remember the outermost such - // cast, though. - case CK_CPointerToObjCPointerCast: - case CK_BlockPointerToObjCPointerCast: - case CK_AnyPointerToBlockPointerCast: - case CK_BitCast: - if (!resultType) - resultType = CGF.ConvertType(ce->getType()); - e = ce->getSubExpr(); - assert(e->getType()->hasPointerRepresentation()); - continue; - - // For consumptions, just emit the subexpression and thus elide - // the retain/release pair. - case CK_ARCConsumeObject: { - llvm::Value *result = CGF.EmitScalarExpr(ce->getSubExpr()); - if (resultType) result = CGF.Builder.CreateBitCast(result, resultType); - return TryEmitResult(result, true); - } + // Handle certain kinds of casts. + if (const CastExpr *ce = dyn_cast<CastExpr>(e)) { + return asImpl().visitCastExpr(ce); - // Block extends are net +0. Naively, we could just recurse on - // the subexpression, but actually we need to ensure that the - // value is copied as a block, so there's a little filter here. - case CK_ARCExtendBlockObject: { - llvm::Value *result; // will be a +0 value + // Handle the comma operator. + } else if (auto op = dyn_cast<BinaryOperator>(e)) { + return asImpl().visitBinaryOperator(op); - // If we can't safely assume the sub-expression will produce a - // block-copied value, emit the sub-expression at +0. - if (shouldEmitSeparateBlockRetain(ce->getSubExpr())) { - result = CGF.EmitScalarExpr(ce->getSubExpr()); + // TODO: handle conditional operators here - // Otherwise, try to emit the sub-expression at +1 recursively. - } else { - TryEmitResult subresult - = tryEmitARCRetainScalarExpr(CGF, ce->getSubExpr()); - result = subresult.getPointer(); - - // If that produced a retained value, just use that, - // possibly casting down. - if (subresult.getInt()) { - if (resultType) - result = CGF.Builder.CreateBitCast(result, resultType); - return TryEmitResult(result, true); - } + // For calls and message sends, use the retained-call logic. + // Delegate inits are a special case in that they're the only + // returns-retained expression that *isn't* surrounded by + // a consume. + } else if (isa<CallExpr>(e) || + (isa<ObjCMessageExpr>(e) && + !cast<ObjCMessageExpr>(e)->isDelegateInitCall())) { + return asImpl().visitCall(e); - // Otherwise it's +0. - } + // Look through pseudo-object expressions. + } else if (const PseudoObjectExpr *pseudo = dyn_cast<PseudoObjectExpr>(e)) { + return asImpl().visitPseudoObjectExpr(pseudo); + } - // Retain the object as a block, then cast down. - result = CGF.EmitARCRetainBlock(result, /*mandatory*/ true); - if (resultType) result = CGF.Builder.CreateBitCast(result, resultType); - return TryEmitResult(result, true); - } + return asImpl().visitExpr(e); +} - // For reclaims, emit the subexpression as a retained call and - // skip the consumption. - case CK_ARCReclaimReturnedObject: { - llvm::Value *result = emitARCRetainCall(CGF, ce->getSubExpr()); - if (resultType) result = CGF.Builder.CreateBitCast(result, resultType); - return TryEmitResult(result, true); - } +namespace { - default: - break; - } +/// An emitter for +1 results. +struct ARCRetainExprEmitter : + public ARCExprEmitter<ARCRetainExprEmitter, TryEmitResult> { - // Skip __extension__. - } else if (const UnaryOperator *op = dyn_cast<UnaryOperator>(e)) { - if (op->getOpcode() == UO_Extension) { - e = op->getSubExpr(); - continue; - } + ARCRetainExprEmitter(CodeGenFunction &CGF) : ARCExprEmitter(CGF) {} + + llvm::Value *getValueOfResult(TryEmitResult result) { + return result.getPointer(); + } - // For calls and message sends, use the retained-call logic. - // Delegate inits are a special case in that they're the only - // returns-retained expression that *isn't* surrounded by - // a consume. - } else if (isa<CallExpr>(e) || - (isa<ObjCMessageExpr>(e) && - !cast<ObjCMessageExpr>(e)->isDelegateInitCall())) { - llvm::Value *result = emitARCRetainCall(CGF, e); - if (resultType) result = CGF.Builder.CreateBitCast(result, resultType); - return TryEmitResult(result, true); - - // Look through pseudo-object expressions. - } else if (const PseudoObjectExpr *pseudo = dyn_cast<PseudoObjectExpr>(e)) { - TryEmitResult result - = tryEmitARCRetainPseudoObject(CGF, pseudo); - if (resultType) { - llvm::Value *value = result.getPointer(); - value = CGF.Builder.CreateBitCast(value, resultType); - result.setPointer(value); + TryEmitResult emitBitCast(TryEmitResult result, llvm::Type *resultType) { + llvm::Value *value = result.getPointer(); + value = CGF.Builder.CreateBitCast(value, resultType); + result.setPointer(value); + return result; + } + + TryEmitResult visitLValueToRValue(const Expr *e) { + return tryEmitARCRetainLoadOfScalar(CGF, e); + } + + /// For consumptions, just emit the subexpression and thus elide + /// the retain/release pair. + TryEmitResult visitConsumeObject(const Expr *e) { + llvm::Value *result = CGF.EmitScalarExpr(e); + return TryEmitResult(result, true); + } + + /// Block extends are net +0. Naively, we could just recurse on + /// the subexpression, but actually we need to ensure that the + /// value is copied as a block, so there's a little filter here. + TryEmitResult visitExtendBlockObject(const Expr *e) { + llvm::Value *result; // will be a +0 value + + // If we can't safely assume the sub-expression will produce a + // block-copied value, emit the sub-expression at +0. + if (shouldEmitSeparateBlockRetain(e)) { + result = CGF.EmitScalarExpr(e); + + // Otherwise, try to emit the sub-expression at +1 recursively. + } else { + TryEmitResult subresult = asImpl().visit(e); + + // If that produced a retained value, just use that. + if (subresult.getInt()) { + return subresult; } - return result; + + // Otherwise it's +0. + result = subresult.getPointer(); } - // Conservatively halt the search at any other expression kind. - break; + // Retain the object as a block. + result = CGF.EmitARCRetainBlock(result, /*mandatory*/ true); + return TryEmitResult(result, true); } - // We didn't find an obvious production, so emit what we've got and - // tell the caller that we didn't manage to retain. - llvm::Value *result = CGF.EmitScalarExpr(e); - if (resultType) result = CGF.Builder.CreateBitCast(result, resultType); - return TryEmitResult(result, false); + /// For reclaims, emit the subexpression as a retained call and + /// skip the consumption. + TryEmitResult visitReclaimReturnedObject(const Expr *e) { + llvm::Value *result = emitARCRetainCallResult(CGF, e); + return TryEmitResult(result, true); + } + + /// When we have an undecorated call, retroactively do a claim. + TryEmitResult visitCall(const Expr *e) { + llvm::Value *result = emitARCRetainCallResult(CGF, e); + return TryEmitResult(result, true); + } + + // TODO: maybe special-case visitBinAssignWeak? + + TryEmitResult visitExpr(const Expr *e) { + // We didn't find an obvious production, so emit what we've got and + // tell the caller that we didn't manage to retain. + llvm::Value *result = CGF.EmitScalarExpr(e); + return TryEmitResult(result, false); + } +}; +} + +static TryEmitResult +tryEmitARCRetainScalarExpr(CodeGenFunction &CGF, const Expr *e) { + return ARCRetainExprEmitter(CGF).visit(e); } static llvm::Value *emitARCRetainLoadOfScalar(CodeGenFunction &CGF, @@ -2807,6 +2984,96 @@ llvm::Value *CodeGenFunction::EmitObjCThrowOperand(const Expr *expr) { return EmitScalarExpr(expr); } +namespace { + +/// An emitter for assigning into an __unsafe_unretained context. +struct ARCUnsafeUnretainedExprEmitter : + public ARCExprEmitter<ARCUnsafeUnretainedExprEmitter, llvm::Value*> { + + ARCUnsafeUnretainedExprEmitter(CodeGenFunction &CGF) : ARCExprEmitter(CGF) {} + + llvm::Value *getValueOfResult(llvm::Value *value) { + return value; + } + + llvm::Value *emitBitCast(llvm::Value *value, llvm::Type *resultType) { + return CGF.Builder.CreateBitCast(value, resultType); + } + + llvm::Value *visitLValueToRValue(const Expr *e) { + return CGF.EmitScalarExpr(e); + } + + /// For consumptions, just emit the subexpression and perform the + /// consumption like normal. + llvm::Value *visitConsumeObject(const Expr *e) { + llvm::Value *value = CGF.EmitScalarExpr(e); + return CGF.EmitObjCConsumeObject(e->getType(), value); + } + + /// No special logic for block extensions. (This probably can't + /// actually happen in this emitter, though.) + llvm::Value *visitExtendBlockObject(const Expr *e) { + return CGF.EmitARCExtendBlockObject(e); + } + + /// For reclaims, perform an unsafeClaim if that's enabled. + llvm::Value *visitReclaimReturnedObject(const Expr *e) { + return CGF.EmitARCReclaimReturnedObject(e, /*unsafe*/ true); + } + + /// When we have an undecorated call, just emit it without adding + /// the unsafeClaim. + llvm::Value *visitCall(const Expr *e) { + return CGF.EmitScalarExpr(e); + } + + /// Just do normal scalar emission in the default case. + llvm::Value *visitExpr(const Expr *e) { + return CGF.EmitScalarExpr(e); + } +}; +} + +static llvm::Value *emitARCUnsafeUnretainedScalarExpr(CodeGenFunction &CGF, + const Expr *e) { + return ARCUnsafeUnretainedExprEmitter(CGF).visit(e); +} + +/// EmitARCUnsafeUnretainedScalarExpr - Semantically equivalent to +/// immediately releasing the resut of EmitARCRetainScalarExpr, but +/// avoiding any spurious retains, including by performing reclaims +/// with objc_unsafeClaimAutoreleasedReturnValue. +llvm::Value *CodeGenFunction::EmitARCUnsafeUnretainedScalarExpr(const Expr *e) { + // Look through full-expressions. + if (const ExprWithCleanups *cleanups = dyn_cast<ExprWithCleanups>(e)) { + enterFullExpression(cleanups); + RunCleanupsScope scope(*this); + return emitARCUnsafeUnretainedScalarExpr(*this, cleanups->getSubExpr()); + } + + return emitARCUnsafeUnretainedScalarExpr(*this, e); +} + +std::pair<LValue,llvm::Value*> +CodeGenFunction::EmitARCStoreUnsafeUnretained(const BinaryOperator *e, + bool ignored) { + // Evaluate the RHS first. If we're ignoring the result, assume + // that we can emit at an unsafe +0. + llvm::Value *value; + if (ignored) { + value = EmitARCUnsafeUnretainedScalarExpr(e->getRHS()); + } else { + value = EmitScalarExpr(e->getRHS()); + } + + // Emit the LHS and perform the store. + LValue lvalue = EmitLValue(e->getLHS()); + EmitStoreOfScalar(value, lvalue); + + return std::pair<LValue,llvm::Value*>(std::move(lvalue), value); +} + std::pair<LValue,llvm::Value*> CodeGenFunction::EmitARCStoreStrong(const BinaryOperator *e, bool ignored) { @@ -2935,8 +3202,8 @@ CodeGenFunction::GenerateObjCAtomicSetterCopyHelperFunction( ImplicitParamDecl srcDecl(getContext(), FD, SourceLocation(), nullptr, SrcTy); args.push_back(&srcDecl); - const CGFunctionInfo &FI = CGM.getTypes().arrangeFreeFunctionDeclaration( - C.VoidTy, args, FunctionType::ExtInfo(), RequiredArgs::All); + const CGFunctionInfo &FI = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, args); llvm::FunctionType *LTy = CGM.getTypes().GetFunctionType(FI); @@ -3016,8 +3283,8 @@ CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction( ImplicitParamDecl srcDecl(getContext(), FD, SourceLocation(), nullptr, SrcTy); args.push_back(&srcDecl); - const CGFunctionInfo &FI = CGM.getTypes().arrangeFreeFunctionDeclaration( - C.VoidTy, args, FunctionType::ExtInfo(), RequiredArgs::All); + const CGFunctionInfo &FI = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, args); llvm::FunctionType *LTy = CGM.getTypes().GetFunctionType(FI); diff --git a/lib/CodeGen/CGObjCGNU.cpp b/lib/CodeGen/CGObjCGNU.cpp index f0af3e924c09..caafef84c333 100644 --- a/lib/CodeGen/CGObjCGNU.cpp +++ b/lib/CodeGen/CGObjCGNU.cpp @@ -35,11 +35,9 @@ #include "llvm/Support/Compiler.h" #include <cstdarg> - using namespace clang; using namespace CodeGen; - namespace { /// Class that lazily initialises the runtime function. Avoids inserting the /// types and the function declaration into a module if they're not used, and @@ -161,6 +159,7 @@ protected: /// runtime provides some LLVM passes that can use this to do things like /// automatic IMP caching and speculative inlining. unsigned msgSendMDKind; + /// Helper function that generates a constant string and returns a pointer to /// the start of the string. The result of this function can be used anywhere /// where the C code specifies const char*. @@ -170,6 +169,7 @@ protected: return llvm::ConstantExpr::getGetElementPtr(Array.getElementType(), Array.getPointer(), Zeros); } + /// Emits a linkonce_odr string, whose name is the prefix followed by the /// string value. This allows the linker to combine the strings between /// different modules. Used for EH typeinfo names, selector strings, and a @@ -186,6 +186,7 @@ protected: return llvm::ConstantExpr::getGetElementPtr(ConstStr->getValueType(), ConstStr, Zeros); } + /// Generates a global structure, initialized by the elements in the vector. /// The element types must match the types of the structure elements in the /// first argument. @@ -201,6 +202,7 @@ protected: GV->setAlignment(Align.getQuantity()); return GV; } + /// Generates a global array. The vector must contain the same number of /// elements that the array type declares, of the type specified as the array /// element type. @@ -216,6 +218,7 @@ protected: GV->setAlignment(Align.getQuantity()); return GV; } + /// Generates a global array, inferring the array type from the specified /// element type and the size of the initialiser. llvm::GlobalVariable *MakeGlobalArray(llvm::Type *Ty, @@ -227,6 +230,7 @@ protected: llvm::ArrayType *ArrayTy = llvm::ArrayType::get(Ty, V.size()); return MakeGlobal(ArrayTy, V, Align, Name, linkage); } + /// Returns a property name and encoding string. llvm::Constant *MakePropertyEncodingString(const ObjCPropertyDecl *PD, const Decl *Container) { @@ -245,6 +249,7 @@ protected: } return MakeConstantString(PD->getNameAsString()); } + /// Push the property attributes into two structure fields. void PushPropertyAttributes(std::vector<llvm::Constant*> &Fields, ObjCPropertyDecl *property, bool isSynthesized=true, bool @@ -273,6 +278,7 @@ protected: Fields.push_back(llvm::ConstantInt::get(Int8Ty, 0)); Fields.push_back(llvm::ConstantInt::get(Int8Ty, 0)); } + /// Ensures that the value has the required type, by inserting a bitcast if /// required. This function lets us avoid inserting bitcasts that are /// redundant. @@ -284,12 +290,14 @@ protected: if (V.getType() == Ty) return V; return B.CreateBitCast(V, Ty); } + // Some zeros used for GEPs in lots of places. llvm::Constant *Zeros[2]; /// Null pointer value. Mainly used as a terminator in various arrays. llvm::Constant *NULLPtr; /// LLVM context. llvm::LLVMContext &VMContext; + private: /// Placeholder for the class. Lots of things refer to the class before we've /// actually emitted it. We use this alias as a placeholder, and then replace @@ -360,7 +368,6 @@ protected: LazyRuntimeFunction SyncExitFn; private: - /// Function called if fast enumeration detects that the collection is /// modified during the update. LazyRuntimeFunction EnumerationMutationFn; @@ -385,7 +392,7 @@ private: /// Objective-C 1 property structures when targeting the GCC runtime or it /// will abort. const int ProtocolVersion; -private: + /// Generates an instance variable list structure. This is a structure /// containing a size and an array of structures containing instance variable /// metadata. This is used purely for introspection in the fragile ABI. In @@ -393,6 +400,7 @@ private: llvm::Constant *GenerateIvarList(ArrayRef<llvm::Constant *> IvarNames, ArrayRef<llvm::Constant *> IvarTypes, ArrayRef<llvm::Constant *> IvarOffsets); + /// Generates a method list structure. This is a structure containing a size /// and an array of structures containing method metadata. /// @@ -403,23 +411,28 @@ private: ArrayRef<Selector> MethodSels, ArrayRef<llvm::Constant *> MethodTypes, bool isClassMethodList); + /// Emits an empty protocol. This is used for \@protocol() where no protocol /// is found. The runtime will (hopefully) fix up the pointer to refer to the /// real protocol. llvm::Constant *GenerateEmptyProtocol(const std::string &ProtocolName); + /// Generates a list of property metadata structures. This follows the same /// pattern as method and instance variable metadata lists. llvm::Constant *GeneratePropertyList(const ObjCImplementationDecl *OID, SmallVectorImpl<Selector> &InstanceMethodSels, SmallVectorImpl<llvm::Constant*> &InstanceMethodTypes); + /// Generates a list of referenced protocols. Classes, categories, and /// protocols all use this structure. llvm::Constant *GenerateProtocolList(ArrayRef<std::string> Protocols); + /// To ensure that all protocols are seen by the runtime, we add a category on /// a class defined in the runtime, declaring no methods, but adopting the /// protocols. This is a horribly ugly hack, but it allows us to collect all /// of the protocols without changing the ABI. void GenerateProtocolHolderCategory(); + /// Generates a class structure. llvm::Constant *GenerateClassStructure( llvm::Constant *MetaClass, @@ -436,25 +449,31 @@ private: llvm::Constant *StrongIvarBitmap, llvm::Constant *WeakIvarBitmap, bool isMeta=false); + /// Generates a method list. This is used by protocols to define the required /// and optional methods. llvm::Constant *GenerateProtocolMethodList( ArrayRef<llvm::Constant *> MethodNames, ArrayRef<llvm::Constant *> MethodTypes); + /// Returns a selector with the specified type encoding. An empty string is /// used to return an untyped selector (with the types field set to NULL). llvm::Value *GetSelector(CodeGenFunction &CGF, Selector Sel, const std::string &TypeEncoding); + /// Returns the variable used to store the offset of an instance variable. llvm::GlobalVariable *ObjCIvarOffsetVariable(const ObjCInterfaceDecl *ID, const ObjCIvarDecl *Ivar); /// Emits a reference to a class. This allows the linker to object if there /// is no class of the matching name. + protected: void EmitClassRef(const std::string &className); + /// Emits a pointer to the named class virtual llvm::Value *GetClassNamed(CodeGenFunction &CGF, const std::string &Name, bool isWeak); + /// Looks up the method for sending a message to the specified object. This /// mechanism differs between the GCC and GNU runtimes, so this method must be /// overridden in subclasses. @@ -463,6 +482,7 @@ protected: llvm::Value *cmd, llvm::MDNode *node, MessageSendInfo &MSI) = 0; + /// Looks up the method for sending a message to a superclass. This /// mechanism differs between the GCC and GNU runtimes, so this method must /// be overridden in subclasses. @@ -470,6 +490,7 @@ protected: Address ObjCSuper, llvm::Value *cmd, MessageSendInfo &MSI) = 0; + /// Libobjc2 uses a bitfield representation where small(ish) bitfields are /// stored in a 64-bit value with the low bit set to 1 and the remaining 63 /// bits set to their values, LSB first, while larger ones are stored in a @@ -482,6 +503,7 @@ protected: /// a bitfield with the 64th bit set will be (int64_t)&{ 2, [0, 1<<31] }, /// while a bitfield / with the 63rd bit set will be 1<<64. llvm::Constant *MakeBitField(ArrayRef<bool> bits); + public: CGObjCGNU(CodeGenModule &cgm, unsigned runtimeABIVersion, unsigned protocolClassVersion); @@ -569,11 +591,12 @@ public: return NULLPtr; } - llvm::GlobalVariable *GetClassGlobal(const std::string &Name, + llvm::GlobalVariable *GetClassGlobal(StringRef Name, bool Weak = false) override { return nullptr; } }; + /// Class representing the legacy GCC Objective-C ABI. This is the default when /// -fobjc-nonfragile-abi is not specified. /// @@ -590,6 +613,7 @@ class CGObjCGCC : public CGObjCGNU { /// structure describing the receiver and the class, and a selector as /// arguments. Returns the IMP for the corresponding method. LazyRuntimeFunction MsgLookupSuperFn; + protected: llvm::Value *LookupIMP(CodeGenFunction &CGF, llvm::Value *&Receiver, llvm::Value *cmd, llvm::MDNode *node, @@ -602,23 +626,26 @@ protected: imp->setMetadata(msgSendMDKind, node); return imp.getInstruction(); } + llvm::Value *LookupIMPSuper(CodeGenFunction &CGF, Address ObjCSuper, llvm::Value *cmd, MessageSendInfo &MSI) override { - CGBuilderTy &Builder = CGF.Builder; - llvm::Value *lookupArgs[] = {EnforceType(Builder, ObjCSuper, - PtrToObjCSuperTy).getPointer(), cmd}; - return CGF.EmitNounwindRuntimeCall(MsgLookupSuperFn, lookupArgs); - } - public: - CGObjCGCC(CodeGenModule &Mod) : CGObjCGNU(Mod, 8, 2) { - // IMP objc_msg_lookup(id, SEL); - MsgLookupFn.init(&CGM, "objc_msg_lookup", IMPTy, IdTy, SelectorTy, - nullptr); - // IMP objc_msg_lookup_super(struct objc_super*, SEL); - MsgLookupSuperFn.init(&CGM, "objc_msg_lookup_super", IMPTy, - PtrToObjCSuperTy, SelectorTy, nullptr); - } + CGBuilderTy &Builder = CGF.Builder; + llvm::Value *lookupArgs[] = {EnforceType(Builder, ObjCSuper, + PtrToObjCSuperTy).getPointer(), cmd}; + return CGF.EmitNounwindRuntimeCall(MsgLookupSuperFn, lookupArgs); + } + +public: + CGObjCGCC(CodeGenModule &Mod) : CGObjCGNU(Mod, 8, 2) { + // IMP objc_msg_lookup(id, SEL); + MsgLookupFn.init(&CGM, "objc_msg_lookup", IMPTy, IdTy, SelectorTy, + nullptr); + // IMP objc_msg_lookup_super(struct objc_super*, SEL); + MsgLookupSuperFn.init(&CGM, "objc_msg_lookup_super", IMPTy, + PtrToObjCSuperTy, SelectorTy, nullptr); + } }; + /// Class used when targeting the new GNUstep runtime ABI. class CGObjCGNUstep : public CGObjCGNU { /// The slot lookup function. Returns a pointer to a cacheable structure @@ -646,8 +673,10 @@ class CGObjCGNUstep : public CGObjCGNU { /// Type of an slot structure pointer. This is returned by the various /// lookup functions. llvm::Type *SlotTy; + public: llvm::Constant *GetEHType(QualType T) override; + protected: llvm::Value *LookupIMP(CodeGenFunction &CGF, llvm::Value *&Receiver, llvm::Value *cmd, llvm::MDNode *node, @@ -689,6 +718,7 @@ class CGObjCGNUstep : public CGObjCGNU { Receiver = Builder.CreateLoad(ReceiverPtr, true); return imp; } + llvm::Value *LookupIMPSuper(CodeGenFunction &CGF, Address ObjCSuper, llvm::Value *cmd, MessageSendInfo &MSI) override { @@ -702,6 +732,7 @@ class CGObjCGNUstep : public CGObjCGNU { return Builder.CreateAlignedLoad(Builder.CreateStructGEP(nullptr, slot, 4), CGF.getPointerAlign()); } + public: CGObjCGNUstep(CodeGenModule &Mod) : CGObjCGNU(Mod, 9, 3) { const ObjCRuntime &R = CGM.getLangOpts().ObjCRuntime; @@ -753,6 +784,7 @@ class CGObjCGNUstep : public CGObjCGNU { CxxAtomicObjectGetFn.init(&CGM, "objc_getCppObjectAtomic", VoidTy, PtrTy, PtrTy, PtrTy, nullptr); } + llvm::Constant *GetCppAtomicObjectGetFunction() override { // The optimised functions were added in version 1.7 of the GNUstep // runtime. @@ -760,6 +792,7 @@ class CGObjCGNUstep : public CGObjCGNU { VersionTuple(1, 7)); return CxxAtomicObjectGetFn; } + llvm::Constant *GetCppAtomicObjectSetFunction() override { // The optimised functions were added in version 1.7 of the GNUstep // runtime. @@ -767,6 +800,7 @@ class CGObjCGNUstep : public CGObjCGNU { VersionTuple(1, 7)); return CxxAtomicObjectSetFn; } + llvm::Constant *GetOptimizedPropertySetFunction(bool atomic, bool copy) override { // The optimised property functions omit the GC check, and so are not @@ -821,32 +855,29 @@ protected: llvm::Value *LookupIMPSuper(CodeGenFunction &CGF, Address ObjCSuper, llvm::Value *cmd, MessageSendInfo &MSI) override { - CGBuilderTy &Builder = CGF.Builder; - llvm::Value *lookupArgs[] = {EnforceType(Builder, ObjCSuper.getPointer(), - PtrToObjCSuperTy), cmd}; + CGBuilderTy &Builder = CGF.Builder; + llvm::Value *lookupArgs[] = { + EnforceType(Builder, ObjCSuper.getPointer(), PtrToObjCSuperTy), cmd, + }; - if (CGM.ReturnTypeUsesSRet(MSI.CallInfo)) - return CGF.EmitNounwindRuntimeCall(MsgLookupSuperFnSRet, lookupArgs); - else - return CGF.EmitNounwindRuntimeCall(MsgLookupSuperFn, lookupArgs); - } + if (CGM.ReturnTypeUsesSRet(MSI.CallInfo)) + return CGF.EmitNounwindRuntimeCall(MsgLookupSuperFnSRet, lookupArgs); + else + return CGF.EmitNounwindRuntimeCall(MsgLookupSuperFn, lookupArgs); + } - llvm::Value *GetClassNamed(CodeGenFunction &CGF, - const std::string &Name, bool isWeak) override { + llvm::Value *GetClassNamed(CodeGenFunction &CGF, const std::string &Name, + bool isWeak) override { if (isWeak) return CGObjCGNU::GetClassNamed(CGF, Name, isWeak); EmitClassRef(Name); - std::string SymbolName = "_OBJC_CLASS_" + Name; - llvm::GlobalVariable *ClassSymbol = TheModule.getGlobalVariable(SymbolName); - if (!ClassSymbol) ClassSymbol = new llvm::GlobalVariable(TheModule, LongTy, false, llvm::GlobalValue::ExternalLinkage, nullptr, SymbolName); - return ClassSymbol; } @@ -865,7 +896,6 @@ public: }; } // end anonymous namespace - /// Emits a reference to a dummy variable which is emitted with each class. /// This ensures that a linker error will be generated when trying to link /// together modules where a referenced class is not defined. @@ -1021,8 +1051,7 @@ CGObjCGNU::CGObjCGNU(CodeGenModule &cgm, unsigned runtimeABIVersion, } llvm::Value *CGObjCGNU::GetClassNamed(CodeGenFunction &CGF, - const std::string &Name, - bool isWeak) { + const std::string &Name, bool isWeak) { llvm::Constant *ClassName = MakeConstantString(Name); // With the incompatible ABI, this will need to be replaced with a direct // reference to the class symbol. For the compatible nonfragile ABI we are @@ -1044,15 +1073,48 @@ llvm::Value *CGObjCGNU::GetClassNamed(CodeGenFunction &CGF, // techniques can modify the name -> class mapping. llvm::Value *CGObjCGNU::GetClass(CodeGenFunction &CGF, const ObjCInterfaceDecl *OID) { - return GetClassNamed(CGF, OID->getNameAsString(), OID->isWeakImported()); + auto *Value = + GetClassNamed(CGF, OID->getNameAsString(), OID->isWeakImported()); + if (CGM.getTriple().isOSBinFormatCOFF()) { + if (auto *ClassSymbol = dyn_cast<llvm::GlobalVariable>(Value)) { + auto DLLStorage = llvm::GlobalValue::DefaultStorageClass; + if (OID->hasAttr<DLLExportAttr>()) + DLLStorage = llvm::GlobalValue::DLLExportStorageClass; + else if (OID->hasAttr<DLLImportAttr>()) + DLLStorage = llvm::GlobalValue::DLLImportStorageClass; + ClassSymbol->setDLLStorageClass(DLLStorage); + } + } + return Value; } + llvm::Value *CGObjCGNU::EmitNSAutoreleasePoolClassRef(CodeGenFunction &CGF) { - return GetClassNamed(CGF, "NSAutoreleasePool", false); + auto *Value = GetClassNamed(CGF, "NSAutoreleasePool", false); + if (CGM.getTriple().isOSBinFormatCOFF()) { + if (auto *ClassSymbol = dyn_cast<llvm::GlobalVariable>(Value)) { + IdentifierInfo &II = CGF.CGM.getContext().Idents.get("NSAutoreleasePool"); + TranslationUnitDecl *TUDecl = CGM.getContext().getTranslationUnitDecl(); + DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl); + + const VarDecl *VD = nullptr; + for (const auto &Result : DC->lookup(&II)) + if ((VD = dyn_cast<VarDecl>(Result))) + break; + + auto DLLStorage = llvm::GlobalValue::DefaultStorageClass; + if (!VD || VD->hasAttr<DLLImportAttr>()) + DLLStorage = llvm::GlobalValue::DLLImportStorageClass; + else if (VD->hasAttr<DLLExportAttr>()) + DLLStorage = llvm::GlobalValue::DLLExportStorageClass; + + ClassSymbol->setDLLStorageClass(DLLStorage); + } + } + return Value; } llvm::Value *CGObjCGNU::GetSelector(CodeGenFunction &CGF, Selector Sel, const std::string &TypeEncoding) { - SmallVectorImpl<TypedSelector> &Types = SelectorTable[Sel]; llvm::GlobalAlias *SelValue = nullptr; @@ -1247,8 +1309,6 @@ CGObjCGNU::GenerateMessageSendSuper(CodeGenFunction &CGF, } llvm::Value *cmd = GetSelector(CGF, Sel); - - CallArgList ActualArgs; ActualArgs.add(RValue::get(EnforceType(Builder, Receiver, IdTy)), ASTIdTy); @@ -1497,21 +1557,17 @@ GenerateMethodList(StringRef ClassName, IMPTy, //Method pointer nullptr); std::vector<llvm::Constant*> Methods; - std::vector<llvm::Constant*> Elements; for (unsigned int i = 0, e = MethodTypes.size(); i < e; ++i) { - Elements.clear(); llvm::Constant *Method = TheModule.getFunction(SymbolNameForMethod(ClassName, CategoryName, MethodSels[i], isClassMethodList)); assert(Method && "Can't generate metadata for method that doesn't exist"); llvm::Constant *C = MakeConstantString(MethodSels[i].getAsString()); - Elements.push_back(C); - Elements.push_back(MethodTypes[i]); Method = llvm::ConstantExpr::getBitCast(Method, IMPTy); - Elements.push_back(Method); - Methods.push_back(llvm::ConstantStruct::get(ObjCMethodTy, Elements)); + Methods.push_back( + llvm::ConstantStruct::get(ObjCMethodTy, {C, MethodTypes[i], Method})); } // Array of method structures @@ -1554,23 +1610,18 @@ GenerateIvarList(ArrayRef<llvm::Constant *> IvarNames, IntTy, nullptr); std::vector<llvm::Constant*> Ivars; - std::vector<llvm::Constant*> Elements; for (unsigned int i = 0, e = IvarNames.size() ; i < e ; i++) { - Elements.clear(); - Elements.push_back(IvarNames[i]); - Elements.push_back(IvarTypes[i]); - Elements.push_back(IvarOffsets[i]); - Ivars.push_back(llvm::ConstantStruct::get(ObjCIvarTy, Elements)); + Ivars.push_back(llvm::ConstantStruct::get( + ObjCIvarTy, {IvarNames[i], IvarTypes[i], IvarOffsets[i]})); } // Array of method structures llvm::ArrayType *ObjCIvarArrayTy = llvm::ArrayType::get(ObjCIvarTy, IvarNames.size()); - - Elements.clear(); - Elements.push_back(llvm::ConstantInt::get(IntTy, (int)IvarNames.size())); - Elements.push_back(llvm::ConstantArray::get(ObjCIvarArrayTy, Ivars)); + llvm::Constant *Elements[] = { + llvm::ConstantInt::get(IntTy, (int)IvarNames.size()), + llvm::ConstantArray::get(ObjCIvarArrayTy, Ivars)}; // Structure containing array and array count llvm::StructType *ObjCIvarListTy = llvm::StructType::get(IntTy, ObjCIvarArrayTy, @@ -1682,12 +1733,9 @@ GenerateProtocolMethodList(ArrayRef<llvm::Constant *> MethodNames, PtrToInt8Ty, nullptr); std::vector<llvm::Constant*> Methods; - std::vector<llvm::Constant*> Elements; for (unsigned int i = 0, e = MethodTypes.size() ; i < e ; i++) { - Elements.clear(); - Elements.push_back(MethodNames[i]); - Elements.push_back(MethodTypes[i]); - Methods.push_back(llvm::ConstantStruct::get(ObjCMethodDescTy, Elements)); + Methods.push_back(llvm::ConstantStruct::get( + ObjCMethodDescTy, {MethodNames[i], MethodTypes[i]})); } llvm::ArrayType *ObjCMethodArrayTy = llvm::ArrayType::get(ObjCMethodDescTy, MethodNames.size()); @@ -1762,17 +1810,13 @@ llvm::Constant *CGObjCGNU::GenerateEmptyProtocol( MethodList->getType(), MethodList->getType(), nullptr); - std::vector<llvm::Constant*> Elements; // The isa pointer must be set to a magic number so the runtime knows it's // the correct layout. - Elements.push_back(llvm::ConstantExpr::getIntToPtr( - llvm::ConstantInt::get(Int32Ty, ProtocolVersion), IdTy)); - Elements.push_back(MakeConstantString(ProtocolName, ".objc_protocol_name")); - Elements.push_back(ProtocolList); - Elements.push_back(MethodList); - Elements.push_back(MethodList); - Elements.push_back(MethodList); - Elements.push_back(MethodList); + llvm::Constant *Elements[] = { + llvm::ConstantExpr::getIntToPtr( + llvm::ConstantInt::get(Int32Ty, ProtocolVersion), IdTy), + MakeConstantString(ProtocolName, ".objc_protocol_name"), ProtocolList, + MethodList, MethodList, MethodList, MethodList}; return MakeGlobal(ProtocolTy, Elements, CGM.getPointerAlign(), ".objc_protocol"); } @@ -1849,7 +1893,7 @@ void CGObjCGNU::GenerateProtocol(const ObjCProtocolDecl *PD) { // Add all of the property methods need adding to the method list and to the // property metadata list. - for (auto *property : PD->properties()) { + for (auto *property : PD->instance_properties()) { std::vector<llvm::Constant*> Fields; Fields.push_back(MakePropertyEncodingString(property, nullptr)); @@ -1920,19 +1964,14 @@ void CGObjCGNU::GenerateProtocol(const ObjCProtocolDecl *PD) { PropertyList->getType(), OptionalPropertyList->getType(), nullptr); - std::vector<llvm::Constant*> Elements; // The isa pointer must be set to a magic number so the runtime knows it's // the correct layout. - Elements.push_back(llvm::ConstantExpr::getIntToPtr( - llvm::ConstantInt::get(Int32Ty, ProtocolVersion), IdTy)); - Elements.push_back(MakeConstantString(ProtocolName, ".objc_protocol_name")); - Elements.push_back(ProtocolList); - Elements.push_back(InstanceMethodList); - Elements.push_back(ClassMethodList); - Elements.push_back(OptionalInstanceMethodList); - Elements.push_back(OptionalClassMethodList); - Elements.push_back(PropertyList); - Elements.push_back(OptionalPropertyList); + llvm::Constant *Elements[] = { + llvm::ConstantExpr::getIntToPtr( + llvm::ConstantInt::get(Int32Ty, ProtocolVersion), IdTy), + MakeConstantString(ProtocolName, ".objc_protocol_name"), ProtocolList, + InstanceMethodList, ClassMethodList, OptionalInstanceMethodList, + OptionalClassMethodList, PropertyList, OptionalPropertyList}; ExistingProtocols[ProtocolName] = llvm::ConstantExpr::getBitCast(MakeGlobal(ProtocolTy, Elements, CGM.getPointerAlign(), ".objc_protocol"), IdTy); @@ -2058,20 +2097,20 @@ void CGObjCGNU::GenerateCategory(const ObjCCategoryImplDecl *OCD) { E = Protos.end(); I != E; ++I) Protocols.push_back((*I)->getNameAsString()); - std::vector<llvm::Constant*> Elements; - Elements.push_back(MakeConstantString(CategoryName)); - Elements.push_back(MakeConstantString(ClassName)); - // Instance method list - Elements.push_back(llvm::ConstantExpr::getBitCast(GenerateMethodList( - ClassName, CategoryName, InstanceMethodSels, InstanceMethodTypes, - false), PtrTy)); - // Class method list - Elements.push_back(llvm::ConstantExpr::getBitCast(GenerateMethodList( - ClassName, CategoryName, ClassMethodSels, ClassMethodTypes, true), - PtrTy)); - // Protocol list - Elements.push_back(llvm::ConstantExpr::getBitCast( - GenerateProtocolList(Protocols), PtrTy)); + llvm::Constant *Elements[] = { + MakeConstantString(CategoryName), MakeConstantString(ClassName), + // Instance method list + llvm::ConstantExpr::getBitCast( + GenerateMethodList(ClassName, CategoryName, InstanceMethodSels, + InstanceMethodTypes, false), + PtrTy), + // Class method list + llvm::ConstantExpr::getBitCast(GenerateMethodList(ClassName, CategoryName, + ClassMethodSels, + ClassMethodTypes, true), + PtrTy), + // Protocol list + llvm::ConstantExpr::getBitCast(GenerateProtocolList(Protocols), PtrTy)}; Categories.push_back(llvm::ConstantExpr::getBitCast( MakeGlobal(llvm::StructType::get(PtrToInt8Ty, PtrToInt8Ty, PtrTy, PtrTy, PtrTy, nullptr), Elements, CGM.getPointerAlign()), @@ -2167,18 +2206,19 @@ void CGObjCGNU::GenerateClass(const ObjCImplementationDecl *OID) { // Get the class name ObjCInterfaceDecl *ClassDecl = - const_cast<ObjCInterfaceDecl *>(OID->getClassInterface()); + const_cast<ObjCInterfaceDecl *>(OID->getClassInterface()); std::string ClassName = ClassDecl->getNameAsString(); + // Emit the symbol that is used to generate linker errors if this class is // referenced in other modules but not declared. std::string classSymbolName = "__objc_class_name_" + ClassName; - if (llvm::GlobalVariable *symbol = - TheModule.getGlobalVariable(classSymbolName)) { + if (auto *symbol = TheModule.getGlobalVariable(classSymbolName)) { symbol->setInitializer(llvm::ConstantInt::get(LongTy, 0)); } else { new llvm::GlobalVariable(TheModule, LongTy, false, - llvm::GlobalValue::ExternalLinkage, llvm::ConstantInt::get(LongTy, 0), - classSymbolName); + llvm::GlobalValue::ExternalLinkage, + llvm::ConstantInt::get(LongTy, 0), + classSymbolName); } // Get the size of instances. @@ -2256,7 +2296,6 @@ void CGObjCGNU::GenerateClass(const ObjCImplementationDecl *OID) { MakeGlobalArray(PtrToIntTy, IvarOffsetValues, CGM.getPointerAlign(), ".ivar.offsets"); - // Collect information about instance methods SmallVector<Selector, 16> InstanceMethodSels; SmallVector<llvm::Constant*, 16> InstanceMethodTypes; @@ -2270,7 +2309,6 @@ void CGObjCGNU::GenerateClass(const ObjCImplementationDecl *OID) { llvm::Constant *Properties = GeneratePropertyList(OID, InstanceMethodSels, InstanceMethodTypes); - // Collect information about class methods SmallVector<Selector, 16> ClassMethodSels; SmallVector<llvm::Constant*, 16> ClassMethodTypes; @@ -2343,19 +2381,35 @@ void CGObjCGNU::GenerateClass(const ObjCImplementationDecl *OID) { ++ivarIndex; } llvm::Constant *ZeroPtr = llvm::ConstantInt::get(IntPtrTy, 0); + //Generate metaclass for class methods - llvm::Constant *MetaClassStruct = GenerateClassStructure(NULLPtr, - NULLPtr, 0x12L, ClassName.c_str(), nullptr, Zeros[0], GenerateIvarList( - empty, empty, empty), ClassMethodList, NULLPtr, - NULLPtr, NULLPtr, ZeroPtr, ZeroPtr, true); + llvm::Constant *MetaClassStruct = GenerateClassStructure( + NULLPtr, NULLPtr, 0x12L, ClassName.c_str(), nullptr, Zeros[0], + GenerateIvarList(empty, empty, empty), ClassMethodList, NULLPtr, NULLPtr, + NULLPtr, ZeroPtr, ZeroPtr, true); + if (CGM.getTriple().isOSBinFormatCOFF()) { + auto Storage = llvm::GlobalValue::DefaultStorageClass; + if (OID->getClassInterface()->hasAttr<DLLImportAttr>()) + Storage = llvm::GlobalValue::DLLImportStorageClass; + else if (OID->getClassInterface()->hasAttr<DLLExportAttr>()) + Storage = llvm::GlobalValue::DLLExportStorageClass; + cast<llvm::GlobalValue>(MetaClassStruct)->setDLLStorageClass(Storage); + } // Generate the class structure - llvm::Constant *ClassStruct = - GenerateClassStructure(MetaClassStruct, SuperClass, 0x11L, - ClassName.c_str(), nullptr, - llvm::ConstantInt::get(LongTy, instanceSize), IvarList, - MethodList, GenerateProtocolList(Protocols), IvarOffsetArray, - Properties, StrongIvarBitmap, WeakIvarBitmap); + llvm::Constant *ClassStruct = GenerateClassStructure( + MetaClassStruct, SuperClass, 0x11L, ClassName.c_str(), nullptr, + llvm::ConstantInt::get(LongTy, instanceSize), IvarList, MethodList, + GenerateProtocolList(Protocols), IvarOffsetArray, Properties, + StrongIvarBitmap, WeakIvarBitmap); + if (CGM.getTriple().isOSBinFormatCOFF()) { + auto Storage = llvm::GlobalValue::DefaultStorageClass; + if (OID->getClassInterface()->hasAttr<DLLImportAttr>()) + Storage = llvm::GlobalValue::DLLImportStorageClass; + else if (OID->getClassInterface()->hasAttr<DLLExportAttr>()) + Storage = llvm::GlobalValue::DLLExportStorageClass; + cast<llvm::GlobalValue>(ClassStruct)->setDLLStorageClass(Storage); + } // Resolve the class aliases, if they exist. if (ClassPtrAlias) { @@ -2376,7 +2430,6 @@ void CGObjCGNU::GenerateClass(const ObjCImplementationDecl *OID) { Classes.push_back(ClassStruct); } - llvm::Function *CGObjCGNU::ModuleInitFunction() { // Only emit an ObjC load function if no Objective-C stuff has been called if (Classes.empty() && Categories.empty() && ConstantStrings.empty() && @@ -2651,12 +2704,15 @@ llvm::Constant *CGObjCGNU::GetOptimizedPropertySetFunction(bool atomic, llvm::Constant *CGObjCGNU::GetGetStructFunction() { return GetStructPropertyFn; } + llvm::Constant *CGObjCGNU::GetSetStructFunction() { return SetStructPropertyFn; } + llvm::Constant *CGObjCGNU::GetCppAtomicObjectGetFunction() { return nullptr; } + llvm::Constant *CGObjCGNU::GetCppAtomicObjectSetFunction() { return nullptr; } @@ -2685,7 +2741,6 @@ void CGObjCGNU::EmitTryStmt(CodeGenFunction &CGF, // In Objective-C++ mode, we actually emit something equivalent to the C++ // exception handler. EmitTryCatchStmt(CGF, S, EnterCatchFn, ExitCatchFn, ExceptionReThrowFn); - return ; } void CGObjCGNU::EmitThrowStmt(CodeGenFunction &CGF, @@ -2800,7 +2855,7 @@ llvm::GlobalVariable *CGObjCGNU::ObjCIvarOffsetVariable( // to replace it with the real version for a library. In non-PIC code you // must compile with the fragile ABI if you want to use ivars from a // GCC-compiled class. - if (CGM.getLangOpts().PICLevel || CGM.getLangOpts().PIELevel) { + if (CGM.getLangOpts().PICLevel) { llvm::GlobalVariable *IvarOffsetGV = new llvm::GlobalVariable(TheModule, Int32Ty, false, llvm::GlobalValue::PrivateLinkage, OffsetGuess, Name+".guess"); @@ -2848,7 +2903,12 @@ llvm::Value *CGObjCGNU::EmitIvarOffset(CodeGenFunction &CGF, const ObjCIvarDecl *Ivar) { if (CGM.getLangOpts().ObjCRuntime.isNonFragile()) { Interface = FindIvarInterface(CGM.getContext(), Interface, Ivar); - if (RuntimeVersion < 10) + + // The MSVC linker cannot have a single global defined as LinkOnceAnyLinkage + // and ExternalLinkage, so create a reference to the ivar global and rely on + // the definition being created as part of GenerateClass. + if (RuntimeVersion < 10 || + CGF.CGM.getTarget().getTriple().isKnownWindowsMSVCEnvironment()) return CGF.Builder.CreateZExtOrBitCast( CGF.Builder.CreateDefaultAlignedLoad(CGF.Builder.CreateAlignedLoad( ObjCIvarOffsetVariable(Interface, Ivar), diff --git a/lib/CodeGen/CGObjCMac.cpp b/lib/CodeGen/CGObjCMac.cpp index e30b2875f209..5ab9fc4f9710 100644 --- a/lib/CodeGen/CGObjCMac.cpp +++ b/lib/CodeGen/CGObjCMac.cpp @@ -236,17 +236,14 @@ public: CodeGen::CodeGenTypes &Types = CGM.getTypes(); ASTContext &Ctx = CGM.getContext(); // id objc_getProperty (id, SEL, ptrdiff_t, bool) - SmallVector<CanQualType,4> Params; CanQualType IdType = Ctx.getCanonicalParamType(Ctx.getObjCIdType()); CanQualType SelType = Ctx.getCanonicalParamType(Ctx.getObjCSelType()); - Params.push_back(IdType); - Params.push_back(SelType); - Params.push_back(Ctx.getPointerDiffType()->getCanonicalTypeUnqualified()); - Params.push_back(Ctx.BoolTy); + CanQualType Params[] = { + IdType, SelType, + Ctx.getPointerDiffType()->getCanonicalTypeUnqualified(), Ctx.BoolTy}; llvm::FunctionType *FTy = - Types.GetFunctionType(Types.arrangeLLVMFunctionInfo( - IdType, false, false, Params, FunctionType::ExtInfo(), - RequiredArgs::All)); + Types.GetFunctionType( + Types.arrangeBuiltinFunctionDeclaration(IdType, Params)); return CGM.CreateRuntimeFunction(FTy, "objc_getProperty"); } @@ -254,19 +251,18 @@ public: CodeGen::CodeGenTypes &Types = CGM.getTypes(); ASTContext &Ctx = CGM.getContext(); // void objc_setProperty (id, SEL, ptrdiff_t, id, bool, bool) - SmallVector<CanQualType,6> Params; CanQualType IdType = Ctx.getCanonicalParamType(Ctx.getObjCIdType()); CanQualType SelType = Ctx.getCanonicalParamType(Ctx.getObjCSelType()); - Params.push_back(IdType); - Params.push_back(SelType); - Params.push_back(Ctx.getPointerDiffType()->getCanonicalTypeUnqualified()); - Params.push_back(IdType); - Params.push_back(Ctx.BoolTy); - Params.push_back(Ctx.BoolTy); + CanQualType Params[] = { + IdType, + SelType, + Ctx.getPointerDiffType()->getCanonicalTypeUnqualified(), + IdType, + Ctx.BoolTy, + Ctx.BoolTy}; llvm::FunctionType *FTy = - Types.GetFunctionType(Types.arrangeLLVMFunctionInfo( - Ctx.VoidTy, false, false, Params, FunctionType::ExtInfo(), - RequiredArgs::All)); + Types.GetFunctionType( + Types.arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Params)); return CGM.CreateRuntimeFunction(FTy, "objc_setProperty"); } @@ -290,9 +286,8 @@ public: Params.push_back(IdType); Params.push_back(Ctx.getPointerDiffType()->getCanonicalTypeUnqualified()); llvm::FunctionType *FTy = - Types.GetFunctionType(Types.arrangeLLVMFunctionInfo( - Ctx.VoidTy, false, false, Params, FunctionType::ExtInfo(), - RequiredArgs::All)); + Types.GetFunctionType( + Types.arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Params)); const char *name; if (atomic && copy) name = "objc_setProperty_atomic_copy"; @@ -317,9 +312,8 @@ public: Params.push_back(Ctx.BoolTy); Params.push_back(Ctx.BoolTy); llvm::FunctionType *FTy = - Types.GetFunctionType(Types.arrangeLLVMFunctionInfo( - Ctx.VoidTy, false, false, Params, FunctionType::ExtInfo(), - RequiredArgs::All)); + Types.GetFunctionType( + Types.arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Params)); return CGM.CreateRuntimeFunction(FTy, "objc_copyStruct"); } @@ -336,10 +330,8 @@ public: Params.push_back(Ctx.VoidPtrTy); Params.push_back(Ctx.VoidPtrTy); llvm::FunctionType *FTy = - Types.GetFunctionType(Types.arrangeLLVMFunctionInfo(Ctx.VoidTy, false, false, - Params, - FunctionType::ExtInfo(), - RequiredArgs::All)); + Types.GetFunctionType( + Types.arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Params)); return CGM.CreateRuntimeFunction(FTy, "objc_copyCppObjectAtomic"); } @@ -350,12 +342,25 @@ public: SmallVector<CanQualType,1> Params; Params.push_back(Ctx.getCanonicalParamType(Ctx.getObjCIdType())); llvm::FunctionType *FTy = - Types.GetFunctionType(Types.arrangeLLVMFunctionInfo( - Ctx.VoidTy, false, false, Params, FunctionType::ExtInfo(), - RequiredArgs::All)); + Types.GetFunctionType( + Types.arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Params)); return CGM.CreateRuntimeFunction(FTy, "objc_enumerationMutation"); } + llvm::Constant *getLookUpClassFn() { + CodeGen::CodeGenTypes &Types = CGM.getTypes(); + ASTContext &Ctx = CGM.getContext(); + // Class objc_lookUpClass (const char *) + SmallVector<CanQualType,1> Params; + Params.push_back( + Ctx.getCanonicalType(Ctx.getPointerType(Ctx.CharTy.withConst()))); + llvm::FunctionType *FTy = + Types.GetFunctionType(Types.arrangeBuiltinFunctionDeclaration( + Ctx.getCanonicalType(Ctx.getObjCClassType()), + Params)); + return CGM.CreateRuntimeFunction(FTy, "objc_lookUpClass"); + } + /// GcReadWeakFn -- LLVM objc_read_weak (id *src) function. llvm::Constant *getGcReadWeakFn() { // id objc_read_weak (id *) @@ -576,7 +581,6 @@ public: return CGM.CreateRuntimeFunction( llvm::FunctionType::get(CGM.Int32Ty, params, false), "objc_exception_match"); - } /// SetJmpFn - LLVM _setjmp function. @@ -600,7 +604,6 @@ public: /// modern abi class ObjCNonFragileABITypesHelper : public ObjCCommonTypesHelper { public: - // MethodListnfABITy - LLVM for struct _method_list_t llvm::StructType *MethodListnfABITy; @@ -967,7 +970,8 @@ protected: llvm::Constant *EmitPropertyList(Twine Name, const Decl *Container, const ObjCContainerDecl *OCD, - const ObjCCommonTypesHelper &ObjCTypes); + const ObjCCommonTypesHelper &ObjCTypes, + bool IsClassProperty); /// EmitProtocolMethodTypes - Generate the array of extended method type /// strings. The return value has type Int8PtrPtrTy. @@ -981,13 +985,20 @@ protected: SmallVectorImpl<llvm::Constant*> &Properties, const Decl *Container, const ObjCProtocolDecl *Proto, - const ObjCCommonTypesHelper &ObjCTypes); + const ObjCCommonTypesHelper &ObjCTypes, + bool IsClassProperty); /// GetProtocolRef - Return a reference to the internal protocol /// description, creating an empty one if it has not been /// defined. The return value has type ProtocolPtrTy. llvm::Constant *GetProtocolRef(const ObjCProtocolDecl *PD); + /// Return a reference to the given Class using runtime calls rather than + /// by a symbol reference. + llvm::Value *EmitClassRefViaRuntime(CodeGenFunction &CGF, + const ObjCInterfaceDecl *ID, + ObjCCommonTypesHelper &ObjCTypes); + public: /// CreateMetadataVar - Create a global variable with internal /// linkage for use by the Objective-C runtime. @@ -1079,7 +1090,8 @@ private: /// has type ClassExtensionPtrTy. llvm::Constant *EmitClassExtension(const ObjCImplementationDecl *ID, CharUnits instanceSize, - bool hasMRCWeakIvars); + bool hasMRCWeakIvars, + bool isClassProperty); /// EmitClassRef - Return a Value*, of type ObjCTypes.ClassPtrTy, /// for the given class. @@ -1119,9 +1131,8 @@ private: /// EmitMethodList - Emit the method list for the given /// implementation. The return value has type MethodListPtrTy. - llvm::Constant *EmitMethodList(Twine Name, - const char *Section, - ArrayRef<llvm::Constant*> Methods); + llvm::Constant *EmitMethodList(Twine Name, StringRef Section, + ArrayRef<llvm::Constant *> Methods); /// EmitMethodDescList - Emit a method description list for a list of /// method declarations. @@ -1134,9 +1145,8 @@ private: /// - begin, end: The method list to output. /// /// The return value has type MethodDescriptionListPtrTy. - llvm::Constant *EmitMethodDescList(Twine Name, - const char *Section, - ArrayRef<llvm::Constant*> Methods); + llvm::Constant *EmitMethodDescList(Twine Name, StringRef Section, + ArrayRef<llvm::Constant *> Methods); /// GetOrEmitProtocol - Get the protocol object for the given /// declaration, emitting it if necessary. The return value has type @@ -1255,7 +1265,7 @@ public: /// GetClassGlobal - Return the global variable for the Objective-C /// class of the given name. - llvm::GlobalVariable *GetClassGlobal(const std::string &Name, + llvm::GlobalVariable *GetClassGlobal(StringRef Name, bool Weak = false) override { llvm_unreachable("CGObjCMac::GetClassGlobal"); } @@ -1293,9 +1303,8 @@ private: /// AddModuleClassList - Add the given list of class pointers to the /// module with the provided symbol and section names. - void AddModuleClassList(ArrayRef<llvm::GlobalValue*> Container, - const char *SymbolName, - const char *SectionName); + void AddModuleClassList(ArrayRef<llvm::GlobalValue *> Container, + StringRef SymbolName, StringRef SectionName); llvm::GlobalVariable * BuildClassRoTInitializer(unsigned flags, unsigned InstanceStart, @@ -1314,9 +1323,8 @@ private: /// EmitMethodList - Emit the method list for the given /// implementation. The return value has type MethodListnfABITy. - llvm::Constant *EmitMethodList(Twine Name, - const char *Section, - ArrayRef<llvm::Constant*> Methods); + llvm::Constant *EmitMethodList(Twine Name, StringRef Section, + ArrayRef<llvm::Constant *> Methods); /// EmitIvarList - Emit the ivar list for the given /// implementation. If ForClass is true the list of class ivars /// (i.e. metaclass ivars) is emitted, otherwise the list of @@ -1357,7 +1365,7 @@ private: /// GetClassGlobal - Return the global variable for the Objective-C /// class of the given name. - llvm::GlobalVariable *GetClassGlobal(const std::string &Name, + llvm::GlobalVariable *GetClassGlobal(StringRef Name, bool Weak = false) override; /// EmitClassRef - Return a Value*, of type ObjCTypes.ClassPtrTy, @@ -1398,13 +1406,9 @@ private: llvm::Constant *GetInterfaceEHType(const ObjCInterfaceDecl *ID, bool ForDefinition); - const char *getMetaclassSymbolPrefix() const { - return "OBJC_METACLASS_$_"; - } + StringRef getMetaclassSymbolPrefix() const { return "OBJC_METACLASS_$_"; } - const char *getClassSymbolPrefix() const { - return "OBJC_CLASS_$_"; - } + StringRef getClassSymbolPrefix() const { return "OBJC_CLASS_$_"; } void GetClassSizeInfo(const ObjCImplementationDecl *OID, uint32_t &InstanceStart, @@ -1506,12 +1510,15 @@ public: llvm::Constant *GetSetStructFunction() override { return ObjCTypes.getCopyStructFn(); } + llvm::Constant *GetGetStructFunction() override { return ObjCTypes.getCopyStructFn(); } + llvm::Constant *GetCppAtomicObjectSetFunction() override { return ObjCTypes.getCppAtomicObjectFunction(); } + llvm::Constant *GetCppAtomicObjectGetFunction() override { return ObjCTypes.getCppAtomicObjectFunction(); } @@ -1934,7 +1941,7 @@ CGObjCCommonMac::EmitMessageSend(CodeGen::CodeGenFunction &CGF, // Emit a null-check if there's a consumed argument other than the receiver. bool RequiresNullCheck = false; if (ReceiverCanBeNull && CGM.getLangOpts().ObjCAutoRefCount && Method) { - for (const auto *ParamDecl : Method->params()) { + for (const auto *ParamDecl : Method->parameters()) { if (ParamDecl->hasAttr<NSConsumedAttr>()) { if (!nullReturn.NullBB) nullReturn.init(CGF, Arg0); @@ -2027,6 +2034,7 @@ namespace { bool IsDisordered = false; llvm::SmallVector<IvarInfo, 8> IvarsInfo; + public: IvarLayoutBuilder(CodeGenModule &CGM, CharUnits instanceBegin, CharUnits instanceEnd, bool forStrongLayout) @@ -2062,7 +2070,7 @@ namespace { printf("\n"); } }; -} +} // end anonymous namespace llvm::Constant *CGObjCCommonMac::BuildGCBlockLayout(CodeGenModule &CGM, const CGBlockInfo &blockInfo) { @@ -2141,7 +2149,6 @@ void IvarLayoutBuilder::visitBlock(const CGBlockInfo &blockInfo) { } } - /// getBlockCaptureLifetime - This routine returns life time of the captured /// block variable for the purpose of block layout meta-data generation. FQT is /// the type of the variable captured in the block. @@ -2629,7 +2636,6 @@ llvm::Constant *CGObjCCommonMac::BuildRCBlockLayout(CodeGenModule &CGM, return getBitmapBlockLayout(false); } - llvm::Constant *CGObjCCommonMac::BuildByrefLayout(CodeGen::CodeGenModule &CGM, QualType T) { assert(CGM.getLangOpts().getGC() == LangOptions::NonGC); @@ -2677,6 +2683,25 @@ llvm::Constant *CGObjCCommonMac::GetProtocolRef(const ObjCProtocolDecl *PD) { return GetOrEmitProtocolRef(PD); } +llvm::Value *CGObjCCommonMac::EmitClassRefViaRuntime( + CodeGenFunction &CGF, + const ObjCInterfaceDecl *ID, + ObjCCommonTypesHelper &ObjCTypes) { + llvm::Constant *lookUpClassFn = ObjCTypes.getLookUpClassFn(); + + llvm::Value *className = + CGF.CGM.GetAddrOfConstantCString(ID->getObjCRuntimeNameAsString()) + .getPointer(); + ASTContext &ctx = CGF.CGM.getContext(); + className = + CGF.Builder.CreateBitCast(className, + CGF.ConvertType( + ctx.getPointerType(ctx.CharTy.withConst()))); + llvm::CallInst *call = CGF.Builder.CreateCall(lookUpClassFn, className); + call->setDoesNotThrow(); + return call; +} + /* // Objective-C 1.0 extensions struct _objc_protocol { @@ -2798,6 +2823,7 @@ llvm::Constant *CGObjCMac::GetOrEmitProtocolRef(const ObjCProtocolDecl *PD) { struct objc_method_description_list *optional_class_methods; struct objc_property_list *instance_properties; const char ** extendedMethodTypes; + struct objc_property_list *class_properties; }; */ llvm::Constant * @@ -2816,13 +2842,16 @@ CGObjCMac::EmitProtocolExtension(const ObjCProtocolDecl *PD, "__OBJC,__cat_cls_meth,regular,no_dead_strip", OptClassMethods), EmitPropertyList("OBJC_$_PROP_PROTO_LIST_" + PD->getName(), nullptr, PD, - ObjCTypes), + ObjCTypes, false), EmitProtocolMethodTypes("OBJC_PROTOCOL_METHOD_TYPES_" + PD->getName(), - MethodTypesExt, ObjCTypes)}; + MethodTypesExt, ObjCTypes), + EmitPropertyList("OBJC_$_CLASS_PROP_PROTO_LIST_" + PD->getName(), nullptr, + PD, ObjCTypes, true)}; // Return null if no extension bits are used. if (Values[1]->isNullValue() && Values[2]->isNullValue() && - Values[3]->isNullValue() && Values[4]->isNullValue()) + Values[3]->isNullValue() && Values[4]->isNullValue() && + Values[5]->isNullValue()) return llvm::Constant::getNullValue(ObjCTypes.ProtocolExtensionPtrTy); llvm::Constant *Init = @@ -2878,10 +2907,15 @@ PushProtocolProperties(llvm::SmallPtrSet<const IdentifierInfo*,16> &PropertySet, SmallVectorImpl<llvm::Constant *> &Properties, const Decl *Container, const ObjCProtocolDecl *Proto, - const ObjCCommonTypesHelper &ObjCTypes) { + const ObjCCommonTypesHelper &ObjCTypes, + bool IsClassProperty) { for (const auto *P : Proto->protocols()) - PushProtocolProperties(PropertySet, Properties, Container, P, ObjCTypes); + PushProtocolProperties(PropertySet, Properties, Container, P, ObjCTypes, + IsClassProperty); + for (const auto *PD : Proto->properties()) { + if (IsClassProperty != PD->isClassProperty()) + continue; if (!PropertySet.insert(PD->getIdentifier()).second) continue; llvm::Constant *Prop[] = { @@ -2907,7 +2941,17 @@ PushProtocolProperties(llvm::SmallPtrSet<const IdentifierInfo*,16> &PropertySet, llvm::Constant *CGObjCCommonMac::EmitPropertyList(Twine Name, const Decl *Container, const ObjCContainerDecl *OCD, - const ObjCCommonTypesHelper &ObjCTypes) { + const ObjCCommonTypesHelper &ObjCTypes, + bool IsClassProperty) { + if (IsClassProperty) { + // Make this entry NULL for OS X with deployment target < 10.11, for iOS + // with deployment target < 9.0. + const llvm::Triple &Triple = CGM.getTarget().getTriple(); + if ((Triple.isMacOSX() && Triple.isMacOSXVersionLT(10, 11)) || + (Triple.isiOS() && Triple.isOSVersionLT(9))) + return llvm::Constant::getNullValue(ObjCTypes.PropertyListPtrTy); + } + SmallVector<llvm::Constant *, 16> Properties; llvm::SmallPtrSet<const IdentifierInfo*, 16> PropertySet; @@ -2919,10 +2963,15 @@ llvm::Constant *CGObjCCommonMac::EmitPropertyList(Twine Name, if (const ObjCInterfaceDecl *OID = dyn_cast<ObjCInterfaceDecl>(OCD)) for (const ObjCCategoryDecl *ClassExt : OID->known_extensions()) for (auto *PD : ClassExt->properties()) { + if (IsClassProperty != PD->isClassProperty()) + continue; PropertySet.insert(PD->getIdentifier()); AddProperty(PD); } + for (const auto *PD : OCD->properties()) { + if (IsClassProperty != PD->isClassProperty()) + continue; // Don't emit duplicate metadata for properties that were already in a // class extension. if (!PropertySet.insert(PD->getIdentifier()).second) @@ -2932,11 +2981,13 @@ llvm::Constant *CGObjCCommonMac::EmitPropertyList(Twine Name, if (const ObjCInterfaceDecl *OID = dyn_cast<ObjCInterfaceDecl>(OCD)) { for (const auto *P : OID->all_referenced_protocols()) - PushProtocolProperties(PropertySet, Properties, Container, P, ObjCTypes); + PushProtocolProperties(PropertySet, Properties, Container, P, ObjCTypes, + IsClassProperty); } else if (const ObjCCategoryDecl *CD = dyn_cast<ObjCCategoryDecl>(OCD)) { for (const auto *P : CD->protocols()) - PushProtocolProperties(PropertySet, Properties, Container, P, ObjCTypes); + PushProtocolProperties(PropertySet, Properties, Container, P, ObjCTypes, + IsClassProperty); } // Return null for empty list. @@ -3001,8 +3052,8 @@ CGObjCMac::GetMethodDescriptionConstant(const ObjCMethodDecl *MD) { } llvm::Constant * -CGObjCMac::EmitMethodDescList(Twine Name, const char *Section, - ArrayRef<llvm::Constant*> Methods) { +CGObjCMac::EmitMethodDescList(Twine Name, StringRef Section, + ArrayRef<llvm::Constant *> Methods) { // Return null for empty list. if (Methods.empty()) return llvm::Constant::getNullValue(ObjCTypes.MethodDescriptionListPtrTy); @@ -3029,6 +3080,7 @@ CGObjCMac::EmitMethodDescList(Twine Name, const char *Section, struct _objc_protocol_list *protocols; uint32_t size; // <rdar://4585769> struct _objc_property_list *instance_properties; + struct _objc_property_list *class_properties; }; */ void CGObjCMac::GenerateCategory(const ObjCCategoryImplDecl *OCD) { @@ -3055,7 +3107,7 @@ void CGObjCMac::GenerateCategory(const ObjCCategoryImplDecl *OCD) { // Class methods should always be defined. ClassMethods.push_back(GetMethodConstant(I)); - llvm::Constant *Values[7]; + llvm::Constant *Values[8]; Values[0] = GetClassName(OCD->getName()); Values[1] = GetClassName(Interface->getObjCRuntimeNameAsString()); LazySymbols.insert(Interface->getIdentifier()); @@ -3077,9 +3129,12 @@ void CGObjCMac::GenerateCategory(const ObjCCategoryImplDecl *OCD) { // If there is no category @interface then there can be no properties. if (Category) { Values[6] = EmitPropertyList("\01l_OBJC_$_PROP_LIST_" + ExtName.str(), - OCD, Category, ObjCTypes); + OCD, Category, ObjCTypes, false); + Values[7] = EmitPropertyList("\01l_OBJC_$_CLASS_PROP_LIST_" + ExtName.str(), + OCD, Category, ObjCTypes, true); } else { Values[6] = llvm::Constant::getNullValue(ObjCTypes.PropertyListPtrTy); + Values[7] = llvm::Constant::getNullValue(ObjCTypes.PropertyListPtrTy); } llvm::Constant *Init = llvm::ConstantStruct::get(ObjCTypes.CategoryTy, @@ -3274,7 +3329,8 @@ void CGObjCMac::GenerateClass(const ObjCImplementationDecl *ID) { Values[ 8] = llvm::Constant::getNullValue(ObjCTypes.CachePtrTy); Values[ 9] = Protocols; Values[10] = BuildStrongIvarLayout(ID, CharUnits::Zero(), Size); - Values[11] = EmitClassExtension(ID, Size, hasMRCWeak); + Values[11] = EmitClassExtension(ID, Size, hasMRCWeak, + false/*isClassProperty*/); llvm::Constant *Init = llvm::ConstantStruct::get(ObjCTypes.ClassTy, Values); std::string Name("OBJC_CLASS_"); @@ -3338,8 +3394,9 @@ llvm::Constant *CGObjCMac::EmitMetaClass(const ObjCImplementationDecl *ID, Values[ 9] = Protocols; // ivar_layout for metaclass is always NULL. Values[10] = llvm::Constant::getNullValue(ObjCTypes.Int8PtrTy); - // The class extension is always unused for metaclasses. - Values[11] = llvm::Constant::getNullValue(ObjCTypes.ClassExtensionPtrTy); + // The class extension is used to store class properties for metaclasses. + Values[11] = EmitClassExtension(ID, CharUnits::Zero(), false/*hasMRCWeak*/, + true/*isClassProperty*/); llvm::Constant *Init = llvm::ConstantStruct::get(ObjCTypes.ClassTy, Values); @@ -3413,19 +3470,28 @@ llvm::Value *CGObjCMac::EmitSuperClassRef(const ObjCInterfaceDecl *ID) { */ llvm::Constant * CGObjCMac::EmitClassExtension(const ObjCImplementationDecl *ID, - CharUnits InstanceSize, bool hasMRCWeakIvars) { + CharUnits InstanceSize, bool hasMRCWeakIvars, + bool isClassProperty) { uint64_t Size = CGM.getDataLayout().getTypeAllocSize(ObjCTypes.ClassExtensionTy); llvm::Constant *Values[3]; Values[0] = llvm::ConstantInt::get(ObjCTypes.IntTy, Size); - Values[1] = BuildWeakIvarLayout(ID, CharUnits::Zero(), InstanceSize, - hasMRCWeakIvars); - Values[2] = EmitPropertyList("\01l_OBJC_$_PROP_LIST_" + ID->getName(), - ID, ID->getClassInterface(), ObjCTypes); + if (isClassProperty) { + llvm::Type *PtrTy = CGM.Int8PtrTy; + Values[1] = llvm::Constant::getNullValue(PtrTy); + } else + Values[1] = BuildWeakIvarLayout(ID, CharUnits::Zero(), InstanceSize, + hasMRCWeakIvars); + if (isClassProperty) + Values[2] = EmitPropertyList("\01l_OBJC_$_CLASS_PROP_LIST_" + ID->getName(), + ID, ID->getClassInterface(), ObjCTypes, true); + else + Values[2] = EmitPropertyList("\01l_OBJC_$_PROP_LIST_" + ID->getName(), + ID, ID->getClassInterface(), ObjCTypes, false); // Return null if no extension bits are used. - if (Values[1]->isNullValue() && Values[2]->isNullValue()) + if ((!Values[1] || Values[1]->isNullValue()) && Values[2]->isNullValue()) return llvm::Constant::getNullValue(ObjCTypes.ClassExtensionPtrTy); llvm::Constant *Init = @@ -3530,9 +3596,8 @@ llvm::Constant *CGObjCMac::GetMethodConstant(const ObjCMethodDecl *MD) { return llvm::ConstantStruct::get(ObjCTypes.MethodTy, Method); } -llvm::Constant *CGObjCMac::EmitMethodList(Twine Name, - const char *Section, - ArrayRef<llvm::Constant*> Methods) { +llvm::Constant *CGObjCMac::EmitMethodList(Twine Name, StringRef Section, + ArrayRef<llvm::Constant *> Methods) { // Return null for empty list. if (Methods.empty()) return llvm::Constant::getNullValue(ObjCTypes.MethodListPtrTy); @@ -3607,6 +3672,7 @@ llvm::Constant *CGObjCMac::GetOptimizedPropertySetFunction(bool atomic, llvm::Constant *CGObjCMac::GetGetStructFunction() { return ObjCTypes.getCopyStructFn(); } + llvm::Constant *CGObjCMac::GetSetStructFunction() { return ObjCTypes.getCopyStructFn(); } @@ -3614,6 +3680,7 @@ llvm::Constant *CGObjCMac::GetSetStructFunction() { llvm::Constant *CGObjCMac::GetCppAtomicObjectGetFunction() { return ObjCTypes.getCppAtomicObjectFunction(); } + llvm::Constant *CGObjCMac::GetCppAtomicObjectSetFunction() { return ObjCTypes.getCppAtomicObjectFunction(); } @@ -3711,7 +3778,7 @@ namespace { void emitWriteHazard(); void emitHazardsInNewBlocks(); }; -} +} // end anonymous namespace /// Create the fragile-ABI read and write hazards based on the current /// state of the function, which is presumed to be immediately prior @@ -4332,7 +4399,6 @@ void CGObjCMac::EmitObjCWeakAssign(CodeGen::CodeGenFunction &CGF, llvm::Value *args[] = { src, dst.getPointer() }; CGF.EmitNounwindRuntimeCall(ObjCTypes.getGcAssignWeakFn(), args, "weakassign"); - return; } /// EmitObjCGlobalAssign - Code gen for assigning to a __strong object. @@ -4358,7 +4424,6 @@ void CGObjCMac::EmitObjCGlobalAssign(CodeGen::CodeGenFunction &CGF, else CGF.EmitNounwindRuntimeCall(ObjCTypes.getGcAssignThreadLocalFn(), args, "threadlocalassign"); - return; } /// EmitObjCIvarAssign - Code gen for assigning to a __strong object. @@ -4380,7 +4445,6 @@ void CGObjCMac::EmitObjCIvarAssign(CodeGen::CodeGenFunction &CGF, dst = CGF.Builder.CreateBitCast(dst, ObjCTypes.PtrObjectPtrTy); llvm::Value *args[] = { src, dst.getPointer(), ivarOffset }; CGF.EmitNounwindRuntimeCall(ObjCTypes.getGcAssignIvarFn(), args); - return; } /// EmitObjCStrongCastAssign - Code gen for assigning to a __strong cast object. @@ -4401,7 +4465,6 @@ void CGObjCMac::EmitObjCStrongCastAssign(CodeGen::CodeGenFunction &CGF, llvm::Value *args[] = { src, dst.getPointer() }; CGF.EmitNounwindRuntimeCall(ObjCTypes.getGcAssignStrongCastFn(), args, "strongassign"); - return; } void CGObjCMac::EmitGCMemmoveCollectable(CodeGen::CodeGenFunction &CGF, @@ -4455,7 +4518,8 @@ enum ImageInfoFlags { // A flag indicating that the module has no instances of a @synthesize of a // superclass variable. <rdar://problem/6803242> eImageInfo_CorrectedSynthesize = (1 << 4), // This flag is no longer set by clang. - eImageInfo_ImageIsSimulated = (1 << 5) + eImageInfo_ImageIsSimulated = (1 << 5), + eImageInfo_ClassProperties = (1 << 6) }; void CGObjCCommonMac::EmitImageInfo() { @@ -4507,6 +4571,10 @@ void CGObjCCommonMac::EmitImageInfo() { Triple.getArch() == llvm::Triple::x86_64)) Mod.addModuleFlag(llvm::Module::Error, "Objective-C Is Simulated", eImageInfo_ImageIsSimulated); + + // Indicate whether we are generating class properties. + Mod.addModuleFlag(llvm::Module::Error, "Objective-C Class Properties", + eImageInfo_ClassProperties); } // struct objc_module { @@ -4602,6 +4670,11 @@ llvm::Value *CGObjCMac::EmitClassRefFromId(CodeGenFunction &CGF, llvm::Value *CGObjCMac::EmitClassRef(CodeGenFunction &CGF, const ObjCInterfaceDecl *ID) { + // If the class has the objc_runtime_visible attribute, we need to + // use the Objective-C runtime to get the class. + if (ID->hasAttr<ObjCRuntimeVisibleAttr>()) + return EmitClassRefViaRuntime(CGF, ID, ObjCTypes); + return EmitClassRefFromId(CGF, ID->getIdentifier()); } @@ -4954,7 +5027,7 @@ CGObjCCommonMac::BuildIvarLayout(const ObjCImplementationDecl *OMD, baseOffset = CharUnits::Zero(); } - baseOffset = baseOffset.RoundUpToAlignment(CGM.getPointerAlign()); + baseOffset = baseOffset.alignTo(CGM.getPointerAlign()); } else { CGM.getContext().DeepCollectObjCIvars(OI, true, ivars); @@ -5131,9 +5204,8 @@ void CGObjCMac::FinishModule() { } CGObjCNonFragileABIMac::CGObjCNonFragileABIMac(CodeGen::CodeGenModule &cgm) - : CGObjCCommonMac(cgm), - ObjCTypes(cgm) { - ObjCEmptyCacheVar = ObjCEmptyVtableVar = nullptr; + : CGObjCCommonMac(cgm), ObjCTypes(cgm), ObjCEmptyCacheVar(nullptr), + ObjCEmptyVtableVar(nullptr) { ObjCABI = 2; } @@ -5223,7 +5295,6 @@ ObjCCommonTypesHelper::ObjCCommonTypesHelper(CodeGen::CodeGenModule &cgm) // struct _objc_cache * CacheTy = llvm::StructType::create(VMContext, "struct._objc_cache"); CachePtrTy = llvm::PointerType::getUnqual(CacheTy); - } ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm) @@ -5256,12 +5327,13 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm) // struct _objc_method_description_list *optional_class_methods; // struct _objc_property_list *instance_properties; // const char ** extendedMethodTypes; + // struct _objc_property_list *class_properties; // } ProtocolExtensionTy = llvm::StructType::create("struct._objc_protocol_extension", IntTy, MethodDescriptionListPtrTy, MethodDescriptionListPtrTy, PropertyListPtrTy, - Int8PtrPtrTy, nullptr); + Int8PtrPtrTy, PropertyListPtrTy, nullptr); // struct _objc_protocol_extension * ProtocolExtensionPtrTy = llvm::PointerType::getUnqual(ProtocolExtensionTy); @@ -5359,14 +5431,17 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm) // char *class_name; // struct _objc_method_list *instance_method; // struct _objc_method_list *class_method; + // struct _objc_protocol_list *protocols; // uint32_t size; // sizeof(struct _objc_category) // struct _objc_property_list *instance_properties;// category's @property + // struct _objc_property_list *class_properties; // } CategoryTy = llvm::StructType::create("struct._objc_category", Int8PtrTy, Int8PtrTy, MethodListPtrTy, MethodListPtrTy, ProtocolListPtrTy, - IntTy, PropertyListPtrTy, nullptr); + IntTy, PropertyListPtrTy, PropertyListPtrTy, + nullptr); // Global metadata structures @@ -5405,7 +5480,6 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm) llvm::StructType::create("struct._objc_exception_data", llvm::ArrayType::get(CGM.Int32Ty,SetJmpBufferSize), StackPtrTy, nullptr); - } ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModule &cgm) @@ -5434,6 +5508,7 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul // const uint32_t flags; // = 0 // const char ** extendedMethodTypes; // const char *demangledName; + // const struct _prop_list_t * class_properties; // } // Holder for struct _protocol_list_t * @@ -5446,7 +5521,7 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul MethodListnfABIPtrTy, MethodListnfABIPtrTy, MethodListnfABIPtrTy, MethodListnfABIPtrTy, PropertyListPtrTy, IntTy, IntTy, Int8PtrPtrTy, - Int8PtrTy, + Int8PtrTy, PropertyListPtrTy, nullptr); // struct _protocol_t* @@ -5539,6 +5614,8 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul // const struct _method_list_t * const class_methods; // const struct _protocol_list_t * const protocols; // const struct _prop_list_t * const properties; + // const struct _prop_list_t * const class_properties; + // const uint32_t size; // } CategorynfABITy = llvm::StructType::create("struct._category_t", Int8PtrTy, ClassnfABIPtrTy, @@ -5546,6 +5623,8 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul MethodListnfABIPtrTy, ProtocolListnfABIPtrTy, PropertyListPtrTy, + PropertyListPtrTy, + IntTy, nullptr); // New types for nonfragile abi messaging. @@ -5609,10 +5688,9 @@ llvm::Function *CGObjCNonFragileABIMac::ModuleInitFunction() { return nullptr; } -void CGObjCNonFragileABIMac:: -AddModuleClassList(ArrayRef<llvm::GlobalValue*> Container, - const char *SymbolName, - const char *SectionName) { +void CGObjCNonFragileABIMac::AddModuleClassList( + ArrayRef<llvm::GlobalValue *> Container, StringRef SymbolName, + StringRef SectionName) { unsigned NumClasses = Container.size(); if (!NumClasses) @@ -5814,13 +5892,16 @@ llvm::GlobalVariable * CGObjCNonFragileABIMac::BuildClassRoTInitializer( if (flags & NonFragileABI_Class_Meta) { Values[ 7] = llvm::Constant::getNullValue(ObjCTypes.IvarListnfABIPtrTy); Values[ 8] = GetIvarLayoutName(nullptr, ObjCTypes); - Values[ 9] = llvm::Constant::getNullValue(ObjCTypes.PropertyListPtrTy); + Values[ 9] = EmitPropertyList( + "\01l_OBJC_$_CLASS_PROP_LIST_" + ID->getObjCRuntimeNameAsString(), + ID, ID->getClassInterface(), ObjCTypes, true); } else { Values[ 7] = EmitIvarList(ID); Values[ 8] = BuildWeakIvarLayout(ID, beginInstance, endInstance, hasMRCWeak); - Values[ 9] = EmitPropertyList("\01l_OBJC_$_PROP_LIST_" + ID->getObjCRuntimeNameAsString(), - ID, ID->getClassInterface(), ObjCTypes); + Values[ 9] = EmitPropertyList( + "\01l_OBJC_$_PROP_LIST_" + ID->getObjCRuntimeNameAsString(), + ID, ID->getClassInterface(), ObjCTypes, false); } llvm::Constant *Init = llvm::ConstantStruct::get(ObjCTypes.ClassRonfABITy, Values); @@ -5870,8 +5951,9 @@ llvm::GlobalVariable *CGObjCNonFragileABIMac::BuildClassMetaData( GV->setSection("__DATA, __objc_data"); GV->setAlignment( CGM.getDataLayout().getABITypeAlignment(ObjCTypes.ClassnfABITy)); - if (HiddenVisibility) - GV->setVisibility(llvm::GlobalValue::HiddenVisibility); + if (!CGM.getTriple().isOSBinFormatCOFF()) + if (HiddenVisibility) + GV->setVisibility(llvm::GlobalValue::HiddenVisibility); return GV; } @@ -5896,49 +5978,60 @@ void CGObjCNonFragileABIMac::GetClassSizeInfo(const ObjCImplementationDecl *OID, InstanceStart = RL.getFieldOffset(0) / CGM.getContext().getCharWidth(); } +static llvm::GlobalValue::DLLStorageClassTypes getStorage(CodeGenModule &CGM, + StringRef Name) { + IdentifierInfo &II = CGM.getContext().Idents.get(Name); + TranslationUnitDecl *TUDecl = CGM.getContext().getTranslationUnitDecl(); + DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl); + + const VarDecl *VD = nullptr; + for (const auto &Result : DC->lookup(&II)) + if ((VD = dyn_cast<VarDecl>(Result))) + break; + + if (!VD) + return llvm::GlobalValue::DLLImportStorageClass; + if (VD->hasAttr<DLLExportAttr>()) + return llvm::GlobalValue::DLLExportStorageClass; + if (VD->hasAttr<DLLImportAttr>()) + return llvm::GlobalValue::DLLImportStorageClass; + return llvm::GlobalValue::DefaultStorageClass; +} + void CGObjCNonFragileABIMac::GenerateClass(const ObjCImplementationDecl *ID) { - std::string ClassName = ID->getObjCRuntimeNameAsString(); if (!ObjCEmptyCacheVar) { - ObjCEmptyCacheVar = new llvm::GlobalVariable( - CGM.getModule(), - ObjCTypes.CacheTy, - false, - llvm::GlobalValue::ExternalLinkage, - nullptr, - "_objc_empty_cache"); - - // Make this entry NULL for any iOS device target, any iOS simulator target, - // OS X with deployment target 10.9 or later. + ObjCEmptyCacheVar = + new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.CacheTy, false, + llvm::GlobalValue::ExternalLinkage, nullptr, + "_objc_empty_cache"); + if (CGM.getTriple().isOSBinFormatCOFF()) + ObjCEmptyCacheVar->setDLLStorageClass(getStorage(CGM, "_objc_empty_cache")); + + // Only OS X with deployment version <10.9 use the empty vtable symbol const llvm::Triple &Triple = CGM.getTarget().getTriple(); - if (Triple.isiOS() || Triple.isWatchOS() || - (Triple.isMacOSX() && !Triple.isMacOSXVersionLT(10, 9))) - // This entry will be null. - ObjCEmptyVtableVar = nullptr; - else - ObjCEmptyVtableVar = new llvm::GlobalVariable( - CGM.getModule(), - ObjCTypes.ImpnfABITy, - false, - llvm::GlobalValue::ExternalLinkage, - nullptr, - "_objc_empty_vtable"); - } - assert(ID->getClassInterface() && - "CGObjCNonFragileABIMac::GenerateClass - class is 0"); + if (Triple.isMacOSX() && Triple.isMacOSXVersionLT(10, 9)) + ObjCEmptyVtableVar = + new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ImpnfABITy, false, + llvm::GlobalValue::ExternalLinkage, nullptr, + "_objc_empty_vtable"); + } + // FIXME: Is this correct (that meta class size is never computed)? uint32_t InstanceStart = CGM.getDataLayout().getTypeAllocSize(ObjCTypes.ClassnfABITy); uint32_t InstanceSize = InstanceStart; uint32_t flags = NonFragileABI_Class_Meta; - llvm::SmallString<64> ObjCMetaClassName(getMetaclassSymbolPrefix()); - llvm::SmallString<64> ObjCClassName(getClassSymbolPrefix()); - llvm::SmallString<64> TClassName; llvm::GlobalVariable *SuperClassGV, *IsAGV; + StringRef ClassName = ID->getObjCRuntimeNameAsString(); + const auto *CI = ID->getClassInterface(); + assert(CI && "CGObjCNonFragileABIMac::GenerateClass - class is 0"); + // Build the flags for the metaclass. - bool classIsHidden = - ID->getClassInterface()->getVisibility() == HiddenVisibility; + bool classIsHidden = (CGM.getTriple().isOSBinFormatCOFF()) + ? !CI->hasAttr<DLLExportAttr>() + : CI->getVisibility() == HiddenVisibility; if (classIsHidden) flags |= NonFragileABI_Class_Hidden; @@ -5947,45 +6040,59 @@ void CGObjCNonFragileABIMac::GenerateClass(const ObjCImplementationDecl *ID) { if (ID->hasNonZeroConstructors() || ID->hasDestructors()) { flags |= NonFragileABI_Class_HasCXXStructors; if (!ID->hasNonZeroConstructors()) - flags |= NonFragileABI_Class_HasCXXDestructorOnly; + flags |= NonFragileABI_Class_HasCXXDestructorOnly; } - if (!ID->getClassInterface()->getSuperClass()) { + if (!CI->getSuperClass()) { // class is root flags |= NonFragileABI_Class_Root; - TClassName = ObjCClassName; - TClassName += ClassName; - SuperClassGV = GetClassGlobal(TClassName.str(), - ID->getClassInterface()->isWeakImported()); - TClassName = ObjCMetaClassName; - TClassName += ClassName; - IsAGV = GetClassGlobal(TClassName.str(), - ID->getClassInterface()->isWeakImported()); + + SuperClassGV = GetClassGlobal((getClassSymbolPrefix() + ClassName).str(), + CI->isWeakImported()); + if (CGM.getTriple().isOSBinFormatCOFF()) + if (CI->hasAttr<DLLImportAttr>()) + SuperClassGV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); + + IsAGV = GetClassGlobal((getMetaclassSymbolPrefix() + ClassName).str(), + CI->isWeakImported()); + if (CGM.getTriple().isOSBinFormatCOFF()) + if (CI->hasAttr<DLLImportAttr>()) + IsAGV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); } else { // Has a root. Current class is not a root. const ObjCInterfaceDecl *Root = ID->getClassInterface(); while (const ObjCInterfaceDecl *Super = Root->getSuperClass()) Root = Super; - TClassName = ObjCMetaClassName ; - TClassName += Root->getObjCRuntimeNameAsString(); - IsAGV = GetClassGlobal(TClassName.str(), + + const auto *Super = CI->getSuperClass(); + StringRef RootClassName = Root->getObjCRuntimeNameAsString(); + StringRef SuperClassName = Super->getObjCRuntimeNameAsString(); + + IsAGV = GetClassGlobal((getMetaclassSymbolPrefix() + RootClassName).str(), Root->isWeakImported()); + if (CGM.getTriple().isOSBinFormatCOFF()) + if (Root->hasAttr<DLLImportAttr>()) + IsAGV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); // work on super class metadata symbol. - TClassName = ObjCMetaClassName; - TClassName += ID->getClassInterface()->getSuperClass()->getObjCRuntimeNameAsString(); - SuperClassGV = GetClassGlobal( - TClassName.str(), - ID->getClassInterface()->getSuperClass()->isWeakImported()); - } - llvm::GlobalVariable *CLASS_RO_GV = BuildClassRoTInitializer(flags, - InstanceStart, - InstanceSize,ID); - TClassName = ObjCMetaClassName; - TClassName += ClassName; - llvm::GlobalVariable *MetaTClass = BuildClassMetaData( - TClassName.str(), IsAGV, SuperClassGV, CLASS_RO_GV, classIsHidden, - ID->getClassInterface()->isWeakImported()); + SuperClassGV = + GetClassGlobal((getMetaclassSymbolPrefix() + SuperClassName).str(), + Super->isWeakImported()); + if (CGM.getTriple().isOSBinFormatCOFF()) + if (Super->hasAttr<DLLImportAttr>()) + SuperClassGV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); + } + + llvm::GlobalVariable *CLASS_RO_GV = + BuildClassRoTInitializer(flags, InstanceStart, InstanceSize, ID); + + llvm::GlobalVariable *MetaTClass = + BuildClassMetaData((getMetaclassSymbolPrefix() + ClassName).str(), IsAGV, + SuperClassGV, CLASS_RO_GV, classIsHidden, + CI->isWeakImported()); + if (CGM.getTriple().isOSBinFormatCOFF()) + if (CI->hasAttr<DLLExportAttr>()) + MetaTClass->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass); DefinedMetaClasses.push_back(MetaTClass); // Metadata for the class @@ -6006,34 +6113,38 @@ void CGObjCNonFragileABIMac::GenerateClass(const ObjCImplementationDecl *ID) { flags |= NonFragileABI_Class_HasCXXDestructorOnly; } - if (hasObjCExceptionAttribute(CGM.getContext(), ID->getClassInterface())) + if (hasObjCExceptionAttribute(CGM.getContext(), CI)) flags |= NonFragileABI_Class_Exception; - if (!ID->getClassInterface()->getSuperClass()) { + if (!CI->getSuperClass()) { flags |= NonFragileABI_Class_Root; SuperClassGV = nullptr; } else { // Has a root. Current class is not a root. - TClassName = ObjCClassName; - TClassName += ID->getClassInterface()->getSuperClass()->getObjCRuntimeNameAsString(); - SuperClassGV = GetClassGlobal( - TClassName.str(), - ID->getClassInterface()->getSuperClass()->isWeakImported()); + const auto *Super = CI->getSuperClass(); + StringRef SuperClassName = Super->getObjCRuntimeNameAsString(); + + SuperClassGV = + GetClassGlobal((getClassSymbolPrefix() + SuperClassName).str(), + Super->isWeakImported()); + if (CGM.getTriple().isOSBinFormatCOFF()) + if (Super->hasAttr<DLLImportAttr>()) + SuperClassGV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); } + GetClassSizeInfo(ID, InstanceStart, InstanceSize); - CLASS_RO_GV = BuildClassRoTInitializer(flags, - InstanceStart, - InstanceSize, - ID); + CLASS_RO_GV = + BuildClassRoTInitializer(flags, InstanceStart, InstanceSize, ID); - TClassName = ObjCClassName; - TClassName += ClassName; llvm::GlobalVariable *ClassMD = - BuildClassMetaData(TClassName.str(), MetaTClass, SuperClassGV, CLASS_RO_GV, - classIsHidden, - ID->getClassInterface()->isWeakImported()); + BuildClassMetaData((getClassSymbolPrefix() + ClassName).str(), MetaTClass, + SuperClassGV, CLASS_RO_GV, classIsHidden, + CI->isWeakImported()); + if (CGM.getTriple().isOSBinFormatCOFF()) + if (CI->hasAttr<DLLExportAttr>()) + ClassMD->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass); DefinedClasses.push_back(ClassMD); - ImplementedClasses.push_back(ID->getClassInterface()); + ImplementedClasses.push_back(CI); // Determine if this class is also "non-lazy". if (ImplementationIsNonLazy(ID)) @@ -6041,7 +6152,7 @@ void CGObjCNonFragileABIMac::GenerateClass(const ObjCImplementationDecl *ID) { // Force the definition of the EHType if necessary. if (flags & NonFragileABI_Class_Exception) - GetInterfaceEHType(ID->getClassInterface(), true); + GetInterfaceEHType(CI, true); // Make sure method definition entries are all clear for next implementation. MethodDefinitions.clear(); } @@ -6093,6 +6204,8 @@ llvm::Value *CGObjCNonFragileABIMac::GenerateProtocolRef(CodeGenFunction &CGF, /// const struct _method_list_t * const class_methods; /// const struct _protocol_list_t * const protocols; /// const struct _prop_list_t * const properties; +/// const struct _prop_list_t * const class_properties; +/// const uint32_t size; /// } /// void CGObjCNonFragileABIMac::GenerateCategory(const ObjCCategoryImplDecl *OCD) { @@ -6107,7 +6220,7 @@ void CGObjCNonFragileABIMac::GenerateCategory(const ObjCCategoryImplDecl *OCD) { llvm::SmallString<64> ExtClassName(getClassSymbolPrefix()); ExtClassName += Interface->getObjCRuntimeNameAsString(); - llvm::Constant *Values[6]; + llvm::Constant *Values[8]; Values[0] = GetClassName(OCD->getIdentifier()->getName()); // meta-class entry symbol llvm::GlobalVariable *ClassGV = @@ -6156,12 +6269,18 @@ void CGObjCNonFragileABIMac::GenerateCategory(const ObjCCategoryImplDecl *OCD) { Category->protocol_begin(), Category->protocol_end()); Values[5] = EmitPropertyList("\01l_OBJC_$_PROP_LIST_" + ExtName.str(), - OCD, Category, ObjCTypes); + OCD, Category, ObjCTypes, false); + Values[6] = EmitPropertyList("\01l_OBJC_$_CLASS_PROP_LIST_" + ExtName.str(), + OCD, Category, ObjCTypes, true); } else { Values[4] = llvm::Constant::getNullValue(ObjCTypes.ProtocolListnfABIPtrTy); Values[5] = llvm::Constant::getNullValue(ObjCTypes.PropertyListPtrTy); + Values[6] = llvm::Constant::getNullValue(ObjCTypes.PropertyListPtrTy); } + unsigned Size = CGM.getDataLayout().getTypeAllocSize(ObjCTypes.CategorynfABITy); + Values[7] = llvm::ConstantInt::get(ObjCTypes.IntTy, Size); + llvm::Constant *Init = llvm::ConstantStruct::get(ObjCTypes.CategorynfABITy, Values); @@ -6210,9 +6329,8 @@ llvm::Constant *CGObjCNonFragileABIMac::GetMethodConstant( /// } /// llvm::Constant * -CGObjCNonFragileABIMac::EmitMethodList(Twine Name, - const char *Section, - ArrayRef<llvm::Constant*> Methods) { +CGObjCNonFragileABIMac::EmitMethodList(Twine Name, StringRef Section, + ArrayRef<llvm::Constant *> Methods) { // Return null for empty list. if (Methods.empty()) return llvm::Constant::getNullValue(ObjCTypes.MethodListnfABIPtrTy); @@ -6242,18 +6360,28 @@ CGObjCNonFragileABIMac::EmitMethodList(Twine Name, llvm::GlobalVariable * CGObjCNonFragileABIMac::ObjCIvarOffsetVariable(const ObjCInterfaceDecl *ID, const ObjCIvarDecl *Ivar) { - const ObjCInterfaceDecl *Container = Ivar->getContainingInterface(); llvm::SmallString<64> Name("OBJC_IVAR_$_"); Name += Container->getObjCRuntimeNameAsString(); Name += "."; Name += Ivar->getName(); - llvm::GlobalVariable *IvarOffsetGV = - CGM.getModule().getGlobalVariable(Name); - if (!IvarOffsetGV) - IvarOffsetGV = new llvm::GlobalVariable( - CGM.getModule(), ObjCTypes.IvarOffsetVarTy, false, - llvm::GlobalValue::ExternalLinkage, nullptr, Name.str()); + llvm::GlobalVariable *IvarOffsetGV = CGM.getModule().getGlobalVariable(Name); + if (!IvarOffsetGV) { + IvarOffsetGV = + new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.IvarOffsetVarTy, + false, llvm::GlobalValue::ExternalLinkage, + nullptr, Name.str()); + if (CGM.getTriple().isOSBinFormatCOFF()) { + bool IsPrivateOrPackage = + Ivar->getAccessControl() == ObjCIvarDecl::Private || + Ivar->getAccessControl() == ObjCIvarDecl::Package; + + if (ID->hasAttr<DLLExportAttr>() && !IsPrivateOrPackage) + IvarOffsetGV->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass); + else if (ID->hasAttr<DLLImportAttr>()) + IvarOffsetGV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); + } + } return IvarOffsetGV; } @@ -6267,14 +6395,17 @@ CGObjCNonFragileABIMac::EmitIvarOffsetVar(const ObjCInterfaceDecl *ID, IvarOffsetGV->setAlignment( CGM.getDataLayout().getABITypeAlignment(ObjCTypes.IvarOffsetVarTy)); - // FIXME: This matches gcc, but shouldn't the visibility be set on the use as - // well (i.e., in ObjCIvarOffsetVariable). - if (Ivar->getAccessControl() == ObjCIvarDecl::Private || - Ivar->getAccessControl() == ObjCIvarDecl::Package || - ID->getVisibility() == HiddenVisibility) - IvarOffsetGV->setVisibility(llvm::GlobalValue::HiddenVisibility); - else - IvarOffsetGV->setVisibility(llvm::GlobalValue::DefaultVisibility); + if (!CGM.getTriple().isOSBinFormatCOFF()) { + // FIXME: This matches gcc, but shouldn't the visibility be set on the use + // as well (i.e., in ObjCIvarOffsetVariable). + if (Ivar->getAccessControl() == ObjCIvarDecl::Private || + Ivar->getAccessControl() == ObjCIvarDecl::Package || + ID->getVisibility() == HiddenVisibility) + IvarOffsetGV->setVisibility(llvm::GlobalValue::HiddenVisibility); + else + IvarOffsetGV->setVisibility(llvm::GlobalValue::DefaultVisibility); + } + IvarOffsetGV->setSection("__DATA, __objc_ivar"); return IvarOffsetGV; } @@ -6361,7 +6492,7 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocolRef( const ObjCProtocolDecl *PD) { llvm::GlobalVariable *&Entry = Protocols[PD->getIdentifier()]; - if (!Entry) { + if (!Entry) // We use the initializer as a marker of whether this is a forward // reference or not. At module finalization we add the empty // contents for protocols which were referenced but never defined. @@ -6370,8 +6501,6 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocolRef( false, llvm::GlobalValue::ExternalLinkage, nullptr, "\01l_OBJC_PROTOCOL_$_" + PD->getObjCRuntimeNameAsString()); - Entry->setSection("__DATA,__datacoal_nt,coalesced"); - } return Entry; } @@ -6391,6 +6520,7 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocolRef( /// const uint32_t flags; // = 0 /// const char ** extendedMethodTypes; /// const char *demangledName; +/// const struct _prop_list_t * class_properties; /// } /// @endcode /// @@ -6442,7 +6572,7 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocol( MethodTypesExt.insert(MethodTypesExt.end(), OptMethodTypesExt.begin(), OptMethodTypesExt.end()); - llvm::Constant *Values[12]; + llvm::Constant *Values[13]; // isa is NULL Values[0] = llvm::Constant::getNullValue(ObjCTypes.ObjectPtrTy); Values[1] = GetClassName(PD->getObjCRuntimeNameAsString()); @@ -6466,8 +6596,9 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocol( + PD->getObjCRuntimeNameAsString(), "__DATA, __objc_const", OptClassMethods); - Values[7] = EmitPropertyList("\01l_OBJC_$_PROP_LIST_" + PD->getObjCRuntimeNameAsString(), - nullptr, PD, ObjCTypes); + Values[7] = EmitPropertyList( + "\01l_OBJC_$_PROP_LIST_" + PD->getObjCRuntimeNameAsString(), + nullptr, PD, ObjCTypes, false); uint32_t Size = CGM.getDataLayout().getTypeAllocSize(ObjCTypes.ProtocolnfABITy); Values[8] = llvm::ConstantInt::get(ObjCTypes.IntTy, Size); @@ -6477,6 +6608,10 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocol( MethodTypesExt, ObjCTypes); // const char *demangledName; Values[11] = llvm::Constant::getNullValue(ObjCTypes.Int8PtrTy); + + Values[12] = EmitPropertyList( + "\01l_OBJC_$_CLASS_PROP_LIST_" + PD->getObjCRuntimeNameAsString(), + nullptr, PD, ObjCTypes, true); llvm::Constant *Init = llvm::ConstantStruct::get(ObjCTypes.ProtocolnfABITy, Values); @@ -6492,7 +6627,6 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocol( "\01l_OBJC_PROTOCOL_$_" + PD->getObjCRuntimeNameAsString()); Entry->setAlignment( CGM.getDataLayout().getABITypeAlignment(ObjCTypes.ProtocolnfABITy)); - Entry->setSection("__DATA,__datacoal_nt,coalesced"); Protocols[PD->getIdentifier()] = Entry; } @@ -6640,7 +6774,7 @@ static void appendSelectorForMessageRefTable(std::string &buffer, } } -/// Emit a "v-table" message send. We emit a weak hidden-visibility +/// Emit a "vtable" message send. We emit a weak hidden-visibility /// struct, initially containing the selector pointer and a pointer to /// a "fixup" variant of the appropriate objc_msgSend. To call, we /// load and call the function pointer, passing the address of the @@ -6734,7 +6868,7 @@ CGObjCNonFragileABIMac::EmitVTableMessageSend(CodeGenFunction &CGF, bool requiresnullCheck = false; if (CGM.getLangOpts().ObjCAutoRefCount && method) - for (const auto *ParamDecl : method->params()) { + for (const auto *ParamDecl : method->parameters()) { if (ParamDecl->hasAttr<NSConsumedAttr>()) { if (!nullReturn.NullBB) nullReturn.init(CGF, arg0); @@ -6783,7 +6917,7 @@ CGObjCNonFragileABIMac::GenerateMessageSend(CodeGen::CodeGenFunction &CGF, } llvm::GlobalVariable * -CGObjCNonFragileABIMac::GetClassGlobal(const std::string &Name, bool Weak) { +CGObjCNonFragileABIMac::GetClassGlobal(StringRef Name, bool Weak) { llvm::GlobalValue::LinkageTypes L = Weak ? llvm::GlobalValue::ExternalWeakLinkage : llvm::GlobalValue::ExternalLinkage; @@ -6806,9 +6940,8 @@ llvm::Value *CGObjCNonFragileABIMac::EmitClassRefFromId(CodeGenFunction &CGF, llvm::GlobalVariable *&Entry = ClassReferences[II]; if (!Entry) { - std::string ClassName( - getClassSymbolPrefix() + - (ID ? ID->getObjCRuntimeNameAsString() : II->getName()).str()); + StringRef Name = ID ? ID->getObjCRuntimeNameAsString() : II->getName(); + std::string ClassName = (getClassSymbolPrefix() + Name).str(); llvm::GlobalVariable *ClassGV = GetClassGlobal(ClassName, Weak); Entry = new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ClassnfABIPtrTy, false, llvm::GlobalValue::PrivateLinkage, @@ -6822,6 +6955,11 @@ llvm::Value *CGObjCNonFragileABIMac::EmitClassRefFromId(CodeGenFunction &CGF, llvm::Value *CGObjCNonFragileABIMac::EmitClassRef(CodeGenFunction &CGF, const ObjCInterfaceDecl *ID) { + // If the class has the objc_runtime_visible attribute, we need to + // use the Objective-C runtime to get the class. + if (ID->hasAttr<ObjCRuntimeVisibleAttr>()) + return EmitClassRefViaRuntime(CGF, ID, ObjCTypes); + return EmitClassRefFromId(CGF, ID->getIdentifier(), ID->isWeakImported(), ID); } @@ -7100,27 +7238,28 @@ CGObjCNonFragileABIMac::EmitSynchronizedStmt(CodeGen::CodeGenFunction &CGF, llvm::Constant * CGObjCNonFragileABIMac::GetEHType(QualType T) { // There's a particular fixed type info for 'id'. - if (T->isObjCIdType() || - T->isObjCQualifiedIdType()) { - llvm::Constant *IDEHType = - CGM.getModule().getGlobalVariable("OBJC_EHTYPE_id"); - if (!IDEHType) + if (T->isObjCIdType() || T->isObjCQualifiedIdType()) { + auto *IDEHType = CGM.getModule().getGlobalVariable("OBJC_EHTYPE_id"); + if (!IDEHType) { IDEHType = - new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.EHTypeTy, - false, - llvm::GlobalValue::ExternalLinkage, - nullptr, "OBJC_EHTYPE_id"); + new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.EHTypeTy, false, + llvm::GlobalValue::ExternalLinkage, nullptr, + "OBJC_EHTYPE_id"); + if (CGM.getTriple().isOSBinFormatCOFF()) + IDEHType->setDLLStorageClass(getStorage(CGM, "OBJC_EHTYPE_id")); + } return IDEHType; } // All other types should be Objective-C interface pointer types. - const ObjCObjectPointerType *PT = - T->getAs<ObjCObjectPointerType>(); + const ObjCObjectPointerType *PT = T->getAs<ObjCObjectPointerType>(); assert(PT && "Invalid @catch type."); + const ObjCInterfaceType *IT = PT->getInterfaceType(); assert(IT && "Invalid @catch type."); + return GetInterfaceEHType(IT->getDecl(), false); -} +} void CGObjCNonFragileABIMac::EmitTryStmt(CodeGen::CodeGenFunction &CGF, const ObjCAtTryStmt &S) { @@ -7153,6 +7292,7 @@ llvm::Constant * CGObjCNonFragileABIMac::GetInterfaceEHType(const ObjCInterfaceDecl *ID, bool ForDefinition) { llvm::GlobalVariable * &Entry = EHTypeReferences[ID->getIdentifier()]; + StringRef ClassName = ID->getObjCRuntimeNameAsString(); // If we don't need a definition, return the entry if found or check // if we use an external reference. @@ -7162,38 +7302,43 @@ CGObjCNonFragileABIMac::GetInterfaceEHType(const ObjCInterfaceDecl *ID, // If this type (or a super class) has the __objc_exception__ // attribute, emit an external reference. - if (hasObjCExceptionAttribute(CGM.getContext(), ID)) - return Entry = - new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.EHTypeTy, false, - llvm::GlobalValue::ExternalLinkage, - nullptr, - ("OBJC_EHTYPE_$_" + - ID->getObjCRuntimeNameAsString())); + if (hasObjCExceptionAttribute(CGM.getContext(), ID)) { + std::string EHTypeName = ("OBJC_EHTYPE_$_" + ClassName).str(); + Entry = new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.EHTypeTy, + false, llvm::GlobalValue::ExternalLinkage, + nullptr, EHTypeName); + if (CGM.getTriple().isOSBinFormatCOFF()) { + if (ID->hasAttr<DLLExportAttr>()) + Entry->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass); + else if (ID->hasAttr<DLLImportAttr>()) + Entry->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); + } + return Entry; + } } - // Otherwise we need to either make a new entry or fill in the - // initializer. + // Otherwise we need to either make a new entry or fill in the initializer. assert((!Entry || !Entry->hasInitializer()) && "Duplicate EHType definition"); - llvm::SmallString<64> ClassName(getClassSymbolPrefix()); - ClassName += ID->getObjCRuntimeNameAsString(); + std::string VTableName = "objc_ehtype_vtable"; - llvm::GlobalVariable *VTableGV = - CGM.getModule().getGlobalVariable(VTableName); - if (!VTableGV) - VTableGV = new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.Int8PtrTy, - false, - llvm::GlobalValue::ExternalLinkage, - nullptr, VTableName); + auto *VTableGV = CGM.getModule().getGlobalVariable(VTableName); + if (!VTableGV) { + VTableGV = + new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.Int8PtrTy, false, + llvm::GlobalValue::ExternalLinkage, nullptr, + VTableName); + if (CGM.getTriple().isOSBinFormatCOFF()) + VTableGV->setDLLStorageClass(getStorage(CGM, VTableName)); + } llvm::Value *VTableIdx = llvm::ConstantInt::get(CGM.Int32Ty, 2); - llvm::Constant *Values[] = { llvm::ConstantExpr::getGetElementPtr(VTableGV->getValueType(), VTableGV, VTableIdx), GetClassName(ID->getObjCRuntimeNameAsString()), - GetClassGlobal(ClassName.str())}; - llvm::Constant *Init = - llvm::ConstantStruct::get(ObjCTypes.EHTypeTy, Values); + GetClassGlobal((getClassSymbolPrefix() + ClassName).str()), + }; + llvm::Constant *Init = llvm::ConstantStruct::get(ObjCTypes.EHTypeTy, Values); llvm::GlobalValue::LinkageTypes L = ForDefinition ? llvm::GlobalValue::ExternalLinkage @@ -7201,24 +7346,25 @@ CGObjCNonFragileABIMac::GetInterfaceEHType(const ObjCInterfaceDecl *ID, if (Entry) { Entry->setInitializer(Init); } else { - llvm::SmallString<64> EHTYPEName("OBJC_EHTYPE_$_"); - EHTYPEName += ID->getObjCRuntimeNameAsString(); - Entry = new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.EHTypeTy, false, - L, - Init, - EHTYPEName.str()); + Entry = + new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.EHTypeTy, false, L, + Init, ("OBJC_EHTYPE_$_" + ClassName).str()); + if (CGM.getTriple().isOSBinFormatCOFF()) + if (hasObjCExceptionAttribute(CGM.getContext(), ID)) + if (ID->hasAttr<DLLExportAttr>()) + Entry->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass); } assert(Entry->getLinkage() == L); - if (ID->getVisibility() == HiddenVisibility) - Entry->setVisibility(llvm::GlobalValue::HiddenVisibility); - Entry->setAlignment(CGM.getDataLayout().getABITypeAlignment( - ObjCTypes.EHTypeTy)); + if (!CGM.getTriple().isOSBinFormatCOFF()) + if (ID->getVisibility() == HiddenVisibility) + Entry->setVisibility(llvm::GlobalValue::HiddenVisibility); + + const auto &DL = CGM.getDataLayout(); + Entry->setAlignment(DL.getABITypeAlignment(ObjCTypes.EHTypeTy)); if (ForDefinition) Entry->setSection("__DATA,__objc_const"); - else - Entry->setSection("__DATA,__datacoal_nt,coalesced"); return Entry; } diff --git a/lib/CodeGen/CGObjCRuntime.cpp b/lib/CodeGen/CGObjCRuntime.cpp index 7be9ae996040..0caf6d9f210a 100644 --- a/lib/CodeGen/CGObjCRuntime.cpp +++ b/lib/CodeGen/CGObjCRuntime.cpp @@ -120,9 +120,8 @@ LValue CGObjCRuntime::EmitValueForIvarAtOffset(CodeGen::CodeGenFunction &CGF, uint64_t BitOffset = FieldBitOffset % CGF.CGM.getContext().getCharWidth(); uint64_t AlignmentBits = CGF.CGM.getTarget().getCharAlign(); uint64_t BitFieldSize = Ivar->getBitWidthValue(CGF.getContext()); - CharUnits StorageSize = - CGF.CGM.getContext().toCharUnitsFromBits( - llvm::RoundUpToAlignment(BitOffset + BitFieldSize, AlignmentBits)); + CharUnits StorageSize = CGF.CGM.getContext().toCharUnitsFromBits( + llvm::alignTo(BitOffset + BitFieldSize, AlignmentBits)); CharUnits Alignment = CGF.CGM.getContext().toCharUnitsFromBits(AlignmentBits); // Allocate a new CGBitFieldInfo object to describe this access. @@ -364,25 +363,15 @@ CGObjCRuntime::getMessageSendInfo(const ObjCMethodDecl *method, llvm::PointerType *signatureType = CGM.getTypes().GetFunctionType(signature)->getPointerTo(); - // If that's not variadic, there's no need to recompute the ABI - // arrangement. - if (!signature.isVariadic()) - return MessageSendInfo(signature, signatureType); + const CGFunctionInfo &signatureForCall = + CGM.getTypes().arrangeCall(signature, callArgs); - // Otherwise, there is. - FunctionType::ExtInfo einfo = signature.getExtInfo(); - const CGFunctionInfo &argsInfo = - CGM.getTypes().arrangeFreeFunctionCall(resultType, callArgs, einfo, - signature.getRequiredArgs()); - - return MessageSendInfo(argsInfo, signatureType); + return MessageSendInfo(signatureForCall, signatureType); } // There's no method; just use a default CC. const CGFunctionInfo &argsInfo = - CGM.getTypes().arrangeFreeFunctionCall(resultType, callArgs, - FunctionType::ExtInfo(), - RequiredArgs::All); + CGM.getTypes().arrangeUnprototypedObjCMessageSend(resultType, callArgs); // Derive the signature to call from that. llvm::PointerType *signatureType = diff --git a/lib/CodeGen/CGObjCRuntime.h b/lib/CodeGen/CGObjCRuntime.h index 28d88dd10be9..6c330590f7cd 100644 --- a/lib/CodeGen/CGObjCRuntime.h +++ b/lib/CodeGen/CGObjCRuntime.h @@ -280,7 +280,7 @@ public: virtual llvm::Constant *BuildByrefLayout(CodeGen::CodeGenModule &CGM, QualType T) = 0; - virtual llvm::GlobalVariable *GetClassGlobal(const std::string &Name, + virtual llvm::GlobalVariable *GetClassGlobal(StringRef Name, bool Weak = false) = 0; struct MessageSendInfo { diff --git a/lib/CodeGen/CGOpenCLRuntime.cpp b/lib/CodeGen/CGOpenCLRuntime.cpp index 686678962d3e..38aebea18ed3 100644 --- a/lib/CodeGen/CGOpenCLRuntime.cpp +++ b/lib/CodeGen/CGOpenCLRuntime.cpp @@ -40,46 +40,12 @@ llvm::Type *CGOpenCLRuntime::convertOpenCLSpecificType(const Type *T) { default: llvm_unreachable("Unexpected opencl builtin type!"); return nullptr; - case BuiltinType::OCLImage1d: - return llvm::PointerType::get(llvm::StructType::create( - Ctx, "opencl.image1d_t"), ImgAddrSpc); - case BuiltinType::OCLImage1dArray: - return llvm::PointerType::get(llvm::StructType::create( - Ctx, "opencl.image1d_array_t"), ImgAddrSpc); - case BuiltinType::OCLImage1dBuffer: - return llvm::PointerType::get(llvm::StructType::create( - Ctx, "opencl.image1d_buffer_t"), ImgAddrSpc); - case BuiltinType::OCLImage2d: - return llvm::PointerType::get(llvm::StructType::create( - Ctx, "opencl.image2d_t"), ImgAddrSpc); - case BuiltinType::OCLImage2dArray: - return llvm::PointerType::get(llvm::StructType::create( - Ctx, "opencl.image2d_array_t"), ImgAddrSpc); - case BuiltinType::OCLImage2dDepth: - return llvm::PointerType::get( - llvm::StructType::create(Ctx, "opencl.image2d_depth_t"), ImgAddrSpc); - case BuiltinType::OCLImage2dArrayDepth: - return llvm::PointerType::get( - llvm::StructType::create(Ctx, "opencl.image2d_array_depth_t"), +#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \ + case BuiltinType::Id: \ + return llvm::PointerType::get( \ + llvm::StructType::create(Ctx, "opencl." #ImgType "_" #Suffix "_t"), \ ImgAddrSpc); - case BuiltinType::OCLImage2dMSAA: - return llvm::PointerType::get( - llvm::StructType::create(Ctx, "opencl.image2d_msaa_t"), ImgAddrSpc); - case BuiltinType::OCLImage2dArrayMSAA: - return llvm::PointerType::get( - llvm::StructType::create(Ctx, "opencl.image2d_array_msaa_t"), - ImgAddrSpc); - case BuiltinType::OCLImage2dMSAADepth: - return llvm::PointerType::get( - llvm::StructType::create(Ctx, "opencl.image2d_msaa_depth_t"), - ImgAddrSpc); - case BuiltinType::OCLImage2dArrayMSAADepth: - return llvm::PointerType::get( - llvm::StructType::create(Ctx, "opencl.image2d_array_msaa_depth_t"), - ImgAddrSpc); - case BuiltinType::OCLImage3d: - return llvm::PointerType::get(llvm::StructType::create( - Ctx, "opencl.image3d_t"), ImgAddrSpc); +#include "clang/Basic/OpenCLImageTypes.def" case BuiltinType::OCLSampler: return llvm::IntegerType::get(Ctx, 32); case BuiltinType::OCLEvent: diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp index 5cfacacbe01a..6a0edbe0e7a9 100644 --- a/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/lib/CodeGen/CGOpenMPRuntime.cpp @@ -72,6 +72,8 @@ public: /// \return LValue for thread id variable. This LValue always has type int32*. virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); + virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} + CGOpenMPRegionKind getRegionKind() const { return RegionKind; } OpenMPDirectiveKind getDirectiveKind() const { return Kind; } @@ -82,6 +84,8 @@ public: return Info->getKind() == CR_OpenMP; } + ~CGOpenMPRegionInfo() override = default; + protected: CGOpenMPRegionKind RegionKind; RegionCodeGenTy CodeGen; @@ -90,7 +94,7 @@ protected: }; /// \brief API for captured statement code generation in OpenMP constructs. -class CGOpenMPOutlinedRegionInfo : public CGOpenMPRegionInfo { +class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { public: CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, const RegionCodeGenTy &CodeGen, @@ -100,6 +104,7 @@ public: ThreadIDVar(ThreadIDVar) { assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); } + /// \brief Get a variable or parameter for storing global thread id /// inside OpenMP construct. const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } @@ -120,16 +125,65 @@ private: }; /// \brief API for captured statement code generation in OpenMP constructs. -class CGOpenMPTaskOutlinedRegionInfo : public CGOpenMPRegionInfo { +class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { public: + class UntiedTaskActionTy final : public PrePostActionTy { + bool Untied; + const VarDecl *PartIDVar; + const RegionCodeGenTy UntiedCodeGen; + llvm::SwitchInst *UntiedSwitch = nullptr; + + public: + UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, + const RegionCodeGenTy &UntiedCodeGen) + : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} + void Enter(CodeGenFunction &CGF) override { + if (Untied) { + // Emit task switching point. + auto PartIdLVal = CGF.EmitLoadOfPointerLValue( + CGF.GetAddrOfLocalVar(PartIDVar), + PartIDVar->getType()->castAs<PointerType>()); + auto *Res = CGF.EmitLoadOfScalar(PartIdLVal, SourceLocation()); + auto *DoneBB = CGF.createBasicBlock(".untied.done."); + UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); + CGF.EmitBlock(DoneBB); + CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); + CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); + UntiedSwitch->addCase(CGF.Builder.getInt32(0), + CGF.Builder.GetInsertBlock()); + emitUntiedSwitch(CGF); + } + } + void emitUntiedSwitch(CodeGenFunction &CGF) const { + if (Untied) { + auto PartIdLVal = CGF.EmitLoadOfPointerLValue( + CGF.GetAddrOfLocalVar(PartIDVar), + PartIDVar->getType()->castAs<PointerType>()); + CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), + PartIdLVal); + UntiedCodeGen(CGF); + CodeGenFunction::JumpDest CurPoint = + CGF.getJumpDestInCurrentScope(".untied.next."); + CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); + CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); + UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), + CGF.Builder.GetInsertBlock()); + CGF.EmitBranchThroughCleanup(CurPoint); + CGF.EmitBlock(CurPoint.getBlock()); + } + } + unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } + }; CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, const RegionCodeGenTy &CodeGen, - OpenMPDirectiveKind Kind, bool HasCancel) + OpenMPDirectiveKind Kind, bool HasCancel, + const UntiedTaskActionTy &Action) : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), - ThreadIDVar(ThreadIDVar) { + ThreadIDVar(ThreadIDVar), Action(Action) { assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); } + /// \brief Get a variable or parameter for storing global thread id /// inside OpenMP construct. const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } @@ -140,6 +194,10 @@ public: /// \brief Get the name of the capture helper. StringRef getHelperName() const override { return ".omp_outlined."; } + void emitUntiedSwitch(CodeGenFunction &CGF) override { + Action.emitUntiedSwitch(CGF); + } + static bool classof(const CGCapturedStmtInfo *Info) { return CGOpenMPRegionInfo::classof(Info) && cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == @@ -150,6 +208,8 @@ private: /// \brief A variable or parameter storing global thread id for OpenMP /// constructs. const VarDecl *ThreadIDVar; + /// Action for emitting code for untied tasks. + const UntiedTaskActionTy &Action; }; /// \brief API for inlined captured statement code generation in OpenMP @@ -162,12 +222,14 @@ public: : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), OldCSI(OldCSI), OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} + // \brief Retrieve the value of the context parameter. llvm::Value *getContextValue() const override { if (OuterRegionInfo) return OuterRegionInfo->getContextValue(); llvm_unreachable("No context value for inlined OpenMP region"); } + void setContextValue(llvm::Value *V) override { if (OuterRegionInfo) { OuterRegionInfo->setContextValue(V); @@ -175,6 +237,7 @@ public: } llvm_unreachable("No context value for inlined OpenMP region"); } + /// \brief Lookup the captured field decl for a variable. const FieldDecl *lookup(const VarDecl *VD) const override { if (OuterRegionInfo) @@ -183,11 +246,13 @@ public: // captured variables, we can use the original one. return nullptr; } + FieldDecl *getThisFieldDecl() const override { if (OuterRegionInfo) return OuterRegionInfo->getThisFieldDecl(); return nullptr; } + /// \brief Get a variable or parameter for storing global thread id /// inside OpenMP construct. const VarDecl *getThreadIDVariable() const override { @@ -203,6 +268,11 @@ public: llvm_unreachable("No helper name for inlined OpenMP construct"); } + void emitUntiedSwitch(CodeGenFunction &CGF) override { + if (OuterRegionInfo) + OuterRegionInfo->emitUntiedSwitch(CGF); + } + CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } static bool classof(const CGCapturedStmtInfo *Info) { @@ -210,6 +280,8 @@ public: cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; } + ~CGOpenMPInlinedRegionInfo() override = default; + private: /// \brief CodeGen info about outer OpenMP region. CodeGenFunction::CGCapturedStmtInfo *OldCSI; @@ -221,7 +293,7 @@ private: /// captured fields. The name of the target region has to be unique in a given /// application so it is provided by the client, because only the client has /// the information to generate that. -class CGOpenMPTargetRegionInfo : public CGOpenMPRegionInfo { +class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { public: CGOpenMPTargetRegionInfo(const CapturedStmt &CS, const RegionCodeGenTy &CodeGen, StringRef HelperName) @@ -245,9 +317,75 @@ private: StringRef HelperName; }; +static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { + llvm_unreachable("No codegen for expressions"); +} +/// \brief API for generation of expressions captured in a innermost OpenMP +/// region. +class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { +public: + CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) + : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, + OMPD_unknown, + /*HasCancel=*/false), + PrivScope(CGF) { + // Make sure the globals captured in the provided statement are local by + // using the privatization logic. We assume the same variable is not + // captured more than once. + for (auto &C : CS.captures()) { + if (!C.capturesVariable() && !C.capturesVariableByCopy()) + continue; + + const VarDecl *VD = C.getCapturedVar(); + if (VD->isLocalVarDeclOrParm()) + continue; + + DeclRefExpr DRE(const_cast<VarDecl *>(VD), + /*RefersToEnclosingVariableOrCapture=*/false, + VD->getType().getNonReferenceType(), VK_LValue, + SourceLocation()); + PrivScope.addPrivate(VD, [&CGF, &DRE]() -> Address { + return CGF.EmitLValue(&DRE).getAddress(); + }); + } + (void)PrivScope.Privatize(); + } + + /// \brief Lookup the captured field decl for a variable. + const FieldDecl *lookup(const VarDecl *VD) const override { + if (auto *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) + return FD; + return nullptr; + } + + /// \brief Emit the captured statement body. + void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { + llvm_unreachable("No body for expressions"); + } + + /// \brief Get a variable or parameter for storing global thread id + /// inside OpenMP construct. + const VarDecl *getThreadIDVariable() const override { + llvm_unreachable("No thread id for expressions"); + } + + /// \brief Get the name of the capture helper. + StringRef getHelperName() const override { + llvm_unreachable("No helper name for expressions"); + } + + static bool classof(const CGCapturedStmtInfo *Info) { return false; } + +private: + /// Private scope to capture global variables. + CodeGenFunction::OMPPrivateScope PrivScope; +}; + /// \brief RAII for emitting code of OpenMP constructs. class InlinedOpenMPRegionRAII { CodeGenFunction &CGF; + llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; + FieldDecl *LambdaThisCaptureField = nullptr; public: /// \brief Constructs region for combined constructs. @@ -260,30 +398,306 @@ public: // Start emission for the construct. CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); + std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); + LambdaThisCaptureField = CGF.LambdaThisCaptureField; + CGF.LambdaThisCaptureField = nullptr; } + ~InlinedOpenMPRegionRAII() { // Restore original CapturedStmtInfo only if we're done with code emission. auto *OldCSI = cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); delete CGF.CapturedStmtInfo; CGF.CapturedStmtInfo = OldCSI; + std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); + CGF.LambdaThisCaptureField = LambdaThisCaptureField; + } +}; + +/// \brief Values for bit flags used in the ident_t to describe the fields. +/// All enumeric elements are named and described in accordance with the code +/// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h +enum OpenMPLocationFlags { + /// \brief Use trampoline for internal microtask. + OMP_IDENT_IMD = 0x01, + /// \brief Use c-style ident structure. + OMP_IDENT_KMPC = 0x02, + /// \brief Atomic reduction option for kmpc_reduce. + OMP_ATOMIC_REDUCE = 0x10, + /// \brief Explicit 'barrier' directive. + OMP_IDENT_BARRIER_EXPL = 0x20, + /// \brief Implicit barrier in code. + OMP_IDENT_BARRIER_IMPL = 0x40, + /// \brief Implicit barrier in 'for' directive. + OMP_IDENT_BARRIER_IMPL_FOR = 0x40, + /// \brief Implicit barrier in 'sections' directive. + OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, + /// \brief Implicit barrier in 'single' directive. + OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140 +}; + +/// \brief Describes ident structure that describes a source location. +/// All descriptions are taken from +/// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h +/// Original structure: +/// typedef struct ident { +/// kmp_int32 reserved_1; /**< might be used in Fortran; +/// see above */ +/// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; +/// KMP_IDENT_KMPC identifies this union +/// member */ +/// kmp_int32 reserved_2; /**< not really used in Fortran any more; +/// see above */ +///#if USE_ITT_BUILD +/// /* but currently used for storing +/// region-specific ITT */ +/// /* contextual information. */ +///#endif /* USE_ITT_BUILD */ +/// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for +/// C++ */ +/// char const *psource; /**< String describing the source location. +/// The string is composed of semi-colon separated +// fields which describe the source file, +/// the function and a pair of line numbers that +/// delimit the construct. +/// */ +/// } ident_t; +enum IdentFieldIndex { + /// \brief might be used in Fortran + IdentField_Reserved_1, + /// \brief OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. + IdentField_Flags, + /// \brief Not really used in Fortran any more + IdentField_Reserved_2, + /// \brief Source[4] in Fortran, do not use for C++ + IdentField_Reserved_3, + /// \brief String describing the source location. The string is composed of + /// semi-colon separated fields which describe the source file, the function + /// and a pair of line numbers that delimit the construct. + IdentField_PSource +}; + +/// \brief Schedule types for 'omp for' loops (these enumerators are taken from +/// the enum sched_type in kmp.h). +enum OpenMPSchedType { + /// \brief Lower bound for default (unordered) versions. + OMP_sch_lower = 32, + OMP_sch_static_chunked = 33, + OMP_sch_static = 34, + OMP_sch_dynamic_chunked = 35, + OMP_sch_guided_chunked = 36, + OMP_sch_runtime = 37, + OMP_sch_auto = 38, + /// static with chunk adjustment (e.g., simd) + OMP_sch_static_balanced_chunked = 45, + /// \brief Lower bound for 'ordered' versions. + OMP_ord_lower = 64, + OMP_ord_static_chunked = 65, + OMP_ord_static = 66, + OMP_ord_dynamic_chunked = 67, + OMP_ord_guided_chunked = 68, + OMP_ord_runtime = 69, + OMP_ord_auto = 70, + OMP_sch_default = OMP_sch_static, + /// \brief dist_schedule types + OMP_dist_sch_static_chunked = 91, + OMP_dist_sch_static = 92, + /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. + /// Set if the monotonic schedule modifier was present. + OMP_sch_modifier_monotonic = (1 << 29), + /// Set if the nonmonotonic schedule modifier was present. + OMP_sch_modifier_nonmonotonic = (1 << 30), +}; + +enum OpenMPRTLFunction { + /// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, + /// kmpc_micro microtask, ...); + OMPRTL__kmpc_fork_call, + /// \brief Call to void *__kmpc_threadprivate_cached(ident_t *loc, + /// kmp_int32 global_tid, void *data, size_t size, void ***cache); + OMPRTL__kmpc_threadprivate_cached, + /// \brief Call to void __kmpc_threadprivate_register( ident_t *, + /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); + OMPRTL__kmpc_threadprivate_register, + // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc); + OMPRTL__kmpc_global_thread_num, + // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, + // kmp_critical_name *crit); + OMPRTL__kmpc_critical, + // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 + // global_tid, kmp_critical_name *crit, uintptr_t hint); + OMPRTL__kmpc_critical_with_hint, + // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, + // kmp_critical_name *crit); + OMPRTL__kmpc_end_critical, + // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 + // global_tid); + OMPRTL__kmpc_cancel_barrier, + // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); + OMPRTL__kmpc_barrier, + // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); + OMPRTL__kmpc_for_static_fini, + // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 + // global_tid); + OMPRTL__kmpc_serialized_parallel, + // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 + // global_tid); + OMPRTL__kmpc_end_serialized_parallel, + // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, + // kmp_int32 num_threads); + OMPRTL__kmpc_push_num_threads, + // Call to void __kmpc_flush(ident_t *loc); + OMPRTL__kmpc_flush, + // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid); + OMPRTL__kmpc_master, + // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid); + OMPRTL__kmpc_end_master, + // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, + // int end_part); + OMPRTL__kmpc_omp_taskyield, + // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid); + OMPRTL__kmpc_single, + // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid); + OMPRTL__kmpc_end_single, + // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, + // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, + // kmp_routine_entry_t *task_entry); + OMPRTL__kmpc_omp_task_alloc, + // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t * + // new_task); + OMPRTL__kmpc_omp_task, + // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, + // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), + // kmp_int32 didit); + OMPRTL__kmpc_copyprivate, + // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, + // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void + // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); + OMPRTL__kmpc_reduce, + // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 + // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, + // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name + // *lck); + OMPRTL__kmpc_reduce_nowait, + // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, + // kmp_critical_name *lck); + OMPRTL__kmpc_end_reduce, + // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, + // kmp_critical_name *lck); + OMPRTL__kmpc_end_reduce_nowait, + // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, + // kmp_task_t * new_task); + OMPRTL__kmpc_omp_task_begin_if0, + // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, + // kmp_task_t * new_task); + OMPRTL__kmpc_omp_task_complete_if0, + // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); + OMPRTL__kmpc_ordered, + // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); + OMPRTL__kmpc_end_ordered, + // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 + // global_tid); + OMPRTL__kmpc_omp_taskwait, + // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); + OMPRTL__kmpc_taskgroup, + // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); + OMPRTL__kmpc_end_taskgroup, + // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, + // int proc_bind); + OMPRTL__kmpc_push_proc_bind, + // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 + // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t + // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); + OMPRTL__kmpc_omp_task_with_deps, + // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 + // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 + // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); + OMPRTL__kmpc_omp_wait_deps, + // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 + // global_tid, kmp_int32 cncl_kind); + OMPRTL__kmpc_cancellationpoint, + // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, + // kmp_int32 cncl_kind); + OMPRTL__kmpc_cancel, + // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, + // kmp_int32 num_teams, kmp_int32 thread_limit); + OMPRTL__kmpc_push_num_teams, + // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro + // microtask, ...); + OMPRTL__kmpc_fork_teams, + // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int + // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int + // sched, kmp_uint64 grainsize, void *task_dup); + OMPRTL__kmpc_taskloop, + // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 + // num_dims, struct kmp_dim *dims); + OMPRTL__kmpc_doacross_init, + // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); + OMPRTL__kmpc_doacross_fini, + // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 + // *vec); + OMPRTL__kmpc_doacross_post, + // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 + // *vec); + OMPRTL__kmpc_doacross_wait, + + // + // Offloading related calls + // + // Call to int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t + // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t + // *arg_types); + OMPRTL__tgt_target, + // Call to int32_t __tgt_target_teams(int32_t device_id, void *host_ptr, + // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, + // int32_t *arg_types, int32_t num_teams, int32_t thread_limit); + OMPRTL__tgt_target_teams, + // Call to void __tgt_register_lib(__tgt_bin_desc *desc); + OMPRTL__tgt_register_lib, + // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc); + OMPRTL__tgt_unregister_lib, + // Call to void __tgt_target_data_begin(int32_t device_id, int32_t arg_num, + // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); + OMPRTL__tgt_target_data_begin, + // Call to void __tgt_target_data_end(int32_t device_id, int32_t arg_num, + // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); + OMPRTL__tgt_target_data_end, + // Call to void __tgt_target_data_update(int32_t device_id, int32_t arg_num, + // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); + OMPRTL__tgt_target_data_update, +}; + +/// A basic class for pre|post-action for advanced codegen sequence for OpenMP +/// region. +class CleanupTy final : public EHScopeStack::Cleanup { + PrePostActionTy *Action; + +public: + explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} + void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { + if (!CGF.HaveInsertPoint()) + return; + Action->Exit(CGF); } }; } // anonymous namespace -static LValue emitLoadOfPointerLValue(CodeGenFunction &CGF, Address PtrAddr, - QualType Ty) { - AlignmentSource Source; - CharUnits Align = CGF.getNaturalPointeeTypeAlignment(Ty, &Source); - return CGF.MakeAddrLValue(Address(CGF.Builder.CreateLoad(PtrAddr), Align), - Ty->getPointeeType(), Source); +void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { + CodeGenFunction::RunCleanupsScope Scope(CGF); + if (PrePostAction) { + CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); + Callback(CodeGen, CGF, *PrePostAction); + } else { + PrePostActionTy Action; + Callback(CodeGen, CGF, Action); + } } LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { - return emitLoadOfPointerLValue(CGF, - CGF.GetAddrOfLocalVar(getThreadIDVariable()), - getThreadIDVariable()->getType()); + return CGF.EmitLoadOfPointerLValue( + CGF.GetAddrOfLocalVar(getThreadIDVariable()), + getThreadIDVariable()->getType()->castAs<PointerType>()); } void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { @@ -295,10 +709,7 @@ void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { // The point of exit cannot be a branch out of the structured block. // longjmp() and throw() must not violate the entry/exit criteria. CGF.EHStack.pushTerminate(); - { - CodeGenFunction::RunCleanupsScope Scope(CGF); - CodeGen(CGF); - } + CodeGen(CGF); CGF.EHStack.popTerminate(); } @@ -310,16 +721,11 @@ LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( } CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) - : CGM(CGM), DefaultOpenMPPSource(nullptr), KmpRoutineEntryPtrTy(nullptr), - OffloadEntriesInfoManager(CGM) { + : CGM(CGM), OffloadEntriesInfoManager(CGM) { IdentTy = llvm::StructType::create( "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */, CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */, CGM.Int8PtrTy /* psource */, nullptr); - // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) - llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), - llvm::PointerType::getUnqual(CGM.Int32Ty)}; - Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); loadOffloadInfoMetadata(); @@ -329,6 +735,90 @@ void CGOpenMPRuntime::clear() { InternalVars.clear(); } +static llvm::Function * +emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, + const Expr *CombinerInitializer, const VarDecl *In, + const VarDecl *Out, bool IsCombiner) { + // void .omp_combiner.(Ty *in, Ty *out); + auto &C = CGM.getContext(); + QualType PtrTy = C.getPointerType(Ty).withRestrict(); + FunctionArgList Args; + ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), + /*Id=*/nullptr, PtrTy); + ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), + /*Id=*/nullptr, PtrTy); + Args.push_back(&OmpOutParm); + Args.push_back(&OmpInParm); + auto &FnInfo = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); + auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo); + auto *Fn = llvm::Function::Create( + FnTy, llvm::GlobalValue::InternalLinkage, + IsCombiner ? ".omp_combiner." : ".omp_initializer.", &CGM.getModule()); + CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo); + Fn->addFnAttr(llvm::Attribute::AlwaysInline); + CodeGenFunction CGF(CGM); + // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. + // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. + CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args); + CodeGenFunction::OMPPrivateScope Scope(CGF); + Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); + Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() -> Address { + return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) + .getAddress(); + }); + Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); + Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() -> Address { + return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) + .getAddress(); + }); + (void)Scope.Privatize(); + CGF.EmitIgnoredExpr(CombinerInitializer); + Scope.ForceCleanup(); + CGF.FinishFunction(); + return Fn; +} + +void CGOpenMPRuntime::emitUserDefinedReduction( + CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { + if (UDRMap.count(D) > 0) + return; + auto &C = CGM.getContext(); + if (!In || !Out) { + In = &C.Idents.get("omp_in"); + Out = &C.Idents.get("omp_out"); + } + llvm::Function *Combiner = emitCombinerOrInitializer( + CGM, D->getType(), D->getCombiner(), cast<VarDecl>(D->lookup(In).front()), + cast<VarDecl>(D->lookup(Out).front()), + /*IsCombiner=*/true); + llvm::Function *Initializer = nullptr; + if (auto *Init = D->getInitializer()) { + if (!Priv || !Orig) { + Priv = &C.Idents.get("omp_priv"); + Orig = &C.Idents.get("omp_orig"); + } + Initializer = emitCombinerOrInitializer( + CGM, D->getType(), Init, cast<VarDecl>(D->lookup(Orig).front()), + cast<VarDecl>(D->lookup(Priv).front()), + /*IsCombiner=*/false); + } + UDRMap.insert(std::make_pair(D, std::make_pair(Combiner, Initializer))); + if (CGF) { + auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); + Decls.second.push_back(D); + } +} + +std::pair<llvm::Function *, llvm::Function *> +CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { + auto I = UDRMap.find(D); + if (I != UDRMap.end()) + return I->second; + emitUserDefinedReduction(/*CGF=*/nullptr, D); + return UDRMap.lookup(D); +} + // Layout information for ident_t. static CharUnits getIdentAlign(CodeGenModule &CGM) { return CGM.getPointerAlign(); @@ -337,18 +827,18 @@ static CharUnits getIdentSize(CodeGenModule &CGM) { assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign())); return CharUnits::fromQuantity(16) + CGM.getPointerSize(); } -static CharUnits getOffsetOfIdentField(CGOpenMPRuntime::IdentFieldIndex Field) { +static CharUnits getOffsetOfIdentField(IdentFieldIndex Field) { // All the fields except the last are i32, so this works beautifully. return unsigned(Field) * CharUnits::fromQuantity(4); } static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr, - CGOpenMPRuntime::IdentFieldIndex Field, + IdentFieldIndex Field, const llvm::Twine &Name = "") { auto Offset = getOffsetOfIdentField(Field); return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name); } -llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction( +llvm::Value *CGOpenMPRuntime::emitParallelOrTeamsOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { assert(ThreadIDVar->getType()->isPointerType() && @@ -370,19 +860,39 @@ llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction( llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, - OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { + const VarDecl *PartIDVar, const VarDecl *TaskTVar, + OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, + bool Tied, unsigned &NumberOfParts) { + auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, + PrePostActionTy &) { + auto *ThreadID = getThreadID(CGF, D.getLocStart()); + auto *UpLoc = emitUpdateLocation(CGF, D.getLocStart()); + llvm::Value *TaskArgs[] = { + UpLoc, ThreadID, + CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), + TaskTVar->getType()->castAs<PointerType>()) + .getPointer()}; + CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); + }; + CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, + UntiedCodeGen); + CodeGen.setAction(Action); assert(!ThreadIDVar->getType()->isPointerType() && "thread id variable must be of type kmp_int32 for tasks"); auto *CS = cast<CapturedStmt>(D.getAssociatedStmt()); + auto *TD = dyn_cast<OMPTaskDirective>(&D); CodeGenFunction CGF(CGM, true); CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, - cast<OMPTaskDirective>(D).hasCancel()); + TD ? TD->hasCancel() : false, Action); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); - return CGF.GenerateCapturedStmtFunction(*CS); + auto *Res = CGF.GenerateCapturedStmtFunction(*CS); + if (!Tied) + NumberOfParts = Action.getNumberOfParts(); + return Res; } -Address CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) { +Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { CharUnits Align = getIdentAlign(CGM); llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags); if (!Entry) { @@ -399,7 +909,7 @@ Address CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) { auto DefaultOpenMPLocation = new llvm::GlobalVariable( CGM.getModule(), IdentTy, /*isConstant*/ true, llvm::GlobalValue::PrivateLinkage, /*Initializer*/ nullptr); - DefaultOpenMPLocation->setUnnamedAddr(true); + DefaultOpenMPLocation->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); DefaultOpenMPLocation->setAlignment(Align.getQuantity()); llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true); @@ -415,9 +925,10 @@ Address CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) { llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, - OpenMPLocationFlags Flags) { + unsigned Flags) { + Flags |= OMP_IDENT_KMPC; // If no debug info is generated - return global default location. - if (CGM.getCodeGenOpts().getDebugInfo() == CodeGenOptions::NoDebugInfo || + if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || Loc.isInvalid()) return getOrCreateDefaultLocation(Flags).getPointer(); @@ -517,20 +1028,34 @@ void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { assert(CGF.CurFn && "No function in current CodeGenFunction."); if (OpenMPLocThreadIDMap.count(CGF.CurFn)) OpenMPLocThreadIDMap.erase(CGF.CurFn); + if (FunctionUDRMap.count(CGF.CurFn) > 0) { + for(auto *D : FunctionUDRMap[CGF.CurFn]) { + UDRMap.erase(D); + } + FunctionUDRMap.erase(CGF.CurFn); + } } llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { + if (!IdentTy) { + } return llvm::PointerType::getUnqual(IdentTy); } llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { + if (!Kmpc_MicroTy) { + // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) + llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), + llvm::PointerType::getUnqual(CGM.Int32Ty)}; + Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); + } return llvm::PointerType::getUnqual(Kmpc_MicroTy); } llvm::Constant * -CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) { +CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { llvm::Constant *RTLFn = nullptr; - switch (Function) { + switch (static_cast<OpenMPRTLFunction>(Function)) { case OMPRTL__kmpc_fork_call: { // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro // microtask, ...); @@ -927,6 +1452,86 @@ CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) { RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel"); break; } + case OMPRTL__kmpc_push_num_teams: { + // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid, + // kmp_int32 num_teams, kmp_int32 num_threads) + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, + CGM.Int32Ty}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams"); + break; + } + case OMPRTL__kmpc_fork_teams: { + // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro + // microtask, ...); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, + getKmpc_MicroPointerTy()}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams"); + break; + } + case OMPRTL__kmpc_taskloop: { + // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int + // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int + // sched, kmp_uint64 grainsize, void *task_dup); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), + CGM.IntTy, + CGM.VoidPtrTy, + CGM.IntTy, + CGM.Int64Ty->getPointerTo(), + CGM.Int64Ty->getPointerTo(), + CGM.Int64Ty, + CGM.IntTy, + CGM.IntTy, + CGM.Int64Ty, + CGM.VoidPtrTy}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop"); + break; + } + case OMPRTL__kmpc_doacross_init: { + // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 + // num_dims, struct kmp_dim *dims); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), + CGM.Int32Ty, + CGM.Int32Ty, + CGM.VoidPtrTy}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init"); + break; + } + case OMPRTL__kmpc_doacross_fini: { + // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini"); + break; + } + case OMPRTL__kmpc_doacross_post: { + // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 + // *vec); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, + CGM.Int64Ty->getPointerTo()}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post"); + break; + } + case OMPRTL__kmpc_doacross_wait: { + // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 + // *vec); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, + CGM.Int64Ty->getPointerTo()}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait"); + break; + } case OMPRTL__tgt_target: { // Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t @@ -943,6 +1548,24 @@ CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) { RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); break; } + case OMPRTL__tgt_target_teams: { + // Build int32_t __tgt_target_teams(int32_t device_id, void *host_ptr, + // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, + // int32_t *arg_types, int32_t num_teams, int32_t thread_limit); + llvm::Type *TypeParams[] = {CGM.Int32Ty, + CGM.VoidPtrTy, + CGM.Int32Ty, + CGM.VoidPtrPtrTy, + CGM.VoidPtrPtrTy, + CGM.SizeTy->getPointerTo(), + CGM.Int32Ty->getPointerTo(), + CGM.Int32Ty, + CGM.Int32Ty}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams"); + break; + } case OMPRTL__tgt_register_lib: { // Build void __tgt_register_lib(__tgt_bin_desc *desc); QualType ParamTy = @@ -963,30 +1586,53 @@ CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) { RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib"); break; } + case OMPRTL__tgt_target_data_begin: { + // Build void __tgt_target_data_begin(int32_t device_id, int32_t arg_num, + // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); + llvm::Type *TypeParams[] = {CGM.Int32Ty, + CGM.Int32Ty, + CGM.VoidPtrPtrTy, + CGM.VoidPtrPtrTy, + CGM.SizeTy->getPointerTo(), + CGM.Int32Ty->getPointerTo()}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin"); + break; + } + case OMPRTL__tgt_target_data_end: { + // Build void __tgt_target_data_end(int32_t device_id, int32_t arg_num, + // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); + llvm::Type *TypeParams[] = {CGM.Int32Ty, + CGM.Int32Ty, + CGM.VoidPtrPtrTy, + CGM.VoidPtrPtrTy, + CGM.SizeTy->getPointerTo(), + CGM.Int32Ty->getPointerTo()}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end"); + break; + } + case OMPRTL__tgt_target_data_update: { + // Build void __tgt_target_data_update(int32_t device_id, int32_t arg_num, + // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); + llvm::Type *TypeParams[] = {CGM.Int32Ty, + CGM.Int32Ty, + CGM.VoidPtrPtrTy, + CGM.VoidPtrPtrTy, + CGM.SizeTy->getPointerTo(), + CGM.Int32Ty->getPointerTo()}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update"); + break; + } } + assert(RTLFn && "Unable to find OpenMP runtime function"); return RTLFn; } -static llvm::Value *getTypeSize(CodeGenFunction &CGF, QualType Ty) { - auto &C = CGF.getContext(); - llvm::Value *Size = nullptr; - auto SizeInChars = C.getTypeSizeInChars(Ty); - if (SizeInChars.isZero()) { - // getTypeSizeInChars() returns 0 for a VLA. - while (auto *VAT = C.getAsVariableArrayType(Ty)) { - llvm::Value *ArraySize; - std::tie(ArraySize, Ty) = CGF.getVLASize(VAT); - Size = Size ? CGF.Builder.CreateNUWMul(Size, ArraySize) : ArraySize; - } - SizeInChars = C.getTypeSizeInChars(Ty); - assert(!SizeInChars.isZero()); - Size = CGF.Builder.CreateNUWMul( - Size, llvm::ConstantInt::get(CGF.SizeTy, SizeInChars.getQuantity())); - } else - Size = llvm::ConstantInt::get(CGF.SizeTy, SizeInChars.getQuantity()); - return Size; -} - llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { assert((IVSize == 32 || IVSize == 64) && @@ -1144,9 +1790,8 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( /*Id=*/nullptr, CGM.getContext().VoidPtrTy); Args.push_back(&Dst); - auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration( - CGM.getContext().VoidPtrTy, Args, FunctionType::ExtInfo(), - /*isVariadic=*/false); + auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( + CGM.getContext().VoidPtrTy, Args); auto FTy = CGM.getTypes().GetFunctionType(FI); auto Fn = CGM.CreateGlobalInitOrDestructFunction( FTy, ".__kmpc_global_ctor_.", FI, Loc); @@ -1176,14 +1821,16 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( /*Id=*/nullptr, CGM.getContext().VoidPtrTy); Args.push_back(&Dst); - auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration( - CGM.getContext().VoidTy, Args, FunctionType::ExtInfo(), - /*isVariadic=*/false); + auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( + CGM.getContext().VoidTy, Args); auto FTy = CGM.getTypes().GetFunctionType(FI); auto Fn = CGM.CreateGlobalInitOrDestructFunction( FTy, ".__kmpc_global_dtor_.", FI, Loc); + auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, SourceLocation()); + // Create a scope with an artificial location for the body of this function. + auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); auto ArgVal = DtorCGF.EmitLoadOfScalar( DtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); @@ -1251,12 +1898,10 @@ static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, // the condition and the dead arm of the if/else. bool CondConstant; if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { - CodeGenFunction::RunCleanupsScope Scope(CGF); - if (CondConstant) { + if (CondConstant) ThenGen(CGF); - } else { + else ElseGen(CGF); - } return; } @@ -1269,26 +1914,16 @@ static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, // Emit the 'then' code. CGF.EmitBlock(ThenBlock); - { - CodeGenFunction::RunCleanupsScope ThenScope(CGF); - ThenGen(CGF); - } + ThenGen(CGF); CGF.EmitBranch(ContBlock); // Emit the 'else' code if present. - { - // There is no need to emit line number for unconditional branch. - auto NL = ApplyDebugLocation::CreateEmpty(CGF); - CGF.EmitBlock(ElseBlock); - } - { - CodeGenFunction::RunCleanupsScope ThenScope(CGF); - ElseGen(CGF); - } - { - // There is no need to emit line number for unconditional branch. - auto NL = ApplyDebugLocation::CreateEmpty(CGF); - CGF.EmitBranch(ContBlock); - } + // There is no need to emit line number for unconditional branch. + (void)ApplyDebugLocation::CreateEmpty(CGF); + CGF.EmitBlock(ElseBlock); + ElseGen(CGF); + // There is no need to emit line number for unconditional branch. + (void)ApplyDebugLocation::CreateEmpty(CGF); + CGF.EmitBranch(ContBlock); // Emit the continuation block for code after the if. CGF.EmitBlock(ContBlock, /*IsFinished=*/true); } @@ -1300,34 +1935,36 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, if (!CGF.HaveInsertPoint()) return; auto *RTLoc = emitUpdateLocation(CGF, Loc); - auto &&ThenGen = [this, OutlinedFn, CapturedVars, - RTLoc](CodeGenFunction &CGF) { + auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF, + PrePostActionTy &) { // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); + auto &RT = CGF.CGM.getOpenMPRuntime(); llvm::Value *Args[] = { RTLoc, CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars - CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; + CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; llvm::SmallVector<llvm::Value *, 16> RealArgs; RealArgs.append(std::begin(Args), std::end(Args)); RealArgs.append(CapturedVars.begin(), CapturedVars.end()); - auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_call); + auto RTLFn = RT.createRuntimeFunction(OMPRTL__kmpc_fork_call); CGF.EmitRuntimeCall(RTLFn, RealArgs); }; - auto &&ElseGen = [this, OutlinedFn, CapturedVars, RTLoc, - Loc](CodeGenFunction &CGF) { - auto ThreadID = getThreadID(CGF, Loc); + auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF, + PrePostActionTy &) { + auto &RT = CGF.CGM.getOpenMPRuntime(); + auto ThreadID = RT.getThreadID(CGF, Loc); // Build calls: // __kmpc_serialized_parallel(&Loc, GTid); llvm::Value *Args[] = {RTLoc, ThreadID}; - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), - Args); + CGF.EmitRuntimeCall( + RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args); // OutlinedFn(>id, &zero, CapturedStruct); - auto ThreadIDAddr = emitThreadIDAddress(CGF, Loc); + auto ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); Address ZeroAddr = - CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4), - /*Name*/ ".zero.addr"); + CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4), + /*Name*/ ".zero.addr"); CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); @@ -1336,15 +1973,16 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs); // __kmpc_end_serialized_parallel(&Loc, GTid); - llvm::Value *EndArgs[] = {emitUpdateLocation(CGF, Loc), ThreadID}; + llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; CGF.EmitRuntimeCall( - createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), EndArgs); + RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), + EndArgs); }; - if (IfCond) { + if (IfCond) emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); - } else { - CodeGenFunction::RunCleanupsScope Scope(CGF); - ThenGen(CGF); + else { + RegionCodeGenTy ThenRCG(ThenGen); + ThenRCG(CGF); } } @@ -1397,20 +2035,39 @@ llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { } namespace { -template <size_t N> class CallEndCleanup final : public EHScopeStack::Cleanup { - llvm::Value *Callee; - llvm::Value *Args[N]; +/// Common pre(post)-action for different OpenMP constructs. +class CommonActionTy final : public PrePostActionTy { + llvm::Value *EnterCallee; + ArrayRef<llvm::Value *> EnterArgs; + llvm::Value *ExitCallee; + ArrayRef<llvm::Value *> ExitArgs; + bool Conditional; + llvm::BasicBlock *ContBlock = nullptr; public: - CallEndCleanup(llvm::Value *Callee, ArrayRef<llvm::Value *> CleanupArgs) - : Callee(Callee) { - assert(CleanupArgs.size() == N); - std::copy(CleanupArgs.begin(), CleanupArgs.end(), std::begin(Args)); + CommonActionTy(llvm::Value *EnterCallee, ArrayRef<llvm::Value *> EnterArgs, + llvm::Value *ExitCallee, ArrayRef<llvm::Value *> ExitArgs, + bool Conditional = false) + : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), + ExitArgs(ExitArgs), Conditional(Conditional) {} + void Enter(CodeGenFunction &CGF) override { + llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); + if (Conditional) { + llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); + auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); + ContBlock = CGF.createBasicBlock("omp_if.end"); + // Generate the branch (If-stmt) + CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); + CGF.EmitBlock(ThenBlock); + } } - void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { - if (!CGF.HaveInsertPoint()) - return; - CGF.EmitRuntimeCall(Callee, Args); + void Done(CodeGenFunction &CGF) { + // Emit the rest of blocks/branches + CGF.EmitBranch(ContBlock); + CGF.EmitBlock(ContBlock, true); + } + void Exit(CodeGenFunction &CGF) override { + CGF.EmitRuntimeCall(ExitCallee, ExitArgs); } }; } // anonymous namespace @@ -1425,45 +2082,22 @@ void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, // Prepare arguments and build a call to __kmpc_critical if (!CGF.HaveInsertPoint()) return; - CodeGenFunction::RunCleanupsScope Scope(CGF); llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), getCriticalRegionLock(CriticalName)}; + llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), + std::end(Args)); if (Hint) { - llvm::SmallVector<llvm::Value *, 8> ArgsWithHint(std::begin(Args), - std::end(Args)); - auto *HintVal = CGF.EmitScalarExpr(Hint); - ArgsWithHint.push_back( - CGF.Builder.CreateIntCast(HintVal, CGM.IntPtrTy, /*isSigned=*/false)); - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical_with_hint), - ArgsWithHint); - } else - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical), Args); - // Build a call to __kmpc_end_critical - CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>( - NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_critical), - llvm::makeArrayRef(Args)); + EnterArgs.push_back(CGF.Builder.CreateIntCast( + CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false)); + } + CommonActionTy Action( + createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint + : OMPRTL__kmpc_critical), + EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args); + CriticalOpGen.setAction(Action); emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); } -static void emitIfStmt(CodeGenFunction &CGF, llvm::Value *IfCond, - OpenMPDirectiveKind Kind, SourceLocation Loc, - const RegionCodeGenTy &BodyOpGen) { - llvm::Value *CallBool = CGF.EmitScalarConversion( - IfCond, - CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true), - CGF.getContext().BoolTy, Loc); - - auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); - auto *ContBlock = CGF.createBasicBlock("omp_if.end"); - // Generate the branch (If-stmt) - CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); - CGF.EmitBlock(ThenBlock); - CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, Kind, BodyOpGen); - // Emit the rest of bblocks/branches - CGF.EmitBranch(ContBlock); - CGF.EmitBlock(ContBlock, true); -} - void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc) { @@ -1475,18 +2109,12 @@ void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, // } // Prepare arguments and build a call to __kmpc_master llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; - auto *IsMaster = - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_master), Args); - typedef CallEndCleanup<std::extent<decltype(Args)>::value> - MasterCallEndCleanup; - emitIfStmt( - CGF, IsMaster, OMPD_master, Loc, [&](CodeGenFunction &CGF) -> void { - CodeGenFunction::RunCleanupsScope Scope(CGF); - CGF.EHStack.pushCleanup<MasterCallEndCleanup>( - NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_master), - llvm::makeArrayRef(Args)); - MasterOpGen(CGF); - }); + CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args, + createRuntimeFunction(OMPRTL__kmpc_end_master), Args, + /*Conditional=*/true); + MasterOpGen.setAction(Action); + emitInlinedDirective(CGF, OMPD_master, MasterOpGen); + Action.Done(CGF); } void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, @@ -1498,6 +2126,8 @@ void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args); + if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) + Region->emitUntiedSwitch(CGF); } void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, @@ -1509,16 +2139,12 @@ void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, // TaskgroupOpGen(); // __kmpc_end_taskgroup(ident_t *, gtid); // Prepare arguments and build a call to __kmpc_taskgroup - { - CodeGenFunction::RunCleanupsScope Scope(CGF); - llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args); - // Build a call to __kmpc_end_taskgroup - CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>( - NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_taskgroup), - llvm::makeArrayRef(Args)); - emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); - } + llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; + CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args, + createRuntimeFunction(OMPRTL__kmpc_end_taskgroup), + Args); + TaskgroupOpGen.setAction(Action); + emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); } /// Given an array of pointers to variables, project the address of a @@ -1549,9 +2175,7 @@ static llvm::Value *emitCopyprivateCopyFunction( C.VoidPtrTy); Args.push_back(&LHSArg); Args.push_back(&RHSArg); - FunctionType::ExtInfo EI; - auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration( - C.VoidTy, Args, EI, /*isVariadic=*/false); + auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); auto *Fn = llvm::Function::Create( CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, ".omp.copyprivate.copy_func", &CGM.getModule()); @@ -1616,22 +2240,16 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, } // Prepare arguments and build a call to __kmpc_single llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; - auto *IsSingle = - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_single), Args); - typedef CallEndCleanup<std::extent<decltype(Args)>::value> - SingleCallEndCleanup; - emitIfStmt( - CGF, IsSingle, OMPD_single, Loc, [&](CodeGenFunction &CGF) -> void { - CodeGenFunction::RunCleanupsScope Scope(CGF); - CGF.EHStack.pushCleanup<SingleCallEndCleanup>( - NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_single), - llvm::makeArrayRef(Args)); - SingleOpGen(CGF); - if (DidIt.isValid()) { - // did_it = 1; - CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); - } - }); + CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args, + createRuntimeFunction(OMPRTL__kmpc_end_single), Args, + /*Conditional=*/true); + SingleOpGen.setAction(Action); + emitInlinedDirective(CGF, OMPD_single, SingleOpGen); + if (DidIt.isValid()) { + // did_it = 1; + CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); + } + Action.Done(CGF); // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, // <copy_func>, did_it); if (DidIt.isValid()) { @@ -1655,7 +2273,7 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, auto *CpyFn = emitCopyprivateCopyFunction( CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), CopyprivateVars, SrcExprs, DstExprs, AssignmentOps); - auto *BufSize = getTypeSize(CGF, CopyprivateArrayTy); + auto *BufSize = CGF.getTypeSize(CopyprivateArrayTy); Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, CGF.VoidPtrTy); @@ -1681,14 +2299,14 @@ void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, // OrderedOpGen(); // __kmpc_end_ordered(ident_t *, gtid); // Prepare arguments and build a call to __kmpc_ordered - CodeGenFunction::RunCleanupsScope Scope(CGF); if (IsThreads) { llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_ordered), Args); - // Build a call to __kmpc_end_ordered - CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>( - NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_ordered), - llvm::makeArrayRef(Args)); + CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args, + createRuntimeFunction(OMPRTL__kmpc_end_ordered), + Args); + OrderedOpGen.setAction(Action); + emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); + return; } emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); } @@ -1700,21 +2318,17 @@ void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, return; // Build call __kmpc_cancel_barrier(loc, thread_id); // Build call __kmpc_barrier(loc, thread_id); - OpenMPLocationFlags Flags = OMP_IDENT_KMPC; - if (Kind == OMPD_for) { - Flags = - static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_FOR); - } else if (Kind == OMPD_sections) { - Flags = static_cast<OpenMPLocationFlags>(Flags | - OMP_IDENT_BARRIER_IMPL_SECTIONS); - } else if (Kind == OMPD_single) { - Flags = - static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_SINGLE); - } else if (Kind == OMPD_barrier) { - Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_EXPL); - } else { - Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL); - } + unsigned Flags; + if (Kind == OMPD_for) + Flags = OMP_IDENT_BARRIER_IMPL_FOR; + else if (Kind == OMPD_sections) + Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; + else if (Kind == OMPD_single) + Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; + else if (Kind == OMPD_barrier) + Flags = OMP_IDENT_BARRIER_EXPL; + else + Flags = OMP_IDENT_BARRIER_IMPL; // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, // thread_id); llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), @@ -1745,28 +2359,6 @@ void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args); } -/// \brief Schedule types for 'omp for' loops (these enumerators are taken from -/// the enum sched_type in kmp.h). -enum OpenMPSchedType { - /// \brief Lower bound for default (unordered) versions. - OMP_sch_lower = 32, - OMP_sch_static_chunked = 33, - OMP_sch_static = 34, - OMP_sch_dynamic_chunked = 35, - OMP_sch_guided_chunked = 36, - OMP_sch_runtime = 37, - OMP_sch_auto = 38, - /// \brief Lower bound for 'ordered' versions. - OMP_ord_lower = 64, - OMP_ord_static_chunked = 65, - OMP_ord_static = 66, - OMP_ord_dynamic_chunked = 67, - OMP_ord_guided_chunked = 68, - OMP_ord_runtime = 69, - OMP_ord_auto = 70, - OMP_sch_default = OMP_sch_static, -}; - /// \brief Map the OpenMP loop schedule to the runtime enumeration. static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, bool Chunked, bool Ordered) { @@ -1789,12 +2381,26 @@ static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, llvm_unreachable("Unexpected runtime schedule"); } +/// \brief Map the OpenMP distribute schedule to the runtime enumeration. +static OpenMPSchedType +getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { + // only static is allowed for dist_schedule + return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; +} + bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const { auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); return Schedule == OMP_sch_static; } +bool CGOpenMPRuntime::isStaticNonchunked( + OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { + auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked); + return Schedule == OMP_dist_sch_static; +} + + bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { auto Schedule = getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); @@ -1802,19 +2408,57 @@ bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { return Schedule != OMP_sch_static; } +static int addMonoNonMonoModifier(OpenMPSchedType Schedule, + OpenMPScheduleClauseModifier M1, + OpenMPScheduleClauseModifier M2) { + int Modifier = 0; + switch (M1) { + case OMPC_SCHEDULE_MODIFIER_monotonic: + Modifier = OMP_sch_modifier_monotonic; + break; + case OMPC_SCHEDULE_MODIFIER_nonmonotonic: + Modifier = OMP_sch_modifier_nonmonotonic; + break; + case OMPC_SCHEDULE_MODIFIER_simd: + if (Schedule == OMP_sch_static_chunked) + Schedule = OMP_sch_static_balanced_chunked; + break; + case OMPC_SCHEDULE_MODIFIER_last: + case OMPC_SCHEDULE_MODIFIER_unknown: + break; + } + switch (M2) { + case OMPC_SCHEDULE_MODIFIER_monotonic: + Modifier = OMP_sch_modifier_monotonic; + break; + case OMPC_SCHEDULE_MODIFIER_nonmonotonic: + Modifier = OMP_sch_modifier_nonmonotonic; + break; + case OMPC_SCHEDULE_MODIFIER_simd: + if (Schedule == OMP_sch_static_chunked) + Schedule = OMP_sch_static_balanced_chunked; + break; + case OMPC_SCHEDULE_MODIFIER_last: + case OMPC_SCHEDULE_MODIFIER_unknown: + break; + } + return Schedule | Modifier; +} + void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, - OpenMPScheduleClauseKind ScheduleKind, + const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, llvm::Value *UB, llvm::Value *Chunk) { if (!CGF.HaveInsertPoint()) return; OpenMPSchedType Schedule = - getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered); + getRuntimeSchedule(ScheduleKind.Schedule, Chunk != nullptr, Ordered); assert(Ordered || (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && - Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked)); + Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && + Schedule != OMP_sch_static_balanced_chunked)); // Call __kmpc_dispatch_init( // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, // kmp_int[32|64] lower, kmp_int[32|64] upper, @@ -1824,59 +2468,94 @@ void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF, if (Chunk == nullptr) Chunk = CGF.Builder.getIntN(IVSize, 1); llvm::Value *Args[] = { - emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), - getThreadID(CGF, Loc), - CGF.Builder.getInt32(Schedule), // Schedule type - CGF.Builder.getIntN(IVSize, 0), // Lower - UB, // Upper - CGF.Builder.getIntN(IVSize, 1), // Stride - Chunk // Chunk + emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), + CGF.Builder.getInt32(addMonoNonMonoModifier( + Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type + CGF.Builder.getIntN(IVSize, 0), // Lower + UB, // Upper + CGF.Builder.getIntN(IVSize, 1), // Stride + Chunk // Chunk }; CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); } +static void emitForStaticInitCall( + CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, + llvm::Constant *ForStaticInitFunction, OpenMPSchedType Schedule, + OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, + unsigned IVSize, bool Ordered, Address IL, Address LB, Address UB, + Address ST, llvm::Value *Chunk) { + if (!CGF.HaveInsertPoint()) + return; + + assert(!Ordered); + assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || + Schedule == OMP_sch_static_balanced_chunked || + Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || + Schedule == OMP_dist_sch_static || + Schedule == OMP_dist_sch_static_chunked); + + // Call __kmpc_for_static_init( + // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, + // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, + // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, + // kmp_int[32|64] incr, kmp_int[32|64] chunk); + if (Chunk == nullptr) { + assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || + Schedule == OMP_dist_sch_static) && + "expected static non-chunked schedule"); + // If the Chunk was not specified in the clause - use default value 1. + Chunk = CGF.Builder.getIntN(IVSize, 1); + } else { + assert((Schedule == OMP_sch_static_chunked || + Schedule == OMP_sch_static_balanced_chunked || + Schedule == OMP_ord_static_chunked || + Schedule == OMP_dist_sch_static_chunked) && + "expected static chunked schedule"); + } + llvm::Value *Args[] = { + UpdateLocation, ThreadId, CGF.Builder.getInt32(addMonoNonMonoModifier( + Schedule, M1, M2)), // Schedule type + IL.getPointer(), // &isLastIter + LB.getPointer(), // &LB + UB.getPointer(), // &UB + ST.getPointer(), // &Stride + CGF.Builder.getIntN(IVSize, 1), // Incr + Chunk // Chunk + }; + CGF.EmitRuntimeCall(ForStaticInitFunction, Args); +} + void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, - OpenMPScheduleClauseKind ScheduleKind, + const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, Address IL, Address LB, Address UB, Address ST, llvm::Value *Chunk) { - if (!CGF.HaveInsertPoint()) - return; - OpenMPSchedType Schedule = - getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered); - assert(!Ordered); - assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || - Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked); - - // Call __kmpc_for_static_init( - // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, - // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, - // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, - // kmp_int[32|64] incr, kmp_int[32|64] chunk); - if (Chunk == nullptr) { - assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static) && - "expected static non-chunked schedule"); - // If the Chunk was not specified in the clause - use default value 1. - Chunk = CGF.Builder.getIntN(IVSize, 1); - } else { - assert((Schedule == OMP_sch_static_chunked || - Schedule == OMP_ord_static_chunked) && - "expected static chunked schedule"); - } - llvm::Value *Args[] = { - emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), - getThreadID(CGF, Loc), - CGF.Builder.getInt32(Schedule), // Schedule type - IL.getPointer(), // &isLastIter - LB.getPointer(), // &LB - UB.getPointer(), // &UB - ST.getPointer(), // &Stride - CGF.Builder.getIntN(IVSize, 1), // Incr - Chunk // Chunk - }; - CGF.EmitRuntimeCall(createForStaticInitFunction(IVSize, IVSigned), Args); + OpenMPSchedType ScheduleNum = + getRuntimeSchedule(ScheduleKind.Schedule, Chunk != nullptr, Ordered); + auto *UpdatedLocation = emitUpdateLocation(CGF, Loc); + auto *ThreadId = getThreadID(CGF, Loc); + auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned); + emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, + ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, IVSize, + Ordered, IL, LB, UB, ST, Chunk); +} + +void CGOpenMPRuntime::emitDistributeStaticInit( + CodeGenFunction &CGF, SourceLocation Loc, + OpenMPDistScheduleClauseKind SchedKind, unsigned IVSize, bool IVSigned, + bool Ordered, Address IL, Address LB, Address UB, Address ST, + llvm::Value *Chunk) { + OpenMPSchedType ScheduleNum = getRuntimeSchedule(SchedKind, Chunk != nullptr); + auto *UpdatedLocation = emitUpdateLocation(CGF, Loc); + auto *ThreadId = getThreadID(CGF, Loc); + auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned); + emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, + ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, + OMPC_SCHEDULE_MODIFIER_unknown, IVSize, Ordered, IL, LB, + UB, ST, Chunk); } void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, @@ -1884,8 +2563,7 @@ void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, if (!CGF.HaveInsertPoint()) return; // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); - llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), - getThreadID(CGF, Loc)}; + llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), Args); } @@ -1897,8 +2575,7 @@ void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, if (!CGF.HaveInsertPoint()) return; // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); - llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), - getThreadID(CGF, Loc)}; + llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); } @@ -1912,7 +2589,8 @@ llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, // kmp_int[32|64] *p_stride); llvm::Value *Args[] = { - emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), getThreadID(CGF, Loc), + emitUpdateLocation(CGF, Loc), + getThreadID(CGF, Loc), IL.getPointer(), // &isLastIter LB.getPointer(), // &Lower UB.getPointer(), // &Upper @@ -1991,8 +2669,18 @@ enum KmpTaskTFields { KmpTaskTRoutine, /// \brief Partition id for the untied tasks. KmpTaskTPartId, - /// \brief Function with call of destructors for private variables. - KmpTaskTDestructors, + /// Function with call of destructors for private variables. + Data1, + /// Task priority. + Data2, + /// (Taskloops only) Lower bound. + KmpTaskTLowerBound, + /// (Taskloops only) Upper bound. + KmpTaskTUpperBound, + /// (Taskloops only) Stride. + KmpTaskTStride, + /// (Taskloops only) Is last iteration flag. + KmpTaskTLastIter, }; } // anonymous namespace @@ -2005,11 +2693,11 @@ bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, - unsigned ColNum, unsigned Order) { + unsigned Order) { assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " "only required for the device " "code generation."); - OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum][ColNum] = + OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr); ++OffloadingEntriesNum; } @@ -2017,30 +2705,27 @@ void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, - unsigned ColNum, llvm::Constant *Addr, - llvm::Constant *ID) { + llvm::Constant *Addr, llvm::Constant *ID) { // If we are emitting code for a target, the entry is already initialized, // only has to be registered. if (CGM.getLangOpts().OpenMPIsDevice) { - assert(hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, - ColNum) && + assert(hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && "Entry must exist."); - auto &Entry = OffloadEntriesTargetRegion[DeviceID][FileID][ParentName] - [LineNum][ColNum]; + auto &Entry = + OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; assert(Entry.isValid() && "Entry not initialized!"); Entry.setAddress(Addr); Entry.setID(ID); return; } else { OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum++, Addr, ID); - OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum][ColNum] = - Entry; + OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; } } bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( - unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, - unsigned ColNum) const { + unsigned DeviceID, unsigned FileID, StringRef ParentName, + unsigned LineNum) const { auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); if (PerDevice == OffloadEntriesTargetRegion.end()) return false; @@ -2053,11 +2738,8 @@ bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( auto PerLine = PerParentName->second.find(LineNum); if (PerLine == PerParentName->second.end()) return false; - auto PerColumn = PerLine->second.find(ColNum); - if (PerColumn == PerLine->second.end()) - return false; // Fail if this entry is already registered. - if (PerColumn->second.getAddress() || PerColumn->second.getID()) + if (PerLine->second.getAddress() || PerLine->second.getID()) return false; return true; } @@ -2069,8 +2751,7 @@ void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( for (auto &F : D.second) for (auto &P : F.second) for (auto &L : P.second) - for (auto &C : L.second) - Action(D.first, F.first, P.first(), L.first, C.first, C.second); + Action(D.first, F.first, P.first(), L.first, L.second); } /// \brief Create a Ctor/Dtor-like function whose body is emitted through @@ -2087,9 +2768,7 @@ createOffloadingBinaryDescriptorFunction(CodeGenModule &CGM, StringRef Name, CodeGenFunction CGF(CGM); GlobalDecl(); - auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration( - C.VoidTy, Args, FunctionType::ExtInfo(), - /*isVariadic=*/false); + auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); auto FTy = CGM.getTypes().GetFunctionType(FI); auto *Fn = CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, SourceLocation()); @@ -2123,11 +2802,11 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()); llvm::GlobalVariable *HostEntriesBegin = new llvm::GlobalVariable( M, OffloadEntryTy, /*isConstant=*/true, - llvm::GlobalValue::ExternalLinkage, /*Initializer=*/0, + llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, ".omp_offloading.entries_begin"); llvm::GlobalVariable *HostEntriesEnd = new llvm::GlobalVariable( M, OffloadEntryTy, /*isConstant=*/true, - llvm::GlobalValue::ExternalLinkage, /*Initializer=*/0, + llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, ".omp_offloading.entries_end"); // Create all device images @@ -2139,10 +2818,11 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { StringRef T = Devices[i].getTriple(); auto *ImgBegin = new llvm::GlobalVariable( M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, - /*Initializer=*/0, Twine(".omp_offloading.img_start.") + Twine(T)); + /*Initializer=*/nullptr, + Twine(".omp_offloading.img_start.") + Twine(T)); auto *ImgEnd = new llvm::GlobalVariable( M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, - /*Initializer=*/0, Twine(".omp_offloading.img_end.") + Twine(T)); + /*Initializer=*/nullptr, Twine(".omp_offloading.img_end.") + Twine(T)); llvm::Constant *Dev = llvm::ConstantStruct::get(DeviceImageTy, ImgBegin, ImgEnd, @@ -2160,7 +2840,7 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { M, DeviceImagesInitTy, /*isConstant=*/true, llvm::GlobalValue::InternalLinkage, DeviceImagesInit, ".omp_offloading.device_images"); - DeviceImages->setUnnamedAddr(true); + DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); // This is a Zero array to be used in the creation of the constant expressions llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty), @@ -2190,12 +2870,14 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { IdentInfo, C.CharTy); auto *UnRegFn = createOffloadingBinaryDescriptorFunction( - CGM, ".omp_offloading.descriptor_unreg", [&](CodeGenFunction &CGF) { + CGM, ".omp_offloading.descriptor_unreg", + [&](CodeGenFunction &CGF, PrePostActionTy &) { CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_unregister_lib), Desc); }); auto *RegFn = createOffloadingBinaryDescriptorFunction( - CGM, ".omp_offloading.descriptor_reg", [&](CodeGenFunction &CGF) { + CGM, ".omp_offloading.descriptor_reg", + [&](CodeGenFunction &CGF, PrePostActionTy &) { CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_register_lib), Desc); CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc); @@ -2203,15 +2885,16 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { return RegFn; } -void CGOpenMPRuntime::createOffloadEntry(llvm::Constant *Addr, StringRef Name, - uint64_t Size) { +void CGOpenMPRuntime::createOffloadEntry(llvm::Constant *ID, + llvm::Constant *Addr, uint64_t Size) { + StringRef Name = Addr->getName(); auto *TgtOffloadEntryType = cast<llvm::StructType>( CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy())); llvm::LLVMContext &C = CGM.getModule().getContext(); llvm::Module &M = CGM.getModule(); |