diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2019-08-20 20:50:49 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2019-08-20 20:50:49 +0000 |
commit | 2298981669bf3bd63335a4be179bc0f96823a8f4 (patch) | |
tree | 1cbe2eb27f030d2d70b80ee5ca3c86bee7326a9f /lib/CodeGen | |
parent | 9a83721404652cea39e9f02ae3e3b5c964602a5c (diff) | |
download | src-2298981669bf3bd63335a4be179bc0f96823a8f4.tar.gz src-2298981669bf3bd63335a4be179bc0f96823a8f4.zip |
Vendor import of stripped clang trunk r366426 (just before thevendor/clang/clang-trunk-r366426
release_90 branch point):
https://llvm.org/svn/llvm-project/cfe/trunk@366426
Notes
Notes:
svn path=/vendor/clang/dist/; revision=351280
svn path=/vendor/clang/clang-trunk-r366426/; revision=351281; tag=vendor/clang/clang-trunk-r366426
Diffstat (limited to 'lib/CodeGen')
87 files changed, 10377 insertions, 5285 deletions
diff --git a/lib/CodeGen/ABIInfo.h b/lib/CodeGen/ABIInfo.h index feed3833f24a..0c3a076da0b5 100644 --- a/lib/CodeGen/ABIInfo.h +++ b/lib/CodeGen/ABIInfo.h @@ -1,9 +1,8 @@ //===----- ABIInfo.h - ABI information access & encapsulation ---*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/Address.h b/lib/CodeGen/Address.h index 334308081ff3..6a8e57f8db33 100644 --- a/lib/CodeGen/Address.h +++ b/lib/CodeGen/Address.h @@ -1,9 +1,8 @@ //===-- Address.h - An aligned address -------------------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/BackendUtil.cpp b/lib/CodeGen/BackendUtil.cpp index b927acabac59..497652e85b47 100644 --- a/lib/CodeGen/BackendUtil.cpp +++ b/lib/CodeGen/BackendUtil.cpp @@ -1,9 +1,8 @@ //===--- BackendUtil.cpp - LLVM Backend Utilities -------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -37,11 +36,13 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/Passes/PassBuilder.h" +#include "llvm/Passes/PassPlugin.h" #include "llvm/Support/BuryPointer.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/PrettyStackTrace.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/TimeProfiler.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" @@ -53,8 +54,11 @@ #include "llvm/Transforms/IPO/ThinLTOBitcodeWriter.h" #include "llvm/Transforms/InstCombine/InstCombine.h" #include "llvm/Transforms/Instrumentation.h" +#include "llvm/Transforms/Instrumentation/AddressSanitizer.h" #include "llvm/Transforms/Instrumentation/BoundsChecking.h" #include "llvm/Transforms/Instrumentation/GCOVProfiler.h" +#include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h" +#include "llvm/Transforms/Instrumentation/InstrProfiling.h" #include "llvm/Transforms/Instrumentation/MemorySanitizer.h" #include "llvm/Transforms/Instrumentation/ThreadSanitizer.h" #include "llvm/Transforms/ObjCARC.h" @@ -62,6 +66,7 @@ #include "llvm/Transforms/Scalar/GVN.h" #include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/CanonicalizeAliases.h" +#include "llvm/Transforms/Utils/EntryExitInstrumenter.h" #include "llvm/Transforms/Utils/NameAnonGlobals.h" #include "llvm/Transforms/Utils/SymbolRewriter.h" #include <memory> @@ -243,15 +248,15 @@ static void addAddressSanitizerPasses(const PassManagerBuilder &Builder, bool UseGlobalsGC = asanUseGlobalsGC(T, CGOpts); PM.add(createAddressSanitizerFunctionPass(/*CompileKernel*/ false, Recover, UseAfterScope)); - PM.add(createAddressSanitizerModulePass(/*CompileKernel*/ false, Recover, - UseGlobalsGC, UseOdrIndicator)); + PM.add(createModuleAddressSanitizerLegacyPassPass( + /*CompileKernel*/ false, Recover, UseGlobalsGC, UseOdrIndicator)); } static void addKernelAddressSanitizerPasses(const PassManagerBuilder &Builder, legacy::PassManagerBase &PM) { PM.add(createAddressSanitizerFunctionPass( /*CompileKernel*/ true, /*Recover*/ true, /*UseAfterScope*/ false)); - PM.add(createAddressSanitizerModulePass( + PM.add(createModuleAddressSanitizerLegacyPassPass( /*CompileKernel*/ true, /*Recover*/ true, /*UseGlobalsGC*/ true, /*UseOdrIndicator*/ false)); } @@ -262,12 +267,13 @@ static void addHWAddressSanitizerPasses(const PassManagerBuilder &Builder, static_cast<const PassManagerBuilderWrapper &>(Builder); const CodeGenOptions &CGOpts = BuilderWrapper.getCGOpts(); bool Recover = CGOpts.SanitizeRecover.has(SanitizerKind::HWAddress); - PM.add(createHWAddressSanitizerPass(/*CompileKernel*/ false, Recover)); + PM.add( + createHWAddressSanitizerLegacyPassPass(/*CompileKernel*/ false, Recover)); } static void addKernelHWAddressSanitizerPasses(const PassManagerBuilder &Builder, legacy::PassManagerBase &PM) { - PM.add(createHWAddressSanitizerPass( + PM.add(createHWAddressSanitizerLegacyPassPass( /*CompileKernel*/ true, /*Recover*/ true)); } @@ -279,7 +285,8 @@ static void addGeneralOptsForMemorySanitizer(const PassManagerBuilder &Builder, const CodeGenOptions &CGOpts = BuilderWrapper.getCGOpts(); int TrackOrigins = CGOpts.SanitizeMemoryTrackOrigins; bool Recover = CGOpts.SanitizeRecover.has(SanitizerKind::Memory); - PM.add(createMemorySanitizerLegacyPassPass(TrackOrigins, Recover, CompileKernel)); + PM.add(createMemorySanitizerLegacyPassPass( + MemorySanitizerOptions{TrackOrigins, Recover, CompileKernel})); // MemorySanitizer inserts complex instrumentation that mostly follows // the logic of the original code, but operates on "shadow" values. @@ -317,19 +324,6 @@ static void addDataFlowSanitizerPass(const PassManagerBuilder &Builder, PM.add(createDataFlowSanitizerPass(LangOpts.SanitizerBlacklistFiles)); } -static void addEfficiencySanitizerPass(const PassManagerBuilder &Builder, - legacy::PassManagerBase &PM) { - const PassManagerBuilderWrapper &BuilderWrapper = - static_cast<const PassManagerBuilderWrapper&>(Builder); - const LangOptions &LangOpts = BuilderWrapper.getLangOpts(); - EfficiencySanitizerOptions Opts; - if (LangOpts.Sanitize.has(SanitizerKind::EfficiencyCacheFrag)) - Opts.ToolType = EfficiencySanitizerOptions::ESAN_CacheFrag; - else if (LangOpts.Sanitize.has(SanitizerKind::EfficiencyWorkingSet)) - Opts.ToolType = EfficiencySanitizerOptions::ESAN_WorkingSet; - PM.add(createEfficiencySanitizerPass(Opts)); -} - static TargetLibraryInfoImpl *createTLII(llvm::Triple &TargetTriple, const CodeGenOptions &CodeGenOpts) { TargetLibraryInfoImpl *TLII = new TargetLibraryInfoImpl(TargetTriple); @@ -347,6 +341,9 @@ static TargetLibraryInfoImpl *createTLII(llvm::Triple &TargetTriple, case CodeGenOptions::Accelerate: TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::Accelerate); break; + case CodeGenOptions::MASSV: + TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::MASSV); + break; case CodeGenOptions::SVML: TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::SVML); break; @@ -473,9 +470,9 @@ static void initTargetOptions(llvm::TargetOptions &Options, Options.DebuggerTuning = CodeGenOpts.getDebuggerTuning(); Options.EmitStackSizeSection = CodeGenOpts.StackSizeSection; Options.EmitAddrsig = CodeGenOpts.Addrsig; + Options.EnableDebugEntryValues = CodeGenOpts.EnableDebugEntryValues; - if (CodeGenOpts.getSplitDwarfMode() != CodeGenOptions::NoFission) - Options.MCOptions.SplitDwarfFile = CodeGenOpts.SplitDwarfFile; + Options.MCOptions.SplitDwarfFile = CodeGenOpts.SplitDwarfFile; Options.MCOptions.MCRelaxAll = CodeGenOpts.RelaxAll; Options.MCOptions.MCSaveTempLabels = CodeGenOpts.SaveTempLabels; Options.MCOptions.MCUseDwarfDirectory = !CodeGenOpts.NoDwarfDirectoryAsm; @@ -515,6 +512,21 @@ static Optional<GCOVOptions> getGCOVOptions(const CodeGenOptions &CodeGenOpts) { return Options; } +static Optional<InstrProfOptions> +getInstrProfOptions(const CodeGenOptions &CodeGenOpts, + const LangOptions &LangOpts) { + if (!CodeGenOpts.hasProfileClangInstr()) + return None; + InstrProfOptions Options; + Options.NoRedZone = CodeGenOpts.DisableRedZone; + Options.InstrProfileOutput = CodeGenOpts.InstrProfileOutput; + + // TODO: Surface the option to emit atomic profile counter increments at + // the driver level. + Options.Atomic = LangOpts.Sanitize.has(SanitizerKind::Thread); + return Options; +} + void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, legacy::FunctionPassManager &FPM) { // Handle disabling of all LLVM passes, where we want to preserve the @@ -554,6 +566,9 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, PMBuilder.LoopVectorize = CodeGenOpts.VectorizeLoop; PMBuilder.DisableUnrollLoops = !CodeGenOpts.UnrollLoops; + // Loop interleaving in the loop vectorizer has historically been set to be + // enabled when loop unrolling is enabled. + PMBuilder.LoopsInterleaved = CodeGenOpts.UnrollLoops; PMBuilder.MergeFunctions = CodeGenOpts.MergeFunctions; PMBuilder.PrepareForThinLTO = CodeGenOpts.PrepareForThinLTO; PMBuilder.PrepareForLTO = CodeGenOpts.PrepareForLTO; @@ -579,7 +594,7 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, addObjCARCOptPass); } - if (LangOpts.CoroutinesTS) + if (LangOpts.Coroutines) addCoroutinePassesToExtensionPoints(PMBuilder); if (LangOpts.Sanitize.has(SanitizerKind::LocalBounds)) { @@ -654,13 +669,6 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, addDataFlowSanitizerPass); } - if (LangOpts.Sanitize.hasOneOf(SanitizerKind::Efficiency)) { - PMBuilder.addExtension(PassManagerBuilder::EP_OptimizerLast, - addEfficiencySanitizerPass); - PMBuilder.addExtension(PassManagerBuilder::EP_EnabledOnOptLevel0, - addEfficiencySanitizerPass); - } - // Set up the per-function pass manager. FPM.add(new TargetLibraryInfoWrapperPass(*TLII)); if (CodeGenOpts.VerifyModule) @@ -676,26 +684,35 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, MPM.add(createStripSymbolsPass(true)); } - if (CodeGenOpts.hasProfileClangInstr()) { - InstrProfOptions Options; - Options.NoRedZone = CodeGenOpts.DisableRedZone; - Options.InstrProfileOutput = CodeGenOpts.InstrProfileOutput; - - // TODO: Surface the option to emit atomic profile counter increments at - // the driver level. - Options.Atomic = LangOpts.Sanitize.has(SanitizerKind::Thread); + if (Optional<InstrProfOptions> Options = + getInstrProfOptions(CodeGenOpts, LangOpts)) + MPM.add(createInstrProfilingLegacyPass(*Options, false)); - MPM.add(createInstrProfilingLegacyPass(Options)); - } + bool hasIRInstr = false; if (CodeGenOpts.hasProfileIRInstr()) { PMBuilder.EnablePGOInstrGen = true; + hasIRInstr = true; + } + if (CodeGenOpts.hasProfileCSIRInstr()) { + assert(!CodeGenOpts.hasProfileCSIRUse() && + "Cannot have both CSProfileUse pass and CSProfileGen pass at the " + "same time"); + assert(!hasIRInstr && + "Cannot have both ProfileGen pass and CSProfileGen pass at the " + "same time"); + PMBuilder.EnablePGOCSInstrGen = true; + hasIRInstr = true; + } + if (hasIRInstr) { if (!CodeGenOpts.InstrProfileOutput.empty()) PMBuilder.PGOInstrGen = CodeGenOpts.InstrProfileOutput; else PMBuilder.PGOInstrGen = DefaultProfileGenName; } - if (CodeGenOpts.hasProfileIRUse()) + if (CodeGenOpts.hasProfileIRUse()) { PMBuilder.PGOInstrUse = CodeGenOpts.ProfileInstrumentUsePath; + PMBuilder.EnablePGOCSInstrUse = CodeGenOpts.hasProfileCSIRUse(); + } if (!CodeGenOpts.SampleProfileFile.empty()) PMBuilder.PGOSampleUse = CodeGenOpts.SampleProfileFile; @@ -845,9 +862,8 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action, break; default: - if (!CodeGenOpts.SplitDwarfFile.empty() && - (CodeGenOpts.getSplitDwarfMode() == CodeGenOptions::SplitFileFission)) { - DwoOS = openOutputFile(CodeGenOpts.SplitDwarfFile); + if (!CodeGenOpts.SplitDwarfOutput.empty()) { + DwoOS = openOutputFile(CodeGenOpts.SplitDwarfOutput); if (!DwoOS) return; } @@ -916,6 +932,43 @@ static PassBuilder::OptimizationLevel mapToLevel(const CodeGenOptions &Opts) { } } +static void addSanitizersAtO0(ModulePassManager &MPM, + const Triple &TargetTriple, + const LangOptions &LangOpts, + const CodeGenOptions &CodeGenOpts) { + auto ASanPass = [&](SanitizerMask Mask, bool CompileKernel) { + MPM.addPass(RequireAnalysisPass<ASanGlobalsMetadataAnalysis, Module>()); + bool Recover = CodeGenOpts.SanitizeRecover.has(Mask); + MPM.addPass(createModuleToFunctionPassAdaptor(AddressSanitizerPass( + CompileKernel, Recover, CodeGenOpts.SanitizeAddressUseAfterScope))); + bool ModuleUseAfterScope = asanUseGlobalsGC(TargetTriple, CodeGenOpts); + MPM.addPass( + ModuleAddressSanitizerPass(CompileKernel, Recover, ModuleUseAfterScope, + CodeGenOpts.SanitizeAddressUseOdrIndicator)); + }; + + if (LangOpts.Sanitize.has(SanitizerKind::Address)) { + ASanPass(SanitizerKind::Address, /*CompileKernel=*/false); + } + + if (LangOpts.Sanitize.has(SanitizerKind::KernelAddress)) { + ASanPass(SanitizerKind::KernelAddress, /*CompileKernel=*/true); + } + + if (LangOpts.Sanitize.has(SanitizerKind::Memory)) { + MPM.addPass(createModuleToFunctionPassAdaptor(MemorySanitizerPass({}))); + } + + if (LangOpts.Sanitize.has(SanitizerKind::KernelMemory)) { + MPM.addPass(createModuleToFunctionPassAdaptor( + MemorySanitizerPass({0, false, /*Kernel=*/true}))); + } + + if (LangOpts.Sanitize.has(SanitizerKind::Thread)) { + MPM.addPass(createModuleToFunctionPassAdaptor(ThreadSanitizerPass())); + } +} + /// A clean version of `EmitAssembly` that uses the new pass manager. /// /// Not all features are currently supported in this system, but where @@ -929,13 +982,15 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( TimeRegion Region(FrontendTimesIsEnabled ? &CodeGenerationTime : nullptr); setCommandLineOpts(CodeGenOpts); - // The new pass manager always makes a target machine available to passes - // during construction. - CreateTargetMachine(/*MustCreateTM*/ true); - if (!TM) - // This will already be diagnosed, just bail. + bool RequiresCodeGen = (Action != Backend_EmitNothing && + Action != Backend_EmitBC && + Action != Backend_EmitLL); + CreateTargetMachine(RequiresCodeGen); + + if (RequiresCodeGen && !TM) return; - TheModule->setDataLayout(TM->createDataLayout()); + if (TM) + TheModule->setDataLayout(TM->createDataLayout()); Optional<PGOOptions> PGOOpt; @@ -944,23 +999,69 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( PGOOpt = PGOOptions(CodeGenOpts.InstrProfileOutput.empty() ? DefaultProfileGenName : CodeGenOpts.InstrProfileOutput, - "", "", "", true, + "", "", PGOOptions::IRInstr, PGOOptions::NoCSAction, CodeGenOpts.DebugInfoForProfiling); - else if (CodeGenOpts.hasProfileIRUse()) + else if (CodeGenOpts.hasProfileIRUse()) { // -fprofile-use. - PGOOpt = PGOOptions("", CodeGenOpts.ProfileInstrumentUsePath, "", - CodeGenOpts.ProfileRemappingFile, false, - CodeGenOpts.DebugInfoForProfiling); - else if (!CodeGenOpts.SampleProfileFile.empty()) + auto CSAction = CodeGenOpts.hasProfileCSIRUse() ? PGOOptions::CSIRUse + : PGOOptions::NoCSAction; + PGOOpt = PGOOptions(CodeGenOpts.ProfileInstrumentUsePath, "", + CodeGenOpts.ProfileRemappingFile, PGOOptions::IRUse, + CSAction, CodeGenOpts.DebugInfoForProfiling); + } else if (!CodeGenOpts.SampleProfileFile.empty()) // -fprofile-sample-use - PGOOpt = PGOOptions("", "", CodeGenOpts.SampleProfileFile, - CodeGenOpts.ProfileRemappingFile, false, - CodeGenOpts.DebugInfoForProfiling); + PGOOpt = + PGOOptions(CodeGenOpts.SampleProfileFile, "", + CodeGenOpts.ProfileRemappingFile, PGOOptions::SampleUse, + PGOOptions::NoCSAction, CodeGenOpts.DebugInfoForProfiling); else if (CodeGenOpts.DebugInfoForProfiling) // -fdebug-info-for-profiling - PGOOpt = PGOOptions("", "", "", "", false, true); + PGOOpt = PGOOptions("", "", "", PGOOptions::NoAction, + PGOOptions::NoCSAction, true); + + // Check to see if we want to generate a CS profile. + if (CodeGenOpts.hasProfileCSIRInstr()) { + assert(!CodeGenOpts.hasProfileCSIRUse() && + "Cannot have both CSProfileUse pass and CSProfileGen pass at " + "the same time"); + if (PGOOpt.hasValue()) { + assert(PGOOpt->Action != PGOOptions::IRInstr && + PGOOpt->Action != PGOOptions::SampleUse && + "Cannot run CSProfileGen pass with ProfileGen or SampleUse " + " pass"); + PGOOpt->CSProfileGenFile = CodeGenOpts.InstrProfileOutput.empty() + ? DefaultProfileGenName + : CodeGenOpts.InstrProfileOutput; + PGOOpt->CSAction = PGOOptions::CSIRInstr; + } else + PGOOpt = PGOOptions("", + CodeGenOpts.InstrProfileOutput.empty() + ? DefaultProfileGenName + : CodeGenOpts.InstrProfileOutput, + "", PGOOptions::NoAction, PGOOptions::CSIRInstr, + CodeGenOpts.DebugInfoForProfiling); + } - PassBuilder PB(TM.get(), PGOOpt); + PipelineTuningOptions PTO; + PTO.LoopUnrolling = CodeGenOpts.UnrollLoops; + // For historical reasons, loop interleaving is set to mirror setting for loop + // unrolling. + PTO.LoopInterleaving = CodeGenOpts.UnrollLoops; + PTO.LoopVectorization = CodeGenOpts.VectorizeLoop; + PTO.SLPVectorization = CodeGenOpts.VectorizeSLP; + + PassBuilder PB(TM.get(), PTO, PGOOpt); + + // Attempt to load pass plugins and register their callbacks with PB. + for (auto &PluginFN : CodeGenOpts.PassPlugins) { + auto PassPlugin = PassPlugin::Load(PluginFN); + if (PassPlugin) { + PassPlugin->registerPassBuilderCallbacks(PB); + } else { + Diags.Report(diag::err_fe_unable_to_load_plugin) + << PluginFN << toString(PassPlugin.takeError()); + } + } LoopAnalysisManager LAM(CodeGenOpts.DebugPassManager); FunctionAnalysisManager FAM(CodeGenOpts.DebugPassManager); @@ -994,10 +1095,15 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( if (CodeGenOpts.OptimizationLevel == 0) { if (Optional<GCOVOptions> Options = getGCOVOptions(CodeGenOpts)) MPM.addPass(GCOVProfilerPass(*Options)); + if (Optional<InstrProfOptions> Options = + getInstrProfOptions(CodeGenOpts, LangOpts)) + MPM.addPass(InstrProfiling(*Options, false)); // Build a minimal pipeline based on the semantics required by Clang, - // which is just that always inlining occurs. - MPM.addPass(AlwaysInlinerPass()); + // which is just that always inlining occurs. Further, disable generating + // lifetime intrinsics to avoid enabling further optimizations during + // code generation. + MPM.addPass(AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/false)); // At -O0 we directly run necessary sanitizer passes. if (LangOpts.Sanitize.has(SanitizerKind::LocalBounds)) @@ -1013,17 +1119,61 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( // configure the pipeline. PassBuilder::OptimizationLevel Level = mapToLevel(CodeGenOpts); + PB.registerPipelineStartEPCallback([](ModulePassManager &MPM) { + MPM.addPass(createModuleToFunctionPassAdaptor( + EntryExitInstrumenterPass(/*PostInlining=*/false))); + }); + // Register callbacks to schedule sanitizer passes at the appropriate part of // the pipeline. + // FIXME: either handle asan/the remaining sanitizers or error out if (LangOpts.Sanitize.has(SanitizerKind::LocalBounds)) PB.registerScalarOptimizerLateEPCallback( [](FunctionPassManager &FPM, PassBuilder::OptimizationLevel Level) { FPM.addPass(BoundsCheckingPass()); }); + if (LangOpts.Sanitize.has(SanitizerKind::Memory)) + PB.registerOptimizerLastEPCallback( + [](FunctionPassManager &FPM, PassBuilder::OptimizationLevel Level) { + FPM.addPass(MemorySanitizerPass({})); + }); + if (LangOpts.Sanitize.has(SanitizerKind::Thread)) + PB.registerOptimizerLastEPCallback( + [](FunctionPassManager &FPM, PassBuilder::OptimizationLevel Level) { + FPM.addPass(ThreadSanitizerPass()); + }); + if (LangOpts.Sanitize.has(SanitizerKind::Address)) { + PB.registerPipelineStartEPCallback([&](ModulePassManager &MPM) { + MPM.addPass( + RequireAnalysisPass<ASanGlobalsMetadataAnalysis, Module>()); + }); + bool Recover = CodeGenOpts.SanitizeRecover.has(SanitizerKind::Address); + bool UseAfterScope = CodeGenOpts.SanitizeAddressUseAfterScope; + PB.registerOptimizerLastEPCallback( + [Recover, UseAfterScope](FunctionPassManager &FPM, + PassBuilder::OptimizationLevel Level) { + FPM.addPass(AddressSanitizerPass( + /*CompileKernel=*/false, Recover, UseAfterScope)); + }); + bool ModuleUseAfterScope = asanUseGlobalsGC(TargetTriple, CodeGenOpts); + bool UseOdrIndicator = CodeGenOpts.SanitizeAddressUseOdrIndicator; + PB.registerPipelineStartEPCallback( + [Recover, ModuleUseAfterScope, + UseOdrIndicator](ModulePassManager &MPM) { + MPM.addPass(ModuleAddressSanitizerPass( + /*CompileKernel=*/false, Recover, ModuleUseAfterScope, + UseOdrIndicator)); + }); + } if (Optional<GCOVOptions> Options = getGCOVOptions(CodeGenOpts)) PB.registerPipelineStartEPCallback([Options](ModulePassManager &MPM) { MPM.addPass(GCOVProfilerPass(*Options)); }); + if (Optional<InstrProfOptions> Options = + getInstrProfOptions(CodeGenOpts, LangOpts)) + PB.registerPipelineStartEPCallback([Options](ModulePassManager &MPM) { + MPM.addPass(InstrProfiling(*Options, false)); + }); if (IsThinLTO) { MPM = PB.buildThinLTOPreLinkDefaultPipeline( @@ -1040,6 +1190,19 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( CodeGenOpts.DebugPassManager); } } + + if (LangOpts.Sanitize.has(SanitizerKind::HWAddress)) { + bool Recover = CodeGenOpts.SanitizeRecover.has(SanitizerKind::HWAddress); + MPM.addPass(HWAddressSanitizerPass( + /*CompileKernel=*/false, Recover)); + } + if (LangOpts.Sanitize.has(SanitizerKind::KernelHWAddress)) { + MPM.addPass(HWAddressSanitizerPass( + /*CompileKernel=*/true, /*Recover=*/true)); + } + + if (CodeGenOpts.OptimizationLevel == 0) + addSanitizersAtO0(MPM, TargetTriple, LangOpts, CodeGenOpts); } // FIXME: We still use the legacy pass manager to do code generation. We @@ -1093,8 +1256,8 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( NeedCodeGen = true; CodeGenPasses.add( createTargetTransformInfoWrapperPass(getTargetIRAnalysis())); - if (!CodeGenOpts.SplitDwarfFile.empty()) { - DwoOS = openOutputFile(CodeGenOpts.SplitDwarfFile); + if (!CodeGenOpts.SplitDwarfOutput.empty()) { + DwoOS = openOutputFile(CodeGenOpts.SplitDwarfOutput); if (!DwoOS) return; } @@ -1226,14 +1389,28 @@ static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M, Conf.MAttrs = TOpts.Features; Conf.RelocModel = CGOpts.RelocationModel; Conf.CGOptLevel = getCGOptLevel(CGOpts); + Conf.OptLevel = CGOpts.OptimizationLevel; initTargetOptions(Conf.Options, CGOpts, TOpts, LOpts, HeaderOpts); Conf.SampleProfile = std::move(SampleProfile); + + // Context sensitive profile. + if (CGOpts.hasProfileCSIRInstr()) { + Conf.RunCSIRInstr = true; + Conf.CSIRProfile = std::move(CGOpts.InstrProfileOutput); + } else if (CGOpts.hasProfileCSIRUse()) { + Conf.RunCSIRInstr = false; + Conf.CSIRProfile = std::move(CGOpts.ProfileInstrumentUsePath); + } + Conf.ProfileRemapping = std::move(ProfileRemapping); Conf.UseNewPM = CGOpts.ExperimentalNewPassManager; Conf.DebugPassManager = CGOpts.DebugPassManager; Conf.RemarksWithHotness = CGOpts.DiagnosticsWithHotness; Conf.RemarksFilename = CGOpts.OptRecordFile; - Conf.DwoPath = CGOpts.SplitDwarfFile; + Conf.RemarksPasses = CGOpts.OptRecordPasses; + Conf.RemarksFormat = CGOpts.OptRecordFormat; + Conf.SplitDwarfFile = CGOpts.SplitDwarfFile; + Conf.SplitDwarfOutput = CGOpts.SplitDwarfOutput; switch (Action) { case Backend_EmitNothing: Conf.PreCodeGenModuleHook = [](size_t Task, const Module &Mod) { @@ -1273,6 +1450,9 @@ void clang::EmitBackendOutput(DiagnosticsEngine &Diags, const llvm::DataLayout &TDesc, Module *M, BackendAction Action, std::unique_ptr<raw_pwrite_stream> OS) { + + llvm::TimeTraceScope TimeScope("Backend", StringRef("")); + std::unique_ptr<llvm::Module> EmptyModule; if (!CGOpts.ThinLTOIndexFile.empty()) { // If we are performing a ThinLTO importing compile, load the function index @@ -1339,6 +1519,9 @@ static const char* getSectionNameForBitcode(const Triple &T) { case Triple::Wasm: case Triple::UnknownObjectFormat: return ".llvmbc"; + case Triple::XCOFF: + llvm_unreachable("XCOFF is not yet implemented"); + break; } llvm_unreachable("Unimplemented ObjectFormatType"); } @@ -1352,6 +1535,9 @@ static const char* getSectionNameForCommandline(const Triple &T) { case Triple::Wasm: case Triple::UnknownObjectFormat: return ".llvmcmd"; + case Triple::XCOFF: + llvm_unreachable("XCOFF is not yet implemented"); + break; } llvm_unreachable("Unimplemented ObjectFormatType"); } diff --git a/lib/CodeGen/CGAtomic.cpp b/lib/CodeGen/CGAtomic.cpp index 24056a449def..a95cd12c2d64 100644 --- a/lib/CodeGen/CGAtomic.cpp +++ b/lib/CodeGen/CGAtomic.cpp @@ -1,9 +1,8 @@ //===--- CGAtomic.cpp - Emit LLVM IR for atomic operations ----------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -36,7 +35,6 @@ namespace { uint64_t ValueSizeInBits; CharUnits AtomicAlign; CharUnits ValueAlign; - CharUnits LValueAlign; TypeEvaluationKind EvaluationKind; bool UseLibcall; LValue LVal; @@ -133,7 +131,6 @@ namespace { QualType getAtomicType() const { return AtomicTy; } QualType getValueType() const { return ValueTy; } CharUnits getAtomicAlignment() const { return AtomicAlign; } - CharUnits getValueAlignment() const { return ValueAlign; } uint64_t getAtomicSizeInBits() const { return AtomicSizeInBits; } uint64_t getValueSizeInBits() const { return ValueSizeInBits; } TypeEvaluationKind getEvaluationKind() const { return EvaluationKind; } @@ -202,7 +199,7 @@ namespace { assert(LVal.isSimple()); Address addr = getAtomicAddress(); if (hasPadding()) - addr = CGF.Builder.CreateStructGEP(addr, 0, CharUnits()); + addr = CGF.Builder.CreateStructGEP(addr, 0); return LValue::MakeAddr(addr, getValueType(), CGF.getContext(), LVal.getBaseInfo(), LVal.getTBAAInfo()); @@ -308,7 +305,7 @@ static RValue emitAtomicLibcall(CodeGenFunction &CGF, const CGFunctionInfo &fnInfo = CGF.CGM.getTypes().arrangeBuiltinFunctionCall(resultType, args); llvm::FunctionType *fnTy = CGF.CGM.getTypes().GetFunctionType(fnInfo); - llvm::Constant *fn = CGF.CGM.CreateRuntimeFunction(fnTy, fnName); + llvm::FunctionCallee fn = CGF.CGM.CreateRuntimeFunction(fnTy, fnName); auto callee = CGCallee::forDirect(fn); return CGF.EmitCall(fnInfo, callee, ReturnValueSlot(), args); } @@ -680,7 +677,8 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *Expr, Address Dest, // Handle constant scope. if (auto SC = dyn_cast<llvm::ConstantInt>(Scope)) { auto SCID = CGF.getTargetHooks().getLLVMSyncScopeID( - ScopeModel->map(SC->getZExtValue()), CGF.CGM.getLLVMContext()); + CGF.CGM.getLangOpts(), ScopeModel->map(SC->getZExtValue()), + Order, CGF.CGM.getLLVMContext()); EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, IsWeak, FailureOrder, Size, Order, SCID); return; @@ -709,7 +707,9 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *Expr, Address Dest, Builder.SetInsertPoint(B); EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, IsWeak, FailureOrder, Size, Order, - CGF.getTargetHooks().getLLVMSyncScopeID(ScopeModel->map(S), + CGF.getTargetHooks().getLLVMSyncScopeID(CGF.CGM.getLangOpts(), + ScopeModel->map(S), + Order, CGF.getLLVMContext())); Builder.CreateBr(ContBB); } @@ -1357,7 +1357,7 @@ RValue AtomicInfo::convertAtomicTempToRValue(Address addr, // Drill into the padding structure if we have one. if (hasPadding()) - addr = CGF.Builder.CreateStructGEP(addr, 0, CharUnits()); + addr = CGF.Builder.CreateStructGEP(addr, 0); // Otherwise, just convert the temporary to an r-value using the // normal conversion routine. @@ -1688,7 +1688,7 @@ EmitAtomicUpdateValue(CodeGenFunction &CGF, AtomicInfo &Atomics, RValue OldRVal, UpRVal = OldRVal; DesiredLVal = CGF.MakeAddrLValue(DesiredAddr, AtomicLVal.getType()); } else { - // Build new lvalue for temp address + // Build new lvalue for temp address. Address Ptr = Atomics.materializeRValue(OldRVal); LValue UpdateLVal; if (AtomicLVal.isBitField()) { @@ -1721,7 +1721,7 @@ EmitAtomicUpdateValue(CodeGenFunction &CGF, AtomicInfo &Atomics, RValue OldRVal, } UpRVal = CGF.EmitLoadOfLValue(UpdateLVal, SourceLocation()); } - // Store new value in the corresponding memory area + // Store new value in the corresponding memory area. RValue NewRVal = UpdateOp(UpRVal); if (NewRVal.isScalar()) { CGF.EmitStoreThroughLValue(NewRVal, DesiredLVal); @@ -1786,7 +1786,7 @@ void AtomicInfo::EmitAtomicUpdateOp( SourceLocation(), /*AsValue=*/false); EmitAtomicUpdateValue(CGF, *this, OldRVal, UpdateOp, NewAtomicAddr); auto *DesiredVal = CGF.Builder.CreateLoad(NewAtomicIntAddr); - // Try to write new value using cmpxchg operation + // Try to write new value using cmpxchg operation. auto Res = EmitAtomicCompareExchangeOp(PHI, DesiredVal, AO, Failure); PHI->addIncoming(Res.first, CGF.Builder.GetInsertBlock()); CGF.Builder.CreateCondBr(Res.second, ExitBB, ContBB); @@ -1797,7 +1797,7 @@ static void EmitAtomicUpdateValue(CodeGenFunction &CGF, AtomicInfo &Atomics, RValue UpdateRVal, Address DesiredAddr) { LValue AtomicLVal = Atomics.getAtomicLValue(); LValue DesiredLVal; - // Build new lvalue for temp address + // Build new lvalue for temp address. if (AtomicLVal.isBitField()) { DesiredLVal = LValue::MakeBitfield(DesiredAddr, AtomicLVal.getBitFieldInfo(), @@ -1814,7 +1814,7 @@ static void EmitAtomicUpdateValue(CodeGenFunction &CGF, AtomicInfo &Atomics, DesiredAddr, AtomicLVal.getExtVectorElts(), AtomicLVal.getType(), AtomicLVal.getBaseInfo(), AtomicLVal.getTBAAInfo()); } - // Store new value in the corresponding memory area + // Store new value in the corresponding memory area. assert(UpdateRVal.isScalar()); CGF.EmitStoreThroughLValue(UpdateRVal, DesiredLVal); } @@ -1866,7 +1866,7 @@ void AtomicInfo::EmitAtomicUpdateOp(llvm::AtomicOrdering AO, RValue UpdateRVal, } EmitAtomicUpdateValue(CGF, *this, UpdateRVal, NewAtomicAddr); auto *DesiredVal = CGF.Builder.CreateLoad(NewAtomicIntAddr); - // Try to write new value using cmpxchg operation + // Try to write new value using cmpxchg operation. auto Res = EmitAtomicCompareExchangeOp(PHI, DesiredVal, AO, Failure); PHI->addIncoming(Res.first, CGF.Builder.GetInsertBlock()); CGF.Builder.CreateCondBr(Res.second, ExitBB, ContBB); diff --git a/lib/CodeGen/CGBlocks.cpp b/lib/CodeGen/CGBlocks.cpp index fa3c3ee8610c..c3ee7129d9d7 100644 --- a/lib/CodeGen/CGBlocks.cpp +++ b/lib/CodeGen/CGBlocks.cpp @@ -1,9 +1,8 @@ //===--- CGBlocks.cpp - Emit LLVM Code for declarations ---------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -23,7 +22,6 @@ #include "clang/AST/DeclObjC.h" #include "clang/CodeGen/ConstantInitBuilder.h" #include "llvm/ADT/SmallSet.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Module.h" #include "llvm/Support/ScopedPrinter.h" @@ -276,6 +274,8 @@ static llvm::Constant *buildBlockDescriptor(CodeGenModule &CGM, /*constant*/ true, linkage, AddrSpace); if (linkage == llvm::GlobalValue::LinkOnceODRLinkage) { + if (CGM.supportsCOMDAT()) + global->setComdat(CGM.getModule().getOrInsertComdat(descName)); global->setVisibility(llvm::GlobalValue::HiddenVisibility); global->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); } @@ -671,7 +671,7 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF, // Sort the layout by alignment. We have to use a stable sort here // to get reproducible results. There should probably be an // llvm::array_pod_stable_sort. - std::stable_sort(layout.begin(), layout.end()); + llvm::stable_sort(layout); // Needed for blocks layout info. info.BlockHeaderForcedGapOffset = info.BlockSize; @@ -838,9 +838,8 @@ static void enterBlockScope(CodeGenFunction &CGF, BlockDecl *block) { } // GEP down to the address. - Address addr = CGF.Builder.CreateStructGEP(blockInfo.LocalAddress, - capture.getIndex(), - capture.getOffset()); + Address addr = + CGF.Builder.CreateStructGEP(blockInfo.LocalAddress, capture.getIndex()); // We can use that GEP as the dominating IP. if (!blockInfo.DominatingIP) @@ -977,27 +976,24 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { flags |= BLOCK_IS_NOESCAPE | BLOCK_IS_GLOBAL; } - auto projectField = - [&](unsigned index, CharUnits offset, const Twine &name) -> Address { - return Builder.CreateStructGEP(blockAddr, index, offset, name); - }; - auto storeField = - [&](llvm::Value *value, unsigned index, CharUnits offset, - const Twine &name) { - Builder.CreateStore(value, projectField(index, offset, name)); - }; + auto projectField = [&](unsigned index, const Twine &name) -> Address { + return Builder.CreateStructGEP(blockAddr, index, name); + }; + auto storeField = [&](llvm::Value *value, unsigned index, const Twine &name) { + Builder.CreateStore(value, projectField(index, name)); + }; // Initialize the block header. { // We assume all the header fields are densely packed. unsigned index = 0; CharUnits offset; - auto addHeaderField = - [&](llvm::Value *value, CharUnits size, const Twine &name) { - storeField(value, index, offset, name); - offset += size; - index++; - }; + auto addHeaderField = [&](llvm::Value *value, CharUnits size, + const Twine &name) { + storeField(value, index, name); + offset += size; + index++; + }; if (!IsOpenCL) { addHeaderField(isa, getPointerSize(), "block.isa"); @@ -1033,8 +1029,8 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { // First, 'this'. if (blockDecl->capturesCXXThis()) { - Address addr = projectField(blockInfo.CXXThisIndex, blockInfo.CXXThisOffset, - "block.captured-this.addr"); + Address addr = + projectField(blockInfo.CXXThisIndex, "block.captured-this.addr"); Builder.CreateStore(LoadCXXThis(), addr); } @@ -1050,8 +1046,7 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { // This will be a [[type]]*, except that a byref entry will just be // an i8**. - Address blockField = - projectField(capture.getIndex(), capture.getOffset(), "block.captured"); + Address blockField = projectField(capture.getIndex(), "block.captured"); // Compute the address of the thing we're going to move into the // block literal. @@ -1070,7 +1065,6 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { // This is a [[type]]*, except that a byref entry will just be an i8**. src = Builder.CreateStructGEP(LoadBlockStruct(), enclosingCapture.getIndex(), - enclosingCapture.getOffset(), "block.capture.addr"); } else { auto I = LocalDeclMap.find(variable); @@ -1261,52 +1255,49 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E, ReturnValueSlot ReturnValue) { const BlockPointerType *BPT = E->getCallee()->getType()->getAs<BlockPointerType>(); - llvm::Value *BlockPtr = EmitScalarExpr(E->getCallee()); - - // Get a pointer to the generic block literal. - // For OpenCL we generate generic AS void ptr to be able to reuse the same - // block definition for blocks with captures generated as private AS local - // variables and without captures generated as global AS program scope - // variables. - unsigned AddrSpace = 0; - if (getLangOpts().OpenCL) - AddrSpace = getContext().getTargetAddressSpace(LangAS::opencl_generic); - - llvm::Type *BlockLiteralTy = - llvm::PointerType::get(CGM.getGenericBlockLiteralType(), AddrSpace); - - // Bitcast the callee to a block literal. - BlockPtr = - Builder.CreatePointerCast(BlockPtr, BlockLiteralTy, "block.literal"); - - // Get the function pointer from the literal. - llvm::Value *FuncPtr = - Builder.CreateStructGEP(CGM.getGenericBlockLiteralType(), BlockPtr, - CGM.getLangOpts().OpenCL ? 2 : 3); - - // Add the block literal. + llvm::Type *GenBlockTy = CGM.getGenericBlockLiteralType(); + llvm::Value *Func = nullptr; + QualType FnType = BPT->getPointeeType(); + ASTContext &Ctx = getContext(); CallArgList Args; - QualType VoidPtrQualTy = getContext().VoidPtrTy; - llvm::Type *GenericVoidPtrTy = VoidPtrTy; if (getLangOpts().OpenCL) { - GenericVoidPtrTy = CGM.getOpenCLRuntime().getGenericVoidPointerType(); - VoidPtrQualTy = - getContext().getPointerType(getContext().getAddrSpaceQualType( - getContext().VoidTy, LangAS::opencl_generic)); - } - - BlockPtr = Builder.CreatePointerCast(BlockPtr, GenericVoidPtrTy); - Args.add(RValue::get(BlockPtr), VoidPtrQualTy); - - QualType FnType = BPT->getPointeeType(); + // For OpenCL, BlockPtr is already casted to generic block literal. + + // First argument of a block call is a generic block literal casted to + // generic void pointer, i.e. i8 addrspace(4)* + llvm::Value *BlockDescriptor = Builder.CreatePointerCast( + BlockPtr, CGM.getOpenCLRuntime().getGenericVoidPointerType()); + QualType VoidPtrQualTy = Ctx.getPointerType( + Ctx.getAddrSpaceQualType(Ctx.VoidTy, LangAS::opencl_generic)); + Args.add(RValue::get(BlockDescriptor), VoidPtrQualTy); + // And the rest of the arguments. + EmitCallArgs(Args, FnType->getAs<FunctionProtoType>(), E->arguments()); + + // We *can* call the block directly unless it is a function argument. + if (!isa<ParmVarDecl>(E->getCalleeDecl())) + Func = CGM.getOpenCLRuntime().getInvokeFunction(E->getCallee()); + else { + llvm::Value *FuncPtr = Builder.CreateStructGEP(GenBlockTy, BlockPtr, 2); + Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign()); + } + } else { + // Bitcast the block literal to a generic block literal. + BlockPtr = Builder.CreatePointerCast( + BlockPtr, llvm::PointerType::get(GenBlockTy, 0), "block.literal"); + // Get pointer to the block invoke function + llvm::Value *FuncPtr = Builder.CreateStructGEP(GenBlockTy, BlockPtr, 3); - // And the rest of the arguments. - EmitCallArgs(Args, FnType->getAs<FunctionProtoType>(), E->arguments()); + // First argument is a block literal casted to a void pointer + BlockPtr = Builder.CreatePointerCast(BlockPtr, VoidPtrTy); + Args.add(RValue::get(BlockPtr), Ctx.VoidPtrTy); + // And the rest of the arguments. + EmitCallArgs(Args, FnType->getAs<FunctionProtoType>(), E->arguments()); - // Load the function. - llvm::Value *Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign()); + // Load the function. + Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign()); + } const FunctionType *FuncTy = FnType->castAs<FunctionType>(); const CGFunctionInfo &FnInfo = @@ -1332,9 +1323,8 @@ Address CodeGenFunction::GetAddrOfBlockDecl(const VarDecl *variable) { // Handle constant captures. if (capture.isConstant()) return LocalDeclMap.find(variable)->second; - Address addr = - Builder.CreateStructGEP(LoadBlockStruct(), capture.getIndex(), - capture.getOffset(), "block.capture.addr"); + Address addr = Builder.CreateStructGEP(LoadBlockStruct(), capture.getIndex(), + "block.capture.addr"); if (variable->isEscapingByref()) { // addr should be a void** right now. Load, then cast the result @@ -1444,10 +1434,12 @@ static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM, if (CGM.getContext().getLangOpts().OpenCL) AddrSpace = CGM.getContext().getTargetAddressSpace(LangAS::opencl_global); - llvm::Constant *literal = fields.finishAndCreateGlobal( + llvm::GlobalVariable *literal = fields.finishAndCreateGlobal( "__block_literal_global", blockInfo.BlockAlign, /*constant*/ !IsWindows, llvm::GlobalVariable::InternalLinkage, AddrSpace); + literal->addAttribute("objc_arc_inert"); + // Windows does not allow globals to be initialised to point to globals in // different DLLs. Any such variables must run code to initialise them. if (IsWindows) { @@ -1617,9 +1609,8 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD, // If we have a C++ 'this' reference, go ahead and force it into // existence now. if (blockDecl->capturesCXXThis()) { - Address addr = - Builder.CreateStructGEP(LoadBlockStruct(), blockInfo.CXXThisIndex, - blockInfo.CXXThisOffset, "block.captured-this"); + Address addr = Builder.CreateStructGEP( + LoadBlockStruct(), blockInfo.CXXThisIndex, "block.captured-this"); CXXThisValue = Builder.CreateLoad(addr, "this"); } @@ -2029,6 +2020,8 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) { llvm::Function *Fn = llvm::Function::Create(LTy, llvm::GlobalValue::LinkOnceODRLinkage, FuncName, &CGM.getModule()); + if (CGM.supportsCOMDAT()) + Fn->setComdat(CGM.getModule().getOrInsertComdat(FuncName)); IdentifierInfo *II = &C.Idents.get(FuncName); @@ -2062,8 +2055,8 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) { BlockFieldFlags flags = CopiedCapture.CopyFlags; unsigned index = capture.getIndex(); - Address srcField = Builder.CreateStructGEP(src, index, capture.getOffset()); - Address dstField = Builder.CreateStructGEP(dst, index, capture.getOffset()); + Address srcField = Builder.CreateStructGEP(src, index); + Address dstField = Builder.CreateStructGEP(dst, index); switch (CopiedCapture.CopyKind) { case BlockCaptureEntityKind::CXXRecord: @@ -2220,6 +2213,8 @@ CodeGenFunction::GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo) { llvm::Function *Fn = llvm::Function::Create(LTy, llvm::GlobalValue::LinkOnceODRLinkage, FuncName, &CGM.getModule()); + if (CGM.supportsCOMDAT()) + Fn->setComdat(CGM.getModule().getOrInsertComdat(FuncName)); IdentifierInfo *II = &C.Idents.get(FuncName); @@ -2251,8 +2246,7 @@ CodeGenFunction::GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo) { const CGBlockInfo::Capture &capture = *DestroyedCapture.Capture; BlockFieldFlags flags = DestroyedCapture.DisposeFlags; - Address srcField = - Builder.CreateStructGEP(src, capture.getIndex(), capture.getOffset()); + Address srcField = Builder.CreateStructGEP(src, capture.getIndex()); pushCaptureCleanup(DestroyedCapture.DisposeKind, srcField, CI.getVariable()->getType(), flags, @@ -2286,7 +2280,7 @@ public: unsigned flags = (Flags | BLOCK_BYREF_CALLER).getBitMask(); llvm::Value *flagsVal = llvm::ConstantInt::get(CGF.Int32Ty, flags); - llvm::Value *fn = CGF.CGM.getBlockObjectAssign(); + llvm::FunctionCallee fn = CGF.CGM.getBlockObjectAssign(); llvm::Value *args[] = { destField.getPointer(), srcValue, flagsVal }; CGF.EmitNounwindRuntimeCall(fn, args); @@ -2712,13 +2706,11 @@ Address CodeGenFunction::emitBlockByrefAddress(Address baseAddr, const llvm::Twine &name) { // Chase the forwarding address if requested. if (followForward) { - Address forwardingAddr = - Builder.CreateStructGEP(baseAddr, 1, getPointerSize(), "forwarding"); + Address forwardingAddr = Builder.CreateStructGEP(baseAddr, 1, "forwarding"); baseAddr = Address(Builder.CreateLoad(forwardingAddr), info.ByrefAlignment); } - return Builder.CreateStructGEP(baseAddr, info.FieldIndex, - info.FieldOffset, name); + return Builder.CreateStructGEP(baseAddr, info.FieldIndex, name); } /// BuildByrefInfo - This routine changes a __block variable declared as T x @@ -2836,8 +2828,7 @@ void CodeGenFunction::emitByrefStructureInit(const AutoVarEmission &emission) { CharUnits nextHeaderOffset; auto storeHeaderField = [&](llvm::Value *value, CharUnits fieldSize, const Twine &name) { - auto fieldAddr = Builder.CreateStructGEP(addr, nextHeaderIndex, - nextHeaderOffset, name); + auto fieldAddr = Builder.CreateStructGEP(addr, nextHeaderIndex, name); Builder.CreateStore(value, fieldAddr); nextHeaderIndex++; @@ -2933,7 +2924,7 @@ void CodeGenFunction::emitByrefStructureInit(const AutoVarEmission &emission) { void CodeGenFunction::BuildBlockRelease(llvm::Value *V, BlockFieldFlags flags, bool CanThrow) { - llvm::Value *F = CGM.getBlockObjectDispose(); + llvm::FunctionCallee F = CGM.getBlockObjectDispose(); llvm::Value *args[] = { Builder.CreateBitCast(V, Int8PtrTy), llvm::ConstantInt::get(Int32Ty, flags.getBitMask()) @@ -2989,7 +2980,7 @@ static void configureBlocksRuntimeObject(CodeGenModule &CGM, CGM.setDSOLocal(GV); } -llvm::Constant *CodeGenModule::getBlockObjectDispose() { +llvm::FunctionCallee CodeGenModule::getBlockObjectDispose() { if (BlockObjectDispose) return BlockObjectDispose; @@ -2997,11 +2988,12 @@ llvm::Constant *CodeGenModule::getBlockObjectDispose() { llvm::FunctionType *fty = llvm::FunctionType::get(VoidTy, args, false); BlockObjectDispose = CreateRuntimeFunction(fty, "_Block_object_dispose"); - configureBlocksRuntimeObject(*this, BlockObjectDispose); + configureBlocksRuntimeObject( + *this, cast<llvm::Constant>(BlockObjectDispose.getCallee())); return BlockObjectDispose; } -llvm::Constant *CodeGenModule::getBlockObjectAssign() { +llvm::FunctionCallee CodeGenModule::getBlockObjectAssign() { if (BlockObjectAssign) return BlockObjectAssign; @@ -3009,7 +3001,8 @@ llvm::Constant *CodeGenModule::getBlockObjectAssign() { llvm::FunctionType *fty = llvm::FunctionType::get(VoidTy, args, false); BlockObjectAssign = CreateRuntimeFunction(fty, "_Block_object_assign"); - configureBlocksRuntimeObject(*this, BlockObjectAssign); + configureBlocksRuntimeObject( + *this, cast<llvm::Constant>(BlockObjectAssign.getCallee())); return BlockObjectAssign; } diff --git a/lib/CodeGen/CGBlocks.h b/lib/CodeGen/CGBlocks.h index 3f9fc16d9b10..c4bfde666154 100644 --- a/lib/CodeGen/CGBlocks.h +++ b/lib/CodeGen/CGBlocks.h @@ -1,9 +1,8 @@ //===-- CGBlocks.h - state for LLVM CodeGen for blocks ----------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/CGBuilder.h b/lib/CodeGen/CGBuilder.h index 654ef72060b7..68c8c641139f 100644 --- a/lib/CodeGen/CGBuilder.h +++ b/lib/CodeGen/CGBuilder.h @@ -1,9 +1,8 @@ //===-- CGBuilder.h - Choose IRBuilder implementation ----------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -168,19 +167,25 @@ public: return Address(Ptr, Addr.getAlignment()); } + /// Given + /// %addr = {T1, T2...}* ... + /// produce + /// %name = getelementptr inbounds %addr, i32 0, i32 index + /// + /// This API assumes that drilling into a struct like this is always an + /// inbounds operation. using CGBuilderBaseTy::CreateStructGEP; - Address CreateStructGEP(Address Addr, unsigned Index, CharUnits Offset, + Address CreateStructGEP(Address Addr, unsigned Index, const llvm::Twine &Name = "") { + llvm::StructType *ElTy = cast<llvm::StructType>(Addr.getElementType()); + const llvm::DataLayout &DL = BB->getParent()->getParent()->getDataLayout(); + const llvm::StructLayout *Layout = DL.getStructLayout(ElTy); + auto Offset = CharUnits::fromQuantity(Layout->getElementOffset(Index)); + return Address(CreateStructGEP(Addr.getElementType(), Addr.getPointer(), Index, Name), Addr.getAlignment().alignmentAtOffset(Offset)); } - Address CreateStructGEP(Address Addr, unsigned Index, - const llvm::StructLayout *Layout, - const llvm::Twine &Name = "") { - auto Offset = CharUnits::fromQuantity(Layout->getElementOffset(Index)); - return CreateStructGEP(Addr, Index, Offset, Name); - } /// Given /// %addr = [n x T]* ... @@ -190,15 +195,17 @@ public: /// /// This API assumes that drilling into an array like this is always /// an inbounds operation. - /// - /// \param EltSize - the size of the type T in bytes - Address CreateConstArrayGEP(Address Addr, uint64_t Index, CharUnits EltSize, + Address CreateConstArrayGEP(Address Addr, uint64_t Index, const llvm::Twine &Name = "") { - return Address(CreateInBoundsGEP(Addr.getPointer(), - {getSize(CharUnits::Zero()), - getSize(Index)}, - Name), - Addr.getAlignment().alignmentAtOffset(Index * EltSize)); + llvm::ArrayType *ElTy = cast<llvm::ArrayType>(Addr.getElementType()); + const llvm::DataLayout &DL = BB->getParent()->getParent()->getDataLayout(); + CharUnits EltSize = + CharUnits::fromQuantity(DL.getTypeAllocSize(ElTy->getElementType())); + + return Address( + CreateInBoundsGEP(Addr.getPointer(), + {getSize(CharUnits::Zero()), getSize(Index)}, Name), + Addr.getAlignment().alignmentAtOffset(Index * EltSize)); } /// Given @@ -206,11 +213,12 @@ public: /// produce /// %name = getelementptr inbounds %addr, i64 index /// where i64 is actually the target word size. - /// - /// \param EltSize - the size of the type T in bytes Address CreateConstInBoundsGEP(Address Addr, uint64_t Index, - CharUnits EltSize, const llvm::Twine &Name = "") { + llvm::Type *ElTy = Addr.getElementType(); + const llvm::DataLayout &DL = BB->getParent()->getParent()->getDataLayout(); + CharUnits EltSize = CharUnits::fromQuantity(DL.getTypeAllocSize(ElTy)); + return Address(CreateInBoundsGEP(Addr.getElementType(), Addr.getPointer(), getSize(Index), Name), Addr.getAlignment().alignmentAtOffset(Index * EltSize)); @@ -221,10 +229,12 @@ public: /// produce /// %name = getelementptr inbounds %addr, i64 index /// where i64 is actually the target word size. - /// - /// \param EltSize - the size of the type T in bytes - Address CreateConstGEP(Address Addr, uint64_t Index, CharUnits EltSize, + Address CreateConstGEP(Address Addr, uint64_t Index, const llvm::Twine &Name = "") { + const llvm::DataLayout &DL = BB->getParent()->getParent()->getDataLayout(); + CharUnits EltSize = + CharUnits::fromQuantity(DL.getTypeAllocSize(Addr.getElementType())); + return Address(CreateGEP(Addr.getElementType(), Addr.getPointer(), getSize(Index), Name), Addr.getAlignment().alignmentAtOffset(Index * EltSize)); @@ -245,31 +255,21 @@ public: } using CGBuilderBaseTy::CreateConstInBoundsGEP2_32; - Address CreateConstInBoundsGEP2_32(Address Addr, unsigned Idx0, - unsigned Idx1, const llvm::DataLayout &DL, - const llvm::Twine &Name = "") { + Address CreateConstInBoundsGEP2_32(Address Addr, unsigned Idx0, unsigned Idx1, + const llvm::Twine &Name = "") { + const llvm::DataLayout &DL = BB->getParent()->getParent()->getDataLayout(); + auto *GEP = cast<llvm::GetElementPtrInst>(CreateConstInBoundsGEP2_32( Addr.getElementType(), Addr.getPointer(), Idx0, Idx1, Name)); llvm::APInt Offset( DL.getIndexSizeInBits(Addr.getType()->getPointerAddressSpace()), 0, - /*IsSigned=*/true); + /*isSigned=*/true); if (!GEP->accumulateConstantOffset(DL, Offset)) llvm_unreachable("offset of GEP with constants is always computable"); return Address(GEP, Addr.getAlignment().alignmentAtOffset( CharUnits::fromQuantity(Offset.getSExtValue()))); } - llvm::Value *CreateConstInBoundsByteGEP(llvm::Value *Ptr, CharUnits Offset, - const llvm::Twine &Name = "") { - assert(Ptr->getType()->getPointerElementType() == TypeCache.Int8Ty); - return CreateInBoundsGEP(Ptr, getSize(Offset), Name); - } - llvm::Value *CreateConstByteGEP(llvm::Value *Ptr, CharUnits Offset, - const llvm::Twine &Name = "") { - assert(Ptr->getType()->getPointerElementType() == TypeCache.Int8Ty); - return CreateGEP(Ptr, getSize(Offset), Name); - } - using CGBuilderBaseTy::CreateMemCpy; llvm::CallInst *CreateMemCpy(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile = false) { @@ -298,6 +298,21 @@ public: return CreateMemSet(Dest.getPointer(), Value, Size, Dest.getAlignment().getQuantity(), IsVolatile); } + + using CGBuilderBaseTy::CreatePreserveStructAccessIndex; + Address CreatePreserveStructAccessIndex(Address Addr, + unsigned Index, + unsigned FieldIndex, + llvm::MDNode *DbgInfo) { + llvm::StructType *ElTy = cast<llvm::StructType>(Addr.getElementType()); + const llvm::DataLayout &DL = BB->getParent()->getParent()->getDataLayout(); + const llvm::StructLayout *Layout = DL.getStructLayout(ElTy); + auto Offset = CharUnits::fromQuantity(Layout->getElementOffset(Index)); + + return Address(CreatePreserveStructAccessIndex(Addr.getPointer(), + Index, FieldIndex, DbgInfo), + Addr.getAlignment().alignmentAtOffset(Offset)); + } }; } // end namespace CodeGen diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index a718f2f19aa6..a300bab49f9c 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -1,9 +1,8 @@ //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -18,6 +17,7 @@ #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "ConstantEmitter.h" +#include "PatternInit.h" #include "TargetInfo.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" @@ -27,7 +27,6 @@ #include "clang/CodeGen/CGFunctionInfo.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Intrinsics.h" @@ -46,6 +45,25 @@ int64_t clamp(int64_t Value, int64_t Low, int64_t High) { return std::min(High, std::max(Low, Value)); } +static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size, unsigned AlignmentInBytes) { + ConstantInt *Byte; + switch (CGF.getLangOpts().getTrivialAutoVarInit()) { + case LangOptions::TrivialAutoVarInitKind::Uninitialized: + // Nothing to initialize. + return; + case LangOptions::TrivialAutoVarInitKind::Zero: + Byte = CGF.Builder.getInt8(0x00); + break; + case LangOptions::TrivialAutoVarInitKind::Pattern: { + llvm::Type *Int8 = llvm::IntegerType::getInt8Ty(CGF.CGM.getLLVMContext()); + Byte = llvm::dyn_cast<llvm::ConstantInt>( + initializationPatternFor(CGF.CGM, Int8)); + break; + } + } + CGF.Builder.CreateMemSet(AI, Byte, Size, AlignmentInBytes); +} + /// getBuiltinLibFunction - Given a builtin id for a function like /// "__builtin_fabsf", return a Function* for "fabsf". llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD, @@ -300,6 +318,34 @@ static Value *EmitAtomicDecrementValue(CodeGenFunction &CGF, const CallExpr *E, return CGF.Builder.CreateSub(Result, ConstantInt::get(IntTy, 1)); } +// Build a plain volatile load. +static Value *EmitISOVolatileLoad(CodeGenFunction &CGF, const CallExpr *E) { + Value *Ptr = CGF.EmitScalarExpr(E->getArg(0)); + QualType ElTy = E->getArg(0)->getType()->getPointeeType(); + CharUnits LoadSize = CGF.getContext().getTypeSizeInChars(ElTy); + llvm::Type *ITy = + llvm::IntegerType::get(CGF.getLLVMContext(), LoadSize.getQuantity() * 8); + Ptr = CGF.Builder.CreateBitCast(Ptr, ITy->getPointerTo()); + llvm::LoadInst *Load = CGF.Builder.CreateAlignedLoad(Ptr, LoadSize); + Load->setVolatile(true); + return Load; +} + +// Build a plain volatile store. +static Value *EmitISOVolatileStore(CodeGenFunction &CGF, const CallExpr *E) { + Value *Ptr = CGF.EmitScalarExpr(E->getArg(0)); + Value *Value = CGF.EmitScalarExpr(E->getArg(1)); + QualType ElTy = E->getArg(0)->getType()->getPointeeType(); + CharUnits StoreSize = CGF.getContext().getTypeSizeInChars(ElTy); + llvm::Type *ITy = + llvm::IntegerType::get(CGF.getLLVMContext(), StoreSize.getQuantity() * 8); + Ptr = CGF.Builder.CreateBitCast(Ptr, ITy->getPointerTo()); + llvm::StoreInst *Store = + CGF.Builder.CreateAlignedStore(Value, Ptr, StoreSize); + Store->setVolatile(true); + return Store; +} + // Emit a simple mangled intrinsic that has 1 argument and a return type // matching the argument type. static Value *emitUnaryBuiltin(CodeGenFunction &CGF, @@ -307,7 +353,7 @@ static Value *emitUnaryBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID) { llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); - Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); + Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); return CGF.Builder.CreateCall(F, Src0); } @@ -318,7 +364,7 @@ static Value *emitBinaryBuiltin(CodeGenFunction &CGF, llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); - Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); + Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); return CGF.Builder.CreateCall(F, { Src0, Src1 }); } @@ -330,7 +376,7 @@ static Value *emitTernaryBuiltin(CodeGenFunction &CGF, llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2)); - Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); + Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 }); } @@ -341,13 +387,25 @@ static Value *emitFPIntBuiltin(CodeGenFunction &CGF, llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); - Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); + Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); return CGF.Builder.CreateCall(F, {Src0, Src1}); } +// Emit an intrinsic that has overloaded integer result and fp operand. +static Value *emitFPToIntRoundBuiltin(CodeGenFunction &CGF, + const CallExpr *E, + unsigned IntrinsicID) { + llvm::Type *ResultType = CGF.ConvertType(E->getType()); + llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); + + Function *F = CGF.CGM.getIntrinsic(IntrinsicID, + {ResultType, Src0->getType()}); + return CGF.Builder.CreateCall(F, Src0); +} + /// EmitFAbs - Emit a call to @llvm.fabs(). static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) { - Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType()); + Function *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType()); llvm::CallInst *Call = CGF.Builder.CreateCall(F, V); Call->setDoesNotAccessMemory(); return Call; @@ -408,7 +466,7 @@ static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF, "Arguments must be the same type. (Did you forget to make sure both " "arguments have the same integer width?)"); - llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType()); + Function *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType()); llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y}); Carry = CGF.Builder.CreateExtractValue(Tmp, 1); return CGF.Builder.CreateExtractValue(Tmp, 0); @@ -419,7 +477,7 @@ static Value *emitRangedBuiltin(CodeGenFunction &CGF, int low, int high) { llvm::MDBuilder MDHelper(CGF.getLLVMContext()); llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high)); - Value *F = CGF.CGM.getIntrinsic(IntrinsicID, {}); + Function *F = CGF.CGM.getIntrinsic(IntrinsicID, {}); llvm::Instruction *Call = CGF.Builder.CreateCall(F); Call->setMetadata(llvm::LLVMContext::MD_range, RNode); return Call; @@ -496,10 +554,11 @@ getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) { llvm::Value * CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type, llvm::IntegerType *ResType, - llvm::Value *EmittedE) { + llvm::Value *EmittedE, + bool IsDynamic) { uint64_t ObjectSize; if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type)) - return emitBuiltinObjectSize(E, Type, ResType, EmittedE); + return emitBuiltinObjectSize(E, Type, ResType, EmittedE, IsDynamic); return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true); } @@ -515,7 +574,7 @@ CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type, llvm::Value * CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type, llvm::IntegerType *ResType, - llvm::Value *EmittedE) { + llvm::Value *EmittedE, bool IsDynamic) { // We need to reference an argument if the pointer is a parameter with the // pass_object_size attribute. if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) { @@ -530,7 +589,7 @@ CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type, auto DIter = LocalDeclMap.find(D); assert(DIter != LocalDeclMap.end()); - return EmitLoadOfScalar(DIter->second, /*volatile=*/false, + return EmitLoadOfScalar(DIter->second, /*Volatile=*/false, getContext().getSizeType(), E->getBeginLoc()); } } @@ -545,13 +604,15 @@ CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type, assert(Ptr->getType()->isPointerTy() && "Non-pointer passed to __builtin_object_size?"); - Value *F = CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()}); + Function *F = + CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()}); // LLVM only supports 0 and 2, make sure that we pass along that as a boolean. Value *Min = Builder.getInt1((Type & 2) != 0); // For GCC compatibility, __builtin_object_size treat NULL as unknown size. Value *NullIsUnknown = Builder.getTrue(); - return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown}); + Value *Dynamic = Builder.getInt1(IsDynamic); + return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown, Dynamic}); } namespace { @@ -658,7 +719,7 @@ static llvm::Value *EmitX86BitTestIntrinsic(CodeGenFunction &CGF, llvm::FunctionType::get(CGF.Int8Ty, {IntPtrType, IntType}, false); llvm::InlineAsm *IA = - llvm::InlineAsm::get(FTy, Asm, Constraints, /*SideEffects=*/true); + llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true); return CGF.Builder.CreateCall(IA, {BitBase, BitPos}); } @@ -793,16 +854,16 @@ static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind, llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get( CGF.getLLVMContext(), llvm::AttributeList::FunctionIndex, llvm::Attribute::ReturnsTwice); - llvm::Constant *SetJmpFn = CGF.CGM.CreateRuntimeFunction( + llvm::FunctionCallee SetJmpFn = CGF.CGM.CreateRuntimeFunction( llvm::FunctionType::get(CGF.IntTy, ArgTypes, IsVarArg), Name, ReturnsTwiceAttr, /*Local=*/true); llvm::Value *Buf = CGF.Builder.CreateBitOrPointerCast( CGF.EmitScalarExpr(E->getArg(0)), CGF.Int8PtrTy); llvm::Value *Args[] = {Buf, Arg1}; - llvm::CallSite CS = CGF.EmitRuntimeCallOrInvoke(SetJmpFn, Args); - CS.setAttributes(ReturnsTwiceAttr); - return RValue::get(CS.getInstruction()); + llvm::CallBase *CB = CGF.EmitRuntimeCallOrInvoke(SetJmpFn, Args); + CB->setAttributes(ReturnsTwiceAttr); + return RValue::get(CB); } // Many of MSVC builtins are on x64, ARM and AArch64; to avoid repeating code, @@ -876,7 +937,7 @@ Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, Address IndexAddress = EmitPointerWithAlignment(E->getArg(0)); if (BuiltinID == MSVCIntrin::_BitScanForward) { - Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); + Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()}); ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false); Builder.CreateStore(ZeroCount, IndexAddress, false); @@ -884,7 +945,7 @@ Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth(); Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1); - Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); + Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()}); ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false); Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount); @@ -996,16 +1057,19 @@ Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, Asm = "udf #251"; Constraints = "{r0}"; break; + case llvm::Triple::aarch64: + Asm = "brk #0xF003"; + Constraints = "{w0}"; } llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false); llvm::InlineAsm *IA = - llvm::InlineAsm::get(FTy, Asm, Constraints, /*SideEffects=*/true); + llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true); llvm::AttributeList NoReturnAttr = llvm::AttributeList::get( getLLVMContext(), llvm::AttributeList::FunctionIndex, llvm::Attribute::NoReturn); - CallSite CS = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0))); - CS.setAttributes(NoReturnAttr); - return CS.getInstruction(); + llvm::CallInst *CI = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0))); + CI->setAttributes(NoReturnAttr); + return CI; } } llvm_unreachable("Incorrect MSVC intrinsic!"); @@ -1070,9 +1134,10 @@ llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction( return F; llvm::SmallVector<QualType, 4> ArgTys; - llvm::SmallVector<ImplicitParamDecl, 4> Params; - Params.emplace_back(Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"), - Ctx.VoidPtrTy, ImplicitParamDecl::Other); + FunctionArgList Args; + Args.push_back(ImplicitParamDecl::Create( + Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"), Ctx.VoidPtrTy, + ImplicitParamDecl::Other)); ArgTys.emplace_back(Ctx.VoidPtrTy); for (unsigned int I = 0, E = Layout.Items.size(); I < E; ++I) { @@ -1081,17 +1146,13 @@ llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction( continue; QualType ArgTy = getOSLogArgType(Ctx, Size); - Params.emplace_back( + Args.push_back(ImplicitParamDecl::Create( Ctx, nullptr, SourceLocation(), &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)), ArgTy, - ImplicitParamDecl::Other); + ImplicitParamDecl::Other)); ArgTys.emplace_back(ArgTy); } - FunctionArgList Args; - for (auto &P : Params) - Args.push_back(&P); - QualType ReturnTy = Ctx.VoidTy; QualType FuncionTy = Ctx.getFunctionType(ReturnTy, ArgTys, {}); @@ -1106,6 +1167,7 @@ llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction( Fn->setVisibility(llvm::GlobalValue::HiddenVisibility); CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, Fn); CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Fn); + Fn->setDoesNotThrow(); // Attach 'noinline' at -Oz. if (CGM.getCodeGenOpts().OptimizeSize == 2) @@ -1123,7 +1185,7 @@ llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction( auto AL = ApplyDebugLocation::CreateArtificial(*this); CharUnits Offset; - Address BufAddr(Builder.CreateLoad(GetAddrOfLocalVar(&Params[0]), "buf"), + Address BufAddr(Builder.CreateLoad(GetAddrOfLocalVar(Args[0]), "buf"), BufferAlignment); Builder.CreateStore(Builder.getInt8(Layout.getSummaryByte()), Builder.CreateConstByteGEP(BufAddr, Offset++, "summary")); @@ -1143,7 +1205,7 @@ llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction( if (!Size.getQuantity()) continue; - Address Arg = GetAddrOfLocalVar(&Params[I]); + Address Arg = GetAddrOfLocalVar(Args[I]); Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset, "argData"); Addr = Builder.CreateBitCast(Addr, Arg.getPointer()->getType(), "argDataCast"); @@ -1330,13 +1392,11 @@ EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1, } static llvm::Value *dumpRecord(CodeGenFunction &CGF, QualType RType, - Value *&RecordPtr, CharUnits Align, Value *Func, - int Lvl) { + Value *&RecordPtr, CharUnits Align, + llvm::FunctionCallee Func, int Lvl) { const auto *RT = RType->getAs<RecordType>(); ASTContext &Context = CGF.getContext(); RecordDecl *RD = RT->getDecl()->getDefinition(); - ASTContext &Ctx = RD->getASTContext(); - const ASTRecordLayout &RL = Ctx.getASTRecordLayout(RD); std::string Pad = std::string(Lvl * 4, ' '); Value *GString = @@ -1366,9 +1426,6 @@ static llvm::Value *dumpRecord(CodeGenFunction &CGF, QualType RType, } for (const auto *FD : RD->fields()) { - uint64_t Off = RL.getFieldOffset(FD->getFieldIndex()); - Off = Ctx.toCharUnitsFromBits(Off).getQuantity(); - Value *FieldPtr = RecordPtr; if (RD->isUnion()) FieldPtr = CGF.Builder.CreatePointerCast( @@ -1466,7 +1523,7 @@ RValue CodeGenFunction::emitRotate(const CallExpr *E, bool IsRotateRight) { // Rotate is a special case of LLVM funnel shift - 1st 2 args are the same. unsigned IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl; - Value *F = CGM.getIntrinsic(IID, Ty); + Function *F = CGM.getIntrinsic(IID, Ty); return RValue::get(Builder.CreateCall(F, { Src, Src, ShiftAmt })); } @@ -1668,6 +1725,38 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_truncl: return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc)); + case Builtin::BIlround: + case Builtin::BIlroundf: + case Builtin::BIlroundl: + case Builtin::BI__builtin_lround: + case Builtin::BI__builtin_lroundf: + case Builtin::BI__builtin_lroundl: + return RValue::get(emitFPToIntRoundBuiltin(*this, E, Intrinsic::lround)); + + case Builtin::BIllround: + case Builtin::BIllroundf: + case Builtin::BIllroundl: + case Builtin::BI__builtin_llround: + case Builtin::BI__builtin_llroundf: + case Builtin::BI__builtin_llroundl: + return RValue::get(emitFPToIntRoundBuiltin(*this, E, Intrinsic::llround)); + + case Builtin::BIlrint: + case Builtin::BIlrintf: + case Builtin::BIlrintl: + case Builtin::BI__builtin_lrint: + case Builtin::BI__builtin_lrintf: + case Builtin::BI__builtin_lrintl: + return RValue::get(emitFPToIntRoundBuiltin(*this, E, Intrinsic::lrint)); + + case Builtin::BIllrint: + case Builtin::BIllrintf: + case Builtin::BIllrintl: + case Builtin::BI__builtin_llrint: + case Builtin::BI__builtin_llrintf: + case Builtin::BI__builtin_llrintl: + return RValue::get(emitFPToIntRoundBuiltin(*this, E, Intrinsic::llrint)); + default: break; } @@ -1735,6 +1824,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, } case Builtin::BI__builtin_dump_struct: { + llvm::Type *LLVMIntTy = getTypes().ConvertType(getContext().IntTy); + llvm::FunctionType *LLVMFuncType = llvm::FunctionType::get( + LLVMIntTy, {llvm::Type::getInt8PtrTy(getLLVMContext())}, true); + Value *Func = EmitScalarExpr(E->getArg(1)->IgnoreImpCasts()); CharUnits Arg0Align = EmitPointerWithAlignment(E->getArg(0)).getAlignment(); @@ -1742,7 +1835,29 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, QualType Arg0Type = Arg0->getType()->getPointeeType(); Value *RecordPtr = EmitScalarExpr(Arg0); - Value *Res = dumpRecord(*this, Arg0Type, RecordPtr, Arg0Align, Func, 0); + Value *Res = dumpRecord(*this, Arg0Type, RecordPtr, Arg0Align, + {LLVMFuncType, Func}, 0); + return RValue::get(Res); + } + + case Builtin::BI__builtin_preserve_access_index: { + // Only enabled preserved access index region when debuginfo + // is available as debuginfo is needed to preserve user-level + // access pattern. + if (!getDebugInfo()) { + CGM.Error(E->getExprLoc(), "using builtin_preserve_access_index() without -g"); + return RValue::get(EmitScalarExpr(E->getArg(0))); + } + + // Nested builtin_preserve_access_index() not supported + if (IsInPreservedAIRegion) { + CGM.Error(E->getExprLoc(), "nested builtin_preserve_access_index() not supported"); + return RValue::get(EmitScalarExpr(E->getArg(0))); + } + + IsInPreservedAIRegion = true; + Value *Res = EmitScalarExpr(E->getArg(0)); + IsInPreservedAIRegion = false; return RValue::get(Res); } @@ -1763,7 +1878,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Value *ArgValue = EmitScalarExpr(E->getArg(0)); llvm::Type *ArgType = ArgValue->getType(); - Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); + Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); llvm::Type *ResultType = ConvertType(E->getType()); Value *Zero = llvm::Constant::getNullValue(ArgType); @@ -1783,7 +1898,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CTZPassedZero); llvm::Type *ArgType = ArgValue->getType(); - Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); + Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); llvm::Type *ResultType = ConvertType(E->getType()); Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef()); @@ -1800,7 +1915,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CLZPassedZero); llvm::Type *ArgType = ArgValue->getType(); - Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); + Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); llvm::Type *ResultType = ConvertType(E->getType()); Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef()); @@ -1817,7 +1932,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Value *ArgValue = EmitScalarExpr(E->getArg(0)); llvm::Type *ArgType = ArgValue->getType(); - Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); + Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); llvm::Type *ResultType = ConvertType(E->getType()); Value *Tmp = @@ -1838,7 +1953,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Value *ArgValue = EmitScalarExpr(E->getArg(0)); llvm::Type *ArgType = ArgValue->getType(); - Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); + Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); llvm::Type *ResultType = ConvertType(E->getType()); Value *Tmp = Builder.CreateCall(F, ArgValue); @@ -1854,7 +1969,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Value *ArgValue = EmitScalarExpr(E->getArg(0)); llvm::Type *ArgType = ArgValue->getType(); - Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); + Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); llvm::Type *ResultType = ConvertType(E->getType()); Value *Result = Builder.CreateCall(F, {ArgValue, Builder.getFalse()}); @@ -1872,7 +1987,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Value *ArgValue = EmitScalarExpr(E->getArg(0)); llvm::Type *ArgType = ArgValue->getType(); - Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); + Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); llvm::Type *ResultType = ConvertType(E->getType()); Value *Result = Builder.CreateCall(F, ArgValue); @@ -1898,7 +2013,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, if (CGM.getCodeGenOpts().OptimizationLevel == 0) return RValue::get(ArgValue); - Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType); + Function *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType); Value *Result = Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval"); return RValue::get(Result); @@ -1913,7 +2028,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue); unsigned Alignment = (unsigned)AlignmentCI->getZExtValue(); - EmitAlignmentAssumption(PtrValue, Ptr, /*The expr loc is sufficient.*/ SourceLocation(), + EmitAlignmentAssumption(PtrValue, Ptr, + /*The expr loc is sufficient.*/ SourceLocation(), Alignment, OffsetValue); return RValue::get(PtrValue); } @@ -1923,7 +2039,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return RValue::get(nullptr); Value *ArgValue = EmitScalarExpr(E->getArg(0)); - Value *FnAssume = CGM.getIntrinsic(Intrinsic::assume); + Function *FnAssume = CGM.getIntrinsic(Intrinsic::assume); return RValue::get(Builder.CreateCall(FnAssume, ArgValue)); } case Builtin::BI__builtin_bswap16: @@ -1968,17 +2084,34 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, const Expr *Arg = E->getArg(0); QualType ArgType = Arg->getType(); - if (!hasScalarEvaluationKind(ArgType) || ArgType->isFunctionType()) - // We can only reason about scalar types. + // FIXME: The allowance for Obj-C pointers and block pointers is historical + // and likely a mistake. + if (!ArgType->isIntegralOrEnumerationType() && !ArgType->isFloatingType() && + !ArgType->isObjCObjectPointerType() && !ArgType->isBlockPointerType()) + // Per the GCC documentation, only numeric constants are recognized after + // inlining. + return RValue::get(ConstantInt::get(ResultType, 0)); + + if (Arg->HasSideEffects(getContext())) + // The argument is unevaluated, so be conservative if it might have + // side-effects. return RValue::get(ConstantInt::get(ResultType, 0)); Value *ArgValue = EmitScalarExpr(Arg); - Value *F = CGM.getIntrinsic(Intrinsic::is_constant, ConvertType(ArgType)); + if (ArgType->isObjCObjectPointerType()) { + // Convert Objective-C objects to id because we cannot distinguish between + // LLVM types for Obj-C classes as they are opaque. + ArgType = CGM.getContext().getObjCIdType(); + ArgValue = Builder.CreateBitCast(ArgValue, ConvertType(ArgType)); + } + Function *F = + CGM.getIntrinsic(Intrinsic::is_constant, ConvertType(ArgType)); Value *Result = Builder.CreateCall(F, ArgValue); if (Result->getType() != ResultType) Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/false); return RValue::get(Result); } + case Builtin::BI__builtin_dynamic_object_size: case Builtin::BI__builtin_object_size: { unsigned Type = E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue(); @@ -1986,8 +2119,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, // We pass this builtin onto the optimizer so that it can figure out the // object size in more complex cases. + bool IsDynamic = BuiltinID == Builtin::BI__builtin_dynamic_object_size; return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType, - /*EmittedE=*/nullptr)); + /*EmittedE=*/nullptr, IsDynamic)); } case Builtin::BI__builtin_prefetch: { Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0)); @@ -1997,17 +2131,17 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : llvm::ConstantInt::get(Int32Ty, 3); Value *Data = llvm::ConstantInt::get(Int32Ty, 1); - Value *F = CGM.getIntrinsic(Intrinsic::prefetch); + Function *F = CGM.getIntrinsic(Intrinsic::prefetch); return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data})); } case Builtin::BI__builtin_readcyclecounter: { - Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter); + Function *F = CGM.getIntrinsic(Intrinsic::readcyclecounter); return RValue::get(Builder.CreateCall(F)); } case Builtin::BI__builtin___clear_cache: { Value *Begin = EmitScalarExpr(E->getArg(0)); Value *End = EmitScalarExpr(E->getArg(1)); - Value *F = CGM.getIntrinsic(Intrinsic::clear_cache); + Function *F = CGM.getIntrinsic(Intrinsic::clear_cache); return RValue::get(Builder.CreateCall(F, {Begin, End})); } case Builtin::BI__builtin_trap: @@ -2029,7 +2163,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Value *Base = EmitScalarExpr(E->getArg(0)); Value *Exponent = EmitScalarExpr(E->getArg(1)); llvm::Type *ArgType = Base->getType(); - Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType); + Function *F = CGM.getIntrinsic(Intrinsic::powi, ArgType); return RValue::get(Builder.CreateCall(F, {Base, Exponent})); } @@ -2130,6 +2264,17 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); } + case Builtin::BI__builtin_flt_rounds: { + Function *F = CGM.getIntrinsic(Intrinsic::flt_rounds); + + llvm::Type *ResultType = ConvertType(E->getType()); + Value *Result = Builder.CreateCall(F); + if (Result->getType() != ResultType) + Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, + "cast"); + return RValue::get(Result); + } + case Builtin::BI__builtin_fpclassify: { Value *V = EmitScalarExpr(E->getArg(5)); llvm::Type *Ty = ConvertType(E->getArg(5)->getType()); @@ -2200,6 +2345,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, .getQuantity(); AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size); AI->setAlignment(SuitableAlignmentInBytes); + initializeAlloca(*this, AI, Size, SuitableAlignmentInBytes); return RValue::get(AI); } @@ -2212,6 +2358,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getQuantity(); AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size); AI->setAlignment(AlignmentInBytes); + initializeAlloca(*this, AI, Size, AlignmentInBytes); return RValue::get(AI); } @@ -2392,24 +2539,24 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, // this instead of hard-coding 0, which is correct for most targets. int32_t Offset = 0; - Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa); + Function *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa); return RValue::get(Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, Offset))); } case Builtin::BI__builtin_return_address: { Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0), getContext().UnsignedIntTy); - Value *F = CGM.getIntrinsic(Intrinsic::returnaddress); + Function *F = CGM.getIntrinsic(Intrinsic::returnaddress); return RValue::get(Builder.CreateCall(F, Depth)); } case Builtin::BI_ReturnAddress: { - Value *F = CGM.getIntrinsic(Intrinsic::returnaddress); + Function *F = CGM.getIntrinsic(Intrinsic::returnaddress); return RValue::get(Builder.CreateCall(F, Builder.getInt32(0))); } case Builtin::BI__builtin_frame_address: { Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0), getContext().UnsignedIntTy); - Value *F = CGM.getIntrinsic(Intrinsic::frameaddress); + Function *F = CGM.getIntrinsic(Intrinsic::frameaddress); return RValue::get(Builder.CreateCall(F, Depth)); } case Builtin::BI__builtin_extract_return_addr: { @@ -2445,9 +2592,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType()); assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) && "LLVM's __builtin_eh_return only supports 32- and 64-bit variants"); - Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32 - ? Intrinsic::eh_return_i32 - : Intrinsic::eh_return_i64); + Function *F = + CGM.getIntrinsic(IntTy->getBitWidth() == 32 ? Intrinsic::eh_return_i32 + : Intrinsic::eh_return_i64); Builder.CreateCall(F, {Int, Ptr}); Builder.CreateUnreachable(); @@ -2457,7 +2604,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return RValue::get(nullptr); } case Builtin::BI__builtin_unwind_init: { - Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init); + Function *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init); return RValue::get(Builder.CreateCall(F)); } case Builtin::BI__builtin_extend_pointer: { @@ -2498,12 +2645,11 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, // Store the stack pointer to the setjmp buffer. Value *StackAddr = Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave)); - Address StackSaveSlot = - Builder.CreateConstInBoundsGEP(Buf, 2, getPointerSize()); + Address StackSaveSlot = Builder.CreateConstInBoundsGEP(Buf, 2); Builder.CreateStore(StackAddr, StackSaveSlot); // Call LLVM's EH setjmp, which is lightweight. - Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp); + Function *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp); Buf = Builder.CreateBitCast(Buf, Int8PtrTy); return RValue::get(Builder.CreateCall(F, Buf.getPointer())); } @@ -2719,7 +2865,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, const CGFunctionInfo &FuncInfo = CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args); llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo); - llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName); + llvm::FunctionCallee Func = CGM.CreateRuntimeFunction(FTy, LibCallName); return EmitCall(FuncInfo, CGCallee::forDirect(Func), ReturnValueSlot(), Args); } @@ -2959,14 +3105,15 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, } // Build and MDTuple of MDStrings and emit the intrinsic call. - llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::codeview_annotation, {}); + llvm::Function *F = + CGM.getIntrinsic(llvm::Intrinsic::codeview_annotation, {}); MDTuple *StrTuple = MDTuple::get(getLLVMContext(), Strings); Builder.CreateCall(F, MetadataAsValue::get(getLLVMContext(), StrTuple)); return RValue::getIgnored(); } case Builtin::BI__builtin_annotation: { llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0)); - llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation, + llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::annotation, AnnVal->getType()); // Get the annotation string, go through casts. Sema requires this to be a @@ -3311,6 +3458,19 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI_interlockedbittestandreset_nf: return RValue::get(EmitBitTestIntrinsic(*this, BuiltinID, E)); + // These builtins exist to emit regular volatile loads and stores not + // affected by the -fms-volatile setting. + case Builtin::BI__iso_volatile_load8: + case Builtin::BI__iso_volatile_load16: + case Builtin::BI__iso_volatile_load32: + case Builtin::BI__iso_volatile_load64: + return RValue::get(EmitISOVolatileLoad(*this, E)); + case Builtin::BI__iso_volatile_store8: + case Builtin::BI__iso_volatile_store16: + case Builtin::BI__iso_volatile_store32: + case Builtin::BI__iso_volatile_store64: + return RValue::get(EmitISOVolatileStore(*this, E)); + case Builtin::BI__exception_code: case Builtin::BI_exception_code: return RValue::get(EmitSEHExceptionCode()); @@ -3348,7 +3508,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, auto & Context = getContext(); auto SizeTy = Context.getSizeType(); auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy)); - Value *F = CGM.getIntrinsic(Intrinsic::coro_size, T); + Function *F = CGM.getIntrinsic(Intrinsic::coro_size, T); return RValue::get(Builder.CreateCall(F)); } @@ -3591,7 +3751,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); AttrBuilder B; - B.addAttribute(Attribute::ByVal); + B.addByValAttr(NDRangeL.getAddress().getElementType()); llvm::AttributeList ByValAttrSet = llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B); @@ -3666,21 +3826,35 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, // Any calls now have event arguments passed. if (NumArgs >= 7) { llvm::Type *EventTy = ConvertType(getContext().OCLClkEventTy); - llvm::Type *EventPtrTy = EventTy->getPointerTo( + llvm::PointerType *EventPtrTy = EventTy->getPointerTo( CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic)); llvm::Value *NumEvents = Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty); - llvm::Value *EventList = - E->getArg(4)->getType()->isArrayType() - ? EmitArrayToPointerDecay(E->getArg(4)).getPointer() - : EmitScalarExpr(E->getArg(4)); - llvm::Value *ClkEvent = EmitScalarExpr(E->getArg(5)); - // Convert to generic address space. - EventList = Builder.CreatePointerCast(EventList, EventPtrTy); - ClkEvent = ClkEvent->getType()->isIntegerTy() - ? Builder.CreateBitOrPointerCast(ClkEvent, EventPtrTy) - : Builder.CreatePointerCast(ClkEvent, EventPtrTy); + + // Since SemaOpenCLBuiltinEnqueueKernel allows fifth and sixth arguments + // to be a null pointer constant (including `0` literal), we can take it + // into account and emit null pointer directly. + llvm::Value *EventWaitList = nullptr; + if (E->getArg(4)->isNullPointerConstant( + getContext(), Expr::NPC_ValueDependentIsNotNull)) { + EventWaitList = llvm::ConstantPointerNull::get(EventPtrTy); + } else { + EventWaitList = E->getArg(4)->getType()->isArrayType() + ? EmitArrayToPointerDecay(E->getArg(4)).getPointer() + : EmitScalarExpr(E->getArg(4)); + // Convert to generic address space. + EventWaitList = Builder.CreatePointerCast(EventWaitList, EventPtrTy); + } + llvm::Value *EventRet = nullptr; + if (E->getArg(5)->isNullPointerConstant( + getContext(), Expr::NPC_ValueDependentIsNotNull)) { + EventRet = llvm::ConstantPointerNull::get(EventPtrTy); + } else { + EventRet = + Builder.CreatePointerCast(EmitScalarExpr(E->getArg(5)), EventPtrTy); + } + auto Info = CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(6)); llvm::Value *Kernel = @@ -3692,8 +3866,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, QueueTy, Int32Ty, RangeTy, Int32Ty, EventPtrTy, EventPtrTy, GenericVoidPtrTy, GenericVoidPtrTy}; - std::vector<llvm::Value *> Args = {Queue, Flags, Range, NumEvents, - EventList, ClkEvent, Kernel, Block}; + std::vector<llvm::Value *> Args = {Queue, Flags, Range, + NumEvents, EventWaitList, EventRet, + Kernel, Block}; if (NumArgs == 7) { // Has events but no variadics. @@ -4922,8 +5097,7 @@ findNeonIntrinsicInMap(ArrayRef<NeonIntrinsicInfo> IntrinsicMap, } #endif - const NeonIntrinsicInfo *Builtin = - std::lower_bound(IntrinsicMap.begin(), IntrinsicMap.end(), BuiltinID); + const NeonIntrinsicInfo *Builtin = llvm::lower_bound(IntrinsicMap, BuiltinID); if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID) return Builtin; @@ -5065,6 +5239,13 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( switch (BuiltinID) { default: break; + case NEON::BI__builtin_neon_vpadd_v: + case NEON::BI__builtin_neon_vpaddq_v: + // We don't allow fp/int overloading of intrinsics. + if (VTy->getElementType()->isFloatingPointTy() && + Int == Intrinsic::aarch64_neon_addp) + Int = Intrinsic::aarch64_neon_faddp; + break; case NEON::BI__builtin_neon_vabs_v: case NEON::BI__builtin_neon_vabsq_v: if (VTy->getElementType()->isFloatingPointTy()) @@ -5262,7 +5443,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( } case NEON::BI__builtin_neon_vfma_v: case NEON::BI__builtin_neon_vfmaq_v: { - Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); + Function *F = CGM.getIntrinsic(Intrinsic::fma, Ty); Ops[0] = Builder.CreateBitCast(Ops[0], Ty); Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ops[2] = Builder.CreateBitCast(Ops[2], Ty); @@ -5731,7 +5912,7 @@ static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, && "Can't fit 64-bit value in 32-bit register"); if (IsRead) { - llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types); + llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types); llvm::Value *Call = Builder.CreateCall(F, Metadata); if (MixedTypes) @@ -5745,7 +5926,7 @@ static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, return Call; } - llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types); + llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types); llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1)); if (MixedTypes) { // Extend 32 bit write value to 64 bit to pass to write. @@ -5798,34 +5979,6 @@ static bool HasExtraNeonArgument(unsigned BuiltinID) { return true; } -Value *CodeGenFunction::EmitISOVolatileLoad(const CallExpr *E) { - Value *Ptr = EmitScalarExpr(E->getArg(0)); - QualType ElTy = E->getArg(0)->getType()->getPointeeType(); - CharUnits LoadSize = getContext().getTypeSizeInChars(ElTy); - llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), - LoadSize.getQuantity() * 8); - Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo()); - llvm::LoadInst *Load = - Builder.CreateAlignedLoad(Ptr, LoadSize); - Load->setVolatile(true); - return Load; -} - -Value *CodeGenFunction::EmitISOVolatileStore(const CallExpr *E) { - Value *Ptr = EmitScalarExpr(E->getArg(0)); - Value *Value = EmitScalarExpr(E->getArg(1)); - QualType ElTy = E->getArg(0)->getType()->getPointeeType(); - CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy); - llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), - StoreSize.getQuantity() * 8); - Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo()); - llvm::StoreInst *Store = - Builder.CreateAlignedStore(Value, Ptr, - StoreSize); - Store->setVolatile(true); - return Store; -} - Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::Triple::ArchType Arch) { @@ -5846,9 +5999,9 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, llvm::InlineAsm *Emit = IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "", - /*SideEffects=*/true) + /*hasSideEffects=*/true) : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "", - /*SideEffects=*/true); + /*hasSideEffects=*/true); return Builder.CreateCall(Emit); } @@ -5866,7 +6019,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, // Locality is not supported on ARM target Value *Locality = llvm::ConstantInt::get(Int32Ty, 3); - Value *F = CGM.getIntrinsic(Intrinsic::prefetch); + Function *F = CGM.getIntrinsic(Intrinsic::prefetch); return Builder.CreateCall(F, {Address, RW, Locality, IsData}); } @@ -6065,19 +6218,6 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex"); } - switch (BuiltinID) { - case ARM::BI__iso_volatile_load8: - case ARM::BI__iso_volatile_load16: - case ARM::BI__iso_volatile_load32: - case ARM::BI__iso_volatile_load64: - return EmitISOVolatileLoad(E); - case ARM::BI__iso_volatile_store8: - case ARM::BI__iso_volatile_store16: - case ARM::BI__iso_volatile_store32: - case ARM::BI__iso_volatile_store64: - return EmitISOVolatileStore(E); - } - if (BuiltinID == ARM::BI__builtin_arm_clrex) { Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex); return Builder.CreateCall(F); @@ -6818,7 +6958,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify // PLDL3STRM or PLDL2STRM. - Value *F = CGM.getIntrinsic(Intrinsic::prefetch); + Function *F = CGM.getIntrinsic(Intrinsic::prefetch); return Builder.CreateCall(F, {Address, RW, Locality, IsData}); } @@ -6837,6 +6977,14 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit"); } + if (BuiltinID == AArch64::BI__builtin_arm_jcvt) { + assert((getContext().getTypeSize(E->getType()) == 32) && + "__jcvt of unusual size!"); + llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::aarch64_fjcvtzs), Arg); + } + if (BuiltinID == AArch64::BI__clear_cache) { assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments"); const FunctionDecl *FD = E->getDirectCallee(); @@ -6956,7 +7104,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops); llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName); - llvm::Value *F = + llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty}); return Builder.CreateCall(F, Metadata); } @@ -7002,6 +7150,84 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {Arg0, Arg1}); } + // Memory Tagging Extensions (MTE) Intrinsics + Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic; + switch (BuiltinID) { + case AArch64::BI__builtin_arm_irg: + MTEIntrinsicID = Intrinsic::aarch64_irg; break; + case AArch64::BI__builtin_arm_addg: + MTEIntrinsicID = Intrinsic::aarch64_addg; break; + case AArch64::BI__builtin_arm_gmi: + MTEIntrinsicID = Intrinsic::aarch64_gmi; break; + case AArch64::BI__builtin_arm_ldg: + MTEIntrinsicID = Intrinsic::aarch64_ldg; break; + case AArch64::BI__builtin_arm_stg: + MTEIntrinsicID = Intrinsic::aarch64_stg; break; + case AArch64::BI__builtin_arm_subp: + MTEIntrinsicID = Intrinsic::aarch64_subp; break; + } + + if (MTEIntrinsicID != Intrinsic::not_intrinsic) { + llvm::Type *T = ConvertType(E->getType()); + + if (MTEIntrinsicID == Intrinsic::aarch64_irg) { + Value *Pointer = EmitScalarExpr(E->getArg(0)); + Value *Mask = EmitScalarExpr(E->getArg(1)); + + Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy); + Mask = Builder.CreateZExt(Mask, Int64Ty); + Value *RV = Builder.CreateCall( + CGM.getIntrinsic(MTEIntrinsicID), {Pointer, Mask}); + return Builder.CreatePointerCast(RV, T); + } + if (MTEIntrinsicID == Intrinsic::aarch64_addg) { + Value *Pointer = EmitScalarExpr(E->getArg(0)); + Value *TagOffset = EmitScalarExpr(E->getArg(1)); + + Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy); + TagOffset = Builder.CreateZExt(TagOffset, Int64Ty); + Value *RV = Builder.CreateCall( + CGM.getIntrinsic(MTEIntrinsicID), {Pointer, TagOffset}); + return Builder.CreatePointerCast(RV, T); + } + if (MTEIntrinsicID == Intrinsic::aarch64_gmi) { + Value *Pointer = EmitScalarExpr(E->getArg(0)); + Value *ExcludedMask = EmitScalarExpr(E->getArg(1)); + + ExcludedMask = Builder.CreateZExt(ExcludedMask, Int64Ty); + Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy); + return Builder.CreateCall( + CGM.getIntrinsic(MTEIntrinsicID), {Pointer, ExcludedMask}); + } + // Although it is possible to supply a different return + // address (first arg) to this intrinsic, for now we set + // return address same as input address. + if (MTEIntrinsicID == Intrinsic::aarch64_ldg) { + Value *TagAddress = EmitScalarExpr(E->getArg(0)); + TagAddress = Builder.CreatePointerCast(TagAddress, Int8PtrTy); + Value *RV = Builder.CreateCall( + CGM.getIntrinsic(MTEIntrinsicID), {TagAddress, TagAddress}); + return Builder.CreatePointerCast(RV, T); + } + // Although it is possible to supply a different tag (to set) + // to this intrinsic (as first arg), for now we supply + // the tag that is in input address arg (common use case). + if (MTEIntrinsicID == Intrinsic::aarch64_stg) { + Value *TagAddress = EmitScalarExpr(E->getArg(0)); + TagAddress = Builder.CreatePointerCast(TagAddress, Int8PtrTy); + return Builder.CreateCall( + CGM.getIntrinsic(MTEIntrinsicID), {TagAddress, TagAddress}); + } + if (MTEIntrinsicID == Intrinsic::aarch64_subp) { + Value *PointerA = EmitScalarExpr(E->getArg(0)); + Value *PointerB = EmitScalarExpr(E->getArg(1)); + PointerA = Builder.CreatePointerCast(PointerA, Int8PtrTy); + PointerB = Builder.CreatePointerCast(PointerB, Int8PtrTy); + return Builder.CreateCall( + CGM.getIntrinsic(MTEIntrinsicID), {PointerA, PointerB}); + } + } + if (BuiltinID == AArch64::BI__builtin_arm_rsr || BuiltinID == AArch64::BI__builtin_arm_rsr64 || BuiltinID == AArch64::BI__builtin_arm_rsrp || @@ -7052,25 +7278,27 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName); llvm::Type *RegisterType = Int64Ty; - llvm::Type *ValueType = Int32Ty; llvm::Type *Types[] = { RegisterType }; if (BuiltinID == AArch64::BI_ReadStatusReg) { - llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types); - llvm::Value *Call = Builder.CreateCall(F, Metadata); + llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types); - return Builder.CreateTrunc(Call, ValueType); + return Builder.CreateCall(F, Metadata); } - llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types); + llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types); llvm::Value *ArgValue = EmitScalarExpr(E->getArg(1)); - ArgValue = Builder.CreateZExt(ArgValue, RegisterType); return Builder.CreateCall(F, { Metadata, ArgValue }); } if (BuiltinID == AArch64::BI_AddressOfReturnAddress) { - llvm::Value *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress); + llvm::Function *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress); + return Builder.CreateCall(F); + } + + if (BuiltinID == AArch64::BI__builtin_sponentry) { + llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry); return Builder.CreateCall(F); } @@ -7608,13 +7836,13 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Ops.push_back(EmitScalarExpr(E->getArg(1))); return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh"); case NEON::BI__builtin_neon_vfmah_f16: { - Value *F = CGM.getIntrinsic(Intrinsic::fma, HalfTy); + Function *F = CGM.getIntrinsic(Intrinsic::fma, HalfTy); // NEON intrinsic puts accumulator first, unlike the LLVM fma. return Builder.CreateCall(F, {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]}); } case NEON::BI__builtin_neon_vfmsh_f16: { - Value *F = CGM.getIntrinsic(Intrinsic::fma, HalfTy); + Function *F = CGM.getIntrinsic(Intrinsic::fma, HalfTy); Value *Zero = llvm::ConstantFP::getZeroValueForNegation(HalfTy); Value* Sub = Builder.CreateFSub(Zero, EmitScalarExpr(E->getArg(1)), "vsubh"); // NEON intrinsic puts accumulator first, unlike the LLVM fma. @@ -7775,6 +8003,14 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, : Intrinsic::aarch64_neon_sqsub; return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl"); } + case NEON::BI__builtin_neon_vduph_lane_f16: { + return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), + "vget_lane"); + } + case NEON::BI__builtin_neon_vduph_laneq_f16: { + return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), + "vgetq_lane"); + } } llvm::VectorType *VTy = GetNeonType(this, Type); @@ -7845,11 +8081,11 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, NeonTypeFlags(NeonTypeFlags::Float64, false, true)); Ops[2] = Builder.CreateBitCast(Ops[2], VTy); Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); - Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy); + Function *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy); Value *Result = Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]}); return Builder.CreateBitCast(Result, Ty); } - Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); + Function *F = CGM.getIntrinsic(Intrinsic::fma, Ty); Ops[0] = Builder.CreateBitCast(Ops[0], Ty); Ops[1] = Builder.CreateBitCast(Ops[1], Ty); @@ -7863,7 +8099,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]}); } case NEON::BI__builtin_neon_vfmaq_laneq_v: { - Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); + Function *F = CGM.getIntrinsic(Intrinsic::fma, Ty); Ops[0] = Builder.CreateBitCast(Ops[0], Ty); Ops[1] = Builder.CreateBitCast(Ops[1], Ty); @@ -7879,7 +8115,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vfmad_laneq_f64: { Ops.push_back(EmitScalarExpr(E->getArg(3))); llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext())); - Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); + Function *F = CGM.getIntrinsic(Intrinsic::fma, Ty); Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]}); } @@ -8892,16 +9128,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Int = Intrinsic::aarch64_neon_suqadd; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd"); } - case AArch64::BI__iso_volatile_load8: - case AArch64::BI__iso_volatile_load16: - case AArch64::BI__iso_volatile_load32: - case AArch64::BI__iso_volatile_load64: - return EmitISOVolatileLoad(E); - case AArch64::BI__iso_volatile_store8: - case AArch64::BI__iso_volatile_store16: - case AArch64::BI__iso_volatile_store32: - case AArch64::BI__iso_volatile_store64: - return EmitISOVolatileStore(E); case AArch64::BI_BitScanForward: case AArch64::BI_BitScanForward64: return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E); @@ -9139,6 +9365,20 @@ static Value *EmitX86ExpandLoad(CodeGenFunction &CGF, return CGF.Builder.CreateCall(F, { Ptr, MaskVec, Ops[1] }); } +static Value *EmitX86CompressExpand(CodeGenFunction &CGF, + ArrayRef<Value *> Ops, + bool IsCompress) { + llvm::Type *ResultTy = Ops[1]->getType(); + + Value *MaskVec = getMaskVecValue(CGF, Ops[2], + ResultTy->getVectorNumElements()); + + Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress + : Intrinsic::x86_avx512_mask_expand; + llvm::Function *F = CGF.CGM.getIntrinsic(IID, ResultTy); + return CGF.Builder.CreateCall(F, { Ops[0], Ops[1], MaskVec }); +} + static Value *EmitX86CompressStore(CodeGenFunction &CGF, ArrayRef<Value *> Ops) { llvm::Type *ResultTy = Ops[1]->getType(); @@ -9184,10 +9424,50 @@ static Value *EmitX86FunnelShift(CodeGenFunction &CGF, Value *Op0, Value *Op1, } unsigned IID = IsRight ? Intrinsic::fshr : Intrinsic::fshl; - Value *F = CGF.CGM.getIntrinsic(IID, Ty); + Function *F = CGF.CGM.getIntrinsic(IID, Ty); return CGF.Builder.CreateCall(F, {Op0, Op1, Amt}); } +static Value *EmitX86vpcom(CodeGenFunction &CGF, ArrayRef<Value *> Ops, + bool IsSigned) { + Value *Op0 = Ops[0]; + Value *Op1 = Ops[1]; + llvm::Type *Ty = Op0->getType(); + uint64_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7; + + CmpInst::Predicate Pred; + switch (Imm) { + case 0x0: + Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; + break; + case 0x1: + Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; + break; + case 0x2: + Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; + break; + case 0x3: + Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; + break; + case 0x4: + Pred = ICmpInst::ICMP_EQ; + break; + case 0x5: + Pred = ICmpInst::ICMP_NE; + break; + case 0x6: + return llvm::Constant::getNullValue(Ty); // FALSE + case 0x7: + return llvm::Constant::getAllOnesValue(Ty); // TRUE + default: + llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate"); + } + + Value *Cmp = CGF.Builder.CreateICmp(Pred, Op0, Op1); + Value *Res = CGF.Builder.CreateSExt(Cmp, Ty); + return Res; +} + static Value *EmitX86Select(CodeGenFunction &CGF, Value *Mask, Value *Op0, Value *Op1) { @@ -9278,6 +9558,25 @@ static Value *EmitX86ConvertToMask(CodeGenFunction &CGF, Value *In) { return EmitX86MaskedCompare(CGF, 1, true, { In, Zero }); } +static Value *EmitX86ConvertIntToFp(CodeGenFunction &CGF, + ArrayRef<Value *> Ops, bool IsSigned) { + unsigned Rnd = cast<llvm::ConstantInt>(Ops[3])->getZExtValue(); + llvm::Type *Ty = Ops[1]->getType(); + + Value *Res; + if (Rnd != 4) { + Intrinsic::ID IID = IsSigned ? Intrinsic::x86_avx512_sitofp_round + : Intrinsic::x86_avx512_uitofp_round; + Function *F = CGF.CGM.getIntrinsic(IID, { Ty, Ops[0]->getType() }); + Res = CGF.Builder.CreateCall(F, { Ops[0], Ops[3] }); + } else { + Res = IsSigned ? CGF.Builder.CreateSIToFP(Ops[0], Ty) + : CGF.Builder.CreateUIToFP(Ops[0], Ty); + } + + return EmitX86Select(CGF, Ops[2], Res, Ops[1]); +} + static Value *EmitX86Abs(CodeGenFunction &CGF, ArrayRef<Value *> Ops) { llvm::Type *Ty = Ops[0]->getType(); @@ -9524,6 +9823,18 @@ Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) { return EmitX86CpuIs(CPUStr); } +// Convert a BF16 to a float. +static Value *EmitX86CvtBF16ToFloatExpr(CodeGenFunction &CGF, + const CallExpr *E, + ArrayRef<Value *> Ops) { + llvm::Type *Int32Ty = CGF.Builder.getInt32Ty(); + Value *ZeroExt = CGF.Builder.CreateZExt(Ops[0], Int32Ty); + Value *Shl = CGF.Builder.CreateShl(ZeroExt, 16); + llvm::Type *ResultType = CGF.ConvertType(E->getType()); + Value *BitCast = CGF.Builder.CreateBitCast(Shl, ResultType); + return BitCast; +} + Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) { llvm::Type *Int32Ty = Builder.getInt32Ty(); @@ -9650,10 +9961,11 @@ llvm::Value *CodeGenFunction::EmitX86CpuSupports(uint64_t FeaturesMask) { Value *CodeGenFunction::EmitX86CpuInit() { llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, /*Variadic*/ false); - llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, "__cpu_indicator_init"); - cast<llvm::GlobalValue>(Func)->setDSOLocal(true); - cast<llvm::GlobalValue>(Func)->setDLLStorageClass( - llvm::GlobalValue::DefaultStorageClass); + llvm::FunctionCallee Func = + CGM.CreateRuntimeFunction(FTy, "__cpu_indicator_init"); + cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true); + cast<llvm::GlobalValue>(Func.getCallee()) + ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass); return Builder.CreateCall(Func); } @@ -9722,7 +10034,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Value *RW = ConstantInt::get(Int32Ty, (C->getZExtValue() >> 2) & 0x1); Value *Locality = ConstantInt::get(Int32Ty, C->getZExtValue() & 0x3); Value *Data = ConstantInt::get(Int32Ty, 1); - Value *F = CGM.getIntrinsic(Intrinsic::prefetch); + Function *F = CGM.getIntrinsic(Intrinsic::prefetch); return Builder.CreateCall(F, {Address, RW, Locality, Data}); } case X86::BI_mm_clflush: { @@ -9753,13 +10065,13 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_lzcnt_u16: case X86::BI__builtin_ia32_lzcnt_u32: case X86::BI__builtin_ia32_lzcnt_u64: { - Value *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType()); + Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType()); return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)}); } case X86::BI__builtin_ia32_tzcnt_u16: case X86::BI__builtin_ia32_tzcnt_u32: case X86::BI__builtin_ia32_tzcnt_u64: { - Value *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType()); + Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType()); return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)}); } case X86::BI__builtin_ia32_undef128: @@ -9833,7 +10145,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_xsavec: case X86::BI__builtin_ia32_xsavec64: case X86::BI__builtin_ia32_xsaves: - case X86::BI__builtin_ia32_xsaves64: { + case X86::BI__builtin_ia32_xsaves64: + case X86::BI__builtin_ia32_xsetbv: + case X86::BI_xsetbv: { Intrinsic::ID ID; #define INTRINSIC_X86_XSAVE_ID(NAME) \ case X86::BI__builtin_ia32_##NAME: \ @@ -9853,6 +10167,10 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, INTRINSIC_X86_XSAVE_ID(xsavec64); INTRINSIC_X86_XSAVE_ID(xsaves); INTRINSIC_X86_XSAVE_ID(xsaves64); + INTRINSIC_X86_XSAVE_ID(xsetbv); + case X86::BI_xsetbv: + ID = Intrinsic::x86_xsetbv; + break; } #undef INTRINSIC_X86_XSAVE_ID Value *Mhi = Builder.CreateTrunc( @@ -9862,6 +10180,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Ops.push_back(Mlo); return Builder.CreateCall(CGM.getIntrinsic(ID), Ops); } + case X86::BI__builtin_ia32_xgetbv: + case X86::BI_xgetbv: + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_xgetbv), Ops); case X86::BI__builtin_ia32_storedqudi128_mask: case X86::BI__builtin_ia32_storedqusi128_mask: case X86::BI__builtin_ia32_storedquhi128_mask: @@ -9930,6 +10251,15 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_cvtq2mask512: return EmitX86ConvertToMask(*this, Ops[0]); + case X86::BI__builtin_ia32_cvtdq2ps512_mask: + case X86::BI__builtin_ia32_cvtqq2ps512_mask: + case X86::BI__builtin_ia32_cvtqq2pd512_mask: + return EmitX86ConvertIntToFp(*this, Ops, /*IsSigned*/true); + case X86::BI__builtin_ia32_cvtudq2ps512_mask: + case X86::BI__builtin_ia32_cvtuqq2ps512_mask: + case X86::BI__builtin_ia32_cvtuqq2pd512_mask: + return EmitX86ConvertIntToFp(*this, Ops, /*IsSigned*/false); + case X86::BI__builtin_ia32_vfmaddss3: case X86::BI__builtin_ia32_vfmaddsd3: case X86::BI__builtin_ia32_vfmaddss3_mask: @@ -10073,22 +10403,262 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_compressstoreqi512_mask: return EmitX86CompressStore(*this, Ops); - case X86::BI__builtin_ia32_storehps: - case X86::BI__builtin_ia32_storelps: { - llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty); - llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2); + case X86::BI__builtin_ia32_expanddf128_mask: + case X86::BI__builtin_ia32_expanddf256_mask: + case X86::BI__builtin_ia32_expanddf512_mask: + case X86::BI__builtin_ia32_expandsf128_mask: + case X86::BI__builtin_ia32_expandsf256_mask: + case X86::BI__builtin_ia32_expandsf512_mask: + case X86::BI__builtin_ia32_expanddi128_mask: + case X86::BI__builtin_ia32_expanddi256_mask: + case X86::BI__builtin_ia32_expanddi512_mask: + case X86::BI__builtin_ia32_expandsi128_mask: + case X86::BI__builtin_ia32_expandsi256_mask: + case X86::BI__builtin_ia32_expandsi512_mask: + case X86::BI__builtin_ia32_expandhi128_mask: + case X86::BI__builtin_ia32_expandhi256_mask: + case X86::BI__builtin_ia32_expandhi512_mask: + case X86::BI__builtin_ia32_expandqi128_mask: + case X86::BI__builtin_ia32_expandqi256_mask: + case X86::BI__builtin_ia32_expandqi512_mask: + return EmitX86CompressExpand(*this, Ops, /*IsCompress*/false); + + case X86::BI__builtin_ia32_compressdf128_mask: + case X86::BI__builtin_ia32_compressdf256_mask: + case X86::BI__builtin_ia32_compressdf512_mask: + case X86::BI__builtin_ia32_compresssf128_mask: + case X86::BI__builtin_ia32_compresssf256_mask: + case X86::BI__builtin_ia32_compresssf512_mask: + case X86::BI__builtin_ia32_compressdi128_mask: + case X86::BI__builtin_ia32_compressdi256_mask: + case X86::BI__builtin_ia32_compressdi512_mask: + case X86::BI__builtin_ia32_compresssi128_mask: + case X86::BI__builtin_ia32_compresssi256_mask: + case X86::BI__builtin_ia32_compresssi512_mask: + case X86::BI__builtin_ia32_compresshi128_mask: + case X86::BI__builtin_ia32_compresshi256_mask: + case X86::BI__builtin_ia32_compresshi512_mask: + case X86::BI__builtin_ia32_compressqi128_mask: + case X86::BI__builtin_ia32_compressqi256_mask: + case X86::BI__builtin_ia32_compressqi512_mask: + return EmitX86CompressExpand(*this, Ops, /*IsCompress*/true); + + case X86::BI__builtin_ia32_gather3div2df: + case X86::BI__builtin_ia32_gather3div2di: + case X86::BI__builtin_ia32_gather3div4df: + case X86::BI__builtin_ia32_gather3div4di: + case X86::BI__builtin_ia32_gather3div4sf: + case X86::BI__builtin_ia32_gather3div4si: + case X86::BI__builtin_ia32_gather3div8sf: + case X86::BI__builtin_ia32_gather3div8si: + case X86::BI__builtin_ia32_gather3siv2df: + case X86::BI__builtin_ia32_gather3siv2di: + case X86::BI__builtin_ia32_gather3siv4df: + case X86::BI__builtin_ia32_gather3siv4di: + case X86::BI__builtin_ia32_gather3siv4sf: + case X86::BI__builtin_ia32_gather3siv4si: + case X86::BI__builtin_ia32_gather3siv8sf: + case X86::BI__builtin_ia32_gather3siv8si: + case X86::BI__builtin_ia32_gathersiv8df: + case X86::BI__builtin_ia32_gathersiv16sf: + case X86::BI__builtin_ia32_gatherdiv8df: + case X86::BI__builtin_ia32_gatherdiv16sf: + case X86::BI__builtin_ia32_gathersiv8di: + case X86::BI__builtin_ia32_gathersiv16si: + case X86::BI__builtin_ia32_gatherdiv8di: + case X86::BI__builtin_ia32_gatherdiv16si: { + Intrinsic::ID IID; + switch (BuiltinID) { + default: llvm_unreachable("Unexpected builtin"); + case X86::BI__builtin_ia32_gather3div2df: + IID = Intrinsic::x86_avx512_mask_gather3div2_df; + break; + case X86::BI__builtin_ia32_gather3div2di: + IID = Intrinsic::x86_avx512_mask_gather3div2_di; + break; + case X86::BI__builtin_ia32_gather3div4df: + IID = Intrinsic::x86_avx512_mask_gather3div4_df; + break; + case X86::BI__builtin_ia32_gather3div4di: + IID = Intrinsic::x86_avx512_mask_gather3div4_di; + break; + case X86::BI__builtin_ia32_gather3div4sf: + IID = Intrinsic::x86_avx512_mask_gather3div4_sf; + break; + case X86::BI__builtin_ia32_gather3div4si: + IID = Intrinsic::x86_avx512_mask_gather3div4_si; + break; + case X86::BI__builtin_ia32_gather3div8sf: + IID = Intrinsic::x86_avx512_mask_gather3div8_sf; + break; + case X86::BI__builtin_ia32_gather3div8si: + IID = Intrinsic::x86_avx512_mask_gather3div8_si; + break; + case X86::BI__builtin_ia32_gather3siv2df: + IID = Intrinsic::x86_avx512_mask_gather3siv2_df; + break; + case X86::BI__builtin_ia32_gather3siv2di: + IID = Intrinsic::x86_avx512_mask_gather3siv2_di; + break; + case X86::BI__builtin_ia32_gather3siv4df: + IID = Intrinsic::x86_avx512_mask_gather3siv4_df; + break; + case X86::BI__builtin_ia32_gather3siv4di: + IID = Intrinsic::x86_avx512_mask_gather3siv4_di; + break; + case X86::BI__builtin_ia32_gather3siv4sf: + IID = Intrinsic::x86_avx512_mask_gather3siv4_sf; + break; + case X86::BI__builtin_ia32_gather3siv4si: + IID = Intrinsic::x86_avx512_mask_gather3siv4_si; + break; + case X86::BI__builtin_ia32_gather3siv8sf: + IID = Intrinsic::x86_avx512_mask_gather3siv8_sf; + break; + case X86::BI__builtin_ia32_gather3siv8si: + IID = Intrinsic::x86_avx512_mask_gather3siv8_si; + break; + case X86::BI__builtin_ia32_gathersiv8df: + IID = Intrinsic::x86_avx512_mask_gather_dpd_512; + break; + case X86::BI__builtin_ia32_gathersiv16sf: + IID = Intrinsic::x86_avx512_mask_gather_dps_512; + break; + case X86::BI__builtin_ia32_gatherdiv8df: + IID = Intrinsic::x86_avx512_mask_gather_qpd_512; + break; + case X86::BI__builtin_ia32_gatherdiv16sf: + IID = Intrinsic::x86_avx512_mask_gather_qps_512; + break; + case X86::BI__builtin_ia32_gathersiv8di: + IID = Intrinsic::x86_avx512_mask_gather_dpq_512; + break; + case X86::BI__builtin_ia32_gathersiv16si: + IID = Intrinsic::x86_avx512_mask_gather_dpi_512; + break; + case X86::BI__builtin_ia32_gatherdiv8di: + IID = Intrinsic::x86_avx512_mask_gather_qpq_512; + break; + case X86::BI__builtin_ia32_gatherdiv16si: + IID = Intrinsic::x86_avx512_mask_gather_qpi_512; + break; + } - // cast val v2i64 - Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast"); + unsigned MinElts = std::min(Ops[0]->getType()->getVectorNumElements(), + Ops[2]->getType()->getVectorNumElements()); + Ops[3] = getMaskVecValue(*this, Ops[3], MinElts); + Function *Intr = CGM.getIntrinsic(IID); + return Builder.CreateCall(Intr, Ops); + } - // extract (0, 1) - unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1; - Ops[1] = Builder.CreateExtractElement(Ops[1], Index, "extract"); + case X86::BI__builtin_ia32_scattersiv8df: + case X86::BI__builtin_ia32_scattersiv16sf: + case X86::BI__builtin_ia32_scatterdiv8df: + case X86::BI__builtin_ia32_scatterdiv16sf: + case X86::BI__builtin_ia32_scattersiv8di: + case X86::BI__builtin_ia32_scattersiv16si: + case X86::BI__builtin_ia32_scatterdiv8di: + case X86::BI__builtin_ia32_scatterdiv16si: + case X86::BI__builtin_ia32_scatterdiv2df: + case X86::BI__builtin_ia32_scatterdiv2di: + case X86::BI__builtin_ia32_scatterdiv4df: + case X86::BI__builtin_ia32_scatterdiv4di: + case X86::BI__builtin_ia32_scatterdiv4sf: + case X86::BI__builtin_ia32_scatterdiv4si: + case X86::BI__builtin_ia32_scatterdiv8sf: + case X86::BI__builtin_ia32_scatterdiv8si: + case X86::BI__builtin_ia32_scattersiv2df: + case X86::BI__builtin_ia32_scattersiv2di: + case X86::BI__builtin_ia32_scattersiv4df: + case X86::BI__builtin_ia32_scattersiv4di: + case X86::BI__builtin_ia32_scattersiv4sf: + case X86::BI__builtin_ia32_scattersiv4si: + case X86::BI__builtin_ia32_scattersiv8sf: + case X86::BI__builtin_ia32_scattersiv8si: { + Intrinsic::ID IID; + switch (BuiltinID) { + default: llvm_unreachable("Unexpected builtin"); + case X86::BI__builtin_ia32_scattersiv8df: + IID = Intrinsic::x86_avx512_mask_scatter_dpd_512; + break; + case X86::BI__builtin_ia32_scattersiv16sf: + IID = Intrinsic::x86_avx512_mask_scatter_dps_512; + break; + case X86::BI__builtin_ia32_scatterdiv8df: + IID = Intrinsic::x86_avx512_mask_scatter_qpd_512; + break; + case X86::BI__builtin_ia32_scatterdiv16sf: + IID = Intrinsic::x86_avx512_mask_scatter_qps_512; + break; + case X86::BI__builtin_ia32_scattersiv8di: + IID = Intrinsic::x86_avx512_mask_scatter_dpq_512; + break; + case X86::BI__builtin_ia32_scattersiv16si: + IID = Intrinsic::x86_avx512_mask_scatter_dpi_512; + break; + case X86::BI__builtin_ia32_scatterdiv8di: + IID = Intrinsic::x86_avx512_mask_scatter_qpq_512; + break; + case X86::BI__builtin_ia32_scatterdiv16si: + IID = Intrinsic::x86_avx512_mask_scatter_qpi_512; + break; + case X86::BI__builtin_ia32_scatterdiv2df: + IID = Intrinsic::x86_avx512_mask_scatterdiv2_df; + break; + case X86::BI__builtin_ia32_scatterdiv2di: + IID = Intrinsic::x86_avx512_mask_scatterdiv2_di; + break; + case X86::BI__builtin_ia32_scatterdiv4df: + IID = Intrinsic::x86_avx512_mask_scatterdiv4_df; + break; + case X86::BI__builtin_ia32_scatterdiv4di: + IID = Intrinsic::x86_avx512_mask_scatterdiv4_di; + break; + case X86::BI__builtin_ia32_scatterdiv4sf: + IID = Intrinsic::x86_avx512_mask_scatterdiv4_sf; + break; + case X86::BI__builtin_ia32_scatterdiv4si: + IID = Intrinsic::x86_avx512_mask_scatterdiv4_si; + break; + case X86::BI__builtin_ia32_scatterdiv8sf: + IID = Intrinsic::x86_avx512_mask_scatterdiv8_sf; + break; + case X86::BI__builtin_ia32_scatterdiv8si: + IID = Intrinsic::x86_avx512_mask_scatterdiv8_si; + break; + case X86::BI__builtin_ia32_scattersiv2df: + IID = Intrinsic::x86_avx512_mask_scattersiv2_df; + break; + case X86::BI__builtin_ia32_scattersiv2di: + IID = Intrinsic::x86_avx512_mask_scattersiv2_di; + break; + case X86::BI__builtin_ia32_scattersiv4df: + IID = Intrinsic::x86_avx512_mask_scattersiv4_df; + break; + case X86::BI__builtin_ia32_scattersiv4di: + IID = Intrinsic::x86_avx512_mask_scattersiv4_di; + break; + case X86::BI__builtin_ia32_scattersiv4sf: + IID = Intrinsic::x86_avx512_mask_scattersiv4_sf; + break; + case X86::BI__builtin_ia32_scattersiv4si: + IID = Intrinsic::x86_avx512_mask_scattersiv4_si; + break; + case X86::BI__builtin_ia32_scattersiv8sf: + IID = Intrinsic::x86_avx512_mask_scattersiv8_sf; + break; + case X86::BI__builtin_ia32_scattersiv8si: + IID = Intrinsic::x86_avx512_mask_scattersiv8_si; + break; + } - // cast pointer to i64 & store - Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy); - return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); + unsigned MinElts = std::min(Ops[2]->getType()->getVectorNumElements(), + Ops[3]->getType()->getVectorNumElements()); + Ops[1] = getMaskVecValue(*this, Ops[1], MinElts); + Function *Intr = CGM.getIntrinsic(IID); + return Builder.CreateCall(Intr, Ops); } + case X86::BI__builtin_ia32_vextractf128_pd256: case X86::BI__builtin_ia32_vextractf128_ps256: case X86::BI__builtin_ia32_vextractf128_si256: @@ -10693,6 +11263,16 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7; return EmitX86MaskedCompare(*this, CC, false, Ops); } + case X86::BI__builtin_ia32_vpcomb: + case X86::BI__builtin_ia32_vpcomw: + case X86::BI__builtin_ia32_vpcomd: + case X86::BI__builtin_ia32_vpcomq: + return EmitX86vpcom(*this, Ops, true); + case X86::BI__builtin_ia32_vpcomub: + case X86::BI__builtin_ia32_vpcomuw: + case X86::BI__builtin_ia32_vpcomud: + case X86::BI__builtin_ia32_vpcomuq: + return EmitX86vpcom(*this, Ops, false); case X86::BI__builtin_ia32_kortestcqi: case X86::BI__builtin_ia32_kortestchi: @@ -11154,6 +11734,47 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, return EmitX86MaskedCompareResult(*this, Fpclass, NumElts, MaskIn); } + case X86::BI__builtin_ia32_vp2intersect_q_512: + case X86::BI__builtin_ia32_vp2intersect_q_256: + case X86::BI__builtin_ia32_vp2intersect_q_128: + case X86::BI__builtin_ia32_vp2intersect_d_512: + case X86::BI__builtin_ia32_vp2intersect_d_256: + case X86::BI__builtin_ia32_vp2intersect_d_128: { + unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); + Intrinsic::ID ID; + + switch (BuiltinID) { + default: llvm_unreachable("Unsupported intrinsic!"); + case X86::BI__builtin_ia32_vp2intersect_q_512: + ID = Intrinsic::x86_avx512_vp2intersect_q_512; + break; + case X86::BI__builtin_ia32_vp2intersect_q_256: + ID = Intrinsic::x86_avx512_vp2intersect_q_256; + break; + case X86::BI__builtin_ia32_vp2intersect_q_128: + ID = Intrinsic::x86_avx512_vp2intersect_q_128; + break; + case X86::BI__builtin_ia32_vp2intersect_d_512: + ID = Intrinsic::x86_avx512_vp2intersect_d_512; + break; + case X86::BI__builtin_ia32_vp2intersect_d_256: + ID = Intrinsic::x86_avx512_vp2intersect_d_256; + break; + case X86::BI__builtin_ia32_vp2intersect_d_128: + ID = Intrinsic::x86_avx512_vp2intersect_d_128; + break; + } + + Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID), {Ops[0], Ops[1]}); + Value *Result = Builder.CreateExtractValue(Call, 0); + Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr); + Builder.CreateDefaultAlignedStore(Result, Ops[2]); + + Result = Builder.CreateExtractValue(Call, 1); + Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr); + return Builder.CreateDefaultAlignedStore(Result, Ops[3]); + } + case X86::BI__builtin_ia32_vpmultishiftqb128: case X86::BI__builtin_ia32_vpmultishiftqb256: case X86::BI__builtin_ia32_vpmultishiftqb512: { @@ -11336,6 +11957,32 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_cmpordsd: return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7); +// AVX512 bf16 intrinsics + case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: { + Ops[2] = getMaskVecValue(*this, Ops[2], + Ops[0]->getType()->getVectorNumElements()); + Intrinsic::ID IID = Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128; + return Builder.CreateCall(CGM.getIntrinsic(IID), Ops); + } + case X86::BI__builtin_ia32_cvtsbf162ss_32: + return EmitX86CvtBF16ToFloatExpr(*this, E, Ops); + + case X86::BI__builtin_ia32_cvtneps2bf16_256_mask: + case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: { + Intrinsic::ID IID; + switch (BuiltinID) { + default: llvm_unreachable("Unsupported intrinsic!"); + case X86::BI__builtin_ia32_cvtneps2bf16_256_mask: + IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_256; + break; + case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: + IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_512; + break; + } + Value *Res = Builder.CreateCall(CGM.getIntrinsic(IID), Ops[0]); + return EmitX86Select(*this, Ops[2], Res, Ops[1]); + } + case X86::BI__emul: case X86::BI__emulu: { llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64); @@ -11386,9 +12033,10 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, // Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty); // return Builder.CreateCall(F, Ops); llvm::Type *Int128Ty = Builder.getInt128Ty(); - Value *Val = Builder.CreateOr( - Builder.CreateShl(Builder.CreateZExt(Ops[1], Int128Ty), 64), - Builder.CreateZExt(Ops[0], Int128Ty)); + Value *HighPart128 = + Builder.CreateShl(Builder.CreateZExt(Ops[1], Int128Ty), 64); + Value *LowPart128 = Builder.CreateZExt(Ops[0], Int128Ty); + Value *Val = Builder.CreateOr(HighPart128, LowPart128); Value *Amt = Builder.CreateAnd(Builder.CreateZExt(Ops[2], Int128Ty), llvm::ConstantInt::get(Int128Ty, 0x3f)); Value *Res; @@ -11465,7 +12113,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, } case X86::BI_AddressOfReturnAddress: { - Value *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress); + Function *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress); return Builder.CreateCall(F); } case X86::BI__stosb: { @@ -11480,13 +12128,13 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, // This syscall signals a driver assertion failure in x86 NT kernels. llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false); llvm::InlineAsm *IA = - llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*SideEffects=*/true); + llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*hasSideEffects=*/true); llvm::AttributeList NoReturnAttr = llvm::AttributeList::get( getLLVMContext(), llvm::AttributeList::FunctionIndex, llvm::Attribute::NoReturn); - CallSite CS = Builder.CreateCall(IA); - CS.setAttributes(NoReturnAttr); - return CS.getInstruction(); + llvm::CallInst *CI = Builder.CreateCall(IA); + CI->setAttributes(NoReturnAttr); + return CI; } case X86::BI__readfsbyte: case X86::BI__readfsword: @@ -12001,7 +12649,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, llvm::Value *Y = EmitScalarExpr(E->getArg(1)); llvm::Value *Z = EmitScalarExpr(E->getArg(2)); - llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale, + llvm::Function *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale, X->getType()); llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z}); @@ -12023,7 +12671,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); llvm::Value *Src3 = EmitScalarExpr(E->getArg(3)); - llvm::Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas, + llvm::Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas, Src0->getType()); llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3); return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool}); @@ -12031,6 +12679,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_ds_swizzle: return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle); + case AMDGPU::BI__builtin_amdgcn_mov_dpp8: + return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_mov_dpp8); case AMDGPU::BI__builtin_amdgcn_mov_dpp: case AMDGPU::BI__builtin_amdgcn_update_dpp: { llvm::SmallVector<llvm::Value *, 6> Args; @@ -12039,7 +12689,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, assert(Args.size() == 5 || Args.size() == 6); if (Args.size() == 5) Args.insert(Args.begin(), llvm::UndefValue::get(Args[0]->getType())); - Value *F = + Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType()); return Builder.CreateCall(F, Args); } @@ -12080,13 +12730,13 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_frexp_exp: case AMDGPU::BI__builtin_amdgcn_frexp_expf: { Value *Src0 = EmitScalarExpr(E->getArg(0)); - Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp, + Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp, { Builder.getInt32Ty(), Src0->getType() }); return Builder.CreateCall(F, Src0); } case AMDGPU::BI__builtin_amdgcn_frexp_exph: { Value *Src0 = EmitScalarExpr(E->getArg(0)); - Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp, + Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp, { Builder.getInt16Ty(), Src0->getType() }); return Builder.CreateCall(F, Src0); } @@ -12096,14 +12746,34 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract); case AMDGPU::BI__builtin_amdgcn_lerp: return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp); + case AMDGPU::BI__builtin_amdgcn_ubfe: + return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_ubfe); + case AMDGPU::BI__builtin_amdgcn_sbfe: + return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_sbfe); case AMDGPU::BI__builtin_amdgcn_uicmp: case AMDGPU::BI__builtin_amdgcn_uicmpl: case AMDGPU::BI__builtin_amdgcn_sicmp: - case AMDGPU::BI__builtin_amdgcn_sicmpl: - return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_icmp); + case AMDGPU::BI__builtin_amdgcn_sicmpl: { + llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); + llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); + llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); + + // FIXME-GFX10: How should 32 bit mask be handled? + Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_icmp, + { Builder.getInt64Ty(), Src0->getType() }); + return Builder.CreateCall(F, { Src0, Src1, Src2 }); + } case AMDGPU::BI__builtin_amdgcn_fcmp: - case AMDGPU::BI__builtin_amdgcn_fcmpf: - return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fcmp); + case AMDGPU::BI__builtin_amdgcn_fcmpf: { + llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); + llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); + llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); + + // FIXME-GFX10: How should 32 bit mask be handled? + Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_fcmp, + { Builder.getInt64Ty(), Src0->getType() }); + return Builder.CreateCall(F, { Src0, Src1, Src2 }); + } case AMDGPU::BI__builtin_amdgcn_class: case AMDGPU::BI__builtin_amdgcn_classf: case AMDGPU::BI__builtin_amdgcn_classh: @@ -12111,6 +12781,14 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_fmed3f: case AMDGPU::BI__builtin_amdgcn_fmed3h: return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fmed3); + case AMDGPU::BI__builtin_amdgcn_ds_append: + case AMDGPU::BI__builtin_amdgcn_ds_consume: { + Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ? + Intrinsic::amdgcn_ds_append : Intrinsic::amdgcn_ds_consume; + Value *Src0 = EmitScalarExpr(E->getArg(0)); + Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() }); + return Builder.CreateCall(F, { Src0, Builder.getFalse() }); + } case AMDGPU::BI__builtin_amdgcn_read_exec: { CallInst *CI = cast<CallInst>( EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec")); @@ -12160,7 +12838,7 @@ static Value *EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF, for (unsigned I = 0; I < NumArgs; ++I) Args[I] = CGF.EmitScalarExpr(E->getArg(I)); Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs)); - Value *F = CGF.CGM.getIntrinsic(IntrinsicID); + Function *F = CGF.CGM.getIntrinsic(IntrinsicID); Value *Call = CGF.Builder.CreateCall(F, Args); Value *CC = CGF.Builder.CreateExtractValue(Call, 1); CGF.Builder.CreateStore(CC, CCPtr); @@ -12173,30 +12851,30 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, case SystemZ::BI__builtin_tbegin: { Value *TDB = EmitScalarExpr(E->getArg(0)); Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c); - Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin); + Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin); return Builder.CreateCall(F, {TDB, Control}); } case SystemZ::BI__builtin_tbegin_nofloat: { Value *TDB = EmitScalarExpr(E->getArg(0)); Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c); - Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat); + Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat); return Builder.CreateCall(F, {TDB, Control}); } case SystemZ::BI__builtin_tbeginc: { Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy); Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08); - Value *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc); + Function *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc); return Builder.CreateCall(F, {TDB, Control}); } case SystemZ::BI__builtin_tabort: { Value *Data = EmitScalarExpr(E->getArg(0)); - Value *F = CGM.getIntrinsic(Intrinsic::s390_tabort); + Function *F = CGM.getIntrinsic(Intrinsic::s390_tabort); return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort")); } case SystemZ::BI__builtin_non_tx_store: { Value *Address = EmitScalarExpr(E->getArg(0)); Value *Data = EmitScalarExpr(E->getArg(1)); - Value *F = CGM.getIntrinsic(Intrinsic::s390_ntstg); + Function *F = CGM.getIntrinsic(Intrinsic::s390_ntstg); return Builder.CreateCall(F, {Data, Address}); } @@ -12406,6 +13084,15 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {X, Y, M4Value}); } + case SystemZ::BI__builtin_s390_vlbrh: + case SystemZ::BI__builtin_s390_vlbrf: + case SystemZ::BI__builtin_s390_vlbrg: { + llvm::Type *ResultType = ConvertType(E->getType()); + Value *X = EmitScalarExpr(E->getArg(0)); + Function *F = CGM.getIntrinsic(Intrinsic::bswap, ResultType); + return Builder.CreateCall(F, X); + } + // Vector intrinsics that output the post-instruction CC value. #define INTRINSIC_WITH_CC(NAME) \ @@ -12481,6 +13168,14 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, INTRINSIC_WITH_CC(s390_vftcisb); INTRINSIC_WITH_CC(s390_vftcidb); + INTRINSIC_WITH_CC(s390_vstrsb); + INTRINSIC_WITH_CC(s390_vstrsh); + INTRINSIC_WITH_CC(s390_vstrsf); + + INTRINSIC_WITH_CC(s390_vstrszb); + INTRINSIC_WITH_CC(s390_vstrszh); + INTRINSIC_WITH_CC(s390_vstrszf); + #undef INTRINSIC_WITH_CC default: @@ -12488,8 +13183,252 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, } } -Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, - const CallExpr *E) { +namespace { +// Helper classes for mapping MMA builtins to particular LLVM intrinsic variant. +struct NVPTXMmaLdstInfo { + unsigned NumResults; // Number of elements to load/store + // Intrinsic IDs for row/col variants. 0 if particular layout is unsupported. + unsigned IID_col; + unsigned IID_row; +}; + +#define MMA_INTR(geom_op_type, layout) \ + Intrinsic::nvvm_wmma_##geom_op_type##_##layout##_stride +#define MMA_LDST(n, geom_op_type) \ + { n, MMA_INTR(geom_op_type, col), MMA_INTR(geom_op_type, row) } + +static NVPTXMmaLdstInfo getNVPTXMmaLdstInfo(unsigned BuiltinID) { + switch (BuiltinID) { + // FP MMA loads + case NVPTX::BI__hmma_m16n16k16_ld_a: + return MMA_LDST(8, m16n16k16_load_a_f16); + case NVPTX::BI__hmma_m16n16k16_ld_b: + return MMA_LDST(8, m16n16k16_load_b_f16); + case NVPTX::BI__hmma_m16n16k16_ld_c_f16: + return MMA_LDST(4, m16n16k16_load_c_f16); + case NVPTX::BI__hmma_m16n16k16_ld_c_f32: + return MMA_LDST(8, m16n16k16_load_c_f32); + case NVPTX::BI__hmma_m32n8k16_ld_a: + return MMA_LDST(8, m32n8k16_load_a_f16); + case NVPTX::BI__hmma_m32n8k16_ld_b: + return MMA_LDST(8, m32n8k16_load_b_f16); + case NVPTX::BI__hmma_m32n8k16_ld_c_f16: + return MMA_LDST(4, m32n8k16_load_c_f16); + case NVPTX::BI__hmma_m32n8k16_ld_c_f32: + return MMA_LDST(8, m32n8k16_load_c_f32); + case NVPTX::BI__hmma_m8n32k16_ld_a: + return MMA_LDST(8, m8n32k16_load_a_f16); + case NVPTX::BI__hmma_m8n32k16_ld_b: + return MMA_LDST(8, m8n32k16_load_b_f16); + case NVPTX::BI__hmma_m8n32k16_ld_c_f16: + return MMA_LDST(4, m8n32k16_load_c_f16); + case NVPTX::BI__hmma_m8n32k16_ld_c_f32: + return MMA_LDST(8, m8n32k16_load_c_f32); + + // Integer MMA loads + case NVPTX::BI__imma_m16n16k16_ld_a_s8: + return MMA_LDST(2, m16n16k16_load_a_s8); + case NVPTX::BI__imma_m16n16k16_ld_a_u8: + return MMA_LDST(2, m16n16k16_load_a_u8); + case NVPTX::BI__imma_m16n16k16_ld_b_s8: + return MMA_LDST(2, m16n16k16_load_b_s8); + case NVPTX::BI__imma_m16n16k16_ld_b_u8: + return MMA_LDST(2, m16n16k16_load_b_u8); + case NVPTX::BI__imma_m16n16k16_ld_c: + return MMA_LDST(8, m16n16k16_load_c_s32); + case NVPTX::BI__imma_m32n8k16_ld_a_s8: + return MMA_LDST(4, m32n8k16_load_a_s8); + case NVPTX::BI__imma_m32n8k16_ld_a_u8: + return MMA_LDST(4, m32n8k16_load_a_u8); + case NVPTX::BI__imma_m32n8k16_ld_b_s8: + return MMA_LDST(1, m32n8k16_load_b_s8); + case NVPTX::BI__imma_m32n8k16_ld_b_u8: + return MMA_LDST(1, m32n8k16_load_b_u8); + case NVPTX::BI__imma_m32n8k16_ld_c: + return MMA_LDST(8, m32n8k16_load_c_s32); + case NVPTX::BI__imma_m8n32k16_ld_a_s8: + return MMA_LDST(1, m8n32k16_load_a_s8); + case NVPTX::BI__imma_m8n32k16_ld_a_u8: + return MMA_LDST(1, m8n32k16_load_a_u8); + case NVPTX::BI__imma_m8n32k16_ld_b_s8: + return MMA_LDST(4, m8n32k16_load_b_s8); + case NVPTX::BI__imma_m8n32k16_ld_b_u8: + return MMA_LDST(4, m8n32k16_load_b_u8); + case NVPTX::BI__imma_m8n32k16_ld_c: + return MMA_LDST(8, m8n32k16_load_c_s32); + + // Sub-integer MMA loads. + // Only row/col layout is supported by A/B fragments. + case NVPTX::BI__imma_m8n8k32_ld_a_s4: + return {1, 0, MMA_INTR(m8n8k32_load_a_s4, row)}; + case NVPTX::BI__imma_m8n8k32_ld_a_u4: + return {1, 0, MMA_INTR(m8n8k32_load_a_u4, row)}; + case NVPTX::BI__imma_m8n8k32_ld_b_s4: + return {1, MMA_INTR(m8n8k32_load_b_s4, col), 0}; + case NVPTX::BI__imma_m8n8k32_ld_b_u4: + return {1, MMA_INTR(m8n8k32_load_b_u4, col), 0}; + case NVPTX::BI__imma_m8n8k32_ld_c: + return MMA_LDST(2, m8n8k32_load_c_s32); + case NVPTX::BI__bmma_m8n8k128_ld_a_b1: + return {1, 0, MMA_INTR(m8n8k128_load_a_b1, row)}; + case NVPTX::BI__bmma_m8n8k128_ld_b_b1: + return {1, MMA_INTR(m8n8k128_load_b_b1, col), 0}; + case NVPTX::BI__bmma_m8n8k128_ld_c: + return MMA_LDST(2, m8n8k128_load_c_s32); + + // NOTE: We need to follow inconsitent naming scheme used by NVCC. Unlike + // PTX and LLVM IR where stores always use fragment D, NVCC builtins always + // use fragment C for both loads and stores. + // FP MMA stores. + case NVPTX::BI__hmma_m16n16k16_st_c_f16: + return MMA_LDST(4, m16n16k16_store_d_f16); + case NVPTX::BI__hmma_m16n16k16_st_c_f32: + return MMA_LDST(8, m16n16k16_store_d_f32); + case NVPTX::BI__hmma_m32n8k16_st_c_f16: + return MMA_LDST(4, m32n8k16_store_d_f16); + case NVPTX::BI__hmma_m32n8k16_st_c_f32: + return MMA_LDST(8, m32n8k16_store_d_f32); + case NVPTX::BI__hmma_m8n32k16_st_c_f16: + return MMA_LDST(4, m8n32k16_store_d_f16); + case NVPTX::BI__hmma_m8n32k16_st_c_f32: + return MMA_LDST(8, m8n32k16_store_d_f32); + + // Integer and sub-integer MMA stores. + // Another naming quirk. Unlike other MMA builtins that use PTX types in the + // name, integer loads/stores use LLVM's i32. + case NVPTX::BI__imma_m16n16k16_st_c_i32: + return MMA_LDST(8, m16n16k16_store_d_s32); + case NVPTX::BI__imma_m32n8k16_st_c_i32: + return MMA_LDST(8, m32n8k16_store_d_s32); + case NVPTX::BI__imma_m8n32k16_st_c_i32: + return MMA_LDST(8, m8n32k16_store_d_s32); + case NVPTX::BI__imma_m8n8k32_st_c_i32: + return MMA_LDST(2, m8n8k32_store_d_s32); + case NVPTX::BI__bmma_m8n8k128_st_c_i32: + return MMA_LDST(2, m8n8k128_store_d_s32); + + default: + llvm_unreachable("Unknown MMA builtin"); + } +} +#undef MMA_LDST +#undef MMA_INTR + + +struct NVPTXMmaInfo { + unsigned NumEltsA; + unsigned NumEltsB; + unsigned NumEltsC; + unsigned NumEltsD; + std::array<unsigned, 8> Variants; + + unsigned getMMAIntrinsic(int Layout, bool Satf) { + unsigned Index = Layout * 2 + Satf; + if (Index >= Variants.size()) + return 0; + return Variants[Index]; + } +}; + + // Returns an intrinsic that matches Layout and Satf for valid combinations of + // Layout and Satf, 0 otherwise. +static NVPTXMmaInfo getNVPTXMmaInfo(unsigned BuiltinID) { + // clang-format off +#define MMA_VARIANTS(geom, type) {{ \ + Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type, \ + Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type##_satfinite, \ + Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \ + Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \ + Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type, \ + Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type##_satfinite, \ + Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type, \ + Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type##_satfinite \ + }} +// Sub-integer MMA only supports row.col layout. +#define MMA_VARIANTS_I4(geom, type) {{ \ + 0, \ + 0, \ + Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \ + Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \ + 0, \ + 0, \ + 0, \ + 0 \ + }} +// b1 MMA does not support .satfinite. +#define MMA_VARIANTS_B1(geom, type) {{ \ + 0, \ + 0, \ + Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \ + 0, \ + 0, \ + 0, \ + 0, \ + 0 \ + }} + // clang-format on + switch (BuiltinID) { + // FP MMA + // Note that 'type' argument of MMA_VARIANT uses D_C notation, while + // NumEltsN of return value are ordered as A,B,C,D. + case NVPTX::BI__hmma_m16n16k16_mma_f16f16: + return {8, 8, 4, 4, MMA_VARIANTS(m16n16k16, f16_f16)}; + case NVPTX::BI__hmma_m16n16k16_mma_f32f16: + return {8, 8, 4, 8, MMA_VARIANTS(m16n16k16, f32_f16)}; + case NVPTX::BI__hmma_m16n16k16_mma_f16f32: + return {8, 8, 8, 4, MMA_VARIANTS(m16n16k16, f16_f32)}; + case NVPTX::BI__hmma_m16n16k16_mma_f32f32: + return {8, 8, 8, 8, MMA_VARIANTS(m16n16k16, f32_f32)}; + case NVPTX::BI__hmma_m32n8k16_mma_f16f16: + return {8, 8, 4, 4, MMA_VARIANTS(m32n8k16, f16_f16)}; + case NVPTX::BI__hmma_m32n8k16_mma_f32f16: + return {8, 8, 4, 8, MMA_VARIANTS(m32n8k16, f32_f16)}; + case NVPTX::BI__hmma_m32n8k16_mma_f16f32: + return {8, 8, 8, 4, MMA_VARIANTS(m32n8k16, f16_f32)}; + case NVPTX::BI__hmma_m32n8k16_mma_f32f32: + return {8, 8, 8, 8, MMA_VARIANTS(m32n8k16, f32_f32)}; + case NVPTX::BI__hmma_m8n32k16_mma_f16f16: + return {8, 8, 4, 4, MMA_VARIANTS(m8n32k16, f16_f16)}; + case NVPTX::BI__hmma_m8n32k16_mma_f32f16: + return {8, 8, 4, 8, MMA_VARIANTS(m8n32k16, f32_f16)}; + case NVPTX::BI__hmma_m8n32k16_mma_f16f32: + return {8, 8, 8, 4, MMA_VARIANTS(m8n32k16, f16_f32)}; + case NVPTX::BI__hmma_m8n32k16_mma_f32f32: + return {8, 8, 8, 8, MMA_VARIANTS(m8n32k16, f32_f32)}; + + // Integer MMA + case NVPTX::BI__imma_m16n16k16_mma_s8: + return {2, 2, 8, 8, MMA_VARIANTS(m16n16k16, s8)}; + case NVPTX::BI__imma_m16n16k16_mma_u8: + return {2, 2, 8, 8, MMA_VARIANTS(m16n16k16, u8)}; + case NVPTX::BI__imma_m32n8k16_mma_s8: + return {4, 1, 8, 8, MMA_VARIANTS(m32n8k16, s8)}; + case NVPTX::BI__imma_m32n8k16_mma_u8: + return {4, 1, 8, 8, MMA_VARIANTS(m32n8k16, u8)}; + case NVPTX::BI__imma_m8n32k16_mma_s8: + return {1, 4, 8, 8, MMA_VARIANTS(m8n32k16, s8)}; + case NVPTX::BI__imma_m8n32k16_mma_u8: + return {1, 4, 8, 8, MMA_VARIANTS(m8n32k16, u8)}; + + // Sub-integer MMA + case NVPTX::BI__imma_m8n8k32_mma_s4: + return {1, 1, 2, 2, MMA_VARIANTS_I4(m8n8k32, s4)}; + case NVPTX::BI__imma_m8n8k32_mma_u4: + return {1, 1, 2, 2, MMA_VARIANTS_I4(m8n8k32, u4)}; + case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1: + return {1, 1, 2, 2, MMA_VARIANTS_B1(m8n8k128, b1)}; + default: + llvm_unreachable("Unexpected builtin ID."); + } +#undef MMA_VARIANTS +#undef MMA_VARIANTS_I4 +#undef MMA_VARIANTS_B1 +} + +} // namespace + +Value * +CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { auto MakeLdg = [&](unsigned IntrinsicID) { Value *Ptr = EmitScalarExpr(E->getArg(0)); clang::CharUnits Align = @@ -12564,30 +13503,18 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, // success flag. return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false); - case NVPTX::BI__nvvm_atom_add_gen_f: { - Value *Ptr = EmitScalarExpr(E->getArg(0)); - Value *Val = EmitScalarExpr(E->getArg(1)); - // atomicrmw only deals with integer arguments so we need to use - // LLVM's nvvm_atomic_load_add_f32 intrinsic for that. - Value *FnALAF32 = - CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f32, Ptr->getType()); - return Builder.CreateCall(FnALAF32, {Ptr, Val}); - } - + case NVPTX::BI__nvvm_atom_add_gen_f: case NVPTX::BI__nvvm_atom_add_gen_d: { Value *Ptr = EmitScalarExpr(E->getArg(0)); Value *Val = EmitScalarExpr(E->getArg(1)); - // atomicrmw only deals with integer arguments, so we need to use - // LLVM's nvvm_atomic_load_add_f64 intrinsic. - Value *FnALAF64 = - CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f64, Ptr->getType()); - return Builder.CreateCall(FnALAF64, {Ptr, Val}); + return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::FAdd, Ptr, Val, + AtomicOrdering::SequentiallyConsistent); } case NVPTX::BI__nvvm_atom_inc_gen_ui: { Value *Ptr = EmitScalarExpr(E->getArg(0)); Value *Val = EmitScalarExpr(E->getArg(1)); - Value *FnALI32 = + Function *FnALI32 = CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType()); return Builder.CreateCall(FnALI32, {Ptr, Val}); } @@ -12595,7 +13522,7 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, case NVPTX::BI__nvvm_atom_dec_gen_ui: { Value *Ptr = EmitScalarExpr(E->getArg(0)); Value *Val = EmitScalarExpr(E->getArg(1)); - Value *FnALD32 = + Function *FnALD32 = CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType()); return Builder.CreateCall(FnALD32, {Ptr, Val}); } @@ -12752,6 +13679,8 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, Builder.CreateStore(Pred, PredOutPtr); return Builder.CreateExtractValue(ResultPair, 0); } + + // FP MMA loads case NVPTX::BI__hmma_m16n16k16_ld_a: case NVPTX::BI__hmma_m16n16k16_ld_b: case NVPTX::BI__hmma_m16n16k16_ld_c_f16: @@ -12763,7 +13692,33 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, case NVPTX::BI__hmma_m8n32k16_ld_a: case NVPTX::BI__hmma_m8n32k16_ld_b: case NVPTX::BI__hmma_m8n32k16_ld_c_f16: - case NVPTX::BI__hmma_m8n32k16_ld_c_f32: { + case NVPTX::BI__hmma_m8n32k16_ld_c_f32: + // Integer MMA loads. + case NVPTX::BI__imma_m16n16k16_ld_a_s8: + case NVPTX::BI__imma_m16n16k16_ld_a_u8: + case NVPTX::BI__imma_m16n16k16_ld_b_s8: + case NVPTX::BI__imma_m16n16k16_ld_b_u8: + case NVPTX::BI__imma_m16n16k16_ld_c: + case NVPTX::BI__imma_m32n8k16_ld_a_s8: + case NVPTX::BI__imma_m32n8k16_ld_a_u8: + case NVPTX::BI__imma_m32n8k16_ld_b_s8: + case NVPTX::BI__imma_m32n8k16_ld_b_u8: + case NVPTX::BI__imma_m32n8k16_ld_c: + case NVPTX::BI__imma_m8n32k16_ld_a_s8: + case NVPTX::BI__imma_m8n32k16_ld_a_u8: + case NVPTX::BI__imma_m8n32k16_ld_b_s8: + case NVPTX::BI__imma_m8n32k16_ld_b_u8: + case NVPTX::BI__imma_m8n32k16_ld_c: + // Sub-integer MMA loads. + case NVPTX::BI__imma_m8n8k32_ld_a_s4: + case NVPTX::BI__imma_m8n8k32_ld_a_u4: + case NVPTX::BI__imma_m8n8k32_ld_b_s4: + case NVPTX::BI__imma_m8n8k32_ld_b_u4: + case NVPTX::BI__imma_m8n8k32_ld_c: + case NVPTX::BI__bmma_m8n8k128_ld_a_b1: + case NVPTX::BI__bmma_m8n8k128_ld_b_b1: + case NVPTX::BI__bmma_m8n8k128_ld_c: + { Address Dst = EmitPointerWithAlignment(E->getArg(0)); Value *Src = EmitScalarExpr(E->getArg(1)); Value *Ldm = EmitScalarExpr(E->getArg(2)); @@ -12771,82 +13726,28 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, if (!E->getArg(3)->isIntegerConstantExpr(isColMajorArg, getContext())) return nullptr; bool isColMajor = isColMajorArg.getSExtValue(); - unsigned IID; - unsigned NumResults; - switch (BuiltinID) { - case NVPTX::BI__hmma_m16n16k16_ld_a: - IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_col_stride - : Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_row_stride; - NumResults = 8; - break; - case NVPTX::BI__hmma_m16n16k16_ld_b: - IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_col_stride - : Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_row_stride; - NumResults = 8; - break; - case NVPTX::BI__hmma_m16n16k16_ld_c_f16: - IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_col_stride - : Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_row_stride; - NumResults = 4; - break; - case NVPTX::BI__hmma_m16n16k16_ld_c_f32: - IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_col_stride - : Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_row_stride; - NumResults = 8; - break; - case NVPTX::BI__hmma_m32n8k16_ld_a: - IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_col_stride - : Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_row_stride; - NumResults = 8; - break; - case NVPTX::BI__hmma_m32n8k16_ld_b: - IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_col_stride - : Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_row_stride; - NumResults = 8; - break; - case NVPTX::BI__hmma_m32n8k16_ld_c_f16: - IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_col_stride - : Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_row_stride; - NumResults = 4; - break; - case NVPTX::BI__hmma_m32n8k16_ld_c_f32: - IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_col_stride - : Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_row_stride; - NumResults = 8; - break; - case NVPTX::BI__hmma_m8n32k16_ld_a: - IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_col_stride - : Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_row_stride; - NumResults = 8; - break; - case NVPTX::BI__hmma_m8n32k16_ld_b: - IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_col_stride - : Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_row_stride; - NumResults = 8; - break; - case NVPTX::BI__hmma_m8n32k16_ld_c_f16: - IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_col_stride - : Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_row_stride; - NumResults = 4; - break; - case NVPTX::BI__hmma_m8n32k16_ld_c_f32: - IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_col_stride - : Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_row_stride; - NumResults = 8; - break; - default: - llvm_unreachable("Unexpected builtin ID."); - } + NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID); + unsigned IID = isColMajor ? II.IID_col : II.IID_row; + if (IID == 0) + return nullptr; + Value *Result = Builder.CreateCall(CGM.getIntrinsic(IID, Src->getType()), {Src, Ldm}); // Save returned values. - for (unsigned i = 0; i < NumResults; ++i) { - Builder.CreateAlignedStore( - Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), - Dst.getElementType()), - Builder.CreateGEP(Dst.getPointer(), llvm::ConstantInt::get(IntTy, i)), - CharUnits::fromQuantity(4)); + assert(II.NumResults); + if (II.NumResults == 1) { + Builder.CreateAlignedStore(Result, Dst.getPointer(), + CharUnits::fromQuantity(4)); + } else { + for (unsigned i = 0; i < II.NumResults; ++i) { + Builder.CreateAlignedStore( + Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), + Dst.getElementType()), + Builder.CreateGEP(Dst.getPointer(), + llvm::ConstantInt::get(IntTy, i)), + CharUnits::fromQuantity(4)); + } } return Result; } @@ -12856,7 +13757,12 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, case NVPTX::BI__hmma_m32n8k16_st_c_f16: case NVPTX::BI__hmma_m32n8k16_st_c_f32: case NVPTX::BI__hmma_m8n32k16_st_c_f16: - case NVPTX::BI__hmma_m8n32k16_st_c_f32: { + case NVPTX::BI__hmma_m8n32k16_st_c_f32: + case NVPTX::BI__imma_m16n16k16_st_c_i32: + case NVPTX::BI__imma_m32n8k16_st_c_i32: + case NVPTX::BI__imma_m8n32k16_st_c_i32: + case NVPTX::BI__imma_m8n8k32_st_c_i32: + case NVPTX::BI__bmma_m8n8k128_st_c_i32: { Value *Dst = EmitScalarExpr(E->getArg(0)); Address Src = EmitPointerWithAlignment(E->getArg(1)); Value *Ldm = EmitScalarExpr(E->getArg(2)); @@ -12864,45 +13770,15 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, if (!E->getArg(3)->isIntegerConstantExpr(isColMajorArg, getContext())) return nullptr; bool isColMajor = isColMajorArg.getSExtValue(); - unsigned IID; - unsigned NumResults = 8; - // PTX Instructions (and LLVM intrinsics) are defined for slice _d_, yet - // for some reason nvcc builtins use _c_. - switch (BuiltinID) { - case NVPTX::BI__hmma_m16n16k16_st_c_f16: - IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_col_stride - : Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_row_stride; - NumResults = 4; - break; - case NVPTX::BI__hmma_m16n16k16_st_c_f32: - IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_col_stride - : Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_row_stride; - break; - case NVPTX::BI__hmma_m32n8k16_st_c_f16: - IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_col_stride - : Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_row_stride; - NumResults = 4; - break; - case NVPTX::BI__hmma_m32n8k16_st_c_f32: - IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_col_stride - : Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_row_stride; - break; - case NVPTX::BI__hmma_m8n32k16_st_c_f16: - IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_col_stride - : Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_row_stride; - NumResults = 4; - break; - case NVPTX::BI__hmma_m8n32k16_st_c_f32: - IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_col_stride - : Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_row_stride; - break; - default: - llvm_unreachable("Unexpected builtin ID."); - } - Function *Intrinsic = CGM.getIntrinsic(IID, Dst->getType()); + NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID); + unsigned IID = isColMajor ? II.IID_col : II.IID_row; + if (IID == 0) + return nullptr; + Function *Intrinsic = + CGM.getIntrinsic(IID, Dst->getType()); llvm::Type *ParamType = Intrinsic->getFunctionType()->getParamType(1); SmallVector<Value *, 10> Values = {Dst}; - for (unsigned i = 0; i < NumResults; ++i) { + for (unsigned i = 0; i < II.NumResults; ++i) { Value *V = Builder.CreateAlignedLoad( Builder.CreateGEP(Src.getPointer(), llvm::ConstantInt::get(IntTy, i)), CharUnits::fromQuantity(4)); @@ -12926,7 +13802,16 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, case NVPTX::BI__hmma_m8n32k16_mma_f16f16: case NVPTX::BI__hmma_m8n32k16_mma_f32f16: case NVPTX::BI__hmma_m8n32k16_mma_f32f32: - case NVPTX::BI__hmma_m8n32k16_mma_f16f32: { + case NVPTX::BI__hmma_m8n32k16_mma_f16f32: + case NVPTX::BI__imma_m16n16k16_mma_s8: + case NVPTX::BI__imma_m16n16k16_mma_u8: + case NVPTX::BI__imma_m32n8k16_mma_s8: + case NVPTX::BI__imma_m32n8k16_mma_u8: + case NVPTX::BI__imma_m8n32k16_mma_s8: + case NVPTX::BI__imma_m8n32k16_mma_u8: + case NVPTX::BI__imma_m8n8k32_mma_s4: + case NVPTX::BI__imma_m8n8k32_mma_u4: + case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1: { Address Dst = EmitPointerWithAlignment(E->getArg(0)); Address SrcA = EmitPointerWithAlignment(E->getArg(1)); Address SrcB = EmitPointerWithAlignment(E->getArg(2)); @@ -12938,119 +13823,40 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, if (Layout < 0 || Layout > 3) return nullptr; llvm::APSInt SatfArg; - if (!E->getArg(5)->isIntegerConstantExpr(SatfArg, getContext())) + if (BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1) + SatfArg = 0; // .b1 does not have satf argument. + else if (!E->getArg(5)->isIntegerConstantExpr(SatfArg, getContext())) return nullptr; bool Satf = SatfArg.getSExtValue(); - - // clang-format off -#define MMA_VARIANTS(geom, type) {{ \ - Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type, \ - Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type##_satfinite, \ - Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \ - Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \ - Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type, \ - Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type##_satfinite, \ - Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type, \ - Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type##_satfinite \ - }} - // clang-format on - - auto getMMAIntrinsic = [Layout, Satf](std::array<unsigned, 8> Variants) { - unsigned Index = Layout * 2 + Satf; - assert(Index < 8); - return Variants[Index]; - }; - unsigned IID; - unsigned NumEltsC; - unsigned NumEltsD; - switch (BuiltinID) { - case NVPTX::BI__hmma_m16n16k16_mma_f16f16: - IID = getMMAIntrinsic(MMA_VARIANTS(m16n16k16, f16_f16)); - NumEltsC = 4; - NumEltsD = 4; - break; - case NVPTX::BI__hmma_m16n16k16_mma_f32f16: - IID = getMMAIntrinsic(MMA_VARIANTS(m16n16k16, f32_f16)); - NumEltsC = 4; - NumEltsD = 8; - break; - case NVPTX::BI__hmma_m16n16k16_mma_f16f32: - IID = getMMAIntrinsic(MMA_VARIANTS(m16n16k16, f16_f32)); - NumEltsC = 8; - NumEltsD = 4; - break; - case NVPTX::BI__hmma_m16n16k16_mma_f32f32: - IID = getMMAIntrinsic(MMA_VARIANTS(m16n16k16, f32_f32)); - NumEltsC = 8; - NumEltsD = 8; - break; - case NVPTX::BI__hmma_m32n8k16_mma_f16f16: - IID = getMMAIntrinsic(MMA_VARIANTS(m32n8k16, f16_f16)); - NumEltsC = 4; - NumEltsD = 4; - break; - case NVPTX::BI__hmma_m32n8k16_mma_f32f16: - IID = getMMAIntrinsic(MMA_VARIANTS(m32n8k16, f32_f16)); - NumEltsC = 4; - NumEltsD = 8; - break; - case NVPTX::BI__hmma_m32n8k16_mma_f16f32: - IID = getMMAIntrinsic(MMA_VARIANTS(m32n8k16, f16_f32)); - NumEltsC = 8; - NumEltsD = 4; - break; - case NVPTX::BI__hmma_m32n8k16_mma_f32f32: - IID = getMMAIntrinsic(MMA_VARIANTS(m32n8k16, f32_f32)); - NumEltsC = 8; - NumEltsD = 8; - break; - case NVPTX::BI__hmma_m8n32k16_mma_f16f16: - IID = getMMAIntrinsic(MMA_VARIANTS(m8n32k16, f16_f16)); - NumEltsC = 4; - NumEltsD = 4; - break; - case NVPTX::BI__hmma_m8n32k16_mma_f32f16: - IID = getMMAIntrinsic(MMA_VARIANTS(m8n32k16, f32_f16)); - NumEltsC = 4; - NumEltsD = 8; - break; - case NVPTX::BI__hmma_m8n32k16_mma_f16f32: - IID = getMMAIntrinsic(MMA_VARIANTS(m8n32k16, f16_f32)); - NumEltsC = 8; - NumEltsD = 4; - break; - case NVPTX::BI__hmma_m8n32k16_mma_f32f32: - IID = getMMAIntrinsic(MMA_VARIANTS(m8n32k16, f32_f32)); - NumEltsC = 8; - NumEltsD = 8; - break; - default: - llvm_unreachable("Unexpected builtin ID."); - } -#undef MMA_VARIANTS + NVPTXMmaInfo MI = getNVPTXMmaInfo(BuiltinID); + unsigned IID = MI.getMMAIntrinsic(Layout, Satf); + if (IID == 0) // Unsupported combination of Layout/Satf. + return nullptr; SmallVector<Value *, 24> Values; Function *Intrinsic = CGM.getIntrinsic(IID); - llvm::Type *ABType = Intrinsic->getFunctionType()->getParamType(0); + llvm::Type *AType = Intrinsic->getFunctionType()->getParamType(0); // Load A - for (unsigned i = 0; i < 8; ++i) { + for (unsigned i = 0; i < MI.NumEltsA; ++i) { Value *V = Builder.CreateAlignedLoad( Builder.CreateGEP(SrcA.getPointer(), llvm::ConstantInt::get(IntTy, i)), CharUnits::fromQuantity(4)); - Values.push_back(Builder.CreateBitCast(V, ABType)); + Values.push_back(Builder.CreateBitCast(V, AType)); } // Load B - for (unsigned i = 0; i < 8; ++i) { + llvm::Type *BType = Intrinsic->getFunctionType()->getParamType(MI.NumEltsA); + for (unsigned i = 0; i < MI.NumEltsB; ++i) { Value *V = Builder.CreateAlignedLoad( Builder.CreateGEP(SrcB.getPointer(), llvm::ConstantInt::get(IntTy, i)), CharUnits::fromQuantity(4)); - Values.push_back(Builder.CreateBitCast(V, ABType)); + Values.push_back(Builder.CreateBitCast(V, BType)); } // Load C - llvm::Type *CType = Intrinsic->getFunctionType()->getParamType(16); - for (unsigned i = 0; i < NumEltsC; ++i) { + llvm::Type *CType = + Intrinsic->getFunctionType()->getParamType(MI.NumEltsA + MI.NumEltsB); + for (unsigned i = 0; i < MI.NumEltsC; ++i) { Value *V = Builder.CreateAlignedLoad( Builder.CreateGEP(SrcC.getPointer(), llvm::ConstantInt::get(IntTy, i)), @@ -13059,7 +13865,7 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, } Value *Result = Builder.CreateCall(Intrinsic, Values); llvm::Type *DType = Dst.getElementType(); - for (unsigned i = 0; i < NumEltsD; ++i) + for (unsigned i = 0; i < MI.NumEltsD; ++i) Builder.CreateAlignedStore( Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), DType), Builder.CreateGEP(Dst.getPointer(), llvm::ConstantInt::get(IntTy, i)), @@ -13077,7 +13883,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, case WebAssembly::BI__builtin_wasm_memory_size: { llvm::Type *ResultType = ConvertType(E->getType()); Value *I = EmitScalarExpr(E->getArg(0)); - Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_size, ResultType); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_size, ResultType); return Builder.CreateCall(Callee, I); } case WebAssembly::BI__builtin_wasm_memory_grow: { @@ -13086,37 +13892,66 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)) }; - Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_grow, ResultType); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_grow, ResultType); + return Builder.CreateCall(Callee, Args); + } + case WebAssembly::BI__builtin_wasm_memory_init: { + llvm::APSInt SegConst; + if (!E->getArg(0)->isIntegerConstantExpr(SegConst, getContext())) + llvm_unreachable("Constant arg isn't actually constant?"); + llvm::APSInt MemConst; + if (!E->getArg(1)->isIntegerConstantExpr(MemConst, getContext())) + llvm_unreachable("Constant arg isn't actually constant?"); + if (!MemConst.isNullValue()) + ErrorUnsupported(E, "non-zero memory index"); + Value *Args[] = {llvm::ConstantInt::get(getLLVMContext(), SegConst), + llvm::ConstantInt::get(getLLVMContext(), MemConst), + EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3)), + EmitScalarExpr(E->getArg(4))}; + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_init); return Builder.CreateCall(Callee, Args); } + case WebAssembly::BI__builtin_wasm_data_drop: { + llvm::APSInt SegConst; + if (!E->getArg(0)->isIntegerConstantExpr(SegConst, getContext())) + llvm_unreachable("Constant arg isn't actually constant?"); + Value *Arg = llvm::ConstantInt::get(getLLVMContext(), SegConst); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_data_drop); + return Builder.CreateCall(Callee, {Arg}); + } + case WebAssembly::BI__builtin_wasm_tls_size: { + llvm::Type *ResultType = ConvertType(E->getType()); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_size, ResultType); + return Builder.CreateCall(Callee); + } case WebAssembly::BI__builtin_wasm_throw: { Value *Tag = EmitScalarExpr(E->getArg(0)); Value *Obj = EmitScalarExpr(E->getArg(1)); - Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw); return Builder.CreateCall(Callee, {Tag, Obj}); } - case WebAssembly::BI__builtin_wasm_rethrow: { - Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow); + case WebAssembly::BI__builtin_wasm_rethrow_in_catch: { + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow_in_catch); return Builder.CreateCall(Callee); } case WebAssembly::BI__builtin_wasm_atomic_wait_i32: { Value *Addr = EmitScalarExpr(E->getArg(0)); Value *Expected = EmitScalarExpr(E->getArg(1)); Value *Timeout = EmitScalarExpr(E->getArg(2)); - Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_wait_i32); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_wait_i32); return Builder.CreateCall(Callee, {Addr, Expected, Timeout}); } case WebAssembly::BI__builtin_wasm_atomic_wait_i64: { Value *Addr = EmitScalarExpr(E->getArg(0)); Value *Expected = EmitScalarExpr(E->getArg(1)); Value *Timeout = EmitScalarExpr(E->getArg(2)); - Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_wait_i64); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_wait_i64); return Builder.CreateCall(Callee, {Addr, Expected, Timeout}); } case WebAssembly::BI__builtin_wasm_atomic_notify: { Value *Addr = EmitScalarExpr(E->getArg(0)); Value *Count = EmitScalarExpr(E->getArg(1)); - Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_notify); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_notify); return Builder.CreateCall(Callee, {Addr, Count}); } case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f32: @@ -13127,7 +13962,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64x2_f64x2: { Value *Src = EmitScalarExpr(E->getArg(0)); llvm::Type *ResT = ConvertType(E->getType()); - Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_saturate_signed, + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_saturate_signed, {ResT, Src->getType()}); return Builder.CreateCall(Callee, {Src}); } @@ -13139,7 +13974,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64x2_f64x2: { Value *Src = EmitScalarExpr(E->getArg(0)); llvm::Type *ResT = ConvertType(E->getType()); - Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_saturate_unsigned, + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_saturate_unsigned, {ResT, Src->getType()}); return Builder.CreateCall(Callee, {Src}); } @@ -13149,7 +13984,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, case WebAssembly::BI__builtin_wasm_min_f64x2: { Value *LHS = EmitScalarExpr(E->getArg(0)); Value *RHS = EmitScalarExpr(E->getArg(1)); - Value *Callee = CGM.getIntrinsic(Intrinsic::minimum, + Function *Callee = CGM.getIntrinsic(Intrinsic::minimum, ConvertType(E->getType())); return Builder.CreateCall(Callee, {LHS, RHS}); } @@ -13159,7 +13994,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, case WebAssembly::BI__builtin_wasm_max_f64x2: { Value *LHS = EmitScalarExpr(E->getArg(0)); Value *RHS = EmitScalarExpr(E->getArg(1)); - Value *Callee = CGM.getIntrinsic(Intrinsic::maximum, + Function *Callee = CGM.getIntrinsic(Intrinsic::maximum, ConvertType(E->getType())); return Builder.CreateCall(Callee, {LHS, RHS}); } @@ -13252,14 +14087,14 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, } Value *LHS = EmitScalarExpr(E->getArg(0)); Value *RHS = EmitScalarExpr(E->getArg(1)); - Value *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType())); + Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType())); return Builder.CreateCall(Callee, {LHS, RHS}); } case WebAssembly::BI__builtin_wasm_bitselect: { Value *V1 = EmitScalarExpr(E->getArg(0)); Value *V2 = EmitScalarExpr(E->getArg(1)); Value *C = EmitScalarExpr(E->getArg(2)); - Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_bitselect, + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_bitselect, ConvertType(E->getType())); return Builder.CreateCall(Callee, {V1, V2, C}); } @@ -13289,19 +14124,19 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, llvm_unreachable("unexpected builtin ID"); } Value *Vec = EmitScalarExpr(E->getArg(0)); - Value *Callee = CGM.getIntrinsic(IntNo, Vec->getType()); + Function *Callee = CGM.getIntrinsic(IntNo, Vec->getType()); return Builder.CreateCall(Callee, {Vec}); } case WebAssembly::BI__builtin_wasm_abs_f32x4: case WebAssembly::BI__builtin_wasm_abs_f64x2: { Value *Vec = EmitScalarExpr(E->getArg(0)); - Value *Callee = CGM.getIntrinsic(Intrinsic::fabs, Vec->getType()); + Function *Callee = CGM.getIntrinsic(Intrinsic::fabs, Vec->getType()); return Builder.CreateCall(Callee, {Vec}); } case WebAssembly::BI__builtin_wasm_sqrt_f32x4: case WebAssembly::BI__builtin_wasm_sqrt_f64x2: { Value *Vec = EmitScalarExpr(E->getArg(0)); - Value *Callee = CGM.getIntrinsic(Intrinsic::sqrt, Vec->getType()); + Function *Callee = CGM.getIntrinsic(Intrinsic::sqrt, Vec->getType()); return Builder.CreateCall(Callee, {Vec}); } diff --git a/lib/CodeGen/CGCUDANV.cpp b/lib/CodeGen/CGCUDANV.cpp index 1c578bd151bd..4d4038dae9cf 100644 --- a/lib/CodeGen/CGCUDANV.cpp +++ b/lib/CodeGen/CGCUDANV.cpp @@ -1,9 +1,8 @@ //===----- CGCUDANV.cpp - Interface to NVIDIA CUDA Runtime ----------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -16,9 +15,10 @@ #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "clang/AST/Decl.h" +#include "clang/Basic/Cuda.h" +#include "clang/CodeGen/CodeGenABITypes.h" #include "clang/CodeGen/ConstantInitBuilder.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/Support/Format.h" @@ -42,17 +42,28 @@ private: /// Convenience reference to the current module llvm::Module &TheModule; /// Keeps track of kernel launch stubs emitted in this module - llvm::SmallVector<llvm::Function *, 16> EmittedKernels; - llvm::SmallVector<std::pair<llvm::GlobalVariable *, unsigned>, 16> DeviceVars; + struct KernelInfo { + llvm::Function *Kernel; + const Decl *D; + }; + llvm::SmallVector<KernelInfo, 16> EmittedKernels; + struct VarInfo { + llvm::GlobalVariable *Var; + const VarDecl *D; + unsigned Flag; + }; + llvm::SmallVector<VarInfo, 16> DeviceVars; /// Keeps track of variable containing handle of GPU binary. Populated by /// ModuleCtorFunction() and used to create corresponding cleanup calls in /// ModuleDtorFunction() llvm::GlobalVariable *GpuBinaryHandle = nullptr; /// Whether we generate relocatable device code. bool RelocatableDeviceCode; + /// Mangle context for device. + std::unique_ptr<MangleContext> DeviceMC; - llvm::Constant *getSetupArgumentFn() const; - llvm::Constant *getLaunchFn() const; + llvm::FunctionCallee getSetupArgumentFn() const; + llvm::FunctionCallee getLaunchFn() const; llvm::FunctionType *getRegisterGlobalsFnTy() const; llvm::FunctionType *getCallbackFnTy() const; @@ -104,20 +115,25 @@ private: return DummyFunc; } - void emitDeviceStubBody(CodeGenFunction &CGF, FunctionArgList &Args); + void emitDeviceStubBodyLegacy(CodeGenFunction &CGF, FunctionArgList &Args); + void emitDeviceStubBodyNew(CodeGenFunction &CGF, FunctionArgList &Args); + std::string getDeviceSideName(const Decl *ND); public: CGNVCUDARuntime(CodeGenModule &CGM); void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) override; - void registerDeviceVar(llvm::GlobalVariable &Var, unsigned Flags) override { - DeviceVars.push_back(std::make_pair(&Var, Flags)); + void registerDeviceVar(const VarDecl *VD, llvm::GlobalVariable &Var, + unsigned Flags) override { + DeviceVars.push_back({&Var, VD, Flags}); } /// Creates module constructor function llvm::Function *makeModuleCtorFunction() override; /// Creates module destructor function llvm::Function *makeModuleDtorFunction() override; + /// Construct and return the stub name of a kernel. + std::string getDeviceStubName(llvm::StringRef Name) const override; }; } @@ -137,7 +153,9 @@ CGNVCUDARuntime::addUnderscoredPrefixToName(StringRef FuncName) const { CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM) : CGCUDARuntime(CGM), Context(CGM.getLLVMContext()), TheModule(CGM.getModule()), - RelocatableDeviceCode(CGM.getLangOpts().GPURelocatableDeviceCode) { + RelocatableDeviceCode(CGM.getLangOpts().GPURelocatableDeviceCode), + DeviceMC(CGM.getContext().createMangleContext( + CGM.getContext().getAuxTargetInfo())) { CodeGen::CodeGenTypes &Types = CGM.getTypes(); ASTContext &Ctx = CGM.getContext(); @@ -150,7 +168,7 @@ CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM) VoidPtrPtrTy = VoidPtrTy->getPointerTo(); } -llvm::Constant *CGNVCUDARuntime::getSetupArgumentFn() const { +llvm::FunctionCallee CGNVCUDARuntime::getSetupArgumentFn() const { // cudaError_t cudaSetupArgument(void *, size_t, size_t) llvm::Type *Params[] = {VoidPtrTy, SizeTy, SizeTy}; return CGM.CreateRuntimeFunction( @@ -158,7 +176,7 @@ llvm::Constant *CGNVCUDARuntime::getSetupArgumentFn() const { addPrefixToName("SetupArgument")); } -llvm::Constant *CGNVCUDARuntime::getLaunchFn() const { +llvm::FunctionCallee CGNVCUDARuntime::getLaunchFn() const { if (CGM.getLangOpts().HIP) { // hipError_t hipLaunchByPtr(char *); return CGM.CreateRuntimeFunction( @@ -186,16 +204,143 @@ llvm::FunctionType *CGNVCUDARuntime::getRegisterLinkedBinaryFnTy() const { return llvm::FunctionType::get(VoidTy, Params, false); } +std::string CGNVCUDARuntime::getDeviceSideName(const Decl *D) { + auto *ND = cast<const NamedDecl>(D); + std::string DeviceSideName; + if (DeviceMC->shouldMangleDeclName(ND)) { + SmallString<256> Buffer; + llvm::raw_svector_ostream Out(Buffer); + DeviceMC->mangleName(ND, Out); + DeviceSideName = Out.str(); + } else + DeviceSideName = ND->getIdentifier()->getName(); + return DeviceSideName; +} + void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) { - EmittedKernels.push_back(CGF.CurFn); - emitDeviceStubBody(CGF, Args); + // Ensure either we have different ABIs between host and device compilations, + // says host compilation following MSVC ABI but device compilation follows + // Itanium C++ ABI or, if they follow the same ABI, kernel names after + // mangling should be the same after name stubbing. The later checking is + // very important as the device kernel name being mangled in host-compilation + // is used to resolve the device binaries to be executed. Inconsistent naming + // result in undefined behavior. Even though we cannot check that naming + // directly between host- and device-compilations, the host- and + // device-mangling in host compilation could help catching certain ones. + assert((CGF.CGM.getContext().getAuxTargetInfo() && + (CGF.CGM.getContext().getAuxTargetInfo()->getCXXABI() != + CGF.CGM.getContext().getTargetInfo().getCXXABI())) || + getDeviceStubName(getDeviceSideName(CGF.CurFuncDecl)) == + CGF.CurFn->getName()); + + EmittedKernels.push_back({CGF.CurFn, CGF.CurFuncDecl}); + if (CudaFeatureEnabled(CGM.getTarget().getSDKVersion(), + CudaFeature::CUDA_USES_NEW_LAUNCH)) + emitDeviceStubBodyNew(CGF, Args); + else + emitDeviceStubBodyLegacy(CGF, Args); } -void CGNVCUDARuntime::emitDeviceStubBody(CodeGenFunction &CGF, - FunctionArgList &Args) { +// CUDA 9.0+ uses new way to launch kernels. Parameters are packed in a local +// array and kernels are launched using cudaLaunchKernel(). +void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF, + FunctionArgList &Args) { + // Build the shadow stack entry at the very start of the function. + + // Calculate amount of space we will need for all arguments. If we have no + // args, allocate a single pointer so we still have a valid pointer to the + // argument array that we can pass to runtime, even if it will be unused. + Address KernelArgs = CGF.CreateTempAlloca( + VoidPtrTy, CharUnits::fromQuantity(16), "kernel_args", + llvm::ConstantInt::get(SizeTy, std::max<size_t>(1, Args.size()))); + // Store pointers to the arguments in a locally allocated launch_args. + for (unsigned i = 0; i < Args.size(); ++i) { + llvm::Value* VarPtr = CGF.GetAddrOfLocalVar(Args[i]).getPointer(); + llvm::Value *VoidVarPtr = CGF.Builder.CreatePointerCast(VarPtr, VoidPtrTy); + CGF.Builder.CreateDefaultAlignedStore( + VoidVarPtr, CGF.Builder.CreateConstGEP1_32(KernelArgs.getPointer(), i)); + } + + llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end"); + + // Lookup cudaLaunchKernel function. + // cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim, + // void **args, size_t sharedMem, + // cudaStream_t stream); + TranslationUnitDecl *TUDecl = CGM.getContext().getTranslationUnitDecl(); + DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl); + IdentifierInfo &cudaLaunchKernelII = + CGM.getContext().Idents.get("cudaLaunchKernel"); + FunctionDecl *cudaLaunchKernelFD = nullptr; + for (const auto &Result : DC->lookup(&cudaLaunchKernelII)) { + if (FunctionDecl *FD = dyn_cast<FunctionDecl>(Result)) + cudaLaunchKernelFD = FD; + } + + if (cudaLaunchKernelFD == nullptr) { + CGM.Error(CGF.CurFuncDecl->getLocation(), + "Can't find declaration for cudaLaunchKernel()"); + return; + } + // Create temporary dim3 grid_dim, block_dim. + ParmVarDecl *GridDimParam = cudaLaunchKernelFD->getParamDecl(1); + QualType Dim3Ty = GridDimParam->getType(); + Address GridDim = + CGF.CreateMemTemp(Dim3Ty, CharUnits::fromQuantity(8), "grid_dim"); + Address BlockDim = + CGF.CreateMemTemp(Dim3Ty, CharUnits::fromQuantity(8), "block_dim"); + Address ShmemSize = + CGF.CreateTempAlloca(SizeTy, CGM.getSizeAlign(), "shmem_size"); + Address Stream = + CGF.CreateTempAlloca(VoidPtrTy, CGM.getPointerAlign(), "stream"); + llvm::FunctionCallee cudaPopConfigFn = CGM.CreateRuntimeFunction( + llvm::FunctionType::get(IntTy, + {/*gridDim=*/GridDim.getType(), + /*blockDim=*/BlockDim.getType(), + /*ShmemSize=*/ShmemSize.getType(), + /*Stream=*/Stream.getType()}, + /*isVarArg=*/false), + "__cudaPopCallConfiguration"); + + CGF.EmitRuntimeCallOrInvoke(cudaPopConfigFn, + {GridDim.getPointer(), BlockDim.getPointer(), + ShmemSize.getPointer(), Stream.getPointer()}); + + // Emit the call to cudaLaunch + llvm::Value *Kernel = CGF.Builder.CreatePointerCast(CGF.CurFn, VoidPtrTy); + CallArgList LaunchKernelArgs; + LaunchKernelArgs.add(RValue::get(Kernel), + cudaLaunchKernelFD->getParamDecl(0)->getType()); + LaunchKernelArgs.add(RValue::getAggregate(GridDim), Dim3Ty); + LaunchKernelArgs.add(RValue::getAggregate(BlockDim), Dim3Ty); + LaunchKernelArgs.add(RValue::get(KernelArgs.getPointer()), + cudaLaunchKernelFD->getParamDecl(3)->getType()); + LaunchKernelArgs.add(RValue::get(CGF.Builder.CreateLoad(ShmemSize)), + cudaLaunchKernelFD->getParamDecl(4)->getType()); + LaunchKernelArgs.add(RValue::get(CGF.Builder.CreateLoad(Stream)), + cudaLaunchKernelFD->getParamDecl(5)->getType()); + + QualType QT = cudaLaunchKernelFD->getType(); + QualType CQT = QT.getCanonicalType(); + llvm::Type *Ty = CGM.getTypes().ConvertType(CQT); + llvm::FunctionType *FTy = dyn_cast<llvm::FunctionType>(Ty); + + const CGFunctionInfo &FI = + CGM.getTypes().arrangeFunctionDeclaration(cudaLaunchKernelFD); + llvm::FunctionCallee cudaLaunchKernelFn = + CGM.CreateRuntimeFunction(FTy, "cudaLaunchKernel"); + CGF.EmitCall(FI, CGCallee::forDirect(cudaLaunchKernelFn), ReturnValueSlot(), + LaunchKernelArgs); + CGF.EmitBranch(EndBlock); + + CGF.EmitBlock(EndBlock); +} + +void CGNVCUDARuntime::emitDeviceStubBodyLegacy(CodeGenFunction &CGF, + FunctionArgList &Args) { // Emit a call to cudaSetupArgument for each arg in Args. - llvm::Constant *cudaSetupArgFn = getSetupArgumentFn(); + llvm::FunctionCallee cudaSetupArgFn = getSetupArgumentFn(); llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end"); CharUnits Offset = CharUnits::Zero(); for (const VarDecl *A : Args) { @@ -209,17 +354,17 @@ void CGNVCUDARuntime::emitDeviceStubBody(CodeGenFunction &CGF, llvm::ConstantInt::get(SizeTy, TyWidth.getQuantity()), llvm::ConstantInt::get(SizeTy, Offset.getQuantity()), }; - llvm::CallSite CS = CGF.EmitRuntimeCallOrInvoke(cudaSetupArgFn, Args); + llvm::CallBase *CB = CGF.EmitRuntimeCallOrInvoke(cudaSetupArgFn, Args); llvm::Constant *Zero = llvm::ConstantInt::get(IntTy, 0); - llvm::Value *CSZero = CGF.Builder.CreateICmpEQ(CS.getInstruction(), Zero); + llvm::Value *CBZero = CGF.Builder.CreateICmpEQ(CB, Zero); llvm::BasicBlock *NextBlock = CGF.createBasicBlock("setup.next"); - CGF.Builder.CreateCondBr(CSZero, NextBlock, EndBlock); + CGF.Builder.CreateCondBr(CBZero, NextBlock, EndBlock); CGF.EmitBlock(NextBlock); Offset += TyWidth; } // Emit the call to cudaLaunch - llvm::Constant *cudaLaunchFn = getLaunchFn(); + llvm::FunctionCallee cudaLaunchFn = getLaunchFn(); llvm::Value *Arg = CGF.Builder.CreatePointerCast(CGF.CurFn, CharPtrTy); CGF.EmitRuntimeCallOrInvoke(cudaLaunchFn, Arg); CGF.EmitBranch(EndBlock); @@ -259,7 +404,7 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() { llvm::Type *RegisterFuncParams[] = { VoidPtrPtrTy, CharPtrTy, CharPtrTy, CharPtrTy, IntTy, VoidPtrTy, VoidPtrTy, VoidPtrTy, VoidPtrTy, IntTy->getPointerTo()}; - llvm::Constant *RegisterFunc = CGM.CreateRuntimeFunction( + llvm::FunctionCallee RegisterFunc = CGM.CreateRuntimeFunction( llvm::FunctionType::get(IntTy, RegisterFuncParams, false), addUnderscoredPrefixToName("RegisterFunction")); @@ -267,13 +412,19 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() { // __cuda_register_globals() and generate __cudaRegisterFunction() call for // each emitted kernel. llvm::Argument &GpuBinaryHandlePtr = *RegisterKernelsFunc->arg_begin(); - for (llvm::Function *Kernel : EmittedKernels) { - llvm::Constant *KernelName = makeConstantString(Kernel->getName()); + for (auto &&I : EmittedKernels) { + llvm::Constant *KernelName = makeConstantString(getDeviceSideName(I.D)); llvm::Constant *NullPtr = llvm::ConstantPointerNull::get(VoidPtrTy); llvm::Value *Args[] = { - &GpuBinaryHandlePtr, Builder.CreateBitCast(Kernel, VoidPtrTy), - KernelName, KernelName, llvm::ConstantInt::get(IntTy, -1), NullPtr, - NullPtr, NullPtr, NullPtr, + &GpuBinaryHandlePtr, + Builder.CreateBitCast(I.Kernel, VoidPtrTy), + KernelName, + KernelName, + llvm::ConstantInt::get(IntTy, -1), + NullPtr, + NullPtr, + NullPtr, + NullPtr, llvm::ConstantPointerNull::get(IntTy->getPointerTo())}; Builder.CreateCall(RegisterFunc, Args); } @@ -283,13 +434,13 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() { llvm::Type *RegisterVarParams[] = {VoidPtrPtrTy, CharPtrTy, CharPtrTy, CharPtrTy, IntTy, IntTy, IntTy, IntTy}; - llvm::Constant *RegisterVar = CGM.CreateRuntimeFunction( + llvm::FunctionCallee RegisterVar = CGM.CreateRuntimeFunction( llvm::FunctionType::get(IntTy, RegisterVarParams, false), addUnderscoredPrefixToName("RegisterVar")); - for (auto &Pair : DeviceVars) { - llvm::GlobalVariable *Var = Pair.first; - unsigned Flags = Pair.second; - llvm::Constant *VarName = makeConstantString(Var->getName()); + for (auto &&Info : DeviceVars) { + llvm::GlobalVariable *Var = Info.Var; + unsigned Flags = Info.Flag; + llvm::Constant *VarName = makeConstantString(getDeviceSideName(Info.D)); uint64_t VarSize = CGM.getDataLayout().getTypeAllocSize(Var->getValueType()); llvm::Value *Args[] = { @@ -329,10 +480,14 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() { /// \endcode llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { bool IsHIP = CGM.getLangOpts().HIP; + bool IsCUDA = CGM.getLangOpts().CUDA; // No need to generate ctors/dtors if there is no GPU binary. StringRef CudaGpuBinaryFileName = CGM.getCodeGenOpts().CudaGpuBinaryFileName; if (CudaGpuBinaryFileName.empty() && !IsHIP) return nullptr; + if ((IsHIP || (IsCUDA && !RelocatableDeviceCode)) && EmittedKernels.empty() && + DeviceVars.empty()) + return nullptr; // void __{cuda|hip}_register_globals(void* handle); llvm::Function *RegisterGlobalsFunc = makeRegisterGlobalsFn(); @@ -342,7 +497,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { RegisterGlobalsFunc = makeDummyFunction(getRegisterGlobalsFnTy()); // void ** __{cuda|hip}RegisterFatBinary(void *); - llvm::Constant *RegisterFatbinFunc = CGM.CreateRuntimeFunction( + llvm::FunctionCallee RegisterFatbinFunc = CGM.CreateRuntimeFunction( llvm::FunctionType::get(VoidPtrPtrTy, VoidPtrTy, false), addUnderscoredPrefixToName("RegisterFatBinary")); // struct { int magic, int version, void * gpu_binary, void * dont_care }; @@ -516,6 +671,16 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { // Call __cuda_register_globals(GpuBinaryHandle); if (RegisterGlobalsFunc) CtorBuilder.CreateCall(RegisterGlobalsFunc, RegisterFatbinCall); + + // Call __cudaRegisterFatBinaryEnd(Handle) if this CUDA version needs it. + if (CudaFeatureEnabled(CGM.getTarget().getSDKVersion(), + CudaFeature::CUDA_USES_FATBIN_REGISTER_END)) { + // void __cudaRegisterFatBinaryEnd(void **); + llvm::FunctionCallee RegisterFatbinEndFunc = CGM.CreateRuntimeFunction( + llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false), + "__cudaRegisterFatBinaryEnd"); + CtorBuilder.CreateCall(RegisterFatbinEndFunc, RegisterFatbinCall); + } } else { // Generate a unique module ID. SmallString<64> ModuleID; @@ -532,7 +697,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { // void *, void (*)(void **)) SmallString<128> RegisterLinkedBinaryName("__cudaRegisterLinkedBinary"); RegisterLinkedBinaryName += ModuleID; - llvm::Constant *RegisterLinkedBinaryFunc = CGM.CreateRuntimeFunction( + llvm::FunctionCallee RegisterLinkedBinaryFunc = CGM.CreateRuntimeFunction( getRegisterLinkedBinaryFnTy(), RegisterLinkedBinaryName); assert(RegisterGlobalsFunc && "Expecting at least dummy function!"); @@ -550,7 +715,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { // extern "C" int atexit(void (*f)(void)); llvm::FunctionType *AtExitTy = llvm::FunctionType::get(IntTy, CleanupFn->getType(), false); - llvm::Constant *AtExitFunc = + llvm::FunctionCallee AtExitFunc = CGM.CreateRuntimeFunction(AtExitTy, "atexit", llvm::AttributeList(), /*Local=*/true); CtorBuilder.CreateCall(AtExitFunc, CleanupFn); @@ -585,7 +750,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleDtorFunction() { return nullptr; // void __cudaUnregisterFatBinary(void ** handle); - llvm::Constant *UnregisterFatbinFunc = CGM.CreateRuntimeFunction( + llvm::FunctionCallee UnregisterFatbinFunc = CGM.CreateRuntimeFunction( llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false), addUnderscoredPrefixToName("UnregisterFatBinary")); @@ -627,6 +792,12 @@ llvm::Function *CGNVCUDARuntime::makeModuleDtorFunction() { return ModuleDtorFunc; } +std::string CGNVCUDARuntime::getDeviceStubName(llvm::StringRef Name) const { + if (!CGM.getLangOpts().HIP) + return Name; + return (Name + ".stub").str(); +} + CGCUDARuntime *CodeGen::CreateNVCUDARuntime(CodeGenModule &CGM) { return new CGNVCUDARuntime(CGM); } diff --git a/lib/CodeGen/CGCUDARuntime.cpp b/lib/CodeGen/CGCUDARuntime.cpp index 1936f9f13692..c14a9d3f2bbb 100644 --- a/lib/CodeGen/CGCUDARuntime.cpp +++ b/lib/CodeGen/CGCUDARuntime.cpp @@ -1,9 +1,8 @@ //===----- CGCUDARuntime.cpp - Interface to CUDA Runtimes -----------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/CGCUDARuntime.h b/lib/CodeGen/CGCUDARuntime.h index 0168f4f9e942..e548a3a546d4 100644 --- a/lib/CodeGen/CGCUDARuntime.h +++ b/lib/CodeGen/CGCUDARuntime.h @@ -1,9 +1,8 @@ //===----- CGCUDARuntime.h - Interface to CUDA Runtimes ---------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -16,6 +15,8 @@ #ifndef LLVM_CLANG_LIB_CODEGEN_CGCUDARUNTIME_H #define LLVM_CLANG_LIB_CODEGEN_CGCUDARUNTIME_H +#include "llvm/ADT/StringRef.h" + namespace llvm { class Function; class GlobalVariable; @@ -24,6 +25,7 @@ class GlobalVariable; namespace clang { class CUDAKernelCallExpr; +class VarDecl; namespace CodeGen { @@ -53,7 +55,8 @@ public: /// Emits a kernel launch stub. virtual void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) = 0; - virtual void registerDeviceVar(llvm::GlobalVariable &Var, unsigned Flags) = 0; + virtual void registerDeviceVar(const VarDecl *VD, llvm::GlobalVariable &Var, + unsigned Flags) = 0; /// Constructs and returns a module initialization function or nullptr if it's /// not needed. Must be called after all kernels have been emitted. @@ -62,6 +65,9 @@ public: /// Returns a module cleanup function or nullptr if it's not needed. /// Must be called after ModuleCtorFunction virtual llvm::Function *makeModuleDtorFunction() = 0; + + /// Construct and return the stub name of a kernel. + virtual std::string getDeviceStubName(llvm::StringRef Name) const = 0; }; /// Creates an instance of a CUDA runtime class. diff --git a/lib/CodeGen/CGCXX.cpp b/lib/CodeGen/CGCXX.cpp index 8b0733fbec3e..6d903a0d09e2 100644 --- a/lib/CodeGen/CGCXX.cpp +++ b/lib/CodeGen/CGCXX.cpp @@ -1,9 +1,8 @@ //===--- CGCXX.cpp - Emit LLVM Code for declarations ----------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -204,55 +203,44 @@ bool CodeGenModule::TryEmitBaseDestructorAsAlias(const CXXDestructorDecl *D) { return false; } -llvm::Function *CodeGenModule::codegenCXXStructor(const CXXMethodDecl *MD, - StructorType Type) { - const CGFunctionInfo &FnInfo = - getTypes().arrangeCXXStructorDeclaration(MD, Type); +llvm::Function *CodeGenModule::codegenCXXStructor(GlobalDecl GD) { + const CGFunctionInfo &FnInfo = getTypes().arrangeCXXStructorDeclaration(GD); auto *Fn = cast<llvm::Function>( - getAddrOfCXXStructor(MD, Type, &FnInfo, /*FnType=*/nullptr, + getAddrOfCXXStructor(GD, &FnInfo, /*FnType=*/nullptr, /*DontDefer=*/true, ForDefinition)); - GlobalDecl GD; - if (const auto *DD = dyn_cast<CXXDestructorDecl>(MD)) { - GD = GlobalDecl(DD, toCXXDtorType(Type)); - } else { - const auto *CD = cast<CXXConstructorDecl>(MD); - GD = GlobalDecl(CD, toCXXCtorType(Type)); - } - setFunctionLinkage(GD, Fn); CodeGenFunction(*this).GenerateCode(GD, Fn, FnInfo); setNonAliasAttributes(GD, Fn); - SetLLVMFunctionAttributesForDefinition(MD, Fn); + SetLLVMFunctionAttributesForDefinition(cast<CXXMethodDecl>(GD.getDecl()), Fn); return Fn; } -llvm::Constant *CodeGenModule::getAddrOfCXXStructor( - const CXXMethodDecl *MD, StructorType Type, const CGFunctionInfo *FnInfo, - llvm::FunctionType *FnType, bool DontDefer, - ForDefinition_t IsForDefinition) { - GlobalDecl GD; - if (auto *CD = dyn_cast<CXXConstructorDecl>(MD)) { - GD = GlobalDecl(CD, toCXXCtorType(Type)); - } else { +llvm::FunctionCallee CodeGenModule::getAddrAndTypeOfCXXStructor( + GlobalDecl GD, const CGFunctionInfo *FnInfo, llvm::FunctionType *FnType, + bool DontDefer, ForDefinition_t IsForDefinition) { + auto *MD = cast<CXXMethodDecl>(GD.getDecl()); + + if (isa<CXXDestructorDecl>(MD)) { // Always alias equivalent complete destructors to base destructors in the // MS ABI. if (getTarget().getCXXABI().isMicrosoft() && - Type == StructorType::Complete && MD->getParent()->getNumVBases() == 0) - Type = StructorType::Base; - GD = GlobalDecl(cast<CXXDestructorDecl>(MD), toCXXDtorType(Type)); + GD.getDtorType() == Dtor_Complete && + MD->getParent()->getNumVBases() == 0) + GD = GD.getWithDtorType(Dtor_Base); } if (!FnType) { if (!FnInfo) - FnInfo = &getTypes().arrangeCXXStructorDeclaration(MD, Type); + FnInfo = &getTypes().arrangeCXXStructorDeclaration(GD); FnType = getTypes().GetFunctionType(*FnInfo); } - return GetOrCreateLLVMFunction( + llvm::Constant *Ptr = GetOrCreateLLVMFunction( getMangledName(GD), FnType, GD, /*ForVTable=*/false, DontDefer, - /*isThunk=*/false, /*ExtraAttrs=*/llvm::AttributeList(), IsForDefinition); + /*IsThunk=*/false, /*ExtraAttrs=*/llvm::AttributeList(), IsForDefinition); + return {FnType, Ptr}; } static CGCallee BuildAppleKextVirtualCall(CodeGenFunction &CGF, @@ -312,7 +300,7 @@ CodeGenFunction::BuildAppleKextVirtualDestructorCall( assert(DD->isVirtual() && Type != Dtor_Base); // Compute the function type we're calling. const CGFunctionInfo &FInfo = CGM.getTypes().arrangeCXXStructorDeclaration( - DD, StructorType::Complete); + GlobalDecl(DD, Dtor_Complete)); llvm::Type *Ty = CGM.getTypes().GetFunctionType(FInfo); return ::BuildAppleKextVirtualCall(*this, GlobalDecl(DD, Type), Ty, RD); } diff --git a/lib/CodeGen/CGCXXABI.cpp b/lib/CodeGen/CGCXXABI.cpp index ed168b1ce72d..041c0f8959fd 100644 --- a/lib/CodeGen/CGCXXABI.cpp +++ b/lib/CodeGen/CGCXXABI.cpp @@ -1,9 +1,8 @@ //===----- CGCXXABI.cpp - Interface to C++ ABIs ---------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -29,12 +28,6 @@ void CGCXXABI::ErrorUnsupportedABI(CodeGenFunction &CGF, StringRef S) { << S; } -bool CGCXXABI::canCopyArgument(const CXXRecordDecl *RD) const { - // We can only copy the argument if there exists at least one trivial, - // non-deleted copy or move constructor. - return RD->canPassInRegisters(); -} - llvm::Constant *CGCXXABI::GetBogusMemberPointer(QualType T) { return llvm::Constant::getNullValue(CGM.getTypes().ConvertType(T)); } @@ -298,7 +291,7 @@ llvm::GlobalValue::LinkageTypes CGCXXABI::getCXXDestructorLinkage( GVALinkage Linkage, const CXXDestructorDecl *Dtor, CXXDtorType DT) const { // Delegate back to CGM by default. return CGM.getLLVMLinkageForDeclarator(Dtor, Linkage, - /*isConstantVariable=*/false); + /*IsConstantVariable=*/false); } bool CGCXXABI::NeedsVTTParameter(GlobalDecl GD) { diff --git a/lib/CodeGen/CGCXXABI.h b/lib/CodeGen/CGCXXABI.h index 65b50e14f436..3a9c3b347439 100644 --- a/lib/CodeGen/CGCXXABI.h +++ b/lib/CodeGen/CGCXXABI.h @@ -1,9 +1,8 @@ //===----- CGCXXABI.h - Interface to C++ ABIs -------------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -137,10 +136,6 @@ public: RAA_Indirect }; - /// Returns true if C++ allows us to copy the memory of an object of type RD - /// when it is passed as an argument. - bool canCopyArgument(const CXXRecordDecl *RD) const; - /// Returns how an argument of the given record type should be passed. virtual RecordArgABI getRecordArgABI(const CXXRecordDecl *RD) const = 0; @@ -310,7 +305,7 @@ public: /// adding any required parameters. For convenience, ArgTys has been /// initialized with the type of 'this'. virtual AddedStructorArgs - buildStructorSignature(const CXXMethodDecl *MD, StructorType T, + buildStructorSignature(GlobalDecl GD, SmallVectorImpl<CanQualType> &ArgTys) = 0; /// Returns true if the given destructor type should be emitted as a linkonce @@ -383,7 +378,7 @@ public: virtual void EmitDestructorCall(CodeGenFunction &CGF, const CXXDestructorDecl *DD, CXXDtorType Type, bool ForVirtualBase, bool Delegating, - Address This) = 0; + Address This, QualType ThisTy) = 0; /// Emits the VTable definitions required for the given record type. virtual void emitVTableDefinitions(CodeGenVTables &CGVT, @@ -426,11 +421,15 @@ public: llvm::Type *Ty, SourceLocation Loc) = 0; + using DeleteOrMemberCallExpr = + llvm::PointerUnion<const CXXDeleteExpr *, const CXXMemberCallExpr *>; + /// Emit the ABI-specific virtual destructor call. - virtual llvm::Value * - EmitVirtualDestructorCall(CodeGenFunction &CGF, const CXXDestructorDecl *Dtor, - CXXDtorType DtorType, Address This, - const CXXMemberCallExpr *CE) = 0; + virtual llvm::Value *EmitVirtualDestructorCall(CodeGenFunction &CGF, + const CXXDestructorDecl *Dtor, + CXXDtorType DtorType, + Address This, + DeleteOrMemberCallExpr E) = 0; virtual void adjustCallArgsForDestructorThunk(CodeGenFunction &CGF, GlobalDecl GD, @@ -557,7 +556,7 @@ public: /// \param Dtor - a function taking a single pointer argument /// \param Addr - a pointer to pass to the destructor function. virtual void registerGlobalDtor(CodeGenFunction &CGF, const VarDecl &D, - llvm::Constant *Dtor, + llvm::FunctionCallee Dtor, llvm::Constant *Addr) = 0; /*************************** thread_local initialization ********************/ @@ -589,7 +588,7 @@ public: /// Emit a single constructor/destructor with the given type from a C++ /// constructor Decl. - virtual void emitCXXStructor(const CXXMethodDecl *MD, StructorType Type) = 0; + virtual void emitCXXStructor(GlobalDecl GD) = 0; /// Load a vtable from This, an object of polymorphic type RD, or from one of /// its virtual bases if it does not have its own vtable. Returns the vtable diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp index 7d494bb1f1c7..cf8024550eee 100644 --- a/lib/CodeGen/CGCall.cpp +++ b/lib/CodeGen/CGCall.cpp @@ -1,9 +1,8 @@ //===--- CGCall.cpp - Encapsulate calling convention details --------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -32,7 +31,6 @@ #include "llvm/Transforms/Utils/Local.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Attributes.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" @@ -69,12 +67,19 @@ unsigned CodeGenTypes::ClangCallConvToLLVMCallConv(CallingConv CC) { } /// Derives the 'this' type for codegen purposes, i.e. ignoring method CVR -/// qualification. -static CanQualType GetThisType(ASTContext &Context, const CXXRecordDecl *RD, - const CXXMethodDecl *MD) { - QualType RecTy = Context.getTagDeclType(RD)->getCanonicalTypeInternal(); +/// qualification. Either or both of RD and MD may be null. A null RD indicates +/// that there is no meaningful 'this' type, and a null MD can occur when +/// calling a method pointer. +CanQualType CodeGenTypes::DeriveThisType(const CXXRecordDecl *RD, + const CXXMethodDecl *MD) { + QualType RecTy; + if (RD) + RecTy = Context.getTagDeclType(RD)->getCanonicalTypeInternal(); + else + RecTy = Context.VoidTy; + if (MD) - RecTy = Context.getAddrSpaceQualType(RecTy, MD->getTypeQualifiers().getAddressSpace()); + RecTy = Context.getAddrSpaceQualType(RecTy, MD->getMethodQualifiers().getAddressSpace()); return Context.getPointerType(CanQualType::CreateUnsafe(RecTy)); } @@ -169,11 +174,9 @@ static void appendParameterTypes(const CodeGenTypes &CGT, static const CGFunctionInfo & arrangeLLVMFunctionInfo(CodeGenTypes &CGT, bool instanceMethod, SmallVectorImpl<CanQualType> &prefix, - CanQual<FunctionProtoType> FTP, - const FunctionDecl *FD) { + CanQual<FunctionProtoType> FTP) { SmallVector<FunctionProtoType::ExtParameterInfo, 16> paramInfos; - RequiredArgs Required = - RequiredArgs::forPrototypePlus(FTP, prefix.size(), FD); + RequiredArgs Required = RequiredArgs::forPrototypePlus(FTP, prefix.size()); // FIXME: Kill copy. appendParameterTypes(CGT, prefix, paramInfos, FTP); CanQualType resultType = FTP->getReturnType().getUnqualifiedType(); @@ -187,11 +190,10 @@ arrangeLLVMFunctionInfo(CodeGenTypes &CGT, bool instanceMethod, /// Arrange the argument and result information for a value of the /// given freestanding function type. const CGFunctionInfo & -CodeGenTypes::arrangeFreeFunctionType(CanQual<FunctionProtoType> FTP, - const FunctionDecl *FD) { +CodeGenTypes::arrangeFreeFunctionType(CanQual<FunctionProtoType> FTP) { SmallVector<CanQualType, 16> argTypes; return ::arrangeLLVMFunctionInfo(*this, /*instanceMethod=*/false, argTypes, - FTP, FD); + FTP); } static CallingConv getCallingConventionForDecl(const Decl *D, bool IsWindows) { @@ -240,7 +242,7 @@ static CallingConv getCallingConventionForDecl(const Decl *D, bool IsWindows) { /// Arrange the argument and result information for a call to an /// unknown C++ non-static member function of the given abstract type. -/// (Zero value of RD means we don't have any meaningful "this" argument type, +/// (A null RD means we don't have any meaningful "this" argument type, /// so fall back to a generic pointer type). /// The member function must be an ordinary function, i.e. not a /// constructor or destructor. @@ -251,14 +253,11 @@ CodeGenTypes::arrangeCXXMethodType(const CXXRecordDecl *RD, SmallVector<CanQualType, 16> argTypes; // Add the 'this' pointer. - if (RD) - argTypes.push_back(GetThisType(Context, RD, MD)); - else - argTypes.push_back(Context.VoidPtrTy); + argTypes.push_back(DeriveThisType(RD, MD)); return ::arrangeLLVMFunctionInfo( *this, true, argTypes, - FTP->getCanonicalTypeUnqualified().getAs<FunctionProtoType>(), MD); + FTP->getCanonicalTypeUnqualified().getAs<FunctionProtoType>()); } /// Set calling convention for CUDA/HIP kernel. @@ -290,7 +289,7 @@ CodeGenTypes::arrangeCXXMethodDeclaration(const CXXMethodDecl *MD) { return arrangeCXXMethodType(ThisType, prototype.getTypePtr(), MD); } - return arrangeFreeFunctionType(prototype, MD); + return arrangeFreeFunctionType(prototype); } bool CodeGenTypes::inheritingCtorHasParams( @@ -300,29 +299,23 @@ bool CodeGenTypes::inheritingCtorHasParams( return Type == Ctor_Complete || !Inherited.getShadowDecl()->constructsVirtualBase() || !Target.getCXXABI().hasConstructorVariants(); - } +} const CGFunctionInfo & -CodeGenTypes::arrangeCXXStructorDeclaration(const CXXMethodDecl *MD, - StructorType Type) { +CodeGenTypes::arrangeCXXStructorDeclaration(GlobalDecl GD) { + auto *MD = cast<CXXMethodDecl>(GD.getDecl()); SmallVector<CanQualType, 16> argTypes; SmallVector<FunctionProtoType::ExtParameterInfo, 16> paramInfos; - argTypes.push_back(GetThisType(Context, MD->getParent(), MD)); + argTypes.push_back(DeriveThisType(MD->getParent(), MD)); bool PassParams = true; - GlobalDecl GD; if (auto *CD = dyn_cast<CXXConstructorDecl>(MD)) { - GD = GlobalDecl(CD, toCXXCtorType(Type)); - // A base class inheriting constructor doesn't get forwarded arguments // needed to construct a virtual base (or base class thereof). if (auto Inherited = CD->getInheritedConstructor()) - PassParams = inheritingCtorHasParams(Inherited, toCXXCtorType(Type)); - } else { - auto *DD = dyn_cast<CXXDestructorDecl>(MD); - GD = GlobalDecl(DD, toCXXDtorType(Type)); + PassParams = inheritingCtorHasParams(Inherited, GD.getCtorType()); } CanQual<FunctionProtoType> FTP = GetFormalType(MD); @@ -332,7 +325,7 @@ CodeGenTypes::arrangeCXXStructorDeclaration(const CXXMethodDecl *MD, appendParameterTypes(*this, argTypes, paramInfos, FTP); CGCXXABI::AddedStructorArgs AddedArgs = - TheCXXABI.buildStructorSignature(MD, Type, argTypes); + TheCXXABI.buildStructorSignature(GD, argTypes); if (!paramInfos.empty()) { // Note: prefix implies after the first param. if (AddedArgs.Prefix) @@ -408,8 +401,11 @@ CodeGenTypes::arrangeCXXConstructorCall(const CallArgList &args, unsigned TotalPrefixArgs = 1 + ExtraPrefixArgs; CanQual<FunctionProtoType> FPT = GetFormalType(D); - RequiredArgs Required = - RequiredArgs::forPrototypePlus(FPT, TotalPrefixArgs + ExtraSuffixArgs, D); + RequiredArgs Required = PassProtoArgs + ? RequiredArgs::forPrototypePlus( + FPT, TotalPrefixArgs + ExtraSuffixArgs) + : RequiredArgs::All; + GlobalDecl GD(D, CtorKind); CanQualType ResultType = TheCXXABI.HasThisReturn(GD) ? ArgTypes.front() @@ -452,7 +448,7 @@ CodeGenTypes::arrangeFunctionDeclaration(const FunctionDecl *FD) { /*chainCall=*/false, None, noProto->getExtInfo(), {},RequiredArgs::All); } - return arrangeFreeFunctionType(FTy.castAs<FunctionProtoType>(), FD); + return arrangeFreeFunctionType(FTy.castAs<FunctionProtoType>()); } /// Arrange the argument and result information for the declaration or @@ -517,11 +513,9 @@ CodeGenTypes::arrangeGlobalDeclaration(GlobalDecl GD) { // FIXME: Do we need to handle ObjCMethodDecl? const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl()); - if (const CXXConstructorDecl *CD = dyn_cast<CXXConstructorDecl>(FD)) - return arrangeCXXStructorDeclaration(CD, getFromCtorType(GD.getCtorType())); - - if (const CXXDestructorDecl *DD = dyn_cast<CXXDestructorDecl>(FD)) - return arrangeCXXStructorDeclaration(DD, getFromDtorType(GD.getDtorType())); + if (isa<CXXConstructorDecl>(GD.getDecl()) || + isa<CXXDestructorDecl>(GD.getDecl())) + return arrangeCXXStructorDeclaration(GD); return arrangeFunctionDeclaration(FD); } @@ -535,7 +529,7 @@ const CGFunctionInfo & CodeGenTypes::arrangeUnprototypedMustTailThunk(const CXXMethodDecl *MD) { assert(MD->isVirtual() && "only methods have thunks"); CanQual<FunctionProtoType> FTP = GetFormalType(MD); - CanQualType ArgTys[] = { GetThisType(Context, MD->getParent(), MD) }; + CanQualType ArgTys[] = {DeriveThisType(MD->getParent(), MD)}; return arrangeLLVMFunctionInfo(Context.VoidTy, /*instanceMethod=*/false, /*chainCall=*/false, ArgTys, FTP->getExtInfo(), {}, RequiredArgs(1)); @@ -549,7 +543,7 @@ CodeGenTypes::arrangeMSCtorClosure(const CXXConstructorDecl *CD, CanQual<FunctionProtoType> FTP = GetFormalType(CD); SmallVector<CanQualType, 2> ArgTys; const CXXRecordDecl *RD = CD->getParent(); - ArgTys.push_back(GetThisType(Context, RD, CD)); + ArgTys.push_back(DeriveThisType(RD, CD)); if (CT == Ctor_CopyingClosure) ArgTys.push_back(*FTP->param_type_begin()); if (RD->getNumVBases() > 0) @@ -582,7 +576,7 @@ arrangeFreeFunctionLikeCall(CodeGenTypes &CGT, // extra prefix plus the arguments in the prototype. if (const FunctionProtoType *proto = dyn_cast<FunctionProtoType>(fnType)) { if (proto->isVariadic()) - required = RequiredArgs(proto->getNumParams() + numExtraRequiredArgs); + required = RequiredArgs::forPrototypePlus(proto, numExtraRequiredArgs); if (proto->hasExtParameterInfos()) addExtParameterInfosForCall(paramInfos, proto, numExtraRequiredArgs, @@ -635,11 +629,10 @@ CodeGenTypes::arrangeBlockFunctionDeclaration(const FunctionProtoType *proto, auto paramInfos = getExtParameterInfosForCall(proto, 1, params.size()); auto argTypes = getArgTypesForDeclaration(Context, params); - return arrangeLLVMFunctionInfo( - GetReturnType(proto->getReturnType()), - /*instanceMethod*/ false, /*chainCall*/ false, argTypes, - proto->getExtInfo(), paramInfos, - RequiredArgs::forPrototypePlus(proto, 1, nullptr)); + return arrangeLLVMFunctionInfo(GetReturnType(proto->getReturnType()), + /*instanceMethod*/ false, /*chainCall*/ false, + argTypes, proto->getExtInfo(), paramInfos, + RequiredArgs::forPrototypePlus(proto, 1)); } const CGFunctionInfo & @@ -808,6 +801,8 @@ CGFunctionInfo *CGFunctionInfo::create(unsigned llvmCC, ArrayRef<CanQualType> argTypes, RequiredArgs required) { assert(paramInfos.empty() || paramInfos.size() == argTypes.size()); + assert(!required.allowsOptionalArgs() || + required.getNumRequiredArgs() <= argTypes.size()); void *buffer = operator new(totalSizeToAlloc<ArgInfo, ExtParameterInfo>( @@ -1148,7 +1143,7 @@ EnterStructPointerForCoercedAccess(Address SrcPtr, return SrcPtr; // GEP into the first element. - SrcPtr = CGF.Builder.CreateStructGEP(SrcPtr, 0, CharUnits(), "coerce.dive"); + SrcPtr = CGF.Builder.CreateStructGEP(SrcPtr, 0, "coerce.dive"); // If the first element is a struct, recurse. llvm::Type *SrcTy = SrcPtr.getElementType(); @@ -1276,12 +1271,8 @@ static void BuildAggStore(CodeGenFunction &CGF, llvm::Value *Val, // Prefer scalar stores to first-class aggregate stores. if (llvm::StructType *STy = dyn_cast<llvm::StructType>(Val->getType())) { - const llvm::StructLayout *Layout = - CGF.CGM.getDataLayout().getStructLayout(STy); - for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { - auto EltOffset = CharUnits::fromQuantity(Layout->getElementOffset(i)); - Address EltPtr = CGF.Builder.CreateStructGEP(Dest, i, EltOffset); + Address EltPtr = CGF.Builder.CreateStructGEP(Dest, i); llvm::Value *Elt = CGF.Builder.CreateExtractValue(Val, i); CGF.Builder.CreateStore(Elt, EltPtr, DestIsVolatile); } @@ -1682,13 +1673,7 @@ llvm::Type *CodeGenTypes::GetFunctionTypeForVTable(GlobalDecl GD) { if (!isFuncTypeConvertible(FPT)) return llvm::StructType::get(getLLVMContext()); - const CGFunctionInfo *Info; - if (isa<CXXDestructorDecl>(MD)) - Info = - &arrangeCXXStructorDeclaration(MD, getFromDtorType(GD.getDtorType())); - else - Info = &arrangeCXXMethodDeclaration(MD); - return GetFunctionType(*Info); + return GetFunctionType(GD); } static void AddAttributesFromFunctionProtoType(ASTContext &Ctx, @@ -1793,8 +1778,6 @@ void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone, if (CodeGenOpts.Backchain) FuncAttrs.addAttribute("backchain"); - // FIXME: The interaction of this attribute with the SLH command line flag - // has not been determined. if (CodeGenOpts.SpeculativeLoadHardening) FuncAttrs.addAttribute(llvm::Attribute::SpeculativeLoadHardening); } @@ -1826,9 +1809,8 @@ void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone, void CodeGenModule::AddDefaultFnAttrs(llvm::Function &F) { llvm::AttrBuilder FuncAttrs; - ConstructDefaultFnAttrList(F.getName(), - F.hasFnAttribute(llvm::Attribute::OptimizeNone), - /* AttrOnCallsite = */ false, FuncAttrs); + ConstructDefaultFnAttrList(F.getName(), F.hasOptNone(), + /* AttrOnCallSite = */ false, FuncAttrs); F.addAttributes(llvm::AttributeList::FunctionIndex, FuncAttrs); } @@ -1864,8 +1846,6 @@ void CodeGenModule::ConstructAttributeList( FuncAttrs.addAttribute(llvm::Attribute::NoDuplicate); if (TargetDecl->hasAttr<ConvergentAttr>()) FuncAttrs.addAttribute(llvm::Attribute::Convergent); - if (TargetDecl->hasAttr<SpeculativeLoadHardeningAttr>()) - FuncAttrs.addAttribute(llvm::Attribute::SpeculativeLoadHardening); if (const FunctionDecl *Fn = dyn_cast<FunctionDecl>(TargetDecl)) { AddAttributesFromFunctionProtoType( @@ -1910,6 +1890,16 @@ void CodeGenModule::ConstructAttributeList( ConstructDefaultFnAttrList(Name, HasOptnone, AttrOnCallSite, FuncAttrs); + // This must run after constructing the default function attribute list + // to ensure that the speculative load hardening attribute is removed + // in the case where the -mspeculative-load-hardening flag was passed. + if (TargetDecl) { + if (TargetDecl->hasAttr<NoSpeculativeLoadHardeningAttr>()) + FuncAttrs.removeAttribute(llvm::Attribute::SpeculativeLoadHardening); + if (TargetDecl->hasAttr<SpeculativeLoadHardeningAttr>()) + FuncAttrs.addAttribute(llvm::Attribute::SpeculativeLoadHardening); + } + if (CodeGenOpts.EnableSegmentedStacks && !(TargetDecl && TargetDecl->hasAttr<NoSplitStackAttr>())) FuncAttrs.addAttribute("split-stack"); @@ -2009,8 +1999,7 @@ void CodeGenModule::ConstructAttributeList( // Attach attributes to sret. if (IRFunctionArgs.hasSRetArg()) { llvm::AttrBuilder SRETAttrs; - if (!RetAI.getSuppressSRet()) - SRETAttrs.addAttribute(llvm::Attribute::StructRet); + SRETAttrs.addAttribute(llvm::Attribute::StructRet); hasUsedSRet = true; if (RetAI.getInReg()) SRETAttrs.addAttribute(llvm::Attribute::InReg); @@ -2066,7 +2055,7 @@ void CodeGenModule::ConstructAttributeList( Attrs.addAttribute(llvm::Attribute::InReg); if (AI.getIndirectByVal()) - Attrs.addAttribute(llvm::Attribute::ByVal); + Attrs.addByValAttr(getTypes().ConvertTypeForMem(ParamType)); CharUnits Align = AI.getIndirectAlign(); @@ -2262,9 +2251,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, // If we're using inalloca, all the memory arguments are GEPs off of the last // parameter, which is a pointer to the complete memory area. Address ArgStruct = Address::invalid(); - const llvm::StructLayout *ArgStructLayout = nullptr; if (IRFunctionArgs.hasInallocaArg()) { - ArgStructLayout = CGM.getDataLayout().getStructLayout(FI.getArgStruct()); ArgStruct = Address(FnArgs[IRFunctionArgs.getInallocaArgNo()], FI.getArgStructAlignment()); @@ -2313,10 +2300,8 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, case ABIArgInfo::InAlloca: { assert(NumIRArgs == 0); auto FieldIndex = ArgI.getInAllocaFieldIndex(); - CharUnits FieldOffset = - CharUnits::fromQuantity(ArgStructLayout->getElementOffset(FieldIndex)); - Address V = Builder.CreateStructGEP(ArgStruct, FieldIndex, FieldOffset, - Arg->getName()); + Address V = + Builder.CreateStructGEP(ArgStruct, FieldIndex, Arg->getName()); ArgVals.push_back(ParamValue::forIndirect(V)); break; } @@ -2476,7 +2461,6 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, llvm::StructType *STy = dyn_cast<llvm::StructType>(ArgI.getCoerceToType()); if (ArgI.isDirect() && ArgI.getCanBeFlattened() && STy && STy->getNumElements() > 1) { - auto SrcLayout = CGM.getDataLayout().getStructLayout(STy); uint64_t SrcSize = CGM.getDataLayout().getTypeAllocSize(STy); llvm::Type *DstTy = Ptr.getElementType(); uint64_t DstSize = CGM.getDataLayout().getTypeAllocSize(DstTy); @@ -2493,9 +2477,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { auto AI = FnArgs[FirstIRArg + i]; AI->setName(Arg->getName() + ".coerce" + Twine(i)); - auto Offset = CharUnits::fromQuantity(SrcLayout->getElementOffset(i)); - Address EltPtr = - Builder.CreateStructGEP(AddrToStoreInto, i, Offset); + Address EltPtr = Builder.CreateStructGEP(AddrToStoreInto, i); Builder.CreateStore(AI, EltPtr); } @@ -2508,7 +2490,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, assert(NumIRArgs == 1); auto AI = FnArgs[FirstIRArg]; AI->setName(Arg->getName() + ".coerce"); - CreateCoercedStore(AI, Ptr, /*DestIsVolatile=*/false, *this); + CreateCoercedStore(AI, Ptr, /*DstIsVolatile=*/false, *this); } // Match to what EmitParmDecl is expecting for this type. @@ -2531,7 +2513,6 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, auto coercionType = ArgI.getCoerceAndExpandType(); alloca = Builder.CreateElementBitCast(alloca, coercionType); - auto layout = CGM.getDataLayout().getStructLayout(coercionType); unsigned argIndex = FirstIRArg; for (unsigned i = 0, e = coercionType->getNumElements(); i != e; ++i) { @@ -2539,7 +2520,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, if (ABIArgInfo::isPaddingForCoerceAndExpand(eltType)) continue; - auto eltAddr = Builder.CreateStructGEP(alloca, i, layout); + auto eltAddr = Builder.CreateStructGEP(alloca, i); auto elt = FnArgs[argIndex++]; Builder.CreateStore(elt, eltAddr); } @@ -2891,15 +2872,6 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI, RV = SI->getValueOperand(); SI->eraseFromParent(); - // If that was the only use of the return value, nuke it as well now. - auto returnValueInst = ReturnValue.getPointer(); - if (returnValueInst->use_empty()) { - if (auto alloca = dyn_cast<llvm::AllocaInst>(returnValueInst)) { - alloca->eraseFromParent(); - ReturnValue = Address::invalid(); - } - } - // Otherwise, we have to do a simple load. } else { RV = Builder.CreateLoad(ReturnValue); @@ -2944,7 +2916,6 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI, case ABIArgInfo::CoerceAndExpand: { auto coercionType = RetAI.getCoerceAndExpandType(); - auto layout = CGM.getDataLayout().getStructLayout(coercionType); // Load all of the coerced elements out into results. llvm::SmallVector<llvm::Value*, 4> results; @@ -2954,7 +2925,7 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI, if (ABIArgInfo::isPaddingForCoerceAndExpand(coercedEltType)) continue; - auto eltAddr = Builder.CreateStructGEP(addr, i, layout); + auto eltAddr = Builder.CreateStructGEP(addr, i); auto elt = Builder.CreateLoad(eltAddr); results.push_back(elt); } @@ -3368,7 +3339,7 @@ void CallArgList::allocateArgumentMemory(CodeGenFunction &CGF) { void CallArgList::freeArgumentMemory(CodeGenFunction &CGF) const { if (StackBase) { // Restore the stack after the call. - llvm::Value *F = CGF.CGM.getIntrinsic(llvm::Intrinsic::stackrestore); + llvm::Function *F = CGF.CGM.getIntrinsic(llvm::Intrinsic::stackrestore); CGF.Builder.CreateCall(F, StackBase); } } @@ -3455,7 +3426,8 @@ void CodeGenFunction::EmitCallArgs( auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy)); assert(EmittedArg.getScalarVal() && "We emitted nothing for the arg?"); llvm::Value *V = evaluateOrEmitBuiltinObjectSize(Arg, PS->getType(), T, - EmittedArg.getScalarVal()); + EmittedArg.getScalarVal(), + PS->isDynamic()); Args.add(RValue::get(V), SizeTy); // If we're emitting args in reverse, be sure to do so with // pass_object_size, as well. @@ -3530,7 +3502,7 @@ struct DestroyUnpassedArg final : EHScopeStack::Cleanup { const CXXDestructorDecl *Dtor = Ty->getAsCXXRecordDecl()->getDestructor(); assert(!Dtor->isTrivial()); CGF.EmitCXXDestructorCall(Dtor, Dtor_Complete, /*for vbase*/ false, - /*Delegating=*/false, Addr); + /*Delegating=*/false, Addr, Ty); } else { CGF.callCStructDestructor(CGF.MakeAddrLValue(Addr, Ty)); } @@ -3565,7 +3537,7 @@ RValue CallArg::getRValue(CodeGenFunction &CGF) const { void CallArg::copyInto(CodeGenFunction &CGF, Address Addr) const { LValue Dst = CGF.MakeAddrLValue(Addr, Ty); if (!HasLV && RV.isScalar()) - CGF.EmitStoreOfScalar(RV.getScalarVal(), Dst, /*init=*/true); + CGF.EmitStoreOfScalar(RV.getScalarVal(), Dst, /*isInit=*/true); else if (!HasLV && RV.isComplex()) CGF.EmitStoreOfComplex(RV.getComplexVal(), Dst, /*init=*/true); else { @@ -3678,15 +3650,15 @@ CodeGenFunction::AddObjCARCExceptionMetadata(llvm::Instruction *Inst) { /// Emits a call to the given no-arguments nounwind runtime function. llvm::CallInst * -CodeGenFunction::EmitNounwindRuntimeCall(llvm::Value *callee, +CodeGenFunction::EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const llvm::Twine &name) { return EmitNounwindRuntimeCall(callee, None, name); } /// Emits a call to the given nounwind runtime function. llvm::CallInst * -CodeGenFunction::EmitNounwindRuntimeCall(llvm::Value *callee, - ArrayRef<llvm::Value*> args, +CodeGenFunction::EmitNounwindRuntimeCall(llvm::FunctionCallee callee, + ArrayRef<llvm::Value *> args, const llvm::Twine &name) { llvm::CallInst *call = EmitRuntimeCall(callee, args, name); call->setDoesNotThrow(); @@ -3695,9 +3667,8 @@ CodeGenFunction::EmitNounwindRuntimeCall(llvm::Value *callee, /// Emits a simple call (never an invoke) to the given no-arguments /// runtime function. -llvm::CallInst * -CodeGenFunction::EmitRuntimeCall(llvm::Value *callee, - const llvm::Twine &name) { +llvm::CallInst *CodeGenFunction::EmitRuntimeCall(llvm::FunctionCallee callee, + const llvm::Twine &name) { return EmitRuntimeCall(callee, None, name); } @@ -3721,21 +3692,20 @@ CodeGenFunction::getBundlesForFunclet(llvm::Value *Callee) { } /// Emits a simple call (never an invoke) to the given runtime function. -llvm::CallInst * -CodeGenFunction::EmitRuntimeCall(llvm::Value *callee, - ArrayRef<llvm::Value*> args, - const llvm::Twine &name) { - llvm::CallInst *call = - Builder.CreateCall(callee, args, getBundlesForFunclet(callee), name); +llvm::CallInst *CodeGenFunction::EmitRuntimeCall(llvm::FunctionCallee callee, + ArrayRef<llvm::Value *> args, + const llvm::Twine &name) { + llvm::CallInst *call = Builder.CreateCall( + callee, args, getBundlesForFunclet(callee.getCallee()), name); call->setCallingConv(getRuntimeCC()); return call; } /// Emits a call or invoke to the given noreturn runtime function. -void CodeGenFunction::EmitNoreturnRuntimeCallOrInvoke(llvm::Value *callee, - ArrayRef<llvm::Value*> args) { +void CodeGenFunction::EmitNoreturnRuntimeCallOrInvoke( + llvm::FunctionCallee callee, ArrayRef<llvm::Value *> args) { SmallVector<llvm::OperandBundleDef, 1> BundleList = - getBundlesForFunclet(callee); + getBundlesForFunclet(callee.getCallee()); if (getInvokeDest()) { llvm::InvokeInst *invoke = @@ -3755,33 +3725,32 @@ void CodeGenFunction::EmitNoreturnRuntimeCallOrInvoke(llvm::Value *callee, } /// Emits a call or invoke instruction to the given nullary runtime function. -llvm::CallSite -CodeGenFunction::EmitRuntimeCallOrInvoke(llvm::Value *callee, +llvm::CallBase * +CodeGenFunction::EmitRuntimeCallOrInvoke(llvm::FunctionCallee callee, const Twine &name) { return EmitRuntimeCallOrInvoke(callee, None, name); } /// Emits a call or invoke instruction to the given runtime function. -llvm::CallSite -CodeGenFunction::EmitRuntimeCallOrInvoke(llvm::Value *callee, - ArrayRef<llvm::Value*> args, +llvm::CallBase * +CodeGenFunction::EmitRuntimeCallOrInvoke(llvm::FunctionCallee callee, + ArrayRef<llvm::Value *> args, const Twine &name) { - llvm::CallSite callSite = EmitCallOrInvoke(callee, args, name); - callSite.setCallingConv(getRuntimeCC()); - return callSite; + llvm::CallBase *call = EmitCallOrInvoke(callee, args, name); + call->setCallingConv(getRuntimeCC()); + return call; } /// Emits a call or invoke instruction to the given function, depending /// on the current state of the EH stack. -llvm::CallSite -CodeGenFunction::EmitCallOrInvoke(llvm::Value *Callee, - ArrayRef<llvm::Value *> Args, - const Twine &Name) { +llvm::CallBase *CodeGenFunction::EmitCallOrInvoke(llvm::FunctionCallee Callee, + ArrayRef<llvm::Value *> Args, + const Twine &Name) { llvm::BasicBlock *InvokeDest = getInvokeDest(); SmallVector<llvm::OperandBundleDef, 1> BundleList = - getBundlesForFunclet(Callee); + getBundlesForFunclet(Callee.getCallee()); - llvm::Instruction *Inst; + llvm::CallBase *Inst; if (!InvokeDest) Inst = Builder.CreateCall(Callee, Args, BundleList, Name); else { @@ -3796,7 +3765,7 @@ CodeGenFunction::EmitCallOrInvoke(llvm::Value *Callee, if (CGM.getLangOpts().ObjCAutoRefCount) AddObjCARCExceptionMetadata(Inst); - return llvm::CallSite(Inst); + return Inst; } void CodeGenFunction::deferPlaceholderReplacement(llvm::Instruction *Old, @@ -3808,7 +3777,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, const CGCallee &Callee, ReturnValueSlot ReturnValue, const CallArgList &CallArgs, - llvm::Instruction **callOrInvoke, + llvm::CallBase **callOrInvoke, SourceLocation Loc) { // FIXME: We no longer need the types from CallArgs; lift up and simplify. @@ -3819,17 +3788,46 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, QualType RetTy = CallInfo.getReturnType(); const ABIArgInfo &RetAI = CallInfo.getReturnInfo(); - llvm::FunctionType *IRFuncTy = Callee.getFunctionType(); + llvm::FunctionType *IRFuncTy = getTypes().GetFunctionType(CallInfo); + + const Decl *TargetDecl = Callee.getAbstractInfo().getCalleeDecl().getDecl(); + if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(TargetDecl)) + // We can only guarantee that a function is called from the correct + // context/function based on the appropriate target attributes, + // so only check in the case where we have both always_inline and target + // since otherwise we could be making a conditional call after a check for + // the proper cpu features (and it won't cause code generation issues due to + // function based code generation). + if (TargetDecl->hasAttr<AlwaysInlineAttr>() && + TargetDecl->hasAttr<TargetAttr>()) + checkTargetFeatures(Loc, FD); + +#ifndef NDEBUG + if (!(CallInfo.isVariadic() && CallInfo.getArgStruct())) { + // For an inalloca varargs function, we don't expect CallInfo to match the + // function pointer's type, because the inalloca struct a will have extra + // fields in it for the varargs parameters. Code later in this function + // bitcasts the function pointer to the type derived from CallInfo. + // + // In other cases, we assert that the types match up (until pointers stop + // having pointee types). + llvm::Type *TypeFromVal; + if (Callee.isVirtual()) + TypeFromVal = Callee.getVirtualFunctionType(); + else + TypeFromVal = + Callee.getFunctionPointer()->getType()->getPointerElementType(); + assert(IRFuncTy == TypeFromVal); + } +#endif // 1. Set up the arguments. // If we're using inalloca, insert the allocation after the stack save. // FIXME: Do this earlier rather than hacking it in here! Address ArgMemory = Address::invalid(); - const llvm::StructLayout *ArgMemoryLayout = nullptr; if (llvm::StructType *ArgStruct = CallInfo.getArgStruct()) { const llvm::DataLayout &DL = CGM.getDataLayout(); - ArgMemoryLayout = DL.getStructLayout(ArgStruct); llvm::Instruction *IP = CallArgs.getStackBase(); llvm::AllocaInst *AI; if (IP) { @@ -3846,13 +3844,6 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, ArgMemory = Address(AI, Align); } - // Helper function to drill into the inalloca allocation. - auto createInAllocaStructGEP = [&](unsigned FieldIndex) -> Address { - auto FieldOffset = - CharUnits::fromQuantity(ArgMemoryLayout->getElementOffset(FieldIndex)); - return Builder.CreateStructGEP(ArgMemory, FieldIndex, FieldOffset); - }; - ClangToLLVMArgMapping IRFunctionArgs(CGM.getContext(), CallInfo); SmallVector<llvm::Value *, 16> IRCallArgs(IRFunctionArgs.totalIRArgs()); @@ -3875,7 +3866,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, if (IRFunctionArgs.hasSRetArg()) { IRCallArgs[IRFunctionArgs.getSRetArgNo()] = SRetPtr.getPointer(); } else if (RetAI.isInAlloca()) { - Address Addr = createInAllocaStructGEP(RetAI.getInAllocaFieldIndex()); + Address Addr = + Builder.CreateStructGEP(ArgMemory, RetAI.getInAllocaFieldIndex()); Builder.CreateStore(SRetPtr.getPointer(), Addr); } } @@ -3913,12 +3905,14 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, cast<llvm::Instruction>(Addr.getPointer()); CGBuilderTy::InsertPoint IP = Builder.saveIP(); Builder.SetInsertPoint(Placeholder); - Addr = createInAllocaStructGEP(ArgInfo.getInAllocaFieldIndex()); + Addr = + Builder.CreateStructGEP(ArgMemory, ArgInfo.getInAllocaFieldIndex()); Builder.restoreIP(IP); deferPlaceholderReplacement(Placeholder, Addr.getPointer()); } else { // Store the RValue into the argument struct. - Address Addr = createInAllocaStructGEP(ArgInfo.getInAllocaFieldIndex()); + Address Addr = + Builder.CreateStructGEP(ArgMemory, ArgInfo.getInAllocaFieldIndex()); unsigned AS = Addr.getType()->getPointerAddressSpace(); llvm::Type *MemType = ConvertTypeForMem(I->Ty)->getPointerTo(AS); // There are some cases where a trivial bitcast is not avoidable. The @@ -4099,11 +4093,9 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, STy->getPointerTo(Src.getAddressSpace())); } - auto SrcLayout = CGM.getDataLayout().getStructLayout(STy); assert(NumIRArgs == STy->getNumElements()); for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { - auto Offset = CharUnits::fromQuantity(SrcLayout->getElementOffset(i)); - Address EltPtr = Builder.CreateStructGEP(Src, i, Offset); + Address EltPtr = Builder.CreateStructGEP(Src, i); llvm::Value *LI = Builder.CreateLoad(EltPtr); IRCallArgs[FirstIRArg + i] = LI; } @@ -4153,7 +4145,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, for (unsigned i = 0, e = coercionType->getNumElements(); i != e; ++i) { llvm::Type *eltType = coercionType->getElementType(i); if (ABIArgInfo::isPaddingForCoerceAndExpand(eltType)) continue; - Address eltAddr = Builder.CreateStructGEP(addr, i, layout); + Address eltAddr = Builder.CreateStructGEP(addr, i); llvm::Value *elt = Builder.CreateLoad(eltAddr); IRCallArgs[IRArgPos++] = elt; } @@ -4186,8 +4178,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // cases, we can't do any parameter mismatch checks. Give up and bitcast // the callee. unsigned CalleeAS = CalleePtr->getType()->getPointerAddressSpace(); - auto FnTy = getTypes().GetFunctionType(CallInfo)->getPointerTo(CalleeAS); - CalleePtr = Builder.CreateBitCast(CalleePtr, FnTy); + CalleePtr = + Builder.CreateBitCast(CalleePtr, IRFuncTy->getPointerTo(CalleeAS)); } else { llvm::Type *LastParamTy = IRFuncTy->getParamType(IRFuncTy->getNumParams() - 1); @@ -4219,19 +4211,20 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // // This makes the IR nicer, but more importantly it ensures that we // can inline the function at -O0 if it is marked always_inline. - auto simplifyVariadicCallee = [](llvm::Value *Ptr) -> llvm::Value* { - llvm::FunctionType *CalleeFT = - cast<llvm::FunctionType>(Ptr->getType()->getPointerElementType()); + auto simplifyVariadicCallee = [](llvm::FunctionType *CalleeFT, + llvm::Value *Ptr) -> llvm::Function * { if (!CalleeFT->isVarArg()) - return Ptr; + return nullptr; - llvm::ConstantExpr *CE = dyn_cast<llvm::ConstantExpr>(Ptr); - if (!CE || CE->getOpcode() != llvm::Instruction::BitCast) - return Ptr; + // Get underlying value if it's a bitcast + if (llvm::ConstantExpr *CE = dyn_cast<llvm::ConstantExpr>(Ptr)) { + if (CE->getOpcode() == llvm::Instruction::BitCast) + Ptr = CE->getOperand(0); + } - llvm::Function *OrigFn = dyn_cast<llvm::Function>(CE->getOperand(0)); + llvm::Function *OrigFn = dyn_cast<llvm::Function>(Ptr); if (!OrigFn) - return Ptr; + return nullptr; llvm::FunctionType *OrigFT = OrigFn->getFunctionType(); @@ -4240,15 +4233,19 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, if (OrigFT->isVarArg() || OrigFT->getNumParams() != CalleeFT->getNumParams() || OrigFT->getReturnType() != CalleeFT->getReturnType()) - return Ptr; + return nullptr; for (unsigned i = 0, e = OrigFT->getNumParams(); i != e; ++i) if (OrigFT->getParamType(i) != CalleeFT->getParamType(i)) - return Ptr; + return nullptr; return OrigFn; }; - CalleePtr = simplifyVariadicCallee(CalleePtr); + + if (llvm::Function *OrigFn = simplifyVariadicCallee(IRFuncTy, CalleePtr)) { + CalleePtr = OrigFn; + IRFuncTy = OrigFn->getFunctionType(); + } // 3. Perform the actual call. @@ -4293,11 +4290,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // Apply always_inline to all calls within flatten functions. // FIXME: should this really take priority over __try, below? if (CurCodeDecl && CurCodeDecl->hasAttr<FlattenAttr>() && - !(Callee.getAbstractInfo().getCalleeDecl().getDecl() && - Callee.getAbstractInfo() - .getCalleeDecl() - .getDecl() - ->hasAttr<NoInlineAttr>())) { + !(TargetDecl && TargetDecl->hasAttr<NoInlineAttr>())) { Attrs = Attrs.addAttribute(getLLVMContext(), llvm::AttributeList::FunctionIndex, llvm::Attribute::AlwaysInline); @@ -4341,22 +4334,21 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, getBundlesForFunclet(CalleePtr); // Emit the actual call/invoke instruction. - llvm::CallSite CS; + llvm::CallBase *CI; if (!InvokeDest) { - CS = Builder.CreateCall(CalleePtr, IRCallArgs, BundleList); + CI = Builder.CreateCall(IRFuncTy, CalleePtr, IRCallArgs, BundleList); } else { llvm::BasicBlock *Cont = createBasicBlock("invoke.cont"); - CS = Builder.CreateInvoke(CalleePtr, Cont, InvokeDest, IRCallArgs, + CI = Builder.CreateInvoke(IRFuncTy, CalleePtr, Cont, InvokeDest, IRCallArgs, BundleList); EmitBlock(Cont); } - llvm::Instruction *CI = CS.getInstruction(); if (callOrInvoke) *callOrInvoke = CI; // Apply the attributes and calling convention. - CS.setAttributes(Attrs); - CS.setCallingConv(static_cast<llvm::CallingConv::ID>(CallingConv)); + CI->setAttributes(Attrs); + CI->setCallingConv(static_cast<llvm::CallingConv::ID>(CallingConv)); // Apply various metadata. @@ -4371,7 +4363,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // Insert instrumentation or attach profile metadata at indirect call sites. // For more details, see the comment before the definition of // IPVK_IndirectCallTarget in InstrProfData.inc. - if (!CS.getCalledFunction()) + if (!CI->getCalledFunction()) PGO.valueProfile(Builder, llvm::IPVK_IndirectCallTarget, CI, CalleePtr); @@ -4382,26 +4374,45 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // Suppress tail calls if requested. if (llvm::CallInst *Call = dyn_cast<llvm::CallInst>(CI)) { - const Decl *TargetDecl = Callee.getAbstractInfo().getCalleeDecl().getDecl(); if (TargetDecl && TargetDecl->hasAttr<NotTailCalledAttr>()) Call->setTailCallKind(llvm::CallInst::TCK_NoTail); } + // Add metadata for calls to MSAllocator functions + if (getDebugInfo() && TargetDecl && + TargetDecl->hasAttr<MSAllocatorAttr>()) + getDebugInfo()->addHeapAllocSiteMetadata(CI, RetTy, Loc); + // 4. Finish the call. // If the call doesn't return, finish the basic block and clear the // insertion point; this allows the rest of IRGen to discard // unreachable code. - if (CS.doesNotReturn()) { + if (CI->doesNotReturn()) { if (UnusedReturnSizePtr) PopCleanupBlock(); // Strip away the noreturn attribute to better diagnose unreachable UB. if (SanOpts.has(SanitizerKind::Unreachable)) { - if (auto *F = CS.getCalledFunction()) + // Also remove from function since CallBase::hasFnAttr additionally checks + // attributes of the called function. + if (auto *F = CI->getCalledFunction()) F->removeFnAttr(llvm::Attribute::NoReturn); - CS.removeAttribute(llvm::AttributeList::FunctionIndex, - llvm::Attribute::NoReturn); + CI->removeAttribute(llvm::AttributeList::FunctionIndex, + llvm::Attribute::NoReturn); + + // Avoid incompatibility with ASan which relies on the `noreturn` + // attribute to insert handler calls. + if (SanOpts.hasOneOf(SanitizerKind::Address | + SanitizerKind::KernelAddress)) { + SanitizerScope SanScope(this); + llvm::IRBuilder<>::InsertPointGuard IPGuard(Builder); + Builder.SetInsertPoint(CI); + auto *FnType = llvm::FunctionType::get(CGM.VoidTy, /*isVarArg=*/false); + llvm::FunctionCallee Fn = + CGM.CreateRuntimeFunction(FnType, "__asan_handle_no_return"); + EmitNounwindRuntimeCall(Fn); + } } EmitUnreachable(Loc); @@ -4436,7 +4447,6 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, switch (RetAI.getKind()) { case ABIArgInfo::CoerceAndExpand: { auto coercionType = RetAI.getCoerceAndExpandType(); - auto layout = CGM.getDataLayout().getStructLayout(coercionType); Address addr = SRetPtr; addr = Builder.CreateElementBitCast(addr, coercionType); @@ -4448,7 +4458,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, for (unsigned i = 0, e = coercionType->getNumElements(); i != e; ++i) { llvm::Type *eltType = coercionType->getElementType(i); if (ABIArgInfo::isPaddingForCoerceAndExpand(eltType)) continue; - Address eltAddr = Builder.CreateStructGEP(addr, i, layout); + Address eltAddr = Builder.CreateStructGEP(addr, i); llvm::Value *elt = CI; if (requiresExtract) elt = Builder.CreateExtractValue(elt, unpaddedIndex++); @@ -4529,7 +4539,6 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, } (); // Emit the assume_aligned check on the return value. - const Decl *TargetDecl = Callee.getAbstractInfo().getCalleeDecl().getDecl(); if (Ret.isScalar() && TargetDecl) { if (const auto *AA = TargetDecl->getAttr<AssumeAlignedAttr>()) { llvm::Value *OffsetValue = nullptr; @@ -4556,7 +4565,7 @@ CGCallee CGCallee::prepareConcreteCallee(CodeGenFunction &CGF) const { if (isVirtual()) { const CallExpr *CE = getVirtualCallExpr(); return CGF.CGM.getCXXABI().getVirtualFunctionPointer( - CGF, getVirtualMethodDecl(), getThisAddress(), getFunctionType(), + CGF, getVirtualMethodDecl(), getThisAddress(), getVirtualFunctionType(), CE ? CE->getBeginLoc() : SourceLocation()); } diff --git a/lib/CodeGen/CGCall.h b/lib/CodeGen/CGCall.h index c300808bea28..cc11ded704ab 100644 --- a/lib/CodeGen/CGCall.h +++ b/lib/CodeGen/CGCall.h @@ -1,9 +1,8 @@ //===----- CGCall.h - Encapsulate calling convention details ----*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -136,6 +135,12 @@ public: return CGCallee(abstractInfo, functionPtr); } + static CGCallee + forDirect(llvm::FunctionCallee functionPtr, + const CGCalleeInfo &abstractInfo = CGCalleeInfo()) { + return CGCallee(abstractInfo, functionPtr.getCallee()); + } + static CGCallee forVirtual(const CallExpr *CE, GlobalDecl MD, Address Addr, llvm::FunctionType *FTy) { CGCallee result(SpecialKind::Virtual); @@ -199,12 +204,9 @@ public: assert(isVirtual()); return VirtualInfo.Addr; } - - llvm::FunctionType *getFunctionType() const { - if (isVirtual()) - return VirtualInfo.FTy; - return cast<llvm::FunctionType>( - getFunctionPointer()->getType()->getPointerElementType()); + llvm::FunctionType *getVirtualFunctionType() const { + assert(isVirtual()); + return VirtualInfo.FTy; } /// If this is a delayed callee computation of some sort, prepare diff --git a/lib/CodeGen/CGClass.cpp b/lib/CodeGen/CGClass.cpp index ee150a792b76..c8bb63c5c4b1 100644 --- a/lib/CodeGen/CGClass.cpp +++ b/lib/CodeGen/CGClass.cpp @@ -1,9 +1,8 @@ //===--- CGClass.cpp - Emit LLVM Code for C++ classes -----------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -303,7 +302,8 @@ Address CodeGenFunction::GetAddressOfBaseClass( // Get the base pointer type. llvm::Type *BasePtrTy = - ConvertType((PathEnd[-1])->getType())->getPointerTo(); + ConvertType((PathEnd[-1])->getType()) + ->getPointerTo(Value.getType()->getPointerAddressSpace()); QualType DerivedTy = getContext().getRecordType(Derived); CharUnits DerivedAlign = CGM.getClassPointerAlignment(Derived); @@ -491,12 +491,15 @@ namespace { cast<CXXMethodDecl>(CGF.CurCodeDecl)->getParent(); const CXXDestructorDecl *D = BaseClass->getDestructor(); + // We are already inside a destructor, so presumably the object being + // destroyed should have the expected type. + QualType ThisTy = D->getThisObjectType(); Address Addr = CGF.GetAddressOfDirectBaseInCompleteClass(CGF.LoadCXXThisAddress(), DerivedClass, BaseClass, BaseIsVirtual); CGF.EmitCXXDestructorCall(D, Dtor_Base, BaseIsVirtual, - /*Delegating=*/false, Addr); + /*Delegating=*/false, Addr, ThisTy); } }; @@ -526,8 +529,7 @@ static bool BaseInitializerUsesThis(ASTContext &C, const Expr *Init) { static void EmitBaseInitializer(CodeGenFunction &CGF, const CXXRecordDecl *ClassDecl, - CXXCtorInitializer *BaseInit, - CXXCtorType CtorType) { + CXXCtorInitializer *BaseInit) { assert(BaseInit->isBaseInitializer() && "Must have base initializer!"); @@ -539,10 +541,6 @@ static void EmitBaseInitializer(CodeGenFunction &CGF, bool isBaseVirtual = BaseInit->isBaseVirtual(); - // The base constructor doesn't construct virtual bases. - if (CtorType == Ctor_Base && isBaseVirtual) - return; - // If the initializer for the base (other than the constructor // itself) accesses 'this' in any way, we need to initialize the // vtables. @@ -561,7 +559,7 @@ static void EmitBaseInitializer(CodeGenFunction &CGF, AggValueSlot::IsDestructed, AggValueSlot::DoesNotNeedGCBarriers, AggValueSlot::IsNotAliased, - CGF.overlapForBaseInit(ClassDecl, BaseClassDecl, isBaseVirtual)); + CGF.getOverlapForBaseInit(ClassDecl, BaseClassDecl, isBaseVirtual)); CGF.EmitAggExpr(BaseInit->getInit(), AggSlot); @@ -650,7 +648,7 @@ static void EmitMemberInitializer(CodeGenFunction &CGF, LValue Src = CGF.EmitLValueForFieldInitialization(ThisRHSLV, Field); // Copy the aggregate. - CGF.EmitAggregateCopy(LHS, Src, FieldType, CGF.overlapForFieldInit(Field), + CGF.EmitAggregateCopy(LHS, Src, FieldType, CGF.getOverlapForFieldInit(Field), LHS.isVolatileQualified()); // Ensure that we destroy the objects if an exception is thrown later in // the constructor. @@ -686,7 +684,7 @@ void CodeGenFunction::EmitInitializerForField(FieldDecl *Field, LValue LHS, AggValueSlot::IsDestructed, AggValueSlot::DoesNotNeedGCBarriers, AggValueSlot::IsNotAliased, - overlapForFieldInit(Field), + getOverlapForFieldInit(Field), AggValueSlot::IsNotZeroed, // Checks are made by the code that calls constructor. AggValueSlot::IsSanitizerChecked); @@ -793,7 +791,7 @@ void CodeGenFunction::EmitAsanPrologueOrEpilogue(bool Prologue) { llvm::Type *Args[2] = {IntPtrTy, IntPtrTy}; llvm::FunctionType *FTy = llvm::FunctionType::get(CGM.VoidTy, Args, false); - llvm::Constant *F = CGM.CreateRuntimeFunction( + llvm::FunctionCallee F = CGM.CreateRuntimeFunction( FTy, Prologue ? "__asan_poison_intra_object_redzone" : "__asan_unpoison_intra_object_redzone"); @@ -1013,7 +1011,7 @@ namespace { if (FOffset < FirstFieldOffset) { FirstField = F; FirstFieldOffset = FOffset; - } else if (FOffset > LastFieldOffset) { + } else if (FOffset >= LastFieldOffset) { LastField = F; LastFieldOffset = FOffset; } @@ -1264,24 +1262,37 @@ void CodeGenFunction::EmitCtorPrologue(const CXXConstructorDecl *CD, CXXConstructorDecl::init_const_iterator B = CD->init_begin(), E = CD->init_end(); + // Virtual base initializers first, if any. They aren't needed if: + // - This is a base ctor variant + // - There are no vbases + // - The class is abstract, so a complete object of it cannot be constructed + // + // The check for an abstract class is necessary because sema may not have + // marked virtual base destructors referenced. + bool ConstructVBases = CtorType != Ctor_Base && + ClassDecl->getNumVBases() != 0 && + !ClassDecl->isAbstract(); + + // In the Microsoft C++ ABI, there are no constructor variants. Instead, the + // constructor of a class with virtual bases takes an additional parameter to + // conditionally construct the virtual bases. Emit that check here. llvm::BasicBlock *BaseCtorContinueBB = nullptr; - if (ClassDecl->getNumVBases() && + if (ConstructVBases && !CGM.getTarget().getCXXABI().hasConstructorVariants()) { - // The ABIs that don't have constructor variants need to put a branch - // before the virtual base initialization code. BaseCtorContinueBB = - CGM.getCXXABI().EmitCtorCompleteObjectHandler(*this, ClassDecl); + CGM.getCXXABI().EmitCtorCompleteObjectHandler(*this, ClassDecl); assert(BaseCtorContinueBB); } llvm::Value *const OldThis = CXXThisValue; - // Virtual base initializers first. for (; B != E && (*B)->isBaseInitializer() && (*B)->isBaseVirtual(); B++) { + if (!ConstructVBases) + continue; if (CGM.getCodeGenOpts().StrictVTablePointers && CGM.getCodeGenOpts().OptimizationLevel > 0 && isInitializerOfDynamicClass(*B)) CXXThisValue = Builder.CreateLaunderInvariantGroup(LoadCXXThis()); - EmitBaseInitializer(*this, ClassDecl, *B, CtorType); + EmitBaseInitializer(*this, ClassDecl, *B); } if (BaseCtorContinueBB) { @@ -1298,7 +1309,7 @@ void CodeGenFunction::EmitCtorPrologue(const CXXConstructorDecl *CD, CGM.getCodeGenOpts().OptimizationLevel > 0 && isInitializerOfDynamicClass(*B)) CXXThisValue = Builder.CreateLaunderInvariantGroup(LoadCXXThis()); - EmitBaseInitializer(*this, ClassDecl, *B, CtorType); + EmitBaseInitializer(*this, ClassDecl, *B); } CXXThisValue = OldThis; @@ -1432,9 +1443,11 @@ void CodeGenFunction::EmitDestructorBody(FunctionArgList &Args) { if (DtorType == Dtor_Deleting) { RunCleanupsScope DtorEpilogue(*this); EnterDtorCleanups(Dtor, Dtor_Deleting); - if (HaveInsertPoint()) + if (HaveInsertPoint()) { + QualType ThisTy = Dtor->getThisObjectType(); EmitCXXDestructorCall(Dtor, Dtor_Complete, /*ForVirtualBase=*/false, - /*Delegating=*/false, LoadCXXThisAddress()); + /*Delegating=*/false, LoadCXXThisAddress(), ThisTy); + } return; } @@ -1465,8 +1478,9 @@ void CodeGenFunction::EmitDestructorBody(FunctionArgList &Args) { EnterDtorCleanups(Dtor, Dtor_Complete); if (!isTryBody) { + QualType ThisTy = Dtor->getThisObjectType(); EmitCXXDestructorCall(Dtor, Dtor_Base, /*ForVirtualBase=*/false, - /*Delegating=*/false, LoadCXXThisAddress()); + /*Delegating=*/false, LoadCXXThisAddress(), ThisTy); break; } @@ -1627,7 +1641,7 @@ namespace { llvm::FunctionType *FnType = llvm::FunctionType::get(CGF.VoidTy, ArgTypes, false); - llvm::Value *Fn = + llvm::FunctionCallee Fn = CGF.CGM.CreateRuntimeFunction(FnType, "__sanitizer_dtor_callback"); CGF.EmitNounwindRuntimeCall(Fn, Args); } @@ -1970,10 +1984,14 @@ void CodeGenFunction::EmitCXXAggrConstructorCall(const CXXConstructorDecl *ctor, pushRegularPartialArrayCleanup(arrayBegin, cur, type, eltAlignment, *destroyer); } - + auto currAVS = AggValueSlot::forAddr( + curAddr, type.getQualifiers(), AggValueSlot::IsDestructed, + AggValueSlot::DoesNotNeedGCBarriers, AggValueSlot::IsNotAliased, + AggValueSlot::DoesNotOverlap, AggValueSlot::IsNotZeroed, + NewPointerIsChecked ? AggValueSlot::IsSanitizerChecked + : AggValueSlot::IsNotSanitizerChecked); EmitCXXConstructorCall(ctor, Ctor_Complete, /*ForVirtualBase=*/false, - /*Delegating=*/false, curAddr, E, - AggValueSlot::DoesNotOverlap, NewPointerIsChecked); + /*Delegating=*/false, currAVS, E); } // Go to the next element. @@ -2001,22 +2019,22 @@ void CodeGenFunction::destroyCXXObject(CodeGenFunction &CGF, const CXXDestructorDecl *dtor = record->getDestructor(); assert(!dtor->isTrivial()); CGF.EmitCXXDestructorCall(dtor, Dtor_Complete, /*for vbase*/ false, - /*Delegating=*/false, addr); + /*Delegating=*/false, addr, type); } void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D, CXXCtorType Type, bool ForVirtualBase, - bool Delegating, Address This, - const CXXConstructExpr *E, - AggValueSlot::Overlap_t Overlap, - bool NewPointerIsChecked) { + bool Delegating, + AggValueSlot ThisAVS, + const CXXConstructExpr *E) { CallArgList Args; - - LangAS SlotAS = E->getType().getAddressSpace(); + Address This = ThisAVS.getAddress(); + LangAS SlotAS = ThisAVS.getQualifiers().getAddressSpace(); QualType ThisType = D->getThisType(); LangAS ThisAS = ThisType.getTypePtr()->getPointeeType().getAddressSpace(); llvm::Value *ThisPtr = This.getPointer(); + if (SlotAS != ThisAS) { unsigned TargetThisAS = getContext().getTargetAddressSpace(ThisAS); llvm::Type *NewType = @@ -2024,6 +2042,7 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D, ThisPtr = getTargetHooks().performAddrSpaceCast(*this, This.getPointer(), ThisAS, SlotAS, NewType); } + // Push the this ptr. Args.add(RValue::get(ThisPtr), D->getThisType()); @@ -2037,7 +2056,7 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D, LValue Src = EmitLValue(Arg); QualType DestTy = getContext().getTypeDeclType(D->getParent()); LValue Dest = MakeAddrLValue(This, DestTy); - EmitAggregateCopyCtor(Dest, Src, Overlap); + EmitAggregateCopyCtor(Dest, Src, ThisAVS.mayOverlap()); return; } @@ -2050,7 +2069,8 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D, /*ParamsToSkip*/ 0, Order); EmitCXXConstructorCall(D, Type, ForVirtualBase, Delegating, This, Args, - Overlap, E->getExprLoc(), NewPointerIsChecked); + ThisAVS.mayOverlap(), E->getExprLoc(), + ThisAVS.isSanitizerChecked()); } static bool canEmitDelegateCallArgs(CodeGenFunction &CGF, @@ -2130,8 +2150,7 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D, Delegating, Args); // Emit the call. - llvm::Constant *CalleePtr = - CGM.getAddrOfCXXStructor(D, getFromCtorType(Type)); + llvm::Constant *CalleePtr = CGM.getAddrOfCXXStructor(GlobalDecl(D, Type)); const CGFunctionInfo &Info = CGM.getTypes().arrangeCXXConstructorCall( Args, D, Type, ExtraArgs.Prefix, ExtraArgs.Suffix, PassPrototypeArgs); CGCallee Callee = CGCallee::forDirect(CalleePtr, GlobalDecl(D, Type)); @@ -2350,8 +2369,11 @@ namespace { : Dtor(D), Addr(Addr), Type(Type) {} void Emit(CodeGenFunction &CGF, Flags flags) override { + // We are calling the destructor from within the constructor. + // Therefore, "this" should have the expected type. + QualType ThisTy = Dtor->getThisObjectType(); CGF.EmitCXXDestructorCall(Dtor, Type, /*ForVirtualBase=*/false, - /*Delegating=*/true, Addr); + /*Delegating=*/true, Addr, ThisTy); } }; } // end anonymous namespace @@ -2389,31 +2411,32 @@ CodeGenFunction::EmitDelegatingCXXConstructorCall(const CXXConstructorDecl *Ctor void CodeGenFunction::EmitCXXDestructorCall(const CXXDestructorDecl *DD, CXXDtorType Type, bool ForVirtualBase, - bool Delegating, - Address This) { + bool Delegating, Address This, + QualType ThisTy) { CGM.getCXXABI().EmitDestructorCall(*this, DD, Type, ForVirtualBase, - Delegating, This); + Delegating, This, ThisTy); } namespace { struct CallLocalDtor final : EHScopeStack::Cleanup { const CXXDestructorDecl *Dtor; Address Addr; + QualType Ty; - CallLocalDtor(const CXXDestructorDecl *D, Address Addr) - : Dtor(D), Addr(Addr) {} + CallLocalDtor(const CXXDestructorDecl *D, Address Addr, QualType Ty) + : Dtor(D), Addr(Addr), Ty(Ty) {} void Emit(CodeGenFunction &CGF, Flags flags) override { CGF.EmitCXXDestructorCall(Dtor, Dtor_Complete, /*ForVirtualBase=*/false, - /*Delegating=*/false, Addr); + /*Delegating=*/false, Addr, Ty); } }; } // end anonymous namespace void CodeGenFunction::PushDestructorCleanup(const CXXDestructorDecl *D, - Address Addr) { - EHStack.pushCleanup<CallLocalDtor>(NormalAndEHCleanup, D, Addr); + QualType T, Address Addr) { + EHStack.pushCleanup<CallLocalDtor>(NormalAndEHCleanup, D, Addr, T); } void CodeGenFunction::PushDestructorCleanup(QualType T, Address Addr) { @@ -2423,7 +2446,7 @@ void CodeGenFunction::PushDestructorCleanup(QualType T, Address Addr) { const CXXDestructorDecl *D = ClassDecl->getDestructor(); assert(D && D->isUsed() && "destructor not marked as used!"); - PushDestructorCleanup(D, Addr); + PushDestructorCleanup(D, T, Addr); } void CodeGenFunction::InitializeVTablePointer(const VPtr &Vptr) { diff --git a/lib/CodeGen/CGCleanup.cpp b/lib/CodeGen/CGCleanup.cpp index 3743d24f11fc..5594f3030229 100644 --- a/lib/CodeGen/CGCleanup.cpp +++ b/lib/CodeGen/CGCleanup.cpp @@ -1,9 +1,8 @@ //===--- CGCleanup.cpp - Bookkeeping and code emission for cleanups -------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -53,12 +52,8 @@ DominatingValue<RValue>::saved_type::save(CodeGenFunction &CGF, RValue rv) { llvm::Type *ComplexTy = llvm::StructType::get(V.first->getType(), V.second->getType()); Address addr = CGF.CreateDefaultAlignTempAlloca(ComplexTy, "saved-complex"); - CGF.Builder.CreateStore(V.first, - CGF.Builder.CreateStructGEP(addr, 0, CharUnits())); - CharUnits offset = CharUnits::fromQuantity( - CGF.CGM.getDataLayout().getTypeAllocSize(V.first->getType())); - CGF.Builder.CreateStore(V.second, - CGF.Builder.CreateStructGEP(addr, 1, offset)); + CGF.Builder.CreateStore(V.first, CGF.Builder.CreateStructGEP(addr, 0)); + CGF.Builder.CreateStore(V.second, CGF.Builder.CreateStructGEP(addr, 1)); return saved_type(addr.getPointer(), ComplexAddress); } @@ -96,12 +91,10 @@ RValue DominatingValue<RValue>::saved_type::restore(CodeGenFunction &CGF) { } case ComplexAddress: { Address address = getSavingAddress(Value); - llvm::Value *real = CGF.Builder.CreateLoad( - CGF.Builder.CreateStructGEP(address, 0, CharUnits())); - CharUnits offset = CharUnits::fromQuantity( - CGF.CGM.getDataLayout().getTypeAllocSize(real->getType())); - llvm::Value *imag = CGF.Builder.CreateLoad( - CGF.Builder.CreateStructGEP(address, 1, offset)); + llvm::Value *real = + CGF.Builder.CreateLoad(CGF.Builder.CreateStructGEP(address, 0)); + llvm::Value *imag = + CGF.Builder.CreateLoad(CGF.Builder.CreateStructGEP(address, 1)); return RValue::getComplex(real, imag); } } diff --git a/lib/CodeGen/CGCleanup.h b/lib/CodeGen/CGCleanup.h index 15d6f46dcb56..ffe0f9d9dd20 100644 --- a/lib/CodeGen/CGCleanup.h +++ b/lib/CodeGen/CGCleanup.h @@ -1,9 +1,8 @@ //===-- CGCleanup.h - Classes for cleanups IR generation --------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/CGCoroutine.cpp b/lib/CodeGen/CGCoroutine.cpp index 80fa7c873631..aee5a927a055 100644 --- a/lib/CodeGen/CGCoroutine.cpp +++ b/lib/CodeGen/CGCoroutine.cpp @@ -1,9 +1,8 @@ //===----- CGCoroutine.cpp - Emit LLVM Code for C++ coroutines ------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -205,7 +204,6 @@ static LValueOrRValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Co BasicBlock *RealSuspendBlock = CGF.createBasicBlock(Prefix + Twine(".suspend.bool")); CGF.Builder.CreateCondBr(SuspendRet, RealSuspendBlock, ReadyBlock); - SuspendBlock = RealSuspendBlock; CGF.EmitBlock(RealSuspendBlock); } @@ -407,7 +405,7 @@ struct CallCoroEnd final : public EHScopeStack::Cleanup { if (Bundles.empty()) { // Otherwise, (landingpad model), create a conditional branch that leads // either to a cleanup block or a block with EH resume instruction. - auto *ResumeBB = CGF.getEHResumeBlock(/*cleanup=*/true); + auto *ResumeBB = CGF.getEHResumeBlock(/*isCleanup=*/true); auto *CleanupContBB = CGF.createBasicBlock("cleanup.cont"); CGF.Builder.CreateCondBr(CoroEnd, ResumeBB, CleanupContBB); CGF.EmitBlock(CleanupContBB); @@ -733,10 +731,10 @@ RValue CodeGenFunction::EmitCoroutineIntrinsic(const CallExpr *E, Args.push_back(llvm::ConstantTokenNone::get(getLLVMContext())); break; } - for (auto &Arg : E->arguments()) + for (const Expr *Arg : E->arguments()) Args.push_back(EmitScalarExpr(Arg)); - llvm::Value *F = CGM.getIntrinsic(IID); + llvm::Function *F = CGM.getIntrinsic(IID); llvm::CallInst *Call = Builder.CreateCall(F, Args); // Note: The following code is to enable to emit coro.id and coro.begin by diff --git a/lib/CodeGen/CGDebugInfo.cpp b/lib/CodeGen/CGDebugInfo.cpp index 41f8721468a3..f6ee7ee26d4b 100644 --- a/lib/CodeGen/CGDebugInfo.cpp +++ b/lib/CodeGen/CGDebugInfo.cpp @@ -1,9 +1,8 @@ //===--- CGDebugInfo.cpp - Emit Debug Information for a Module ------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -19,6 +18,7 @@ #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "ConstantEmitter.h" +#include "clang/Analysis/Analyses/ExprMutationAnalyzer.h" #include "clang/AST/ASTContext.h" #include "clang/AST/DeclFriend.h" #include "clang/AST/DeclObjC.h" @@ -373,7 +373,7 @@ CGDebugInfo::computeChecksum(FileID FID, SmallString<32> &Checksum) const { SourceManager &SM = CGM.getContext().getSourceManager(); bool Invalid; - llvm::MemoryBuffer *MemBuffer = SM.getBuffer(FID, &Invalid); + const llvm::MemoryBuffer *MemBuffer = SM.getBuffer(FID, &Invalid); if (Invalid) return None; @@ -423,8 +423,12 @@ llvm::DIFile *CGDebugInfo::getOrCreateFile(SourceLocation Loc) { } SmallString<32> Checksum; + + // Compute the checksum if possible. If the location is affected by a #line + // directive that refers to a file, PLoc will have an invalid FileID, and we + // will correctly get no checksum. Optional<llvm::DIFile::ChecksumKind> CSKind = - computeChecksum(SM.getFileID(Loc), Checksum); + computeChecksum(PLoc.getFileID(), Checksum); Optional<llvm::DIFile::ChecksumInfo<StringRef>> CSInfo; if (CSKind) CSInfo.emplace(*CSKind, Checksum); @@ -451,8 +455,8 @@ CGDebugInfo::createFile(StringRef FileName, for (; CurDirIt != CurDirE && *CurDirIt == *FileIt; ++CurDirIt, ++FileIt) llvm::sys::path::append(DirBuf, *CurDirIt); if (std::distance(llvm::sys::path::begin(CurDir), CurDirIt) == 1) { - // The common prefix only the root; stripping it would cause - // LLVM diagnostic locations to be more confusing. + // Don't strip the common prefix if it is only the root "/" + // since that would make LLVM diagnostic locations confusing. Dir = {}; File = RemappedFile; } else { @@ -610,12 +614,8 @@ void CGDebugInfo::CreateCompileUnit() { TheCU = DBuilder.createCompileUnit( LangTag, CUFile, CGOpts.EmitVersionIdentMetadata ? Producer : "", LO.Optimize || CGOpts.PrepareForLTO || CGOpts.PrepareForThinLTO, - CGOpts.DwarfDebugFlags, RuntimeVers, - (CGOpts.getSplitDwarfMode() != CodeGenOptions::NoFission) - ? "" - : CGOpts.SplitDwarfFile, - EmissionKind, DwoId, CGOpts.SplitDwarfInlining, - CGOpts.DebugInfoForProfiling, + CGOpts.DwarfDebugFlags, RuntimeVers, CGOpts.SplitDwarfFile, EmissionKind, + DwoId, CGOpts.SplitDwarfInlining, CGOpts.DebugInfoForProfiling, CGM.getTarget().getTriple().isNVPTX() ? llvm::DICompileUnit::DebugNameTableKind::None : static_cast<llvm::DICompileUnit::DebugNameTableKind>( @@ -916,6 +916,11 @@ static SmallString<256> getTypeIdentifier(const TagType *Ty, CodeGenModule &CGM, if (!needsTypeIdentifier(TD, CGM, TheCU)) return Identifier; + if (const auto *RD = dyn_cast<CXXRecordDecl>(TD)) + if (RD->getDefinition()) + if (RD->isDynamicClass() && + CGM.getVTableLinkage(RD) == llvm::GlobalValue::ExternalLinkage) + return Identifier; // TODO: This is using the RTTI name. Is there a better way to get // a unique string for a type? @@ -1083,15 +1088,18 @@ llvm::DIType *CGDebugInfo::CreateType(const TemplateSpecializationType *Ty, assert(Ty->isTypeAlias()); llvm::DIType *Src = getOrCreateType(Ty->getAliasedType(), Unit); + auto *AliasDecl = + cast<TypeAliasTemplateDecl>(Ty->getTemplateName().getAsTemplateDecl()) + ->getTemplatedDecl(); + + if (AliasDecl->hasAttr<NoDebugAttr>()) + return Src; + SmallString<128> NS; llvm::raw_svector_ostream OS(NS); Ty->getTemplateName().print(OS, getPrintingPolicy(), /*qualified*/ false); printTemplateArgumentList(OS, Ty->template_arguments(), getPrintingPolicy()); - auto *AliasDecl = - cast<TypeAliasTemplateDecl>(Ty->getTemplateName().getAsTemplateDecl()) - ->getTemplatedDecl(); - SourceLocation Loc = AliasDecl->getLocation(); return DBuilder.createTypedef(Src, OS.str(), getOrCreateFile(Loc), getLineNumber(Loc), @@ -1100,15 +1108,20 @@ llvm::DIType *CGDebugInfo::CreateType(const TemplateSpecializationType *Ty, llvm::DIType *CGDebugInfo::CreateType(const TypedefType *Ty, llvm::DIFile *Unit) { + llvm::DIType *Underlying = + getOrCreateType(Ty->getDecl()->getUnderlyingType(), Unit); + + if (Ty->getDecl()->hasAttr<NoDebugAttr>()) + return Underlying; + // We don't set size information, but do specify where the typedef was // declared. SourceLocation Loc = Ty->getDecl()->getLocation(); // Typedefs are derived from some other type. - return DBuilder.createTypedef( - getOrCreateType(Ty->getDecl()->getUnderlyingType(), Unit), - Ty->getDecl()->getName(), getOrCreateFile(Loc), getLineNumber(Loc), - getDeclContextDescriptor(Ty->getDecl())); + return DBuilder.createTypedef(Underlying, Ty->getDecl()->getName(), + getOrCreateFile(Loc), getLineNumber(Loc), + getDeclContextDescriptor(Ty->getDecl())); } static unsigned getDwarfCC(CallingConv CC) { @@ -1394,6 +1407,9 @@ void CGDebugInfo::CollectRecordFields( isa<VarTemplateSpecializationDecl>(V)) continue; + if (isa<VarTemplatePartialSpecializationDecl>(V)) + continue; + // Reuse the existing static member declaration if one exists auto MI = StaticDataMemberCache.find(V->getCanonicalDecl()); if (MI != StaticDataMemberCache.end()) { @@ -1726,31 +1742,37 @@ CGDebugInfo::CollectTemplateParams(const TemplateParameterList *TPList, QualType T = TA.getParamTypeForDecl().getDesugaredType(CGM.getContext()); llvm::DIType *TTy = getOrCreateType(T, Unit); llvm::Constant *V = nullptr; - const CXXMethodDecl *MD; - // Variable pointer template parameters have a value that is the address - // of the variable. - if (const auto *VD = dyn_cast<VarDecl>(D)) - V = CGM.GetAddrOfGlobalVar(VD); - // Member function pointers have special support for building them, though - // this is currently unsupported in LLVM CodeGen. - else if ((MD = dyn_cast<CXXMethodDecl>(D)) && MD->isInstance()) - V = CGM.getCXXABI().EmitMemberFunctionPointer(MD); - else if (const auto *FD = dyn_cast<FunctionDecl>(D)) - V = CGM.GetAddrOfFunction(FD); - // Member data pointers have special handling too to compute the fixed - // offset within the object. - else if (const auto *MPT = dyn_cast<MemberPointerType>(T.getTypePtr())) { - // These five lines (& possibly the above member function pointer - // handling) might be able to be refactored to use similar code in - // CodeGenModule::getMemberPointerConstant - uint64_t fieldOffset = CGM.getContext().getFieldOffset(D); - CharUnits chars = - CGM.getContext().toCharUnitsFromBits((int64_t)fieldOffset); - V = CGM.getCXXABI().EmitMemberDataPointer(MPT, chars); + // Skip retrieve the value if that template parameter has cuda device + // attribute, i.e. that value is not available at the host side. + if (!CGM.getLangOpts().CUDA || CGM.getLangOpts().CUDAIsDevice || + !D->hasAttr<CUDADeviceAttr>()) { + const CXXMethodDecl *MD; + // Variable pointer template parameters have a value that is the address + // of the variable. + if (const auto *VD = dyn_cast<VarDecl>(D)) + V = CGM.GetAddrOfGlobalVar(VD); + // Member function pointers have special support for building them, + // though this is currently unsupported in LLVM CodeGen. + else if ((MD = dyn_cast<CXXMethodDecl>(D)) && MD->isInstance()) + V = CGM.getCXXABI().EmitMemberFunctionPointer(MD); + else if (const auto *FD = dyn_cast<FunctionDecl>(D)) + V = CGM.GetAddrOfFunction(FD); + // Member data pointers have special handling too to compute the fixed + // offset within the object. + else if (const auto *MPT = + dyn_cast<MemberPointerType>(T.getTypePtr())) { + // These five lines (& possibly the above member function pointer + // handling) might be able to be refactored to use similar code in + // CodeGenModule::getMemberPointerConstant + uint64_t fieldOffset = CGM.getContext().getFieldOffset(D); + CharUnits chars = + CGM.getContext().toCharUnitsFromBits((int64_t)fieldOffset); + V = CGM.getCXXABI().EmitMemberDataPointer(MPT, chars); + } + V = V->stripPointerCasts(); } TemplateParams.push_back(DBuilder.createTemplateValueParameter( - TheCU, Name, TTy, - cast_or_null<llvm::Constant>(V->stripPointerCasts()))); + TheCU, Name, TTy, cast_or_null<llvm::Constant>(V))); } break; case TemplateArgument::NullPtr: { QualType T = TA.getNullPtrType(); @@ -1817,32 +1839,24 @@ CGDebugInfo::CollectFunctionTemplateParams(const FunctionDecl *FD, } llvm::DINodeArray CGDebugInfo::CollectVarTemplateParams(const VarDecl *VL, - llvm::DIFile *Unit) { - if (auto *TS = dyn_cast<VarTemplateSpecializationDecl>(VL)) { - auto T = TS->getSpecializedTemplateOrPartial(); - auto TA = TS->getTemplateArgs().asArray(); - // Collect parameters for a partial specialization - if (T.is<VarTemplatePartialSpecializationDecl *>()) { - const TemplateParameterList *TList = - T.get<VarTemplatePartialSpecializationDecl *>() - ->getTemplateParameters(); - return CollectTemplateParams(TList, TA, Unit); - } - - // Collect parameters for an explicit specialization - if (T.is<VarTemplateDecl *>()) { - const TemplateParameterList *TList = T.get<VarTemplateDecl *>() - ->getTemplateParameters(); - return CollectTemplateParams(TList, TA, Unit); - } - } - return llvm::DINodeArray(); + llvm::DIFile *Unit) { + // Always get the full list of parameters, not just the ones from the + // specialization. A partial specialization may have fewer parameters than + // there are arguments. + auto *TS = dyn_cast<VarTemplateSpecializationDecl>(VL); + if (!TS) + return llvm::DINodeArray(); + VarTemplateDecl *T = TS->getSpecializedTemplate(); + const TemplateParameterList *TList = T->getTemplateParameters(); + auto TA = TS->getTemplateArgs().asArray(); + return CollectTemplateParams(TList, TA, Unit); } llvm::DINodeArray CGDebugInfo::CollectCXXTemplateParams( const ClassTemplateSpecializationDecl *TSpecial, llvm::DIFile *Unit) { - // Always get the full list of parameters, not just the ones from - // the specialization. + // Always get the full list of parameters, not just the ones from the + // specialization. A partial specialization may have fewer parameters than + // there are arguments. TemplateParameterList *TPList = TSpecial->getSpecializedTemplate()->getTemplateParameters(); const TemplateArgumentList &TAList = TSpecial->getTemplateArgs(); @@ -1875,6 +1889,58 @@ StringRef CGDebugInfo::getVTableName(const CXXRecordDecl *RD) { return internString("_vptr$", RD->getNameAsString()); } +StringRef CGDebugInfo::getDynamicInitializerName(const VarDecl *VD, + DynamicInitKind StubKind, + llvm::Function *InitFn) { + // If we're not emitting codeview, use the mangled name. For Itanium, this is + // arbitrary. + if (!CGM.getCodeGenOpts().EmitCodeView) + return InitFn->getName(); + + // Print the normal qualified name for the variable, then break off the last + // NNS, and add the appropriate other text. Clang always prints the global + // variable name without template arguments, so we can use rsplit("::") and + // then recombine the pieces. + SmallString<128> QualifiedGV; + StringRef Quals; + StringRef GVName; + { + llvm::raw_svector_ostream OS(QualifiedGV); + VD->printQualifiedName(OS, getPrintingPolicy()); + std::tie(Quals, GVName) = OS.str().rsplit("::"); + if (GVName.empty()) + std::swap(Quals, GVName); + } + + SmallString<128> InitName; + llvm::raw_svector_ostream OS(InitName); + if (!Quals.empty()) + OS << Quals << "::"; + + switch (StubKind) { + case DynamicInitKind::NoStub: + llvm_unreachable("not an initializer"); + case DynamicInitKind::Initializer: + OS << "`dynamic initializer for '"; + break; + case DynamicInitKind::AtExit: + OS << "`dynamic atexit destructor for '"; + break; + } + + OS << GVName; + + // Add any template specialization args. + if (const auto *VTpl = dyn_cast<VarTemplateSpecializationDecl>(VD)) { + printTemplateArgumentList(OS, VTpl->getTemplateArgs().asArray(), + getPrintingPolicy()); + } + + OS << '\''; + + return internString(OS.str()); +} + void CGDebugInfo::CollectVTableInfo(const CXXRecordDecl *RD, llvm::DIFile *Unit, SmallVectorImpl<llvm::Metadata *> &EltTys, llvm::DICompositeType *RecordTy) { @@ -1954,6 +2020,20 @@ llvm::DIType *CGDebugInfo::getOrCreateStandaloneType(QualType D, return T; } +void CGDebugInfo::addHeapAllocSiteMetadata(llvm::Instruction *CI, + QualType D, + SourceLocation Loc) { + llvm::MDNode *node; + if (D.getTypePtr()->isVoidPointerType()) { + node = llvm::MDNode::get(CGM.getLLVMContext(), None); + } else { + QualType PointeeTy = D.getTypePtr()->getPointeeType(); + node = getOrCreateType(PointeeTy, getOrCreateFile(Loc)); + } + + CI->setMetadata("heapallocsite", node); +} + void CGDebugInfo::completeType(const EnumDecl *ED) { if (DebugKind <= codegenoptions::DebugLineTablesOnly) return; @@ -2297,7 +2377,14 @@ CGDebugInfo::getOrCreateModuleRef(ExternalASTSource::ASTSourceDescriptor Mod, } bool IsRootModule = M ? !M->Parent : true; - if (CreateSkeletonCU && IsRootModule) { + // When a module name is specified as -fmodule-name, that module gets a + // clang::Module object, but it won't actually be built or imported; it will + // be textual. + if (CreateSkeletonCU && IsRootModule && Mod.getASTFile().empty() && M) + assert(StringRef(M->Name).startswith(CGM.getLangOpts().ModuleName) && + "clang module without ASTFile must be specified by -fmodule-name"); + + if (CreateSkeletonCU && IsRootModule && !Mod.getASTFile().empty()) { // PCH files don't have a signature field in the control block, // but LLVM detects skeleton CUs by looking for a non-zero DWO id. // We use the lower 64 bits for debug info. @@ -2314,6 +2401,7 @@ CGDebugInfo::getOrCreateModuleRef(ExternalASTSource::ASTSourceDescriptor Mod, Signature); DIB.finalize(); } + llvm::DIModule *Parent = IsRootModule ? nullptr : getOrCreateModuleRef( @@ -2768,6 +2856,9 @@ static QualType UnwrapTypeForDebugInfo(QualType T, const ASTContext &C) { case Type::Paren: T = cast<ParenType>(T)->getInnerType(); break; + case Type::MacroQualified: + T = cast<MacroQualifiedType>(T)->getUnderlyingType(); + break; case Type::SubstTemplateTypeParm: T = cast<SubstTemplateTypeParmType>(T)->getReplacementType(); break; @@ -2947,6 +3038,7 @@ llvm::DIType *CGDebugInfo::CreateTypeNode(QualType Ty, llvm::DIFile *Unit) { case Type::DeducedTemplateSpecialization: case Type::Elaborated: case Type::Paren: + case Type::MacroQualified: case Type::SubstTemplateTypeParm: case Type::TypeOfExpr: case Type::TypeOf: @@ -3021,9 +3113,9 @@ llvm::DICompositeType *CGDebugInfo::CreateLimitedType(const RecordType *Ty) { else Flags |= llvm::DINode::FlagTypePassByValue; - // Record if a C++ record is trivial type. - if (CXXRD->isTrivial()) - Flags |= llvm::DINode::FlagTrivial; + // Record if a C++ record is non-trivial type. + if (!CXXRD->isTrivial()) + Flags |= llvm::DINode::FlagNonTrivial; } llvm::DICompositeType *RealDecl = DBuilder.createReplaceableCompositeType( @@ -3443,6 +3535,11 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc, } else if (const auto *OMD = dyn_cast<ObjCMethodDecl>(D)) { Name = getObjCMethodName(OMD); Flags |= llvm::DINode::FlagPrototyped; + } else if (isa<VarDecl>(D) && + GD.getDynamicInitKind() != DynamicInitKind::NoStub) { + // This is a global initializer or atexit destructor for a global variable. + Name = getDynamicInitializerName(cast<VarDecl>(D), GD.getDynamicInitKind(), + Fn); } else { // Use llvm function name. Name = Fn->getName(); @@ -3488,6 +3585,15 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc, if (HasDecl && isa<FunctionDecl>(D)) DeclCache[D->getCanonicalDecl()].reset(SP); + // We use the SPDefCache only in the case when the debug entry values option + // is set, in order to speed up parameters modification analysis. + // + // FIXME: Use AbstractCallee here to support ObjCMethodDecl. + if (CGM.getCodeGenOpts().EnableDebugEntryValues && HasDecl) + if (auto *FD = dyn_cast<FunctionDecl>(D)) + if (FD->hasBody() && !FD->param_empty()) + SPDefCache[FD].reset(SP); + if (CGM.getCodeGenOpts().DwarfVersion >= 5) { // Starting with DWARF V5 method declarations are emitted as children of // the interface type. @@ -3516,7 +3622,7 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc, } void CGDebugInfo::EmitFunctionDecl(GlobalDecl GD, SourceLocation Loc, - QualType FnType) { + QualType FnType, llvm::Function *Fn) { StringRef Name; StringRef LinkageName; @@ -3526,7 +3632,9 @@ void CGDebugInfo::EmitFunctionDecl(GlobalDecl GD, SourceLocation Loc, llvm::DINode::DIFlags Flags = llvm::DINode::FlagZero; llvm::DIFile *Unit = getOrCreateFile(Loc); - llvm::DIScope *FDContext = getDeclContextDescriptor(D); + bool IsDeclForCallSite = Fn ? true : false; + llvm::DIScope *FDContext = + IsDeclForCallSite ? Unit : getDeclContextDescriptor(D); llvm::DINodeArray TParamsArray; if (isa<FunctionDecl>(D)) { // If there is a DISubprogram for this function available then use it. @@ -3553,10 +3661,38 @@ void CGDebugInfo::EmitFunctionDecl(GlobalDecl GD, SourceLocation Loc, if (CGM.getLangOpts().Optimize) SPFlags |= llvm::DISubprogram::SPFlagOptimized; - DBuilder.retainType(DBuilder.createFunction( + llvm::DISubprogram *SP = DBuilder.createFunction( FDContext, Name, LinkageName, Unit, LineNo, getOrCreateFunctionType(D, FnType, Unit), ScopeLine, Flags, SPFlags, - TParamsArray.get(), getFunctionDeclaration(D))); + TParamsArray.get(), getFunctionDeclaration(D)); + + if (IsDeclForCallSite) + Fn->setSubprogram(SP); + + DBuilder.retainType(SP); +} + +void CGDebugInfo::EmitFuncDeclForCallSite(llvm::CallBase *CallOrInvoke, + QualType CalleeType, + const FunctionDecl *CalleeDecl) { + auto &CGOpts = CGM.getCodeGenOpts(); + if (!CGOpts.EnableDebugEntryValues || !CGM.getLangOpts().Optimize || + !CallOrInvoke || + CGM.getCodeGenOpts().getDebugInfo() < codegenoptions::LimitedDebugInfo) + return; + + auto *Func = CallOrInvoke->getCalledFunction(); + if (!Func) + return; + + // If there is no DISubprogram attached to the function being called, + // create the one describing the function in order to have complete + // call site debug info. + if (Func->getSubprogram()) + return; + + if (!CalleeDecl->isStatic() && !CalleeDecl->isInlined()) + EmitFunctionDecl(CalleeDecl, CalleeDecl->getLocation(), CalleeType, Func); } void CGDebugInfo::EmitInlineFunctionStart(CGBuilderTy &Builder, GlobalDecl GD) { @@ -3735,7 +3871,8 @@ CGDebugInfo::EmitTypeForVarWithBlocksAttr(const VarDecl *VD, llvm::DILocalVariable *CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::Value *Storage, llvm::Optional<unsigned> ArgNo, - CGBuilderTy &Builder) { + CGBuilderTy &Builder, + const bool UsePointerValue) { assert(DebugKind >= codegenoptions::LimitedDebugInfo); assert(!LexicalBlockStack.empty() && "Region stack mismatch, stack empty!"); if (VD->hasAttr<NoDebugAttr>()) @@ -3840,6 +3977,16 @@ llvm::DILocalVariable *CGDebugInfo::EmitDeclare(const VarDecl *VD, } } + // Clang stores the sret pointer provided by the caller in a static alloca. + // Use DW_OP_deref to tell the debugger to load the pointer and treat it as + // the address of the variable. + if (UsePointerValue) { + assert(std::find(Expr.begin(), Expr.end(), llvm::dwarf::DW_OP_deref) == + Expr.end() && + "Debug info already contains DW_OP_deref."); + Expr.push_back(llvm::dwarf::DW_OP_deref); + } + // Create the descriptor for the variable. auto *D = ArgNo ? DBuilder.createParameterVariable( Scope, Name, *ArgNo, Unit, Line, Ty, @@ -3853,14 +4000,46 @@ llvm::DILocalVariable *CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::DebugLoc::get(Line, Column, Scope, CurInlinedAt), Builder.GetInsertBlock()); + if (CGM.getCodeGenOpts().EnableDebugEntryValues && ArgNo) { + if (auto *PD = dyn_cast<ParmVarDecl>(VD)) + ParamCache[PD].reset(D); + } + return D; } llvm::DILocalVariable * CGDebugInfo::EmitDeclareOfAutoVariable(const VarDecl *VD, llvm::Value *Storage, - CGBuilderTy &Builder) { + CGBuilderTy &Builder, + const bool UsePointerValue) { assert(DebugKind >= codegenoptions::LimitedDebugInfo); - return EmitDeclare(VD, Storage, llvm::None, Builder); + return EmitDeclare(VD, Storage, llvm::None, Builder, UsePointerValue); +} + +void CGDebugInfo::EmitLabel(const LabelDecl *D, CGBuilderTy &Builder) { + assert(DebugKind >= codegenoptions::LimitedDebugInfo); + assert(!LexicalBlockStack.empty() && "Region stack mismatch, stack empty!"); + + if (D->hasAttr<NoDebugAttr>()) + return; + + auto *Scope = cast<llvm::DIScope>(LexicalBlockStack.back()); + llvm::DIFile *Unit = getOrCreateFile(D->getLocation()); + + // Get location information. + unsigned Line = getLineNumber(D->getLocation()); + unsigned Column = getColumnNumber(D->getLocation()); + + StringRef Name = D->getName(); + + // Create the descriptor for the label. + auto *L = + DBuilder.createLabel(Scope, Name, Unit, Line, CGM.getLangOpts().Optimize); + + // Insert an llvm.dbg.label into the current block. + DBuilder.insertLabel(L, + llvm::DebugLoc::get(Line, Column, Scope, CurInlinedAt), + Builder.GetInsertBlock()); } llvm::DIType *CGDebugInfo::CreateSelfType(const QualType &QualTy, @@ -4125,7 +4304,7 @@ void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block, llvm::DIDerivedType * CGDebugInfo::getOrCreateStaticDataMemberDeclarationOrNull(const VarDecl *D) { - if (!D->isStaticDataMember()) + if (!D || !D->isStaticDataMember()) return nullptr; auto MI = StaticDataMemberCache.find(D->getCanonicalDecl()); @@ -4207,6 +4386,14 @@ void CGDebugInfo::EmitGlobalVariable(llvm::GlobalVariable *Var, SmallVector<int64_t, 4> Expr; unsigned AddressSpace = CGM.getContext().getTargetAddressSpace(D->getType()); + if (CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice) { + if (D->hasAttr<CUDASharedAttr>()) + AddressSpace = + CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared); + else if (D->hasAttr<CUDAConstantAttr>()) + AddressSpace = + CGM.getContext().getTargetAddressSpace(LangAS::cuda_constant); + } AppendAddressSpaceXDeref(AddressSpace, Expr); GVE = DBuilder.createGlobalVariableExpression( @@ -4229,22 +4416,32 @@ void CGDebugInfo::EmitGlobalVariable(const ValueDecl *VD, const APValue &Init) { llvm::DIFile *Unit = getOrCreateFile(VD->getLocation()); StringRef Name = VD->getName(); llvm::DIType *Ty = getOrCreateType(VD->getType(), Unit); + + // Do not use global variables for enums, unless in CodeView. if (const auto *ECD = dyn_cast<EnumConstantDecl>(VD)) { const auto *ED = cast<EnumDecl>(ECD->getDeclContext()); assert(isa<EnumType>(ED->getTypeForDecl()) && "Enum without EnumType?"); - Ty = getOrCreateType(QualType(ED->getTypeForDecl(), 0), Unit); + (void)ED; + + // If CodeView, emit enums as global variables, unless they are defined + // inside a class. We do this because MSVC doesn't emit S_CONSTANTs for + // enums in classes, and because it is difficult to attach this scope + // information to the global variable. + if (!CGM.getCodeGenOpts().EmitCodeView || + isa<RecordDecl>(ED->getDeclContext())) + return; } - // Do not use global variables for enums. - // - // FIXME: why not? - if (Ty->getTag() == llvm::dwarf::DW_TAG_enumeration_type) - return; - // Do not emit separate definitions for function local const/statics. + + llvm::DIScope *DContext = nullptr; + + // Do not emit separate definitions for function local consts. if (isa<FunctionDecl>(VD->getDeclContext())) return; + + // Emit definition for static members in CodeView. VD = cast<ValueDecl>(VD->getCanonicalDecl()); - auto *VarD = cast<VarDecl>(VD); - if (VarD->isStaticDataMember()) { + auto *VarD = dyn_cast<VarDecl>(VD); + if (VarD && VarD->isStaticDataMember()) { auto *RD = cast<RecordDecl>(VarD->getDeclContext()); getDeclContextDescriptor(VarD); // Ensure that the type is retained even though it's otherwise unreferenced. @@ -4253,10 +4450,16 @@ void CGDebugInfo::EmitGlobalVariable(const ValueDecl *VD, const APValue &Init) { // through its scope. RetainedTypes.push_back( CGM.getContext().getRecordType(RD).getAsOpaquePtr()); - return; - } - llvm::DIScope *DContext = getDeclContextDescriptor(VD); + if (!CGM.getCodeGenOpts().EmitCodeView) + return; + + // Use the global scope for static members. + DContext = getContextDescriptor( + cast<Decl>(CGM.getContext().getTranslationUnitDecl()), TheCU); + } else { + DContext = getDeclContextDescriptor(VD); + } auto &GV = DeclCache[VD]; if (GV) @@ -4393,6 +4596,29 @@ void CGDebugInfo::setDwoId(uint64_t Signature) { TheCU->setDWOId(Signature); } +/// Analyzes each function parameter to determine whether it is constant +/// throughout the function body. +static void analyzeParametersModification( + ASTContext &Ctx, + llvm::DenseMap<const FunctionDecl *, llvm::TrackingMDRef> &SPDefCache, + llvm::DenseMap<const ParmVarDecl *, llvm::TrackingMDRef> &ParamCache) { + for (auto &SP : SPDefCache) { + auto *FD = SP.first; + assert(FD->hasBody() && "Functions must have body here"); + const Stmt *FuncBody = (*FD).getBody(); + for (auto Parm : FD->parameters()) { + ExprMutationAnalyzer FuncAnalyzer(*FuncBody, Ctx); + if (FuncAnalyzer.isMutated(Parm)) + continue; + + auto I = ParamCache.find(Parm); + assert(I != ParamCache.end() && "Parameters should be already cached"); + auto *DIParm = cast<llvm::DILocalVariable>(I->second); + DIParm->setIsNotModified(); + } + } +} + void CGDebugInfo::finalize() { // Creating types might create further types - invalidating the current // element and the size(), so don't cache/reference them. @@ -4465,6 +4691,10 @@ void CGDebugInfo::finalize() { if (auto MD = TypeCache[RT]) DBuilder.retainType(cast<llvm::DIType>(MD)); + if (CGM.getCodeGenOpts().EnableDebugEntryValues) + // This will be used to emit debug entry values. + analyzeParametersModification(CGM.getContext(), SPDefCache, ParamCache); + DBuilder.finalize(); } @@ -4497,7 +4727,10 @@ llvm::DINode::DIFlags CGDebugInfo::getCallSiteRelatedAttrs() const { // were part of DWARF v4. bool SupportsDWARFv4Ext = CGM.getCodeGenOpts().DwarfVersion == 4 && - CGM.getCodeGenOpts().getDebuggerTuning() == llvm::DebuggerKind::LLDB; + (CGM.getCodeGenOpts().getDebuggerTuning() == llvm::DebuggerKind::LLDB || + (CGM.getCodeGenOpts().EnableDebugEntryValues && + CGM.getCodeGenOpts().getDebuggerTuning() == llvm::DebuggerKind::GDB)); + if (!SupportsDWARFv4Ext && CGM.getCodeGenOpts().DwarfVersion < 5) return llvm::DINode::FlagZero; diff --git a/lib/CodeGen/CGDebugInfo.h b/lib/CodeGen/CGDebugInfo.h index 031e40b9dde9..7edbea86633a 100644 --- a/lib/CodeGen/CGDebugInfo.h +++ b/lib/CodeGen/CGDebugInfo.h @@ -1,9 +1,8 @@ //===--- CGDebugInfo.h - DebugInfo for LLVM CodeGen -------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -42,6 +41,7 @@ class ObjCInterfaceDecl; class ObjCIvarDecl; class UsingDecl; class VarDecl; +enum class DynamicInitKind : unsigned; namespace CodeGen { class CodeGenModule; @@ -134,6 +134,10 @@ class CGDebugInfo { llvm::DenseMap<const char *, llvm::TrackingMDRef> DIFileCache; llvm::DenseMap<const FunctionDecl *, llvm::TrackingMDRef> SPCache; + /// Cache function definitions relevant to use for parameters mutation + /// analysis. + llvm::DenseMap<const FunctionDecl *, llvm::TrackingMDRef> SPDefCache; + llvm::DenseMap<const ParmVarDecl *, llvm::TrackingMDRef> ParamCache; /// Cache declarations relevant to DW_TAG_imported_declarations (C++ /// using declarations) that aren't covered by other more specific caches. llvm::DenseMap<const Decl *, llvm::TrackingMDRef> DeclCache; @@ -405,7 +409,15 @@ public: void EmitInlineFunctionEnd(CGBuilderTy &Builder); /// Emit debug info for a function declaration. - void EmitFunctionDecl(GlobalDecl GD, SourceLocation Loc, QualType FnType); + /// \p Fn is set only when a declaration for a debug call site gets created. + void EmitFunctionDecl(GlobalDecl GD, SourceLocation Loc, + QualType FnType, llvm::Function *Fn = nullptr); + + /// Emit debug info for an extern function being called. + /// This is needed for call site debug info. + void EmitFuncDeclForCallSite(llvm::CallBase *CallOrInvoke, + QualType CalleeType, + const FunctionDecl *CalleeDecl); /// Constructs the debug code for exiting a function. void EmitFunctionEnd(CGBuilderTy &Builder, llvm::Function *Fn); @@ -422,9 +434,13 @@ public: /// declaration. /// Returns a pointer to the DILocalVariable associated with the /// llvm.dbg.declare, or nullptr otherwise. - llvm::DILocalVariable *EmitDeclareOfAutoVariable(const VarDecl *Decl, - llvm::Value *AI, - CGBuilderTy &Builder); + llvm::DILocalVariable * + EmitDeclareOfAutoVariable(const VarDecl *Decl, llvm::Value *AI, + CGBuilderTy &Builder, + const bool UsePointerValue = false); + + /// Emit call to \c llvm.dbg.label for an label. + void EmitLabel(const LabelDecl *D, CGBuilderTy &Builder); /// Emit call to \c llvm.dbg.declare for an imported variable /// declaration in a block. @@ -474,6 +490,10 @@ public: /// Emit standalone debug info for a type. llvm::DIType *getOrCreateStandaloneType(QualType Ty, SourceLocation Loc); + /// Add heapallocsite metadata for MSAllocator calls. + void addHeapAllocSiteMetadata(llvm::Instruction *CallSite, QualType Ty, + SourceLocation Loc); + void completeType(const EnumDecl *ED); void completeType(const RecordDecl *RD); void completeRequiredType(const RecordDecl *RD); @@ -500,7 +520,8 @@ private: /// llvm.dbg.declare, or nullptr otherwise. llvm::DILocalVariable *EmitDeclare(const VarDecl *decl, llvm::Value *AI, llvm::Optional<unsigned> ArgNo, - CGBuilderTy &Builder); + CGBuilderTy &Builder, + const bool UsePointerValue = false); struct BlockByRefType { /// The wrapper struct used inside the __block_literal struct. @@ -642,6 +663,12 @@ private: /// Get the vtable name for the given class. StringRef getVTableName(const CXXRecordDecl *Decl); + /// Get the name to use in the debug info for a dynamic initializer or atexit + /// stub function. + StringRef getDynamicInitializerName(const VarDecl *VD, + DynamicInitKind StubKind, + llvm::Function *InitFn); + /// Get line number for the location. If location is invalid /// then use current location. unsigned getLineNumber(SourceLocation Loc); diff --git a/lib/CodeGen/CGDecl.cpp b/lib/CodeGen/CGDecl.cpp index 5959d889b455..6ad43cefc4d2 100644 --- a/lib/CodeGen/CGDecl.cpp +++ b/lib/CodeGen/CGDecl.cpp @@ -1,9 +1,8 @@ //===--- CGDecl.cpp - Emit LLVM Code for declarations ---------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -20,6 +19,7 @@ #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "ConstantEmitter.h" +#include "PatternInit.h" #include "TargetInfo.h" #include "clang/AST/ASTContext.h" #include "clang/AST/CharUnits.h" @@ -104,9 +104,11 @@ void CodeGenFunction::EmitDecl(const Decl &D) { case Decl::Label: // __label__ x; case Decl::Import: case Decl::OMPThreadPrivate: + case Decl::OMPAllocate: case Decl::OMPCapturedExpr: case Decl::OMPRequires: case Decl::Empty: + case Decl::Concept: // None of these decls require codegen support. return; @@ -142,6 +144,9 @@ void CodeGenFunction::EmitDecl(const Decl &D) { case Decl::OMPDeclareReduction: return CGM.EmitOMPDeclareReduction(cast<OMPDeclareReductionDecl>(&D), this); + case Decl::OMPDeclareMapper: + return CGM.EmitOMPDeclareMapper(cast<OMPDeclareMapperDecl>(&D), this); + case Decl::Typedef: // typedef int X; case Decl::TypeAlias: { // using X = int; [C++0x] const TypedefNameDecl &TD = cast<TypedefNameDecl>(D); @@ -149,6 +154,8 @@ void CodeGenFunction::EmitDecl(const Decl &D) { if (Ty->isVariablyModifiedType()) EmitVariablyModifiedType(Ty); + + return; } } } @@ -169,7 +176,7 @@ void CodeGenFunction::EmitVarDecl(const VarDecl &D) { return; llvm::GlobalValue::LinkageTypes Linkage = - CGM.getLLVMLinkageVarDefinition(&D, /*isConstant=*/false); + CGM.getLLVMLinkageVarDefinition(&D, /*IsConstant=*/false); // FIXME: We need to force the emission/use of a guard variable for // some variables even if we can constant-evaluate them because @@ -473,11 +480,12 @@ namespace { template <class Derived> struct DestroyNRVOVariable : EHScopeStack::Cleanup { - DestroyNRVOVariable(Address addr, llvm::Value *NRVOFlag) - : NRVOFlag(NRVOFlag), Loc(addr) {} + DestroyNRVOVariable(Address addr, QualType type, llvm::Value *NRVOFlag) + : NRVOFlag(NRVOFlag), Loc(addr), Ty(type) {} llvm::Value *NRVOFlag; Address Loc; + QualType Ty; void Emit(CodeGenFunction &CGF, Flags flags) override { // Along the exceptions path we always execute the dtor. @@ -504,26 +512,24 @@ namespace { struct DestroyNRVOVariableCXX final : DestroyNRVOVariable<DestroyNRVOVariableCXX> { - DestroyNRVOVariableCXX(Address addr, const CXXDestructorDecl *Dtor, - llvm::Value *NRVOFlag) - : DestroyNRVOVariable<DestroyNRVOVariableCXX>(addr, NRVOFlag), - Dtor(Dtor) {} + DestroyNRVOVariableCXX(Address addr, QualType type, + const CXXDestructorDecl *Dtor, llvm::Value *NRVOFlag) + : DestroyNRVOVariable<DestroyNRVOVariableCXX>(addr, type, NRVOFlag), + Dtor(Dtor) {} const CXXDestructorDecl *Dtor; void emitDestructorCall(CodeGenFunction &CGF) { CGF.EmitCXXDestructorCall(Dtor, Dtor_Complete, /*ForVirtualBase=*/false, - /*Delegating=*/false, Loc); + /*Delegating=*/false, Loc, Ty); } }; struct DestroyNRVOVariableC final : DestroyNRVOVariable<DestroyNRVOVariableC> { DestroyNRVOVariableC(Address addr, llvm::Value *NRVOFlag, QualType Ty) - : DestroyNRVOVariable<DestroyNRVOVariableC>(addr, NRVOFlag), Ty(Ty) {} - - QualType Ty; + : DestroyNRVOVariable<DestroyNRVOVariableC>(addr, Ty, NRVOFlag) {} void emitDestructorCall(CodeGenFunction &CGF) { CGF.destroyNonTrivialCStruct(CGF, Loc, Ty); @@ -535,7 +541,7 @@ namespace { CallStackRestore(Address Stack) : Stack(Stack) {} void Emit(CodeGenFunction &CGF, Flags flags) override { llvm::Value *V = CGF.Builder.CreateLoad(Stack); - llvm::Value *F = CGF.CGM.getIntrinsic(llvm::Intrinsic::stackrestore); + llvm::Function *F = CGF.CGM.getIntrinsic(llvm::Intrinsic::stackrestore); CGF.Builder.CreateCall(F, V); } }; @@ -915,9 +921,8 @@ static void emitStoresForInitAfterBZero(CodeGenModule &CGM, // If necessary, get a pointer to the element and emit it. if (!Elt->isNullValue() && !isa<llvm::UndefValue>(Elt)) emitStoresForInitAfterBZero( - CGM, Elt, - Builder.CreateConstInBoundsGEP2_32(Loc, 0, i, CGM.getDataLayout()), - isVolatile, Builder); + CGM, Elt, Builder.CreateConstInBoundsGEP2_32(Loc, 0, i), isVolatile, + Builder); } return; } @@ -930,10 +935,9 @@ static void emitStoresForInitAfterBZero(CodeGenModule &CGM, // If necessary, get a pointer to the element and emit it. if (!Elt->isNullValue() && !isa<llvm::UndefValue>(Elt)) - emitStoresForInitAfterBZero( - CGM, Elt, - Builder.CreateConstInBoundsGEP2_32(Loc, 0, i, CGM.getDataLayout()), - isVolatile, Builder); + emitStoresForInitAfterBZero(CGM, Elt, + Builder.CreateConstInBoundsGEP2_32(Loc, 0, i), + isVolatile, Builder); } } @@ -962,103 +966,130 @@ static bool shouldUseBZeroPlusStoresToInitialize(llvm::Constant *Init, /// FIXME We could be more clever, as we are for bzero above, and generate /// memset followed by stores. It's unclear that's worth the effort. static llvm::Value *shouldUseMemSetToInitialize(llvm::Constant *Init, - uint64_t GlobalSize) { + uint64_t GlobalSize, + const llvm::DataLayout &DL) { uint64_t SizeLimit = 32; if (GlobalSize <= SizeLimit) return nullptr; - return llvm::isBytewiseValue(Init); + return llvm::isBytewiseValue(Init, DL); } -static llvm::Constant *patternFor(CodeGenModule &CGM, llvm::Type *Ty) { - // The following value is a guaranteed unmappable pointer value and has a - // repeated byte-pattern which makes it easier to synthesize. We use it for - // pointers as well as integers so that aggregates are likely to be - // initialized with this repeated value. - constexpr uint64_t LargeValue = 0xAAAAAAAAAAAAAAAAull; - // For 32-bit platforms it's a bit trickier because, across systems, only the - // zero page can reasonably be expected to be unmapped, and even then we need - // a very low address. We use a smaller value, and that value sadly doesn't - // have a repeated byte-pattern. We don't use it for integers. - constexpr uint32_t SmallValue = 0x000000AA; - // Floating-point values are initialized as NaNs because they propagate. Using - // a repeated byte pattern means that it will be easier to initialize - // all-floating-point aggregates and arrays with memset. Further, aggregates - // which mix integral and a few floats might also initialize with memset - // followed by a handful of stores for the floats. Using fairly unique NaNs - // also means they'll be easier to distinguish in a crash. - constexpr bool NegativeNaN = true; - constexpr uint64_t NaNPayload = 0xFFFFFFFFFFFFFFFFull; - if (Ty->isIntOrIntVectorTy()) { - unsigned BitWidth = cast<llvm::IntegerType>( - Ty->isVectorTy() ? Ty->getVectorElementType() : Ty) - ->getBitWidth(); - if (BitWidth <= 64) - return llvm::ConstantInt::get(Ty, LargeValue); - return llvm::ConstantInt::get( - Ty, llvm::APInt::getSplat(BitWidth, llvm::APInt(64, LargeValue))); - } - if (Ty->isPtrOrPtrVectorTy()) { - auto *PtrTy = cast<llvm::PointerType>( - Ty->isVectorTy() ? Ty->getVectorElementType() : Ty); - unsigned PtrWidth = CGM.getContext().getTargetInfo().getPointerWidth( - PtrTy->getAddressSpace()); - llvm::Type *IntTy = llvm::IntegerType::get(CGM.getLLVMContext(), PtrWidth); - uint64_t IntValue; - switch (PtrWidth) { - default: - llvm_unreachable("pattern initialization of unsupported pointer width"); - case 64: - IntValue = LargeValue; - break; - case 32: - IntValue = SmallValue; - break; +/// Decide whether we want to split a constant structure or array store into a +/// sequence of its fields' stores. This may cost us code size and compilation +/// speed, but plays better with store optimizations. +static bool shouldSplitConstantStore(CodeGenModule &CGM, + uint64_t GlobalByteSize) { + // Don't break things that occupy more than one cacheline. + uint64_t ByteSizeLimit = 64; + if (CGM.getCodeGenOpts().OptimizationLevel == 0) + return false; + if (GlobalByteSize <= ByteSizeLimit) + return true; + return false; +} + +enum class IsPattern { No, Yes }; + +/// Generate a constant filled with either a pattern or zeroes. +static llvm::Constant *patternOrZeroFor(CodeGenModule &CGM, IsPattern isPattern, + llvm::Type *Ty) { + if (isPattern == IsPattern::Yes) + return initializationPatternFor(CGM, Ty); + else + return llvm::Constant::getNullValue(Ty); +} + +static llvm::Constant *constWithPadding(CodeGenModule &CGM, IsPattern isPattern, + llvm::Constant *constant); + +/// Helper function for constWithPadding() to deal with padding in structures. +static llvm::Constant *constStructWithPadding(CodeGenModule &CGM, + IsPattern isPattern, + llvm::StructType *STy, + llvm::Constant *constant) { + const llvm::DataLayout &DL = CGM.getDataLayout(); + const llvm::StructLayout *Layout = DL.getStructLayout(STy); + llvm::Type *Int8Ty = llvm::IntegerType::getInt8Ty(CGM.getLLVMContext()); + unsigned SizeSoFar = 0; + SmallVector<llvm::Constant *, 8> Values; + bool NestedIntact = true; + for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) { + unsigned CurOff = Layout->getElementOffset(i); + if (SizeSoFar < CurOff) { + assert(!STy->isPacked()); + auto *PadTy = llvm::ArrayType::get(Int8Ty, CurOff - SizeSoFar); + Values.push_back(patternOrZeroFor(CGM, isPattern, PadTy)); } - auto *Int = llvm::ConstantInt::get(IntTy, IntValue); - return llvm::ConstantExpr::getIntToPtr(Int, PtrTy); - } - if (Ty->isFPOrFPVectorTy()) { - unsigned BitWidth = llvm::APFloat::semanticsSizeInBits( - (Ty->isVectorTy() ? Ty->getVectorElementType() : Ty) - ->getFltSemantics()); - llvm::APInt Payload(64, NaNPayload); - if (BitWidth >= 64) - Payload = llvm::APInt::getSplat(BitWidth, Payload); - return llvm::ConstantFP::getQNaN(Ty, NegativeNaN, &Payload); - } - if (Ty->isArrayTy()) { - // Note: this doesn't touch tail padding (at the end of an object, before - // the next array object). It is instead handled by replaceUndef. - auto *ArrTy = cast<llvm::ArrayType>(Ty); - llvm::SmallVector<llvm::Constant *, 8> Element( - ArrTy->getNumElements(), patternFor(CGM, ArrTy->getElementType())); - return llvm::ConstantArray::get(ArrTy, Element); - } - - // Note: this doesn't touch struct padding. It will initialize as much union - // padding as is required for the largest type in the union. Padding is - // instead handled by replaceUndef. Stores to structs with volatile members - // don't have a volatile qualifier when initialized according to C++. This is - // fine because stack-based volatiles don't really have volatile semantics - // anyways, and the initialization shouldn't be observable. - auto *StructTy = cast<llvm::StructType>(Ty); - llvm::SmallVector<llvm::Constant *, 8> Struct(StructTy->getNumElements()); - for (unsigned El = 0; El != Struct.size(); ++El) - Struct[El] = patternFor(CGM, StructTy->getElementType(El)); - return llvm::ConstantStruct::get(StructTy, Struct); + llvm::Constant *CurOp; + if (constant->isZeroValue()) + CurOp = llvm::Constant::getNullValue(STy->getElementType(i)); + else + CurOp = cast<llvm::Constant>(constant->getAggregateElement(i)); + auto *NewOp = constWithPadding(CGM, isPattern, CurOp); + if (CurOp != NewOp) + NestedIntact = false; + Values.push_back(NewOp); + SizeSoFar = CurOff + DL.getTypeAllocSize(CurOp->getType()); + } + unsigned TotalSize = Layout->getSizeInBytes(); + if (SizeSoFar < TotalSize) { + auto *PadTy = llvm::ArrayType::get(Int8Ty, TotalSize - SizeSoFar); + Values.push_back(patternOrZeroFor(CGM, isPattern, PadTy)); + } + if (NestedIntact && Values.size() == STy->getNumElements()) + return constant; + return llvm::ConstantStruct::getAnon(Values, STy->isPacked()); } -static Address createUnnamedGlobalFrom(CodeGenModule &CGM, const VarDecl &D, - CGBuilderTy &Builder, - llvm::Constant *Constant, - CharUnits Align) { +/// Replace all padding bytes in a given constant with either a pattern byte or +/// 0x00. +static llvm::Constant *constWithPadding(CodeGenModule &CGM, IsPattern isPattern, + llvm::Constant *constant) { + llvm::Type *OrigTy = constant->getType(); + if (const auto STy = dyn_cast<llvm::StructType>(OrigTy)) + return constStructWithPadding(CGM, isPattern, STy, constant); + if (auto *STy = dyn_cast<llvm::SequentialType>(OrigTy)) { + llvm::SmallVector<llvm::Constant *, 8> Values; + unsigned Size = STy->getNumElements(); + if (!Size) + return constant; + llvm::Type *ElemTy = STy->getElementType(); + bool ZeroInitializer = constant->isZeroValue(); + llvm::Constant *OpValue, *PaddedOp; + if (ZeroInitializer) { + OpValue = llvm::Constant::getNullValue(ElemTy); + PaddedOp = constWithPadding(CGM, isPattern, OpValue); + } + for (unsigned Op = 0; Op != Size; ++Op) { + if (!ZeroInitializer) { + OpValue = constant->getAggregateElement(Op); + PaddedOp = constWithPadding(CGM, isPattern, OpValue); + } + Values.push_back(PaddedOp); + } + auto *NewElemTy = Values[0]->getType(); + if (NewElemTy == ElemTy) + return constant; + if (OrigTy->isArrayTy()) { + auto *ArrayTy = llvm::ArrayType::get(NewElemTy, Size); + return llvm::ConstantArray::get(ArrayTy, Values); + } else { + return llvm::ConstantVector::get(Values); + } + } + return constant; +} + +Address CodeGenModule::createUnnamedGlobalFrom(const VarDecl &D, + llvm::Constant *Constant, + CharUnits Align) { auto FunctionName = [&](const DeclContext *DC) -> std::string { if (const auto *FD = dyn_cast<FunctionDecl>(DC)) { if (const auto *CC = dyn_cast<CXXConstructorDecl>(FD)) return CC->getNameAsString(); if (const auto *CD = dyn_cast<CXXDestructorDecl>(FD)) return CD->getNameAsString(); - return CGM.getMangledName(FD); + return getMangledName(FD); } else if (const auto *OM = dyn_cast<ObjCMethodDecl>(DC)) { return OM->getNameAsString(); } else if (isa<BlockDecl>(DC)) { @@ -1066,26 +1097,47 @@ static Address createUnnamedGlobalFrom(CodeGenModule &CGM, const VarDecl &D, } else if (isa<CapturedDecl>(DC)) { return "<captured>"; } else { - llvm::llvm_unreachable_internal("expected a function or method"); + llvm_unreachable("expected a function or method"); } }; - auto *Ty = Constant->getType(); - bool isConstant = true; - llvm::GlobalVariable *InsertBefore = nullptr; - unsigned AS = CGM.getContext().getTargetAddressSpace( - CGM.getStringLiteralAddressSpace()); - llvm::GlobalVariable *GV = new llvm::GlobalVariable( - CGM.getModule(), Ty, isConstant, llvm::GlobalValue::PrivateLinkage, - Constant, - "__const." + FunctionName(D.getParentFunctionOrMethod()) + "." + - D.getName(), - InsertBefore, llvm::GlobalValue::NotThreadLocal, AS); - GV->setAlignment(Align.getQuantity()); - GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); - - Address SrcPtr = Address(GV, Align); - llvm::Type *BP = llvm::PointerType::getInt8PtrTy(CGM.getLLVMContext(), AS); + // Form a simple per-variable cache of these values in case we find we + // want to reuse them. + llvm::GlobalVariable *&CacheEntry = InitializerConstants[&D]; + if (!CacheEntry || CacheEntry->getInitializer() != Constant) { + auto *Ty = Constant->getType(); + bool isConstant = true; + llvm::GlobalVariable *InsertBefore = nullptr; + unsigned AS = + getContext().getTargetAddressSpace(getStringLiteralAddressSpace()); + std::string Name; + if (D.hasGlobalStorage()) + Name = getMangledName(&D).str() + ".const"; + else if (const DeclContext *DC = D.getParentFunctionOrMethod()) + Name = ("__const." + FunctionName(DC) + "." + D.getName()).str(); + else + llvm_unreachable("local variable has no parent function or method"); + llvm::GlobalVariable *GV = new llvm::GlobalVariable( + getModule(), Ty, isConstant, llvm::GlobalValue::PrivateLinkage, + Constant, Name, InsertBefore, llvm::GlobalValue::NotThreadLocal, AS); + GV->setAlignment(Align.getQuantity()); + GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); + CacheEntry = GV; + } else if (CacheEntry->getAlignment() < Align.getQuantity()) { + CacheEntry->setAlignment(Align.getQuantity()); + } + + return Address(CacheEntry, Align); +} + +static Address createUnnamedGlobalForMemcpyFrom(CodeGenModule &CGM, + const VarDecl &D, + CGBuilderTy &Builder, + llvm::Constant *Constant, + CharUnits Align) { + Address SrcPtr = CGM.createUnnamedGlobalFrom(D, Constant, Align); + llvm::Type *BP = llvm::PointerType::getInt8PtrTy(CGM.getLLVMContext(), + SrcPtr.getAddressSpace()); if (SrcPtr.getType() != BP) SrcPtr = Builder.CreateBitCast(SrcPtr, BP); return SrcPtr; @@ -1096,22 +1148,23 @@ static void emitStoresForConstant(CodeGenModule &CGM, const VarDecl &D, CGBuilderTy &Builder, llvm::Constant *constant) { auto *Ty = constant->getType(); - bool isScalar = Ty->isIntOrIntVectorTy() || Ty->isPtrOrPtrVectorTy() || - Ty->isFPOrFPVectorTy(); - if (isScalar) { + uint64_t ConstantSize = CGM.getDataLayout().getTypeAllocSize(Ty); + if (!ConstantSize) + return; + + bool canDoSingleStore = Ty->isIntOrIntVectorTy() || + Ty->isPtrOrPtrVectorTy() || Ty->isFPOrFPVectorTy(); + if (canDoSingleStore) { Builder.CreateStore(constant, Loc, isVolatile); return; } - auto *Int8Ty = llvm::IntegerType::getInt8Ty(CGM.getLLVMContext()); - auto *IntPtrTy = CGM.getDataLayout().getIntPtrType(CGM.getLLVMContext()); + auto *SizeVal = llvm::ConstantInt::get(CGM.IntPtrTy, ConstantSize); // If the initializer is all or mostly the same, codegen with bzero / memset // then do a few stores afterward. - uint64_t ConstantSize = CGM.getDataLayout().getTypeAllocSize(Ty); - auto *SizeVal = llvm::ConstantInt::get(IntPtrTy, ConstantSize); if (shouldUseBZeroPlusStoresToInitialize(constant, ConstantSize)) { - Builder.CreateMemSet(Loc, llvm::ConstantInt::get(Int8Ty, 0), SizeVal, + Builder.CreateMemSet(Loc, llvm::ConstantInt::get(CGM.Int8Ty, 0), SizeVal, isVolatile); bool valueAlreadyCorrect = @@ -1123,7 +1176,9 @@ static void emitStoresForConstant(CodeGenModule &CGM, const VarDecl &D, return; } - llvm::Value *Pattern = shouldUseMemSetToInitialize(constant, ConstantSize); + // If the initializer is a repeated byte pattern, use memset. + llvm::Value *Pattern = + shouldUseMemSetToInitialize(constant, ConstantSize, CGM.getDataLayout()); if (Pattern) { uint64_t Value = 0x00; if (!isa<llvm::UndefValue>(Pattern)) { @@ -1131,22 +1186,51 @@ static void emitStoresForConstant(CodeGenModule &CGM, const VarDecl &D, assert(AP.getBitWidth() <= 8); Value = AP.getLimitedValue(); } - Builder.CreateMemSet(Loc, llvm::ConstantInt::get(Int8Ty, Value), SizeVal, + Builder.CreateMemSet(Loc, llvm::ConstantInt::get(CGM.Int8Ty, Value), SizeVal, isVolatile); return; } - Builder.CreateMemCpy( - Loc, - createUnnamedGlobalFrom(CGM, D, Builder, constant, Loc.getAlignment()), - SizeVal, isVolatile); + // If the initializer is small, use a handful of stores. + if (shouldSplitConstantStore(CGM, ConstantSize)) { + if (auto *STy = dyn_cast<llvm::StructType>(Ty)) { + // FIXME: handle the case when STy != Loc.getElementType(). + if (STy == Loc.getElementType()) { + for (unsigned i = 0; i != constant->getNumOperands(); i++) { + Address EltPtr = Builder.CreateStructGEP(Loc, i); + emitStoresForConstant( + CGM, D, EltPtr, isVolatile, Builder, + cast<llvm::Constant>(Builder.CreateExtractValue(constant, i))); + } + return; + } + } else if (auto *ATy = dyn_cast<llvm::ArrayType>(Ty)) { + // FIXME: handle the case when ATy != Loc.getElementType(). + if (ATy == Loc.getElementType()) { + for (unsigned i = 0; i != ATy->getNumElements(); i++) { + Address EltPtr = Builder.CreateConstArrayGEP(Loc, i); + emitStoresForConstant( + CGM, D, EltPtr, isVolatile, Builder, + cast<llvm::Constant>(Builder.CreateExtractValue(constant, i))); + } + return; + } + } + } + + // Copy from a global. + Builder.CreateMemCpy(Loc, + createUnnamedGlobalForMemcpyFrom( + CGM, D, Builder, constant, Loc.getAlignment()), + SizeVal, isVolatile); } static void emitStoresForZeroInit(CodeGenModule &CGM, const VarDecl &D, Address Loc, bool isVolatile, CGBuilderTy &Builder) { llvm::Type *ElTy = Loc.getElementType(); - llvm::Constant *constant = llvm::Constant::getNullValue(ElTy); + llvm::Constant *constant = + constWithPadding(CGM, IsPattern::No, llvm::Constant::getNullValue(ElTy)); emitStoresForConstant(CGM, D, Loc, isVolatile, Builder, constant); } @@ -1154,7 +1238,8 @@ static void emitStoresForPatternInit(CodeGenModule &CGM, const VarDecl &D, Address Loc, bool isVolatile, CGBuilderTy &Builder) { llvm::Type *ElTy = Loc.getElementType(); - llvm::Constant *constant = patternFor(CGM, ElTy); + llvm::Constant *constant = constWithPadding( + CGM, IsPattern::Yes, initializationPatternFor(CGM, ElTy)); assert(!isa<llvm::UndefValue>(constant)); emitStoresForConstant(CGM, D, Loc, isVolatile, Builder, constant); } @@ -1170,13 +1255,11 @@ static bool containsUndef(llvm::Constant *constant) { return false; } -static llvm::Constant *replaceUndef(llvm::Constant *constant) { - // FIXME: when doing pattern initialization, replace undef with 0xAA instead. - // FIXME: also replace padding between values by creating a new struct type - // which has no padding. +static llvm::Constant *replaceUndef(CodeGenModule &CGM, IsPattern isPattern, + llvm::Constant *constant) { auto *Ty = constant->getType(); if (isa<llvm::UndefValue>(constant)) - return llvm::Constant::getNullValue(Ty); + return patternOrZeroFor(CGM, isPattern, Ty); if (!(Ty->isStructTy() || Ty->isArrayTy() || Ty->isVectorTy())) return constant; if (!containsUndef(constant)) @@ -1184,7 +1267,7 @@ static llvm::Constant *replaceUndef(llvm::Constant *constant) { llvm::SmallVector<llvm::Constant *, 8> Values(constant->getNumOperands()); for (unsigned Op = 0, NumOp = constant->getNumOperands(); Op != NumOp; ++Op) { auto *OpValue = cast<llvm::Constant>(constant->getOperand(Op)); - Values[Op] = replaceUndef(OpValue); + Values[Op] = replaceUndef(CGM, isPattern, OpValue); } if (Ty->isStructTy()) return llvm::ConstantStruct::get(cast<llvm::StructType>(Ty), Values); @@ -1318,10 +1401,15 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { Address address = Address::invalid(); Address AllocaAddr = Address::invalid(); - if (Ty->isConstantSizeType()) { - bool NRVO = getLangOpts().ElideConstructors && - D.isNRVOVariable(); - + Address OpenMPLocalAddr = + getLangOpts().OpenMP + ? CGM.getOpenMPRuntime().getAddressOfLocalVariable(*this, &D) + : Address::invalid(); + bool NRVO = getLangOpts().ElideConstructors && D.isNRVOVariable(); + + if (getLangOpts().OpenMP && OpenMPLocalAddr.isValid()) { + address = OpenMPLocalAddr; + } else if (Ty->isConstantSizeType()) { // If this value is an array or struct with a statically determinable // constant initializer, there are optimizations we can do. // @@ -1361,14 +1449,7 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { // unless: // - it's an NRVO variable. // - we are compiling OpenMP and it's an OpenMP local variable. - - Address OpenMPLocalAddr = - getLangOpts().OpenMP - ? CGM.getOpenMPRuntime().getAddressOfLocalVariable(*this, &D) - : Address::invalid(); - if (getLangOpts().OpenMP && OpenMPLocalAddr.isValid()) { - address = OpenMPLocalAddr; - } else if (NRVO) { + if (NRVO) { // The named return value optimization: allocate this variable in the // return slot, so that we can elide the copy when returning this // variable (C++0x [class.copy]p34). @@ -1451,7 +1532,7 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { Address Stack = CreateTempAlloca(Int8PtrTy, getPointerAlign(), "saved_stack"); - llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::stacksave); + llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::stacksave); llvm::Value *V = Builder.CreateCall(F); Builder.CreateStore(V, Stack); @@ -1481,11 +1562,19 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { // Emit debug info for local var declaration. if (EmitDebugInfo && HaveInsertPoint()) { + Address DebugAddr = address; + bool UsePointerValue = NRVO && ReturnValuePointer.isValid(); DI->setLocation(D.getLocation()); - (void)DI->EmitDeclareOfAutoVariable(&D, address.getPointer(), Builder); + + // If NRVO, use a pointer to the return address. + if (UsePointerValue) + DebugAddr = ReturnValuePointer; + + (void)DI->EmitDeclareOfAutoVariable(&D, DebugAddr.getPointer(), Builder, + UsePointerValue); } - if (D.hasAttr<AnnotateAttr>()) + if (D.hasAttr<AnnotateAttr>() && HaveInsertPoint()) EmitVarAnnotations(&D, address.getPointer()); // Make sure we call @llvm.lifetime.end. @@ -1575,6 +1664,87 @@ bool CodeGenFunction::isTrivialInitializer(const Expr *Init) { return false; } +void CodeGenFunction::emitZeroOrPatternForAutoVarInit(QualType type, + const VarDecl &D, + Address Loc) { + auto trivialAutoVarInit = getContext().getLangOpts().getTrivialAutoVarInit(); + CharUnits Size = getContext().getTypeSizeInChars(type); + bool isVolatile = type.isVolatileQualified(); + if (!Size.isZero()) { + switch (trivialAutoVarInit) { + case LangOptions::TrivialAutoVarInitKind::Uninitialized: + llvm_unreachable("Uninitialized handled by caller"); + case LangOptions::TrivialAutoVarInitKind::Zero: + emitStoresForZeroInit(CGM, D, Loc, isVolatile, Builder); + break; + case LangOptions::TrivialAutoVarInitKind::Pattern: + emitStoresForPatternInit(CGM, D, Loc, isVolatile, Builder); + break; + } + return; + } + + // VLAs look zero-sized to getTypeInfo. We can't emit constant stores to + // them, so emit a memcpy with the VLA size to initialize each element. + // Technically zero-sized or negative-sized VLAs are undefined, and UBSan + // will catch that code, but there exists code which generates zero-sized + // VLAs. Be nice and initialize whatever they requested. + const auto *VlaType = getContext().getAsVariableArrayType(type); + if (!VlaType) + return; + auto VlaSize = getVLASize(VlaType); + auto SizeVal = VlaSize.NumElts; + CharUnits EltSize = getContext().getTypeSizeInChars(VlaSize.Type); + switch (trivialAutoVarInit) { + case LangOptions::TrivialAutoVarInitKind::Uninitialized: + llvm_unreachable("Uninitialized handled by caller"); + + case LangOptions::TrivialAutoVarInitKind::Zero: + if (!EltSize.isOne()) + SizeVal = Builder.CreateNUWMul(SizeVal, CGM.getSize(EltSize)); + Builder.CreateMemSet(Loc, llvm::ConstantInt::get(Int8Ty, 0), SizeVal, + isVolatile); + break; + + case LangOptions::TrivialAutoVarInitKind::Pattern: { + llvm::Type *ElTy = Loc.getElementType(); + llvm::Constant *Constant = constWithPadding( + CGM, IsPattern::Yes, initializationPatternFor(CGM, ElTy)); + CharUnits ConstantAlign = getContext().getTypeAlignInChars(VlaSize.Type); + llvm::BasicBlock *SetupBB = createBasicBlock("vla-setup.loop"); + llvm::BasicBlock *LoopBB = createBasicBlock("vla-init.loop"); + llvm::BasicBlock *ContBB = createBasicBlock("vla-init.cont"); + llvm::Value *IsZeroSizedVLA = Builder.CreateICmpEQ( + SizeVal, llvm::ConstantInt::get(SizeVal->getType(), 0), + "vla.iszerosized"); + Builder.CreateCondBr(IsZeroSizedVLA, ContBB, SetupBB); + EmitBlock(SetupBB); + if (!EltSize.isOne()) + SizeVal = Builder.CreateNUWMul(SizeVal, CGM.getSize(EltSize)); + llvm::Value *BaseSizeInChars = + llvm::ConstantInt::get(IntPtrTy, EltSize.getQuantity()); + Address Begin = Builder.CreateElementBitCast(Loc, Int8Ty, "vla.begin"); + llvm::Value *End = + Builder.CreateInBoundsGEP(Begin.getPointer(), SizeVal, "vla.end"); + llvm::BasicBlock *OriginBB = Builder.GetInsertBlock(); + EmitBlock(LoopBB); + llvm::PHINode *Cur = Builder.CreatePHI(Begin.getType(), 2, "vla.cur"); + Cur->addIncoming(Begin.getPointer(), OriginBB); + CharUnits CurAlign = Loc.getAlignment().alignmentOfArrayElement(EltSize); + Builder.CreateMemCpy(Address(Cur, CurAlign), + createUnnamedGlobalForMemcpyFrom( + CGM, D, Builder, Constant, ConstantAlign), + BaseSizeInChars, isVolatile); + llvm::Value *Next = + Builder.CreateInBoundsGEP(Int8Ty, Cur, BaseSizeInChars, "vla.next"); + llvm::Value *Done = Builder.CreateICmpEQ(Next, End, "vla-init.isdone"); + Builder.CreateCondBr(Done, ContBB, LoopBB); + Cur->addIncoming(Next, LoopBB); + EmitBlock(ContBB); + } break; + } +} + void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) { assert(emission.Variable && "emission was not valid!"); @@ -1585,8 +1755,6 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) { auto DL = ApplyDebugLocation::CreateDefaultArtificial(*this, D.getLocation()); QualType type = D.getType(); - bool isVolatile = type.isVolatileQualified(); - // If this local has an initializer, emit it now. const Expr *Init = D.getInit(); @@ -1620,8 +1788,9 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) { bool capturedByInit = Init && emission.IsEscapingByRef && isCapturedBy(D, Init); - Address Loc = - capturedByInit ? emission.Addr : emission.getObjectAddress(*this); + bool locIsByrefHeader = !capturedByInit; + const Address Loc = + locIsByrefHeader ? emission.getObjectAddress(*this) : emission.Addr; // Note: constexpr already initializes everything correctly. LangOptions::TrivialAutoVarInitKind trivialAutoVarInit = @@ -1631,103 +1800,46 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) { ? LangOptions::TrivialAutoVarInitKind::Uninitialized : getContext().getLangOpts().getTrivialAutoVarInit())); - auto initializeWhatIsTechnicallyUninitialized = [&]() { + auto initializeWhatIsTechnicallyUninitialized = [&](Address Loc) { if (trivialAutoVarInit == LangOptions::TrivialAutoVarInitKind::Uninitialized) return; - CharUnits Size = getContext().getTypeSizeInChars(type); - if (!Size.isZero()) { - switch (trivialAutoVarInit) { - case LangOptions::TrivialAutoVarInitKind::Uninitialized: - llvm_unreachable("Uninitialized handled above"); - case LangOptions::TrivialAutoVarInitKind::Zero: - emitStoresForZeroInit(CGM, D, Loc, isVolatile, Builder); - break; - case LangOptions::TrivialAutoVarInitKind::Pattern: - emitStoresForPatternInit(CGM, D, Loc, isVolatile, Builder); - break; - } - return; - } - - // VLAs look zero-sized to getTypeInfo. We can't emit constant stores to - // them, so emit a memcpy with the VLA size to initialize each element. - // Technically zero-sized or negative-sized VLAs are undefined, and UBSan - // will catch that code, but there exists code which generates zero-sized - // VLAs. Be nice and initialize whatever they requested. - const VariableArrayType *VlaType = - dyn_cast_or_null<VariableArrayType>(getContext().getAsArrayType(type)); - if (!VlaType) - return; - auto VlaSize = getVLASize(VlaType); - auto SizeVal = VlaSize.NumElts; - CharUnits EltSize = getContext().getTypeSizeInChars(VlaSize.Type); - switch (trivialAutoVarInit) { - case LangOptions::TrivialAutoVarInitKind::Uninitialized: - llvm_unreachable("Uninitialized handled above"); - - case LangOptions::TrivialAutoVarInitKind::Zero: - if (!EltSize.isOne()) - SizeVal = Builder.CreateNUWMul(SizeVal, CGM.getSize(EltSize)); - Builder.CreateMemSet(Loc, llvm::ConstantInt::get(Int8Ty, 0), SizeVal, - isVolatile); - break; + // Only initialize a __block's storage: we always initialize the header. + if (emission.IsEscapingByRef && !locIsByrefHeader) + Loc = emitBlockByrefAddress(Loc, &D, /*follow=*/false); - case LangOptions::TrivialAutoVarInitKind::Pattern: { - llvm::Type *ElTy = Loc.getElementType(); - llvm::Constant *Constant = patternFor(CGM, ElTy); - CharUnits ConstantAlign = getContext().getTypeAlignInChars(VlaSize.Type); - llvm::BasicBlock *SetupBB = createBasicBlock("vla-setup.loop"); - llvm::BasicBlock *LoopBB = createBasicBlock("vla-init.loop"); - llvm::BasicBlock *ContBB = createBasicBlock("vla-init.cont"); - llvm::Value *IsZeroSizedVLA = Builder.CreateICmpEQ( - SizeVal, llvm::ConstantInt::get(SizeVal->getType(), 0), - "vla.iszerosized"); - Builder.CreateCondBr(IsZeroSizedVLA, ContBB, SetupBB); - EmitBlock(SetupBB); - if (!EltSize.isOne()) - SizeVal = Builder.CreateNUWMul(SizeVal, CGM.getSize(EltSize)); - llvm::Value *BaseSizeInChars = - llvm::ConstantInt::get(IntPtrTy, EltSize.getQuantity()); - Address Begin = Builder.CreateElementBitCast(Loc, Int8Ty, "vla.begin"); - llvm::Value *End = - Builder.CreateInBoundsGEP(Begin.getPointer(), SizeVal, "vla.end"); - llvm::BasicBlock *OriginBB = Builder.GetInsertBlock(); - EmitBlock(LoopBB); - llvm::PHINode *Cur = Builder.CreatePHI(Begin.getType(), 2, "vla.cur"); - Cur->addIncoming(Begin.getPointer(), OriginBB); - CharUnits CurAlign = Loc.getAlignment().alignmentOfArrayElement(EltSize); - Builder.CreateMemCpy( - Address(Cur, CurAlign), - createUnnamedGlobalFrom(CGM, D, Builder, Constant, ConstantAlign), - BaseSizeInChars, isVolatile); - llvm::Value *Next = - Builder.CreateInBoundsGEP(Int8Ty, Cur, BaseSizeInChars, "vla.next"); - llvm::Value *Done = Builder.CreateICmpEQ(Next, End, "vla-init.isdone"); - Builder.CreateCondBr(Done, ContBB, LoopBB); - Cur->addIncoming(Next, LoopBB); - EmitBlock(ContBB); - } break; - } + return emitZeroOrPatternForAutoVarInit(type, D, Loc); }; - if (isTrivialInitializer(Init)) { - initializeWhatIsTechnicallyUninitialized(); - return; - } + if (isTrivialInitializer(Init)) + return initializeWhatIsTechnicallyUninitialized(Loc); llvm::Constant *constant = nullptr; - if (emission.IsConstantAggregate || D.isConstexpr()) { + if (emission.IsConstantAggregate || + D.mightBeUsableInConstantExpressions(getContext())) { assert(!capturedByInit && "constant init contains a capturing block?"); constant = ConstantEmitter(*this).tryEmitAbstractForInitializer(D); - if (constant && trivialAutoVarInit != - LangOptions::TrivialAutoVarInitKind::Uninitialized) - constant = replaceUndef(constant); + if (constant && !constant->isZeroValue() && + (trivialAutoVarInit != + LangOptions::TrivialAutoVarInitKind::Uninitialized)) { + IsPattern isPattern = + (trivialAutoVarInit == LangOptions::TrivialAutoVarInitKind::Pattern) + ? IsPattern::Yes + : IsPattern::No; + // C guarantees that brace-init with fewer initializers than members in + // the aggregate will initialize the rest of the aggregate as-if it were + // static initialization. In turn static initialization guarantees that + // padding is initialized to zero bits. We could instead pattern-init if D + // has any ImplicitValueInitExpr, but that seems to be unintuitive + // behavior. + constant = constWithPadding(CGM, IsPattern::No, + replaceUndef(CGM, isPattern, constant)); + } } if (!constant) { - initializeWhatIsTechnicallyUninitialized(); + initializeWhatIsTechnicallyUninitialized(Loc); LValue lv = MakeAddrLValue(Loc, type); lv.setNonGC(true); return EmitExprAsInit(Init, &D, lv, capturedByInit); @@ -1741,10 +1853,9 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) { } llvm::Type *BP = CGM.Int8Ty->getPointerTo(Loc.getAddressSpace()); - if (Loc.getType() != BP) - Loc = Builder.CreateBitCast(Loc, BP); - - emitStoresForConstant(CGM, D, Loc, isVolatile, Builder, constant); + emitStoresForConstant( + CGM, D, (Loc.getType() == BP) ? Loc : Builder.CreateBitCast(Loc, BP), + type.isVolatileQualified(), Builder, constant); } /// Emit an expression as an initializer for an object (variable, field, etc.) @@ -1789,7 +1900,7 @@ void CodeGenFunction::EmitExprAsInit(const Expr *init, const ValueDecl *D, if (isa<VarDecl>(D)) Overlap = AggValueSlot::DoesNotOverlap; else if (auto *FD = dyn_cast<FieldDecl>(D)) - Overlap = overlapForFieldInit(FD); + Overlap = getOverlapForFieldInit(FD); // TODO: how can we delay here if D is captured by its initializer? EmitAggExpr(init, AggValueSlot::forLValue(lvalue, AggValueSlot::IsDestructed, @@ -1828,7 +1939,7 @@ void CodeGenFunction::emitAutoVarTypeCleanup( if (emission.NRVOFlag) { assert(!type->isArrayType()); CXXDestructorDecl *dtor = type->getAsCXXRecordDecl()->getDestructor(); - EHStack.pushCleanup<DestroyNRVOVariableCXX>(cleanupKind, addr, dtor, + EHStack.pushCleanup<DestroyNRVOVariableCXX>(cleanupKind, addr, type, dtor, emission.NRVOFlag); return; } @@ -2199,7 +2310,7 @@ void CodeGenFunction::pushRegularPartialArrayCleanup(llvm::Value *arrayBegin, } /// Lazily declare the @llvm.lifetime.start intrinsic. -llvm::Constant *CodeGenModule::getLLVMLifetimeStartFn() { +llvm::Function *CodeGenModule::getLLVMLifetimeStartFn() { if (LifetimeStartFn) return LifetimeStartFn; LifetimeStartFn = llvm::Intrinsic::getDeclaration(&getModule(), @@ -2208,7 +2319,7 @@ llvm::Constant *CodeGenModule::getLLVMLifetimeStartFn() { } /// Lazily declare the @llvm.lifetime.end intrinsic. -llvm::Constant *CodeGenModule::getLLVMLifetimeEndFn() { +llvm::Function *CodeGenModule::getLLVMLifetimeEndFn() { if (LifetimeEndFn) return LifetimeEndFn; LifetimeEndFn = llvm::Intrinsic::getDeclaration(&getModule(), @@ -2417,6 +2528,13 @@ void CodeGenModule::EmitOMPDeclareReduction(const OMPDeclareReductionDecl *D, getOpenMPRuntime().emitUserDefinedReduction(CGF, D); } +void CodeGenModule::EmitOMPDeclareMapper(const OMPDeclareMapperDecl *D, + CodeGenFunction *CGF) { + if (!LangOpts.OpenMP || (!LangOpts.EmitAllDecls && !D->isUsed())) + return; + // FIXME: need to implement mapper code generation +} + void CodeGenModule::EmitOMPRequiresDecl(const OMPRequiresDecl *D) { - getOpenMPRuntime().checkArchForUnifiedAddressing(*this, D); + getOpenMPRuntime().checkArchForUnifiedAddressing(D); } diff --git a/lib/CodeGen/CGDeclCXX.cpp b/lib/CodeGen/CGDeclCXX.cpp index 9aa31f181e99..7a0605b8450a 100644 --- a/lib/CodeGen/CGDeclCXX.cpp +++ b/lib/CodeGen/CGDeclCXX.cpp @@ -1,9 +1,8 @@ //===--- CGDeclCXX.cpp - Emit LLVM Code for C++ declarations --------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -15,6 +14,7 @@ #include "CGCXXABI.h" #include "CGObjCRuntime.h" #include "CGOpenMPRuntime.h" +#include "TargetInfo.h" #include "clang/Basic/CodeGenOptions.h" #include "llvm/ADT/StringExtras.h" #include "llvm/IR/Intrinsics.h" @@ -75,7 +75,7 @@ static void EmitDeclDestroy(CodeGenFunction &CGF, const VarDecl &D, // bails even if the attribute is not present. if (D.isNoDestroy(CGF.getContext())) return; - + CodeGenModule &CGM = CGF.CGM; // FIXME: __attribute__((cleanup)) ? @@ -98,7 +98,7 @@ static void EmitDeclDestroy(CodeGenFunction &CGF, const VarDecl &D, return; } - llvm::Constant *Func; + llvm::FunctionCallee Func; llvm::Constant *Argument; // Special-case non-array C++ destructors, if they have the right signature. @@ -118,10 +118,23 @@ static void EmitDeclDestroy(CodeGenFunction &CGF, const VarDecl &D, assert(!Record->hasTrivialDestructor()); CXXDestructorDecl *Dtor = Record->getDestructor(); - Func = CGM.getAddrOfCXXStructor(Dtor, StructorType::Complete); - Argument = llvm::ConstantExpr::getBitCast( - Addr.getPointer(), CGF.getTypes().ConvertType(Type)->getPointerTo()); - + Func = CGM.getAddrAndTypeOfCXXStructor(GlobalDecl(Dtor, Dtor_Complete)); + if (CGF.getContext().getLangOpts().OpenCL) { + auto DestAS = + CGM.getTargetCodeGenInfo().getAddrSpaceOfCxaAtexitPtrParam(); + auto DestTy = CGF.getTypes().ConvertType(Type)->getPointerTo( + CGM.getContext().getTargetAddressSpace(DestAS)); + auto SrcAS = D.getType().getQualifiers().getAddressSpace(); + if (DestAS == SrcAS) + Argument = llvm::ConstantExpr::getBitCast(Addr.getPointer(), DestTy); + else + // FIXME: On addr space mismatch we are passing NULL. The generation + // of the global destructor function should be adjusted accordingly. + Argument = llvm::ConstantPointerNull::get(DestTy); + } else { + Argument = llvm::ConstantExpr::getBitCast( + Addr.getPointer(), CGF.getTypes().ConvertType(Type)->getPointerTo()); + } // Otherwise, the standard logic requires a helper function. } else { Func = CodeGenFunction(CGM) @@ -150,7 +163,7 @@ void CodeGenFunction::EmitInvariantStart(llvm::Constant *Addr, CharUnits Size) { llvm::Intrinsic::ID InvStartID = llvm::Intrinsic::invariant_start; // Overloaded address space type. llvm::Type *ObjectPtr[1] = {Int8PtrTy}; - llvm::Constant *InvariantStart = CGM.getIntrinsic(InvStartID, ObjectPtr); + llvm::Function *InvariantStart = CGM.getIntrinsic(InvStartID, ObjectPtr); // Emit a call with the size in bytes of the object. uint64_t Width = Size.getQuantity(); @@ -215,8 +228,8 @@ void CodeGenFunction::EmitCXXGlobalVarDeclInit(const VarDecl &D, /// Create a stub function, suitable for being passed to atexit, /// which passes the given address to the given destructor function. -llvm::Constant *CodeGenFunction::createAtExitStub(const VarDecl &VD, - llvm::Constant *dtor, +llvm::Function *CodeGenFunction::createAtExitStub(const VarDecl &VD, + llvm::FunctionCallee dtor, llvm::Constant *addr) { // Get the destructor function type, void(*)(void). llvm::FunctionType *ty = llvm::FunctionType::get(CGM.VoidTy, false); @@ -227,19 +240,19 @@ llvm::Constant *CodeGenFunction::createAtExitStub(const VarDecl &VD, } const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); - llvm::Function *fn = CGM.CreateGlobalInitOrDestructFunction(ty, FnName.str(), - FI, - VD.getLocation()); + llvm::Function *fn = CGM.CreateGlobalInitOrDestructFunction( + ty, FnName.str(), FI, VD.getLocation()); CodeGenFunction CGF(CGM); - CGF.StartFunction(&VD, CGM.getContext().VoidTy, fn, FI, FunctionArgList()); + CGF.StartFunction(GlobalDecl(&VD, DynamicInitKind::AtExit), + CGM.getContext().VoidTy, fn, FI, FunctionArgList()); llvm::CallInst *call = CGF.Builder.CreateCall(dtor, addr); // Make sure the call and the callee agree on calling convention. if (llvm::Function *dtorFn = - dyn_cast<llvm::Function>(dtor->stripPointerCasts())) + dyn_cast<llvm::Function>(dtor.getCallee()->stripPointerCasts())) call->setCallingConv(dtorFn->getCallingConv()); CGF.FinishFunction(); @@ -249,7 +262,7 @@ llvm::Constant *CodeGenFunction::createAtExitStub(const VarDecl &VD, /// Register a global destructor using the C atexit runtime function. void CodeGenFunction::registerGlobalDtorWithAtExit(const VarDecl &VD, - llvm::Constant *dtor, + llvm::FunctionCallee dtor, llvm::Constant *addr) { // Create a function which calls the destructor. llvm::Constant *dtorStub = createAtExitStub(VD, dtor, addr); @@ -261,10 +274,10 @@ void CodeGenFunction::registerGlobalDtorWithAtExit(llvm::Constant *dtorStub) { llvm::FunctionType *atexitTy = llvm::FunctionType::get(IntTy, dtorStub->getType(), false); - llvm::Constant *atexit = + llvm::FunctionCallee atexit = CGM.CreateRuntimeFunction(atexitTy, "atexit", llvm::AttributeList(), /*Local=*/true); - if (llvm::Function *atexitFn = dyn_cast<llvm::Function>(atexit)) + if (llvm::Function *atexitFn = dyn_cast<llvm::Function>(atexit.getCallee())) atexitFn->setDoesNotThrow(); EmitNounwindRuntimeCall(atexit, dtorStub); @@ -356,6 +369,10 @@ llvm::Function *CodeGenModule::CreateGlobalInitOrDestructFunction( !isInSanitizerBlacklist(SanitizerKind::KernelHWAddress, Fn, Loc)) Fn->addFnAttr(llvm::Attribute::SanitizeHWAddress); + if (getLangOpts().Sanitize.has(SanitizerKind::MemTag) && + !isInSanitizerBlacklist(SanitizerKind::MemTag, Fn, Loc)) + Fn->addFnAttr(llvm::Attribute::SanitizeMemTag); + if (getLangOpts().Sanitize.has(SanitizerKind::Thread) && !isInSanitizerBlacklist(SanitizerKind::Thread, Fn, Loc)) Fn->addFnAttr(llvm::Attribute::SanitizeThread); @@ -468,7 +485,8 @@ CodeGenModule::EmitCXXGlobalVarDeclInitFunc(const VarDecl *D, } else if (auto *IPA = D->getAttr<InitPriorityAttr>()) { OrderGlobalInits Key(IPA->getPriority(), PrioritizedCXXGlobalInits.size()); PrioritizedCXXGlobalInits.push_back(std::make_pair(Key, Fn)); - } else if (isTemplateInstantiation(D->getTemplateSpecializationKind())) { + } else if (isTemplateInstantiation(D->getTemplateSpecializationKind()) || + getContext().GetGVALinkageForVariable(D) == GVA_DiscardableODR) { // C++ [basic.start.init]p2: // Definitions of explicitly specialized class template static data // members have ordered initialization. Other class template static data @@ -482,6 +500,11 @@ CodeGenModule::EmitCXXGlobalVarDeclInitFunc(const VarDecl *D, // minor startup time optimization. In the MS C++ ABI, there are no guard // variables, so this COMDAT key is required for correctness. AddGlobalCtor(Fn, 65535, COMDATKey); + if (getTarget().getCXXABI().isMicrosoft() && COMDATKey) { + // In The MS C++, MS add template static data member in the linker + // drective. + addUsedGlobal(COMDATKey); + } } else if (D->hasAttr<SelectAnyAttr>()) { // SelectAny globals will be comdat-folded. Put the initializer into a // COMDAT group associated with the global, so the initializers get folded @@ -575,6 +598,19 @@ CodeGenModule::EmitCXXGlobalInitFunc() { CodeGenFunction(*this).GenerateCXXGlobalInitFunc(Fn, CXXGlobalInits); AddGlobalCtor(Fn); + // In OpenCL global init functions must be converted to kernels in order to + // be able to launch them from the host. + // FIXME: Some more work might be needed to handle destructors correctly. + // Current initialization function makes use of function pointers callbacks. + // We can't support function pointers especially between host and device. + // However it seems global destruction has little meaning without any + // dynamic resource allocation on the device and program scope variables are + // destroyed by the runtime when program is released. + if (getLangOpts().OpenCL) { + GenOpenCLArgMetadata(Fn); + Fn->setCallingConv(llvm::CallingConv::SPIR_KERNEL); + } + CXXGlobalInits.clear(); } @@ -604,15 +640,21 @@ void CodeGenFunction::GenerateCXXGlobalVarDeclInitFunc(llvm::Function *Fn, CurEHLocation = D->getBeginLoc(); - StartFunction(GlobalDecl(D), getContext().VoidTy, Fn, - getTypes().arrangeNullaryFunction(), + StartFunction(GlobalDecl(D, DynamicInitKind::Initializer), + getContext().VoidTy, Fn, getTypes().arrangeNullaryFunction(), FunctionArgList(), D->getLocation(), D->getInit()->getExprLoc()); // Use guarded initialization if the global variable is weak. This // occurs for, e.g., instantiated static data members and // definitions explicitly marked weak. - if (Addr->hasWeakLinkage() || Addr->hasLinkOnceLinkage()) { + // + // Also use guarded initialization for a variable with dynamic TLS and + // unordered initialization. (If the initialization is ordered, the ABI + // layer will guard the whole-TU initialization for us.) + if (Addr->hasWeakLinkage() || Addr->hasLinkOnceLinkage() || + (D->getTLSKind() == VarDecl::TLS_Dynamic && + isTemplateInstantiation(D->getTemplateSpecializationKind()))) { EmitCXXGuardedInit(*D, Addr, PerformInit); } else { EmitCXXGlobalVarDeclInit(*D, Addr, PerformInit); @@ -682,8 +724,8 @@ CodeGenFunction::GenerateCXXGlobalInitFunc(llvm::Function *Fn, void CodeGenFunction::GenerateCXXGlobalDtorsFunc( llvm::Function *Fn, - const std::vector<std::pair<llvm::WeakTrackingVH, llvm::Constant *>> - &DtorsAndObjects) { + const std::vector<std::tuple<llvm::FunctionType *, llvm::WeakTrackingVH, + llvm::Constant *>> &DtorsAndObjects) { { auto NL = ApplyDebugLocation::CreateEmpty(*this); StartFunction(GlobalDecl(), getContext().VoidTy, Fn, @@ -693,9 +735,11 @@ void CodeGenFunction::GenerateCXXGlobalDtorsFunc( // Emit the dtors, in reverse order from construction. for (unsigned i = 0, e = DtorsAndObjects.size(); i != e; ++i) { - llvm::Value *Callee = DtorsAndObjects[e - i - 1].first; - llvm::CallInst *CI = Builder.CreateCall(Callee, - DtorsAndObjects[e - i - 1].second); + llvm::FunctionType *CalleeTy; + llvm::Value *Callee; + llvm::Constant *Arg; + std::tie(CalleeTy, Callee, Arg) = DtorsAndObjects[e - i - 1]; + llvm::CallInst *CI = Builder.CreateCall(CalleeTy, Callee, Arg); // Make sure the call and the callee agree on calling convention. if (llvm::Function *F = dyn_cast<llvm::Function>(Callee)) CI->setCallingConv(F->getCallingConv()); diff --git a/lib/CodeGen/CGException.cpp b/lib/CodeGen/CGException.cpp index 5756e13d2623..3b7a88a0b769 100644 --- a/lib/CodeGen/CGException.cpp +++ b/lib/CodeGen/CGException.cpp @@ -1,9 +1,8 @@ //===--- CGException.cpp - Emit LLVM Code for C++ exceptions ----*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -22,7 +21,6 @@ #include "clang/AST/StmtObjC.h" #include "clang/AST/StmtVisitor.h" #include "clang/Basic/TargetBuiltins.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/SaveAndRestore.h" @@ -30,29 +28,29 @@ using namespace clang; using namespace CodeGen; -static llvm::Constant *getFreeExceptionFn(CodeGenModule &CGM) { +static llvm::FunctionCallee getFreeExceptionFn(CodeGenModule &CGM) { // void __cxa_free_exception(void *thrown_exception); llvm::FunctionType *FTy = - llvm::FunctionType::get(CGM.VoidTy, CGM.Int8PtrTy, /*IsVarArgs=*/false); + llvm::FunctionType::get(CGM.VoidTy, CGM.Int8PtrTy, /*isVarArg=*/false); return CGM.CreateRuntimeFunction(FTy, "__cxa_free_exception"); } -static llvm::Constant *getUnexpectedFn(CodeGenModule &CGM) { +static llvm::FunctionCallee getUnexpectedFn(CodeGenModule &CGM) { // void __cxa_call_unexpected(void *thrown_exception); llvm::FunctionType *FTy = - llvm::FunctionType::get(CGM.VoidTy, CGM.Int8PtrTy, /*IsVarArgs=*/false); + llvm::FunctionType::get(CGM.VoidTy, CGM.Int8PtrTy, /*isVarArg=*/false); return CGM.CreateRuntimeFunction(FTy, "__cxa_call_unexpected"); } -llvm::Constant *CodeGenModule::getTerminateFn() { +llvm::FunctionCallee CodeGenModule::getTerminateFn() { // void __terminate(); llvm::FunctionType *FTy = - llvm::FunctionType::get(VoidTy, /*IsVarArgs=*/false); + llvm::FunctionType::get(VoidTy, /*isVarArg=*/false); StringRef name; @@ -74,10 +72,10 @@ llvm::Constant *CodeGenModule::getTerminateFn() { return CreateRuntimeFunction(FTy, name); } -static llvm::Constant *getCatchallRethrowFn(CodeGenModule &CGM, - StringRef Name) { +static llvm::FunctionCallee getCatchallRethrowFn(CodeGenModule &CGM, + StringRef Name) { llvm::FunctionType *FTy = - llvm::FunctionType::get(CGM.VoidTy, CGM.Int8PtrTy, /*IsVarArgs=*/false); + llvm::FunctionType::get(CGM.VoidTy, CGM.Int8PtrTy, /*isVarArg=*/false); return CGM.CreateRuntimeFunction(FTy, Name); } @@ -240,8 +238,8 @@ const EHPersonality &EHPersonality::get(CodeGenFunction &CGF) { return get(CGF.CGM, dyn_cast_or_null<FunctionDecl>(FD)); } -static llvm::Constant *getPersonalityFn(CodeGenModule &CGM, - const EHPersonality &Personality) { +static llvm::FunctionCallee getPersonalityFn(CodeGenModule &CGM, + const EHPersonality &Personality) { return CGM.CreateRuntimeFunction(llvm::FunctionType::get(CGM.Int32Ty, true), Personality.PersonalityFn, llvm::AttributeList(), /*Local=*/true); @@ -249,12 +247,13 @@ static llvm::Constant *getPersonalityFn(CodeGenModule &CGM, static llvm::Constant *getOpaquePersonalityFn(CodeGenModule &CGM, const EHPersonality &Personality) { - llvm::Constant *Fn = getPersonalityFn(CGM, Personality); + llvm::FunctionCallee Fn = getPersonalityFn(CGM, Personality); llvm::PointerType* Int8PtrTy = llvm::PointerType::get( llvm::Type::getInt8Ty(CGM.getLLVMContext()), CGM.getDataLayout().getProgramAddressSpace()); - return llvm::ConstantExpr::getBitCast(Fn, Int8PtrTy); + return llvm::ConstantExpr::getBitCast(cast<llvm::Constant>(Fn.getCallee()), + Int8PtrTy); } /// Check whether a landingpad instruction only uses C++ features. @@ -345,12 +344,13 @@ void CodeGenModule::SimplifyPersonality() { // Create the C++ personality function and kill off the old // function. - llvm::Constant *CXXFn = getPersonalityFn(*this, CXX); + llvm::FunctionCallee CXXFn = getPersonalityFn(*this, CXX); // This can happen if the user is screwing with us. - if (Fn->getType() != CXXFn->getType()) return; + if (Fn->getType() != CXXFn.getCallee()->getType()) + return; - Fn->replaceAllUsesWith(CXXFn); + Fn->replaceAllUsesWith(CXXFn.getCallee()); Fn->eraseFromParent(); } @@ -977,15 +977,15 @@ static void emitWasmCatchPadBlock(CodeGenFunction &CGF, // Create calls to wasm.get.exception and wasm.get.ehselector intrinsics. // Before they are lowered appropriately later, they provide values for the // exception and selector. - llvm::Value *GetExnFn = + llvm::Function *GetExnFn = CGF.CGM.getIntrinsic(llvm::Intrinsic::wasm_get_exception); - llvm::Value *GetSelectorFn = + llvm::Function *GetSelectorFn = CGF.CGM.getIntrinsic(llvm::Intrinsic::wasm_get_ehselector); llvm::CallInst *Exn = CGF.Builder.CreateCall(GetExnFn, CPI); CGF.Builder.CreateStore(Exn, CGF.getExceptionSlot()); llvm::CallInst *Selector = CGF.Builder.CreateCall(GetSelectorFn, CPI); - llvm::Value *TypeIDFn = CGF.CGM.getIntrinsic(llvm::Intrinsic::eh_typeid_for); + llvm::Function *TypeIDFn = CGF.CGM.getIntrinsic(llvm::Intrinsic::eh_typeid_for); // If there's only a single catch-all, branch directly to its handler. if (CatchScope.getNumHandlers() == 1 && @@ -1069,7 +1069,7 @@ static void emitCatchDispatchBlock(CodeGenFunction &CGF, CGF.EmitBlockAfterUses(dispatchBlock); // Select the right handler. - llvm::Value *llvm_eh_typeid_for = + llvm::Function *llvm_eh_typeid_for = CGF.CGM.getIntrinsic(llvm::Intrinsic::eh_typeid_for); // Load the selector value. @@ -1259,7 +1259,9 @@ void CodeGenFunction::ExitCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock) { } assert(RethrowBlock != WasmCatchStartBlock && RethrowBlock->empty()); Builder.SetInsertPoint(RethrowBlock); - CGM.getCXXABI().emitRethrow(*this, /*isNoReturn=*/true); + llvm::Function *RethrowInCatchFn = + CGM.getIntrinsic(llvm::Intrinsic::wasm_rethrow_in_catch); + EmitNoreturnRuntimeCallOrInvoke(RethrowInCatchFn, {}); } EmitBlock(ContBB); @@ -1269,9 +1271,10 @@ void CodeGenFunction::ExitCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock) { namespace { struct CallEndCatchForFinally final : EHScopeStack::Cleanup { llvm::Value *ForEHVar; - llvm::Value *EndCatchFn; - CallEndCatchForFinally(llvm::Value *ForEHVar, llvm::Value *EndCatchFn) - : ForEHVar(ForEHVar), EndCatchFn(EndCatchFn) {} + llvm::FunctionCallee EndCatchFn; + CallEndCatchForFinally(llvm::Value *ForEHVar, + llvm::FunctionCallee EndCatchFn) + : ForEHVar(ForEHVar), EndCatchFn(EndCatchFn) {} void Emit(CodeGenFunction &CGF, Flags flags) override { llvm::BasicBlock *EndCatchBB = CGF.createBasicBlock("finally.endcatch"); @@ -1290,15 +1293,15 @@ namespace { struct PerformFinally final : EHScopeStack::Cleanup { const Stmt *Body; llvm::Value *ForEHVar; - llvm::Value *EndCatchFn; - llvm::Value *RethrowFn; + llvm::FunctionCallee EndCatchFn; + llvm::FunctionCallee RethrowFn; llvm::Value *SavedExnVar; PerformFinally(const Stmt *Body, llvm::Value *ForEHVar, - llvm::Value *EndCatchFn, - llvm::Value *RethrowFn, llvm::Value *SavedExnVar) - : Body(Body), ForEHVar(ForEHVar), EndCatchFn(EndCatchFn), - RethrowFn(RethrowFn), SavedExnVar(SavedExnVar) {} + llvm::FunctionCallee EndCatchFn, + llvm::FunctionCallee RethrowFn, llvm::Value *SavedExnVar) + : Body(Body), ForEHVar(ForEHVar), EndCatchFn(EndCatchFn), + RethrowFn(RethrowFn), SavedExnVar(SavedExnVar) {} void Emit(CodeGenFunction &CGF, Flags flags) override { // Enter a cleanup to call the end-catch function if one was provided. @@ -1360,12 +1363,11 @@ namespace { /// Enters a finally block for an implementation using zero-cost /// exceptions. This is mostly general, but hard-codes some /// language/ABI-specific behavior in the catch-all sections. -void CodeGenFunction::FinallyInfo::enter(CodeGenFunction &CGF, - const Stmt *body, - llvm::Constant *beginCatchFn, - llvm::Constant *endCatchFn, - llvm::Constant *rethrowFn) { - assert((beginCatchFn != nullptr) == (endCatchFn != nullptr) && +void CodeGenFunction::FinallyInfo::enter(CodeGenFunction &CGF, const Stmt *body, + llvm::FunctionCallee beginCatchFn, + llvm::FunctionCallee endCatchFn, + llvm::FunctionCallee rethrowFn) { + assert((!!beginCatchFn) == (!!endCatchFn) && "begin/end catch functions not paired"); assert(rethrowFn && "rethrow function is required"); @@ -1377,9 +1379,7 @@ void CodeGenFunction::FinallyInfo::enter(CodeGenFunction &CGF, // In the latter case we need to pass it the exception object. // But we can't use the exception slot because the @finally might // have a landing pad (which would overwrite the exception slot). - llvm::FunctionType *rethrowFnTy = - cast<llvm::FunctionType>( - cast<llvm::PointerType>(rethrowFn->getType())->getElementType()); + llvm::FunctionType *rethrowFnTy = rethrowFn.getFunctionType(); SavedExnVar = nullptr; if (rethrowFnTy->getNumParams()) SavedExnVar = CGF.CreateTempAlloca(CGF.Int8PtrTy, "finally.exn"); @@ -1545,7 +1545,7 @@ llvm::BasicBlock *CodeGenFunction::getTerminateFunclet() { // __clang_call_terminate function. if (getLangOpts().CPlusPlus && EHPersonality::get(*this).isWasmPersonality()) { - llvm::Value *GetExnFn = + llvm::Function *GetExnFn = CGM.getIntrinsic(llvm::Intrinsic::wasm_get_exception); Exn = Builder.CreateCall(GetExnFn, CurrentFuncletPad); } @@ -1632,7 +1632,7 @@ struct PerformSEHFinally final : EHScopeStack::Cleanup { if (CGF.IsOutlinedSEHHelper) { FP = &CGF.CurFn->arg_begin()[1]; } else { - llvm::Value *LocalAddrFn = + llvm::Function *LocalAddrFn = CGM.getIntrinsic(llvm::Intrinsic::localaddress); FP = CGF.Builder.CreateCall(LocalAddrFn); } diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp index 34a921e2dc00..5a4b1188b711 100644 --- a/lib/CodeGen/CGExpr.cpp +++ b/lib/CodeGen/CGExpr.cpp @@ -1,9 +1,8 @@ //===--- CGExpr.cpp - Emit LLVM Code from Expressions ---------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -26,6 +25,7 @@ #include "clang/AST/Attr.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/NSAPI.h" +#include "clang/Basic/Builtins.h" #include "clang/Basic/CodeGenOptions.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/StringExtras.h" @@ -331,7 +331,7 @@ pushTemporaryCleanup(CodeGenFunction &CGF, const MaterializeTemporaryExpr *M, switch (M->getStorageDuration()) { case SD_Static: case SD_Thread: { - llvm::Constant *CleanupFn; + llvm::FunctionCallee CleanupFn; llvm::Constant *CleanupArg; if (E->getType()->isArrayType()) { CleanupFn = CodeGenFunction(CGF.CGM).generateDestroyHelper( @@ -340,8 +340,8 @@ pushTemporaryCleanup(CodeGenFunction &CGF, const MaterializeTemporaryExpr *M, dyn_cast_or_null<VarDecl>(M->getExtendingDecl())); CleanupArg = llvm::Constant::getNullValue(CGF.Int8PtrTy); } else { - CleanupFn = CGF.CGM.getAddrOfCXXStructor(ReferenceTemporaryDtor, - StructorType::Complete); + CleanupFn = CGF.CGM.getAddrAndTypeOfCXXStructor( + GlobalDecl(ReferenceTemporaryDtor, Dtor_Complete)); CleanupArg = cast<llvm::Constant>(ReferenceTemporary.getPointer()); } CGF.CGM.getCXXABI().registerGlobalDtor( @@ -653,7 +653,8 @@ bool CodeGenFunction::sanitizePerformTypeCheck() const { void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, llvm::Value *Ptr, QualType Ty, CharUnits Alignment, - SanitizerSet SkippedChecks) { + SanitizerSet SkippedChecks, + llvm::Value *ArraySize) { if (!sanitizePerformTypeCheck()) return; @@ -711,21 +712,28 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, if (SanOpts.has(SanitizerKind::ObjectSize) && !SkippedChecks.has(SanitizerKind::ObjectSize) && !Ty->isIncompleteType()) { - uint64_t Size = getContext().getTypeSizeInChars(Ty).getQuantity(); - - // The glvalue must refer to a large enough storage region. - // FIXME: If Address Sanitizer is enabled, insert dynamic instrumentation - // to check this. - // FIXME: Get object address space - llvm::Type *Tys[2] = { IntPtrTy, Int8PtrTy }; - llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::objectsize, Tys); - llvm::Value *Min = Builder.getFalse(); - llvm::Value *NullIsUnknown = Builder.getFalse(); - llvm::Value *CastAddr = Builder.CreateBitCast(Ptr, Int8PtrTy); - llvm::Value *LargeEnough = Builder.CreateICmpUGE( - Builder.CreateCall(F, {CastAddr, Min, NullIsUnknown}), - llvm::ConstantInt::get(IntPtrTy, Size)); - Checks.push_back(std::make_pair(LargeEnough, SanitizerKind::ObjectSize)); + uint64_t TySize = getContext().getTypeSizeInChars(Ty).getQuantity(); + llvm::Value *Size = llvm::ConstantInt::get(IntPtrTy, TySize); + if (ArraySize) + Size = Builder.CreateMul(Size, ArraySize); + + // Degenerate case: new X[0] does not need an objectsize check. + llvm::Constant *ConstantSize = dyn_cast<llvm::Constant>(Size); + if (!ConstantSize || !ConstantSize->isNullValue()) { + // The glvalue must refer to a large enough storage region. + // FIXME: If Address Sanitizer is enabled, insert dynamic instrumentation + // to check this. + // FIXME: Get object address space + llvm::Type *Tys[2] = { IntPtrTy, Int8PtrTy }; + llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::objectsize, Tys); + llvm::Value *Min = Builder.getFalse(); + llvm::Value *NullIsUnknown = Builder.getFalse(); + llvm::Value *Dynamic = Builder.getFalse(); + llvm::Value *CastAddr = Builder.CreateBitCast(Ptr, Int8PtrTy); + llvm::Value *LargeEnough = Builder.CreateICmpUGE( + Builder.CreateCall(F, {CastAddr, Min, NullIsUnknown, Dynamic}), Size); + Checks.push_back(std::make_pair(LargeEnough, SanitizerKind::ObjectSize)); + } } uint64_t AlignVal = 0; @@ -1288,7 +1296,7 @@ LValue CodeGenFunction::EmitLValue(const Expr *E) { case Expr::CXXUuidofExprClass: return EmitCXXUuidofLValue(cast<CXXUuidofExpr>(E)); case Expr::LambdaExprClass: - return EmitLambdaLValue(cast<LambdaExpr>(E)); + return EmitAggExprToLValue(E); case Expr::ExprWithCleanupsClass: { const auto *cleanups = cast<ExprWithCleanups>(E); @@ -1308,11 +1316,15 @@ LValue CodeGenFunction::EmitLValue(const Expr *E) { return LV; } - case Expr::CXXDefaultArgExprClass: - return EmitLValue(cast<CXXDefaultArgExpr>(E)->getExpr()); + case Expr::CXXDefaultArgExprClass: { + auto *DAE = cast<CXXDefaultArgExpr>(E); + CXXDefaultArgExprScope Scope(*this, DAE); + return EmitLValue(DAE->getExpr()); + } case Expr::CXXDefaultInitExprClass: { - CXXDefaultInitExprScope Scope(*this); - return EmitLValue(cast<CXXDefaultInitExpr>(E)->getExpr()); + auto *DIE = cast<CXXDefaultInitExpr>(E); + CXXDefaultInitExprScope Scope(*this, DIE); + return EmitLValue(DIE->getExpr()); } case Expr::CXXTypeidExprClass: return EmitCXXTypeidLValue(cast<CXXTypeidExpr>(E)); @@ -1387,7 +1399,7 @@ static bool isConstantEmittableObjectType(QualType type) { /// Can we constant-emit a load of a reference to a variable of the /// given type? This is different from predicates like -/// Decl::isUsableInConstantExpressions because we do want it to apply +/// Decl::mightBeUsableInConstantExpressions because we do want it to apply /// in situations that don't necessarily satisfy the language's rules /// for this (e.g. C++'s ODR-use rules). For example, we want to able /// to do this with const float variables even if those variables @@ -1411,10 +1423,11 @@ static ConstantEmissionKind checkVarTypeForConstantEmission(QualType type) { } /// Try to emit a reference to the given value without producing it as -/// an l-value. This is actually more than an optimization: we can't -/// produce an l-value for variables that we never actually captured -/// in a block or lambda, which means const int variables or constexpr -/// literals or similar. +/// an l-value. This is just an optimization, but it avoids us needing +/// to emit global copies of variables if they're named without triggering +/// a formal use in a context where we can't emit a direct reference to them, +/// for instance if a block or lambda or a member of a local class uses a +/// const int variable or constexpr variable from an enclosing function. CodeGenFunction::ConstantEmission CodeGenFunction::tryEmitAsConstant(DeclRefExpr *refExpr) { ValueDecl *value = refExpr->getDecl(); @@ -1485,7 +1498,7 @@ static DeclRefExpr *tryToConvertMemberExprToDeclRefExpr(CodeGenFunction &CGF, return DeclRefExpr::Create( CGF.getContext(), NestedNameSpecifierLoc(), SourceLocation(), VD, /*RefersToEnclosingVariableOrCapture=*/false, ME->getExprLoc(), - ME->getType(), ME->getValueKind()); + ME->getType(), ME->getValueKind(), nullptr, nullptr, ME->isNonOdrUse()); } return nullptr; } @@ -1879,7 +1892,6 @@ Address CodeGenFunction::EmitExtVectorElementLValue(LValue LV) { Address VectorBasePtrPlusIx = Builder.CreateConstInBoundsGEP(CastToPointerElement, ix, - getContext().getTypeSizeInChars(EQT), "vector.elt"); return VectorBasePtrPlusIx; @@ -1899,7 +1911,7 @@ RValue CodeGenFunction::EmitLoadOfGlobalRegLValue(LValue LV) { Ty = CGM.getTypes().getDataLayout().getIntPtrType(OrigTy); llvm::Type *Types[] = { Ty }; - llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types); + llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types); llvm::Value *Call = Builder.CreateCall( F, llvm::MetadataAsValue::get(Ty->getContext(), RegName)); if (OrigTy->isPointerTy()) @@ -2019,7 +2031,7 @@ void CodeGenFunction::EmitStoreThroughBitfieldLValue(RValue Src, LValue Dst, // Cast the source to the storage type and shift it into place. SrcVal = Builder.CreateIntCast(SrcVal, Ptr.getElementType(), - /*IsSigned=*/false); + /*isSigned=*/false); llvm::Value *MaskedVal = SrcVal; // See if there are other bits in the bitfield's storage we'll need to load @@ -2160,7 +2172,7 @@ void CodeGenFunction::EmitStoreThroughGlobalRegLValue(RValue Src, LValue Dst) { Ty = CGM.getTypes().getDataLayout().getIntPtrType(OrigTy); llvm::Type *Types[] = { Ty }; - llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types); + llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types); llvm::Value *Value = Src.getScalarVal(); if (OrigTy->isPointerTy()) Value = Builder.CreatePtrToInt(Value, Ty); @@ -2284,15 +2296,22 @@ static LValue EmitThreadPrivateVarDeclLValue( return CGF.MakeAddrLValue(Addr, T, AlignmentSource::Decl); } -static Address emitDeclTargetLinkVarDeclLValue(CodeGenFunction &CGF, - const VarDecl *VD, QualType T) { +static Address emitDeclTargetVarDeclLValue(CodeGenFunction &CGF, + const VarDecl *VD, QualType T) { llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); - if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_To) + // Return an invalid address if variable is MT_To and unified + // memory is not enabled. For all other cases: MT_Link and + // MT_To with unified memory, return a valid address. + if (!Res || (*Res == OMPDeclareTargetDeclAttr::MT_To && + !CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) return Address::invalid(); - assert(*Res == OMPDeclareTargetDeclAttr::MT_Link && "Expected link clause"); + assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || + (*Res == OMPDeclareTargetDeclAttr::MT_To && + CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) && + "Expected link clause OR to clause with unified memory enabled."); QualType PtrTy = CGF.getContext().getPointerType(VD->getType()); - Address Addr = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD); + Address Addr = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); return CGF.EmitLoadOfPointer(Addr, PtrTy->castAs<PointerType>()); } @@ -2348,7 +2367,7 @@ static LValue EmitGlobalVarDeclLValue(CodeGenFunction &CGF, // Check if the variable is marked as declare target with link clause in // device codegen. if (CGF.getLangOpts().OpenMPIsDevice) { - Address Addr = emitDeclTargetLinkVarDeclLValue(CGF, VD, T); + Address Addr = emitDeclTargetVarDeclLValue(CGF, VD, T); if (Addr.isValid()) return CGF.MakeAddrLValue(Addr, T, AlignmentSource::Decl); } @@ -2440,45 +2459,101 @@ static LValue EmitGlobalNamedRegister(const VarDecl *VD, CodeGenModule &CGM) { return LValue::MakeGlobalReg(Address(Ptr, Alignment), VD->getType()); } +/// Determine whether we can emit a reference to \p VD from the current +/// context, despite not necessarily having seen an odr-use of the variable in +/// this context. +static bool canEmitSpuriousReferenceToVariable(CodeGenFunction &CGF, + const DeclRefExpr *E, + const VarDecl *VD, + bool IsConstant) { + // For a variable declared in an enclosing scope, do not emit a spurious + // reference even if we have a capture, as that will emit an unwarranted + // reference to our capture state, and will likely generate worse code than + // emitting a local copy. + if (E->refersToEnclosingVariableOrCapture()) + return false; + + // For a local declaration declared in this function, we can always reference + // it even if we don't have an odr-use. + if (VD->hasLocalStorage()) { + return VD->getDeclContext() == + dyn_cast_or_null<DeclContext>(CGF.CurCodeDecl); + } + + // For a global declaration, we can emit a reference to it if we know + // for sure that we are able to emit a definition of it. + VD = VD->getDefinition(CGF.getContext()); + if (!VD) + return false; + + // Don't emit a spurious reference if it might be to a variable that only + // exists on a different device / target. + // FIXME: This is unnecessarily broad. Check whether this would actually be a + // cross-target reference. + if (CGF.getLangOpts().OpenMP || CGF.getLangOpts().CUDA || + CGF.getLangOpts().OpenCL) { + return false; + } + + // We can emit a spurious reference only if the linkage implies that we'll + // be emitting a non-interposable symbol that will be retained until link + // time. + switch (CGF.CGM.getLLVMLinkageVarDefinition(VD, IsConstant)) { + case llvm::GlobalValue::ExternalLinkage: + case llvm::GlobalValue::LinkOnceODRLinkage: + case llvm::GlobalValue::WeakODRLinkage: + case llvm::GlobalValue::InternalLinkage: + case llvm::GlobalValue::PrivateLinkage: + return true; + default: + return false; + } +} + LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) { const NamedDecl *ND = E->getDecl(); QualType T = E->getType(); + assert(E->isNonOdrUse() != NOUR_Unevaluated && + "should not emit an unevaluated operand"); + if (const auto *VD = dyn_cast<VarDecl>(ND)) { // Global Named registers access via intrinsics only if (VD->getStorageClass() == SC_Register && VD->hasAttr<AsmLabelAttr>() && !VD->isLocalVarDecl()) return EmitGlobalNamedRegister(VD, CGM); - // A DeclRefExpr for a reference initialized by a constant expression can - // appear without being odr-used. Directly emit the constant initializer. - const Expr *Init = VD->getAnyInitializer(VD); - const auto *BD = dyn_cast_or_null<BlockDecl>(CurCodeDecl); - if (Init && !isa<ParmVarDecl>(VD) && VD->getType()->isReferenceType() && - VD->isUsableInConstantExpressions(getContext()) && - VD->checkInitIsICE() && - // Do not emit if it is private OpenMP variable. - !(E->refersToEnclosingVariableOrCapture() && - ((CapturedStmtInfo && - (LocalDeclMap.count(VD->getCanonicalDecl()) || - CapturedStmtInfo->lookup(VD->getCanonicalDecl()))) || - LambdaCaptureFields.lookup(VD->getCanonicalDecl()) || - (BD && BD->capturesVariable(VD))))) { - llvm::Constant *Val = - ConstantEmitter(*this).emitAbstract(E->getLocation(), - *VD->evaluateValue(), - VD->getType()); - assert(Val && "failed to emit reference constant expression"); - // FIXME: Eventually we will want to emit vector element references. - - // Should we be using the alignment of the constant pointer we emitted? - CharUnits Alignment = getNaturalTypeAlignment(E->getType(), - /* BaseInfo= */ nullptr, - /* TBAAInfo= */ nullptr, - /* forPointeeType= */ true); - return MakeAddrLValue(Address(Val, Alignment), T, AlignmentSource::Decl); + // If this DeclRefExpr does not constitute an odr-use of the variable, + // we're not permitted to emit a reference to it in general, and it might + // not be captured if capture would be necessary for a use. Emit the + // constant value directly instead. + if (E->isNonOdrUse() == NOUR_Constant && + (VD->getType()->isReferenceType() || + !canEmitSpuriousReferenceToVariable(*this, E, VD, true))) { + VD->getAnyInitializer(VD); + llvm::Constant *Val = ConstantEmitter(*this).emitAbstract( + E->getLocation(), *VD->evaluateValue(), VD->getType()); + assert(Val && "failed to emit constant expression"); + + Address Addr = Address::invalid(); + if (!VD->getType()->isReferenceType()) { + // Spill the constant value to a global. + Addr = CGM.createUnnamedGlobalFrom(*VD, Val, + getContext().getDeclAlign(VD)); + } else { + // Should we be using the alignment of the constant pointer we emitted? + CharUnits Alignment = + getNaturalTypeAlignment(E->getType(), + /* BaseInfo= */ nullptr, + /* TBAAInfo= */ nullptr, + /* forPointeeType= */ true); + Addr = Address(Val, Alignment); + } + return MakeAddrLValue(Addr, T, AlignmentSource::Decl); } + // FIXME: Handle other kinds of non-odr-use DeclRefExprs. + // Check for captured variables. if (E->refersToEnclosingVariableOrCapture()) { VD = VD->getCanonicalDecl(); @@ -2510,7 +2585,7 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) { // FIXME: We should be able to assert this for FunctionDecls as well! // FIXME: We should be able to assert this for all DeclRefExprs, not just // those with a valid source location. - assert((ND->isUsed(false) || !isa<VarDecl>(ND) || + assert((ND->isUsed(false) || !isa<VarDecl>(ND) || E->isNonOdrUse() || !E->getLocation().isValid()) && "Should not use decl without marking it used!"); @@ -2536,7 +2611,7 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) { // some reason; most likely, because it's in an outer function. } else if (VD->isStaticLocal()) { addr = Address(CGM.getOrCreateStaticVarDecl( - *VD, CGM.getLLVMLinkageVarDefinition(VD, /*isConstant=*/false)), + *VD, CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false)), getContext().getDeclAlign(VD)); // No other cases for now. @@ -2851,16 +2926,13 @@ enum class CheckRecoverableKind { } static CheckRecoverableKind getRecoverableKind(SanitizerMask Kind) { - assert(llvm::countPopulation(Kind) == 1); - switch (Kind) { - case SanitizerKind::Vptr: + assert(Kind.countPopulation() == 1); + if (Kind == SanitizerKind::Function || Kind == SanitizerKind::Vptr) return CheckRecoverableKind::AlwaysRecoverable; - case SanitizerKind::Return: - case SanitizerKind::Unreachable: + else if (Kind == SanitizerKind::Return || Kind == SanitizerKind::Unreachable) return CheckRecoverableKind::Unrecoverable; - default: + else return CheckRecoverableKind::Recoverable; - } } namespace { @@ -2910,7 +2982,7 @@ static void emitCheckHandlerCall(CodeGenFunction &CGF, } B.addAttribute(llvm::Attribute::UWTable); - llvm::Value *Fn = CGF.CGM.CreateRuntimeFunction( + llvm::FunctionCallee Fn = CGF.CGM.CreateRuntimeFunction( FnType, FnName, llvm::AttributeList::get(CGF.getLLVMContext(), llvm::AttributeList::FunctionIndex, B), @@ -3051,7 +3123,7 @@ void CodeGenFunction::EmitCfiSlowPathCheck( bool WithDiag = !CGM.getCodeGenOpts().SanitizeTrap.has(Kind); llvm::CallInst *CheckCall; - llvm::Constant *SlowPathFn; + llvm::FunctionCallee SlowPathFn; if (WithDiag) { llvm::Constant *Info = llvm::ConstantStruct::getAnon(StaticArgs); auto *InfoPtr = @@ -3073,7 +3145,8 @@ void CodeGenFunction::EmitCfiSlowPathCheck( CheckCall = Builder.CreateCall(SlowPathFn, {TypeId, Ptr}); } - CGM.setDSOLocal(cast<llvm::GlobalValue>(SlowPathFn->stripPointerCasts())); + CGM.setDSOLocal( + cast<llvm::GlobalValue>(SlowPathFn.getCallee()->stripPointerCasts())); CheckCall->setDoesNotThrow(); EmitBlock(Cont); @@ -3252,7 +3325,7 @@ Address CodeGenFunction::EmitArrayToPointerDecay(const Expr *E, if (!E->getType()->isVariableArrayType()) { assert(isa<llvm::ArrayType>(Addr.getElementType()) && "Expected pointer to array"); - Addr = Builder.CreateStructGEP(Addr, 0, CharUnits::Zero(), "arraydecay"); + Addr = Builder.CreateConstArrayGEP(Addr, 0, "arraydecay"); } // The result of this decay conversion points to an array element within the @@ -3346,8 +3419,20 @@ static Address emitArraySubscriptGEP(CodeGenFunction &CGF, Address addr, CharUnits eltAlign = getArrayElementAlign(addr.getAlignment(), indices.back(), eltSize); - llvm::Value *eltPtr = emitArraySubscriptGEP( - CGF, addr.getPointer(), indices, inbounds, signedIndices, loc, name); + llvm::Value *eltPtr; + auto LastIndex = dyn_cast<llvm::ConstantInt>(indices.back()); + if (!CGF.IsInPreservedAIRegion || !LastIndex) { + eltPtr = emitArraySubscriptGEP( + CGF, addr.getPointer(), indices, inbounds, signedIndices, + loc, name); + } else { + // Remember the original array subscript for bpf target + unsigned idx = LastIndex->getZExtValue(); + eltPtr = CGF.Builder.CreatePreserveArrayAccessIndex(addr.getPointer(), + indices.size() - 1, + idx); + } + return Address(eltPtr, eltAlign); } @@ -3529,8 +3614,7 @@ static Address emitOMPArraySectionBase(CodeGenFunction &CGF, const Expr *Base, if (!BaseTy->isVariableArrayType()) { assert(isa<llvm::ArrayType>(Addr.getElementType()) && "Expected pointer to array"); - Addr = CGF.Builder.CreateStructGEP(Addr, 0, CharUnits::Zero(), - "arraydecay"); + Addr = CGF.Builder.CreateConstArrayGEP(Addr, 0, "arraydecay"); } return CGF.Builder.CreateElementBitCast(Addr, @@ -3665,7 +3749,7 @@ LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E, Idx = Builder.CreateNSWMul(Idx, NumElements); EltPtr = emitArraySubscriptGEP(*this, Base, Idx, VLA->getElementType(), !getLangOpts().isSignedOverflowDefined(), - /*SignedIndices=*/false, E->getExprLoc()); + /*signedIndices=*/false, E->getExprLoc()); } else if (const Expr *Array = isSimpleArrayDecayOperand(E->getBase())) { // If this is A[i] where A is an array, the frontend will have decayed the // base to be a ArrayToPointerDecay implicit cast. While correct, it is @@ -3685,7 +3769,7 @@ LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E, EltPtr = emitArraySubscriptGEP( *this, ArrayLV.getAddress(), {CGM.getSize(CharUnits::Zero()), Idx}, ResultExprTy, !getLangOpts().isSignedOverflowDefined(), - /*SignedIndices=*/false, E->getExprLoc()); + /*signedIndices=*/false, E->getExprLoc()); BaseInfo = ArrayLV.getBaseInfo(); TBAAInfo = CGM.getTBAAInfoForSubobject(ArrayLV, ResultExprTy); } else { @@ -3694,7 +3778,7 @@ LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E, IsLowerBound); EltPtr = emitArraySubscriptGEP(*this, Base, Idx, ResultExprTy, !getLangOpts().isSignedOverflowDefined(), - /*SignedIndices=*/false, E->getExprLoc()); + /*signedIndices=*/false, E->getExprLoc()); } return MakeAddrLValue(EltPtr, ResultExprTy, BaseInfo, TBAAInfo); @@ -3808,31 +3892,63 @@ LValue CodeGenFunction::EmitLValueForLambdaField(const FieldDecl *Field) { return EmitLValueForField(LambdaLV, Field); } +/// Get the field index in the debug info. The debug info structure/union +/// will ignore the unnamed bitfields. +unsigned CodeGenFunction::getDebugInfoFIndex(const RecordDecl *Rec, + unsigned FieldIndex) { + unsigned I = 0, Skipped = 0; + + for (auto F : Rec->getDefinition()->fields()) { + if (I == FieldIndex) + break; + if (F->isUnnamedBitfield()) + Skipped++; + I++; + } + + return FieldIndex - Skipped; +} + +/// Get the address of a zero-sized field within a record. The resulting +/// address doesn't necessarily have the right type. +static Address emitAddrOfZeroSizeField(CodeGenFunction &CGF, Address Base, + const FieldDecl *Field) { + CharUnits Offset = CGF.getContext().toCharUnitsFromBits( + CGF.getContext().getFieldOffset(Field)); + if (Offset.isZero()) + return Base; + Base = CGF.Builder.CreateElementBitCast(Base, CGF.Int8Ty); + return CGF.Builder.CreateConstInBoundsByteGEP(Base, Offset); +} + /// Drill down to the storage of a field without walking into /// reference types. /// /// The resulting address doesn't necessarily have the right type. static Address emitAddrOfFieldStorage(CodeGenFunction &CGF, Address base, const FieldDecl *field) { + if (field->isZeroSize(CGF.getContext())) + return emitAddrOfZeroSizeField(CGF, base, field); + const RecordDecl *rec = field->getParent(); unsigned idx = CGF.CGM.getTypes().getCGRecordLayout(rec).getLLVMFieldNo(field); - CharUnits offset; - // Adjust the alignment down to the given offset. - // As a special case, if the LLVM field index is 0, we know that this - // is zero. - assert((idx != 0 || CGF.getContext().getASTRecordLayout(rec) - .getFieldOffset(field->getFieldIndex()) == 0) && - "LLVM field at index zero had non-zero offset?"); - if (idx != 0) { - auto &recLayout = CGF.getContext().getASTRecordLayout(rec); - auto offsetInBits = recLayout.getFieldOffset(field->getFieldIndex()); - offset = CGF.getContext().toCharUnitsFromBits(offsetInBits); - } + return CGF.Builder.CreateStructGEP(base, idx, field->getName()); +} + +static Address emitPreserveStructAccess(CodeGenFunction &CGF, Address base, + const FieldDecl *field) { + const RecordDecl *rec = field->getParent(); + llvm::DIType *DbgInfo = CGF.getDebugInfo()->getOrCreateRecordType( + CGF.getContext().getRecordType(rec), rec->getLocation()); + + unsigned idx = + CGF.CGM.getTypes().getCGRecordLayout(rec).getLLVMFieldNo(field); - return CGF.Builder.CreateStructGEP(base, idx, offset, field->getName()); + return CGF.Builder.CreatePreserveStructAccessIndex( + base, idx, CGF.getDebugInfoFIndex(rec, field->getFieldIndex()), DbgInfo); } static bool hasAnyVptr(const QualType Type, const ASTContext &Context) { @@ -3866,8 +3982,7 @@ LValue CodeGenFunction::EmitLValueForField(LValue base, unsigned Idx = RL.getLLVMFieldNo(field); if (Idx != 0) // For structs, we GEP to the field that the record layout suggests. - Addr = Builder.CreateStructGEP(Addr, Idx, Info.StorageOffset, - field->getName()); + Addr = Builder.CreateStructGEP(Addr, Idx, field->getName()); // Get the access type. llvm::Type *FieldIntTy = llvm::Type::getIntNTy(getLLVMContext(), Info.StorageSize); @@ -3943,9 +4058,24 @@ LValue CodeGenFunction::EmitLValueForField(LValue base, // a barrier every time CXXRecord field with vptr is referenced. addr = Address(Builder.CreateLaunderInvariantGroup(addr.getPointer()), addr.getAlignment()); + + if (IsInPreservedAIRegion) { + // Remember the original union field index + llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateRecordType( + getContext().getRecordType(rec), rec->getLocation()); + addr = Address( + Builder.CreatePreserveUnionAccessIndex( + addr.getPointer(), getDebugInfoFIndex(rec, field->getFieldIndex()), DbgInfo), + addr.getAlignment()); + } } else { - // For structs, we GEP to the field that the record layout suggests. - addr = emitAddrOfFieldStorage(*this, addr, field); + + if (!IsInPreservedAIRegion) + // For structs, we GEP to the field that the record layout suggests. + addr = emitAddrOfFieldStorage(*this, addr, field); + else + // Remember the original struct field index + addr = emitPreserveStructAccess(*this, addr, field); // If this is a reference field, load the reference right now. if (FieldType->isReferenceType()) { @@ -4137,6 +4267,7 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) { switch (E->getCastKind()) { case CK_ToVoid: case CK_BitCast: + case CK_LValueToRValueBitCast: case CK_ArrayToPointerDecay: case CK_FunctionToPointerDecay: case CK_NullToMemberPointer: @@ -4175,6 +4306,8 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) { case CK_IntToOCLSampler: case CK_FixedPointCast: case CK_FixedPointToBoolean: + case CK_FixedPointToIntegral: + case CK_IntegralToFixedPoint: return EmitUnsupportedLValue(E, "unexpected cast lvalue"); case CK_Dependent: @@ -4548,13 +4681,6 @@ CodeGenFunction::EmitCXXBindTemporaryLValue(const CXXBindTemporaryExpr *E) { return MakeAddrLValue(Slot.getAddress(), E->getType(), AlignmentSource::Decl); } -LValue -CodeGenFunction::EmitLambdaLValue(const LambdaExpr *E) { - AggValueSlot Slot = CreateAggTemp(E->getType(), "temp.lvalue"); - EmitLambdaExpr(E, Slot); - return MakeAddrLValue(Slot.getAddress(), E->getType(), AlignmentSource::Decl); -} - LValue CodeGenFunction::EmitObjCMessageExprLValue(const ObjCMessageExpr *E) { RValue RV = EmitObjCMessageExpr(E); @@ -4630,17 +4756,6 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee const Decl *TargetDecl = OrigCallee.getAbstractInfo().getCalleeDecl().getDecl(); - if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(TargetDecl)) - // We can only guarantee that a function is called from the correct - // context/function based on the appropriate target attributes, - // so only check in the case where we have both always_inline and target - // since otherwise we could be making a conditional call after a check for - // the proper cpu features (and it won't cause code generation issues due to - // function based code generation). - if (TargetDecl->hasAttr<AlwaysInlineAttr>() && - TargetDecl->hasAttr<TargetAttr>()) - checkTargetFeatures(E, FD); - CalleeType = getContext().getCanonicalType(CalleeType); auto PointeeType = cast<PointerType>(CalleeType)->getPointeeType(); @@ -4688,7 +4803,8 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee llvm::Constant *StaticData[] = {EmitCheckSourceLocation(E->getBeginLoc()), EmitCheckTypeDescriptor(CalleeType)}; EmitCheck(std::make_pair(CalleeRTTIMatch, SanitizerKind::Function), - SanitizerHandler::FunctionTypeMismatch, StaticData, CalleePtr); + SanitizerHandler::FunctionTypeMismatch, StaticData, + {CalleePtr, CalleeRTTI, FTRTTIConst}); Builder.CreateBr(Cont); EmitBlock(Cont); @@ -4768,7 +4884,7 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee E->getDirectCallee(), /*ParamsToSkip*/ 0, Order); const CGFunctionInfo &FnInfo = CGM.getTypes().arrangeFreeFunctionCall( - Args, FnType, /*isChainCall=*/Chain); + Args, FnType, /*ChainCall=*/Chain); // C99 6.5.2.2p6: // If the expression that denotes the called function has a type @@ -4799,7 +4915,19 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee Callee.setFunctionPointer(CalleePtr); } - return EmitCall(FnInfo, Callee, ReturnValue, Args, nullptr, E->getExprLoc()); + llvm::CallBase *CallOrInvoke = nullptr; + RValue Call = EmitCall(FnInfo, Callee, ReturnValue, Args, &CallOrInvoke, + E->getExprLoc()); + + // Generate function declaration DISuprogram in order to be used + // in debug info about call sites. + if (CGDebugInfo *DI = getDebugInfo()) { + if (auto *CalleeDecl = dyn_cast_or_null<FunctionDecl>(TargetDecl)) + DI->EmitFuncDeclForCallSite(CallOrInvoke, QualType(FnType, 0), + CalleeDecl); + } + + return Call; } LValue CodeGenFunction:: diff --git a/lib/CodeGen/CGExprAgg.cpp b/lib/CodeGen/CGExprAgg.cpp index db49b3f28a59..695facd50b67 100644 --- a/lib/CodeGen/CGExprAgg.cpp +++ b/lib/CodeGen/CGExprAgg.cpp @@ -1,9 +1,8 @@ //===--- CGExprAgg.cpp - Emit LLVM Code from Aggregate Expressions --------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -166,10 +165,11 @@ public: void VisitImplicitValueInitExpr(ImplicitValueInitExpr *E); void VisitNoInitExpr(NoInitExpr *E) { } // Do nothing. void VisitCXXDefaultArgExpr(CXXDefaultArgExpr *DAE) { + CodeGenFunction::CXXDefaultArgExprScope Scope(CGF, DAE); Visit(DAE->getExpr()); } void VisitCXXDefaultInitExpr(CXXDefaultInitExpr *DIE) { - CodeGenFunction::CXXDefaultInitExprScope Scope(CGF); + CodeGenFunction::CXXDefaultInitExprScope Scope(CGF, DIE); Visit(DIE->getExpr()); } void VisitCXXBindTemporaryExpr(CXXBindTemporaryExpr *E); @@ -711,6 +711,25 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) { break; } + case CK_LValueToRValueBitCast: { + if (Dest.isIgnored()) { + CGF.EmitAnyExpr(E->getSubExpr(), AggValueSlot::ignored(), + /*ignoreResult=*/true); + break; + } + + LValue SourceLV = CGF.EmitLValue(E->getSubExpr()); + Address SourceAddress = + Builder.CreateElementBitCast(SourceLV.getAddress(), CGF.Int8Ty); + Address DestAddress = + Builder.CreateElementBitCast(Dest.getAddress(), CGF.Int8Ty); + llvm::Value *SizeVal = llvm::ConstantInt::get( + CGF.SizeTy, + CGF.getContext().getTypeSizeInChars(E->getType()).getQuantity()); + Builder.CreateMemCpy(DestAddress, SourceAddress, SizeVal); + break; + } + case CK_DerivedToBase: case CK_BaseToDerived: case CK_UncheckedDerivedToBase: { @@ -760,8 +779,7 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) { // Build a GEP to refer to the subobject. Address valueAddr = - CGF.Builder.CreateStructGEP(valueDest.getAddress(), 0, - CharUnits()); + CGF.Builder.CreateStructGEP(valueDest.getAddress(), 0); valueDest = AggValueSlot::forAddr(valueAddr, valueDest.getQualifiers(), valueDest.isExternallyDestructed(), @@ -781,11 +799,12 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) { CGF.CreateAggTemp(atomicType, "atomic-to-nonatomic.temp"); CGF.EmitAggExpr(E->getSubExpr(), atomicSlot); - Address valueAddr = - Builder.CreateStructGEP(atomicSlot.getAddress(), 0, CharUnits()); + Address valueAddr = Builder.CreateStructGEP(atomicSlot.getAddress(), 0); RValue rvalue = RValue::getAggregate(valueAddr, atomicSlot.isVolatile()); return EmitFinalDestCopy(valueType, rvalue); } + case CK_AddressSpaceConversion: + return Visit(E->getSubExpr()); case CK_LValueToRValue: // If we're loading from a volatile type, force the destination @@ -797,6 +816,7 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) { LLVM_FALLTHROUGH; + case CK_NoOp: case CK_UserDefinedConversion: case CK_ConstructorConversion: @@ -852,10 +872,12 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) { case CK_CopyAndAutoreleaseBlockObject: case CK_BuiltinFnToFnPtr: case CK_ZeroToOCLOpaqueType: - case CK_AddressSpaceConversion: + case CK_IntToOCLSampler: case CK_FixedPointCast: case CK_FixedPointToBoolean: + case CK_FixedPointToIntegral: + case CK_IntegralToFixedPoint: llvm_unreachable("cast kind invalid for aggregate types"); } } @@ -1264,7 +1286,52 @@ void AggExprEmitter::VisitCXXInheritedCtorInitExpr( void AggExprEmitter::VisitLambdaExpr(LambdaExpr *E) { AggValueSlot Slot = EnsureSlot(E->getType()); - CGF.EmitLambdaExpr(E, Slot); + LValue SlotLV = CGF.MakeAddrLValue(Slot.getAddress(), E->getType()); + + // We'll need to enter cleanup scopes in case any of the element + // initializers throws an exception. + SmallVector<EHScopeStack::stable_iterator, 16> Cleanups; + llvm::Instruction *CleanupDominator = nullptr; + + CXXRecordDecl::field_iterator CurField = E->getLambdaClass()->field_begin(); + for (LambdaExpr::const_capture_init_iterator i = E->capture_init_begin(), + e = E->capture_init_end(); + i != e; ++i, ++CurField) { + // Emit initialization + LValue LV = CGF.EmitLValueForFieldInitialization(SlotLV, *CurField); + if (CurField->hasCapturedVLAType()) { + CGF.EmitLambdaVLACapture(CurField->getCapturedVLAType(), LV); + continue; + } + + EmitInitializationToLValue(*i, LV); + + // Push a destructor if necessary. + if (QualType::DestructionKind DtorKind = + CurField->getType().isDestructedType()) { + assert(LV.isSimple()); + if (CGF.needsEHCleanup(DtorKind)) { + if (!CleanupDominator) + CleanupDominator = CGF.Builder.CreateAlignedLoad( + CGF.Int8Ty, + llvm::Constant::getNullValue(CGF.Int8PtrTy), + CharUnits::One()); // placeholder + + CGF.pushDestroy(EHCleanup, LV.getAddress(), CurField->getType(), + CGF.getDestroyer(DtorKind), false); + Cleanups.push_back(CGF.EHStack.stable_begin()); + } + } + } + + // Deactivate all the partial cleanups in reverse order, which + // generally means popping them. + for (unsigned i = Cleanups.size(); i != 0; --i) + CGF.DeactivateCleanupBlock(Cleanups[i-1], CleanupDominator); + + // Destroy the placeholder if we made one. + if (CleanupDominator) + CleanupDominator->eraseFromParent(); } void AggExprEmitter::VisitExprWithCleanups(ExprWithCleanups *E) { @@ -1304,7 +1371,8 @@ static bool isSimpleZero(const Expr *E, CodeGenFunction &CGF) { // (int*)0 - Null pointer expressions. if (const CastExpr *ICE = dyn_cast<CastExpr>(E)) return ICE->getCastKind() == CK_NullToPointer && - CGF.getTypes().isPointerZeroInitializable(E->getType()); + CGF.getTypes().isPointerZeroInitializable(E->getType()) && + !E->HasSideEffects(CGF.getContext()); // '\0' if (const CharacterLiteral *CL = dyn_cast<CharacterLiteral>(E)) return CL->getValue() == 0; @@ -1445,7 +1513,7 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) { AggValueSlot::IsDestructed, AggValueSlot::DoesNotNeedGCBarriers, AggValueSlot::IsNotAliased, - CGF.overlapForBaseInit(CXXRD, BaseRD, Base.isVirtual())); + CGF.getOverlapForBaseInit(CXXRD, BaseRD, Base.isVirtual())); CGF.EmitAggExpr(E->getInit(curInitIndex++), AggSlot); if (QualType::DestructionKind dtorKind = @@ -1797,15 +1865,32 @@ LValue CodeGenFunction::EmitAggExprToLValue(const Expr *E) { return LV; } -AggValueSlot::Overlap_t CodeGenFunction::overlapForBaseInit( +AggValueSlot::Overlap_t +CodeGenFunction::getOverlapForFieldInit(const FieldDecl *FD) { + if (!FD->hasAttr<NoUniqueAddressAttr>() || !FD->getType()->isRecordType()) + return AggValueSlot::DoesNotOverlap; + + // If the field lies entirely within the enclosing class's nvsize, its tail + // padding cannot overlap any already-initialized object. (The only subobjects + // with greater addresses that might already be initialized are vbases.) + const RecordDecl *ClassRD = FD->getParent(); + const ASTRecordLayout &Layout = getContext().getASTRecordLayout(ClassRD); + if (Layout.getFieldOffset(FD->getFieldIndex()) + + getContext().getTypeSize(FD->getType()) <= + (uint64_t)getContext().toBits(Layout.getNonVirtualSize())) + return AggValueSlot::DoesNotOverlap; + + // The tail padding may contain values we need to preserve. + return AggValueSlot::MayOverlap; +} + +AggValueSlot::Overlap_t CodeGenFunction::getOverlapForBaseInit( const CXXRecordDecl *RD, const CXXRecordDecl *BaseRD, bool IsVirtual) { - // Virtual bases are initialized first, in address order, so there's never - // any overlap during their initialization. - // - // FIXME: Under P0840, this is no longer true: the tail padding of a vbase - // of a field could be reused by a vbase of a containing class. + // If the most-derived object is a field declared with [[no_unique_address]], + // the tail padding of any virtual base could be reused for other subobjects + // of that field's class. if (IsVirtual) - return AggValueSlot::DoesNotOverlap; + return AggValueSlot::MayOverlap; // If the base class is laid out entirely within the nvsize of the derived // class, its tail padding cannot yet be initialized, so we can issue diff --git a/lib/CodeGen/CGExprCXX.cpp b/lib/CodeGen/CGExprCXX.cpp index 884ce96859c5..5476d13b7c46 100644 --- a/lib/CodeGen/CGExprCXX.cpp +++ b/lib/CodeGen/CGExprCXX.cpp @@ -1,9 +1,8 @@ //===--- CGExprCXX.cpp - Emit LLVM Code for C++ expressions ---------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -11,15 +10,15 @@ // //===----------------------------------------------------------------------===// -#include "CodeGenFunction.h" #include "CGCUDARuntime.h" #include "CGCXXABI.h" #include "CGDebugInfo.h" #include "CGObjCRuntime.h" +#include "CodeGenFunction.h" #include "ConstantEmitter.h" +#include "TargetInfo.h" #include "clang/Basic/CodeGenOptions.h" #include "clang/CodeGen/CGFunctionInfo.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/Intrinsics.h" using namespace clang; @@ -42,13 +41,11 @@ commonEmitCXXMemberOrOperatorCall(CodeGenFunction &CGF, const CXXMethodDecl *MD, isa<CXXOperatorCallExpr>(CE)); assert(MD->isInstance() && "Trying to emit a member or operator call expr on a static method!"); - ASTContext &C = CGF.getContext(); // Push the this ptr. const CXXRecordDecl *RD = CGF.CGM.getCXXABI().getThisArgumentTypeForMethod(MD); - Args.add(RValue::get(This), - RD ? C.getPointerType(C.getTypeDeclType(RD)) : C.VoidPtrTy); + Args.add(RValue::get(This), CGF.getTypes().DeriveThisType(RD, MD)); // If there is an implicit parameter (e.g. VTT), emit it. if (ImplicitParam) { @@ -56,7 +53,7 @@ commonEmitCXXMemberOrOperatorCall(CodeGenFunction &CGF, const CXXMethodDecl *MD, } const FunctionProtoType *FPT = MD->getType()->castAs<FunctionProtoType>(); - RequiredArgs required = RequiredArgs::forPrototypePlus(FPT, Args.size(), MD); + RequiredArgs required = RequiredArgs::forPrototypePlus(FPT, Args.size()); unsigned PrefixSize = Args.size() - 1; // And the rest of the call args. @@ -94,14 +91,28 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorCall( } RValue CodeGenFunction::EmitCXXDestructorCall( - const CXXDestructorDecl *DD, const CGCallee &Callee, llvm::Value *This, - llvm::Value *ImplicitParam, QualType ImplicitParamTy, const CallExpr *CE, - StructorType Type) { + GlobalDecl Dtor, const CGCallee &Callee, llvm::Value *This, QualType ThisTy, + llvm::Value *ImplicitParam, QualType ImplicitParamTy, const CallExpr *CE) { + const CXXMethodDecl *DtorDecl = cast<CXXMethodDecl>(Dtor.getDecl()); + + assert(!ThisTy.isNull()); + assert(ThisTy->getAsCXXRecordDecl() == DtorDecl->getParent() && + "Pointer/Object mixup"); + + LangAS SrcAS = ThisTy.getAddressSpace(); + LangAS DstAS = DtorDecl->getMethodQualifiers().getAddressSpace(); + if (SrcAS != DstAS) { + QualType DstTy = DtorDecl->getThisType(); + llvm::Type *NewType = CGM.getTypes().ConvertType(DstTy); + This = getTargetHooks().performAddrSpaceCast(*this, This, SrcAS, DstAS, + NewType); + } + CallArgList Args; - commonEmitCXXMemberOrOperatorCall(*this, DD, This, ImplicitParam, + commonEmitCXXMemberOrOperatorCall(*this, DtorDecl, This, ImplicitParam, ImplicitParamTy, CE, Args, nullptr); - return EmitCall(CGM.getTypes().arrangeCXXStructorDeclaration(DD, Type), - Callee, ReturnValueSlot(), Args); + return EmitCall(CGM.getTypes().arrangeCXXStructorDeclaration(Dtor), Callee, + ReturnValueSlot(), Args); } RValue CodeGenFunction::EmitCXXPseudoDestructorExpr( @@ -253,13 +264,25 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( This = EmitLValue(Base); } + if (const CXXConstructorDecl *Ctor = dyn_cast<CXXConstructorDecl>(MD)) { + // This is the MSVC p->Ctor::Ctor(...) extension. We assume that's + // constructing a new complete object of type Ctor. + assert(!RtlArgs); + assert(ReturnValue.isNull() && "Constructor shouldn't have return value"); + CallArgList Args; + commonEmitCXXMemberOrOperatorCall( + *this, Ctor, This.getPointer(), /*ImplicitParam=*/nullptr, + /*ImplicitParamTy=*/QualType(), CE, Args, nullptr); + + EmitCXXConstructorCall(Ctor, Ctor_Complete, /*ForVirtualBase=*/false, + /*Delegating=*/false, This.getAddress(), Args, + AggValueSlot::DoesNotOverlap, CE->getExprLoc(), + /*NewPointerIsChecked=*/false); + return RValue::get(nullptr); + } if (MD->isTrivial() || (MD->isDefaulted() && MD->getParent()->isUnion())) { if (isa<CXXDestructorDecl>(MD)) return RValue::get(nullptr); - if (isa<CXXConstructorDecl>(MD) && - cast<CXXConstructorDecl>(MD)->isDefaultConstructor()) - return RValue::get(nullptr); - if (!MD->getParent()->mayInsertExtraPadding()) { if (MD->isCopyAssignmentOperator() || MD->isMoveAssignmentOperator()) { // We don't like to generate the trivial copy/move assignment operator @@ -272,20 +295,6 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( EmitAggregateAssign(This, RHS, CE->getType()); return RValue::get(This.getPointer()); } - - if (isa<CXXConstructorDecl>(MD) && - cast<CXXConstructorDecl>(MD)->isCopyOrMoveConstructor()) { - // Trivial move and copy ctor are the same. - assert(CE->getNumArgs() == 1 && "unexpected argcount for trivial ctor"); - const Expr *Arg = *CE->arg_begin(); - LValue RHS = EmitLValue(Arg); - LValue Dest = MakeAddrLValue(This.getAddress(), Arg->getType()); - // This is the MSVC p->Ctor::Ctor(...) extension. We assume that's - // constructing a new complete object of type Ctor. - EmitAggregateCopy(Dest, RHS, Arg->getType(), - AggValueSlot::DoesNotOverlap); - return RValue::get(This.getPointer()); - } llvm_unreachable("unknown trivial member function"); } } @@ -296,10 +305,7 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( const CGFunctionInfo *FInfo = nullptr; if (const auto *Dtor = dyn_cast<CXXDestructorDecl>(CalleeDecl)) FInfo = &CGM.getTypes().arrangeCXXStructorDeclaration( - Dtor, StructorType::Complete); - else if (const auto *Ctor = dyn_cast<CXXConstructorDecl>(CalleeDecl)) - FInfo = &CGM.getTypes().arrangeCXXStructorDeclaration( - Ctor, StructorType::Complete); + GlobalDecl(Dtor, Dtor_Complete)); else FInfo = &CGM.getTypes().arrangeCXXMethodDeclaration(CalleeDecl); @@ -322,14 +328,9 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( if (IsImplicitObjectCXXThis || isa<DeclRefExpr>(IOA)) SkippedChecks.set(SanitizerKind::Null, true); } - EmitTypeCheck( - isa<CXXConstructorDecl>(CalleeDecl) ? CodeGenFunction::TCK_ConstructorCall - : CodeGenFunction::TCK_MemberCall, - CallLoc, This.getPointer(), C.getRecordType(CalleeDecl->getParent()), - /*Alignment=*/CharUnits::Zero(), SkippedChecks); - - // FIXME: Uses of 'MD' past this point need to be audited. We may need to use - // 'CalleeDecl' instead. + EmitTypeCheck(CodeGenFunction::TCK_MemberCall, CallLoc, This.getPointer(), + C.getRecordType(CalleeDecl->getParent()), + /*Alignment=*/CharUnits::Zero(), SkippedChecks); // C++ [class.virtual]p12: // Explicit qualification with the scope operator (5.1) suppresses the @@ -339,7 +340,7 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( // because then we know what the type is. bool UseVirtualCall = CanUseVirtualCall && !DevirtualizedMethod; - if (const CXXDestructorDecl *Dtor = dyn_cast<CXXDestructorDecl>(MD)) { + if (const CXXDestructorDecl *Dtor = dyn_cast<CXXDestructorDecl>(CalleeDecl)) { assert(CE->arg_begin() == CE->arg_end() && "Destructor shouldn't have explicit parameters"); assert(ReturnValue.isNull() && "Destructor shouldn't have return value"); @@ -348,33 +349,31 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( *this, Dtor, Dtor_Complete, This.getAddress(), cast<CXXMemberCallExpr>(CE)); } else { + GlobalDecl GD(Dtor, Dtor_Complete); CGCallee Callee; - if (getLangOpts().AppleKext && MD->isVirtual() && HasQualifier) - Callee = BuildAppleKextVirtualCall(MD, Qualifier, Ty); + if (getLangOpts().AppleKext && Dtor->isVirtual() && HasQualifier) + Callee = BuildAppleKextVirtualCall(Dtor, Qualifier, Ty); else if (!DevirtualizedMethod) - Callee = CGCallee::forDirect( - CGM.getAddrOfCXXStructor(Dtor, StructorType::Complete, FInfo, Ty), - GlobalDecl(Dtor, Dtor_Complete)); + Callee = + CGCallee::forDirect(CGM.getAddrOfCXXStructor(GD, FInfo, Ty), GD); else { - const CXXDestructorDecl *DDtor = - cast<CXXDestructorDecl>(DevirtualizedMethod); - Callee = CGCallee::forDirect( - CGM.GetAddrOfFunction(GlobalDecl(DDtor, Dtor_Complete), Ty), - GlobalDecl(DDtor, Dtor_Complete)); + Callee = CGCallee::forDirect(CGM.GetAddrOfFunction(GD, Ty), GD); } - EmitCXXMemberOrOperatorCall( - CalleeDecl, Callee, ReturnValue, This.getPointer(), - /*ImplicitParam=*/nullptr, QualType(), CE, nullptr); + + QualType ThisTy = + IsArrow ? Base->getType()->getPointeeType() : Base->getType(); + EmitCXXDestructorCall(GD, Callee, This.getPointer(), ThisTy, + /*ImplicitParam=*/nullptr, + /*ImplicitParamTy=*/QualType(), nullptr); } return RValue::get(nullptr); } + // FIXME: Uses of 'MD' past this point need to be audited. We may need to use + // 'CalleeDecl' instead. + CGCallee Callee; - if (const CXXConstructorDecl *Ctor = dyn_cast<CXXConstructorDecl>(MD)) { - Callee = CGCallee::forDirect( - CGM.GetAddrOfFunction(GlobalDecl(Ctor, Ctor_Complete), Ty), - GlobalDecl(Ctor, Ctor_Complete)); - } else if (UseVirtualCall) { + if (UseVirtualCall) { Callee = CGCallee::forVirtual(CE, MD, This.getAddress(), Ty); } else { if (SanOpts.has(SanitizerKind::CFINVCall) && @@ -454,8 +453,7 @@ CodeGenFunction::EmitCXXMemberPointerCallExpr(const CXXMemberCallExpr *E, // Push the this ptr. Args.add(RValue::get(ThisPtrForCall), ThisType); - RequiredArgs required = - RequiredArgs::forPrototypePlus(FPT, 1, /*FD=*/nullptr); + RequiredArgs required = RequiredArgs::forPrototypePlus(FPT, 1); // And the rest of the call args EmitCallArgs(Args, FPT, E->arguments()); @@ -633,12 +631,10 @@ CodeGenFunction::EmitCXXConstructExpr(const CXXConstructExpr *E, case CXXConstructExpr::CK_NonVirtualBase: Type = Ctor_Base; - } + } - // Call the constructor. - EmitCXXConstructorCall(CD, Type, ForVirtualBase, Delegating, - Dest.getAddress(), E, Dest.mayOverlap(), - Dest.isSanitizerChecked()); + // Call the constructor. + EmitCXXConstructorCall(CD, Type, ForVirtualBase, Delegating, Dest, E); } } @@ -702,9 +698,9 @@ static llvm::Value *EmitCXXNewAllocSize(CodeGenFunction &CGF, // We multiply the size of all dimensions for NumElements. // e.g for 'int[2][3]', ElemType is 'int' and NumElements is 6. numElements = - ConstantEmitter(CGF).tryEmitAbstract(e->getArraySize(), e->getType()); + ConstantEmitter(CGF).tryEmitAbstract(*e->getArraySize(), e->getType()); if (!numElements) - numElements = CGF.EmitScalarExpr(e->getArraySize()); + numElements = CGF.EmitScalarExpr(*e->getArraySize()); assert(isa<llvm::IntegerType>(numElements->getType())); // The number of elements can be have an arbitrary integer type; @@ -714,7 +710,7 @@ static llvm::Value *EmitCXXNewAllocSize(CodeGenFunction &CGF, // important way: if the count is negative, it's an error even if // the cookie size would bring the total size >= 0. bool isSigned - = e->getArraySize()->getType()->isSignedIntegerOrEnumerationType(); + = (*e->getArraySize())->getType()->isSignedIntegerOrEnumerationType(); llvm::IntegerType *numElementsType = cast<llvm::IntegerType>(numElements->getType()); unsigned numElementsWidth = numElementsType->getBitWidth(); @@ -866,7 +862,7 @@ static llvm::Value *EmitCXXNewAllocSize(CodeGenFunction &CGF, // can be ignored because the result shouldn't be used if // allocation fails. if (typeSizeMultiplier != 1) { - llvm::Value *umul_with_overflow + llvm::Function *umul_with_overflow = CGF.CGM.getIntrinsic(llvm::Intrinsic::umul_with_overflow, CGF.SizeTy); llvm::Value *tsmV = @@ -906,7 +902,7 @@ static llvm::Value *EmitCXXNewAllocSize(CodeGenFunction &CGF, if (cookieSize != 0) { sizeWithoutCookie = size; - llvm::Value *uadd_with_overflow + llvm::Function *uadd_with_overflow = CGF.CGM.getIntrinsic(llvm::Intrinsic::uadd_with_overflow, CGF.SizeTy); llvm::Value *cookieSizeV = llvm::ConstantInt::get(CGF.SizeTy, cookieSize); @@ -1293,12 +1289,12 @@ static RValue EmitNewDeleteCall(CodeGenFunction &CGF, const FunctionDecl *CalleeDecl, const FunctionProtoType *CalleeType, const CallArgList &Args) { - llvm::Instruction *CallOrInvoke; + llvm::CallBase *CallOrInvoke; llvm::Constant *CalleePtr = CGF.CGM.GetAddrOfFunction(CalleeDecl); CGCallee Callee = CGCallee::forDirect(CalleePtr, GlobalDecl(CalleeDecl)); RValue RV = CGF.EmitCall(CGF.CGM.getTypes().arrangeFreeFunctionCall( - Args, CalleeType, /*chainCall=*/false), + Args, CalleeType, /*ChainCall=*/false), Callee, ReturnValueSlot(), Args, &CallOrInvoke); /// C++1y [expr.new]p10: @@ -1309,15 +1305,8 @@ static RValue EmitNewDeleteCall(CodeGenFunction &CGF, llvm::Function *Fn = dyn_cast<llvm::Function>(CalleePtr); if (CalleeDecl->isReplaceableGlobalAllocationFunction() && Fn && Fn->hasFnAttribute(llvm::Attribute::NoBuiltin)) { - // FIXME: Add addAttribute to CallSite. - if (llvm::CallInst *CI = dyn_cast<llvm::CallInst>(CallOrInvoke)) - CI->addAttribute(llvm::AttributeList::FunctionIndex, - llvm::Attribute::Builtin); - else if (llvm::InvokeInst *II = dyn_cast<llvm::InvokeInst>(CallOrInvoke)) - II->addAttribute(llvm::AttributeList::FunctionIndex, - llvm::Attribute::Builtin); - else - llvm_unreachable("unexpected kind of call instruction"); + CallOrInvoke->addAttribute(llvm::AttributeList::FunctionIndex, + llvm::Attribute::Builtin); } return RV; @@ -1715,10 +1704,16 @@ llvm::Value *CodeGenFunction::EmitCXXNewExpr(const CXXNewExpr *E) { result.getAlignment()); // Emit sanitizer checks for pointer value now, so that in the case of an - // array it was checked only once and not at each constructor call. + // array it was checked only once and not at each constructor call. We may + // have already checked that the pointer is non-null. + // FIXME: If we have an array cookie and a potentially-throwing allocator, + // we'll null check the wrong pointer here. + SanitizerSet SkippedChecks; + SkippedChecks.set(SanitizerKind::Null, nullCheck); EmitTypeCheck(CodeGenFunction::TCK_ConstructorCall, - E->getAllocatedTypeSourceInfo()->getTypeLoc().getBeginLoc(), - result.getPointer(), allocType); + E->getAllocatedTypeSourceInfo()->getTypeLoc().getBeginLoc(), + result.getPointer(), allocType, result.getAlignment(), + SkippedChecks, numElements); EmitNewInitializer(*this, E, allocType, elementTy, result, numElements, allocSizeWithoutCookie); @@ -1905,7 +1900,7 @@ static void EmitObjectDelete(CodeGenFunction &CGF, CGF.EmitCXXDestructorCall(Dtor, Dtor_Complete, /*ForVirtualBase=*/false, /*Delegating=*/false, - Ptr); + Ptr, ElementType); else if (auto Lifetime = ElementType.getObjCLifetime()) { switch (Lifetime) { case Qualifiers::OCL_None: @@ -2253,21 +2248,3 @@ llvm::Value *CodeGenFunction::EmitDynamicCast(Address ThisAddr, return Value; } - -void CodeGenFunction::EmitLambdaExpr(const LambdaExpr *E, AggValueSlot Slot) { - LValue SlotLV = MakeAddrLValue(Slot.getAddress(), E->getType()); - - CXXRecordDecl::field_iterator CurField = E->getLambdaClass()->field_begin(); - for (LambdaExpr::const_capture_init_iterator i = E->capture_init_begin(), - e = E->capture_init_end(); - i != e; ++i, ++CurField) { - // Emit initialization - LValue LV = EmitLValueForFieldInitialization(SlotLV, *CurField); - if (CurField->hasCapturedVLAType()) { - auto VAT = CurField->getCapturedVLAType(); - EmitStoreThroughLValue(RValue::get(VLASizeMap[VAT->getSizeExpr()]), LV); - } else { - EmitInitializerForField(*CurField, LV, *i); - } - } -} diff --git a/lib/CodeGen/CGExprComplex.cpp b/lib/CodeGen/CGExprComplex.cpp index 2db693b44c90..6a5fb45ba259 100644 --- a/lib/CodeGen/CGExprComplex.cpp +++ b/lib/CodeGen/CGExprComplex.cpp @@ -1,9 +1,8 @@ //===--- CGExprComplex.cpp - Emit LLVM Code for Complex Exprs -------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -214,10 +213,11 @@ public: return Visit(E->getSubExpr()); } ComplexPairTy VisitCXXDefaultArgExpr(CXXDefaultArgExpr *DAE) { + CodeGenFunction::CXXDefaultArgExprScope Scope(CGF, DAE); return Visit(DAE->getExpr()); } ComplexPairTy VisitCXXDefaultInitExpr(CXXDefaultInitExpr *DIE) { - CodeGenFunction::CXXDefaultInitExprScope Scope(CGF); + CodeGenFunction::CXXDefaultInitExprScope Scope(CGF, DIE); return Visit(DIE->getExpr()); } ComplexPairTy VisitExprWithCleanups(ExprWithCleanups *E) { @@ -328,15 +328,12 @@ public: Address CodeGenFunction::emitAddrOfRealComponent(Address addr, QualType complexType) { - CharUnits offset = CharUnits::Zero(); - return Builder.CreateStructGEP(addr, 0, offset, addr.getName() + ".realp"); + return Builder.CreateStructGEP(addr, 0, addr.getName() + ".realp"); } Address CodeGenFunction::emitAddrOfImagComponent(Address addr, QualType complexType) { - QualType eltType = complexType->castAs<ComplexType>()->getElementType(); - CharUnits offset = getContext().getTypeSizeInChars(eltType); - return Builder.CreateStructGEP(addr, 1, offset, addr.getName() + ".imagp"); + return Builder.CreateStructGEP(addr, 1, addr.getName() + ".imagp"); } /// EmitLoadOfLValue - Given an RValue reference for a complex, emit code to @@ -467,6 +464,15 @@ ComplexPairTy ComplexExprEmitter::EmitCast(CastKind CK, Expr *Op, return EmitLoadOfLValue(CGF.MakeAddrLValue(V, DestTy), Op->getExprLoc()); } + case CK_LValueToRValueBitCast: { + LValue SourceLVal = CGF.EmitLValue(Op); + Address Addr = Builder.CreateElementBitCast(SourceLVal.getAddress(), + CGF.ConvertTypeForMem(DestTy)); + LValue DestLV = CGF.MakeAddrLValue(Addr, DestTy); + DestLV.setTBAAInfo(TBAAAccessInfo::getMayAliasInfo()); + return EmitLoadOfLValue(DestLV, Op->getExprLoc()); + } + case CK_BitCast: case CK_BaseToDerived: case CK_DerivedToBase: @@ -513,6 +519,8 @@ ComplexPairTy ComplexExprEmitter::EmitCast(CastKind CK, Expr *Op, case CK_IntToOCLSampler: case CK_FixedPointCast: case CK_FixedPointToBoolean: + case CK_FixedPointToIntegral: + case CK_IntegralToFixedPoint: llvm_unreachable("invalid cast kind for complex value"); case CK_FloatingRealToComplex: @@ -628,12 +636,13 @@ ComplexPairTy ComplexExprEmitter::EmitComplexBinOpLibCall(StringRef LibCallName, Args, cast<FunctionType>(FQTy.getTypePtr()), false); llvm::FunctionType *FTy = CGF.CGM.getTypes().GetFunctionType(FuncInfo); - llvm::Constant *Func = CGF.CGM.CreateBuiltinFunction(FTy, LibCallName); + llvm::FunctionCallee Func = CGF.CGM.CreateRuntimeFunction( + FTy, LibCallName, llvm::AttributeList(), true); CGCallee Callee = CGCallee::forDirect(Func, FQTy->getAs<FunctionProtoType>()); - llvm::Instruction *Call; + llvm::CallBase *Call; RValue Res = CGF.EmitCall(FuncInfo, Callee, ReturnValueSlot(), Args, &Call); - cast<llvm::CallInst>(Call)->setCallingConv(CGF.CGM.getRuntimeCC()); + Call->setCallingConv(CGF.CGM.getRuntimeCC()); return Res.getComplexVal(); } diff --git a/lib/CodeGen/CGExprConstant.cpp b/lib/CodeGen/CGExprConstant.cpp index c9475840aeeb..31cf2aef1ba0 100644 --- a/lib/CodeGen/CGExprConstant.cpp +++ b/lib/CodeGen/CGExprConstant.cpp @@ -1,9 +1,8 @@ //===--- CGExprConstant.cpp - Emit LLVM Code from Constant Expressions ----===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -23,6 +22,8 @@ #include "clang/AST/RecordLayout.h" #include "clang/AST/StmtVisitor.h" #include "clang/Basic/Builtins.h" +#include "llvm/ADT/Sequence.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" @@ -31,345 +32,637 @@ using namespace clang; using namespace CodeGen; //===----------------------------------------------------------------------===// -// ConstStructBuilder +// ConstantAggregateBuilder //===----------------------------------------------------------------------===// namespace { class ConstExprEmitter; -class ConstStructBuilder { - CodeGenModule &CGM; - ConstantEmitter &Emitter; - - bool Packed; - CharUnits NextFieldOffsetInChars; - CharUnits LLVMStructAlignment; - SmallVector<llvm::Constant *, 32> Elements; -public: - static llvm::Constant *BuildStruct(ConstantEmitter &Emitter, - ConstExprEmitter *ExprEmitter, - llvm::Constant *Base, - InitListExpr *Updater, - QualType ValTy); - static llvm::Constant *BuildStruct(ConstantEmitter &Emitter, - InitListExpr *ILE, QualType StructTy); - static llvm::Constant *BuildStruct(ConstantEmitter &Emitter, - const APValue &Value, QualType ValTy); - -private: - ConstStructBuilder(ConstantEmitter &emitter) - : CGM(emitter.CGM), Emitter(emitter), Packed(false), - NextFieldOffsetInChars(CharUnits::Zero()), - LLVMStructAlignment(CharUnits::One()) { } - - void AppendField(const FieldDecl *Field, uint64_t FieldOffset, - llvm::Constant *InitExpr); - - void AppendBytes(CharUnits FieldOffsetInChars, llvm::Constant *InitCst); - - void AppendBitField(const FieldDecl *Field, uint64_t FieldOffset, - llvm::ConstantInt *InitExpr); - - void AppendPadding(CharUnits PadSize); - void AppendTailPadding(CharUnits RecordSize); - - void ConvertStructToPacked(); +struct ConstantAggregateBuilderUtils { + CodeGenModule &CGM; - bool Build(InitListExpr *ILE); - bool Build(ConstExprEmitter *Emitter, llvm::Constant *Base, - InitListExpr *Updater); - bool Build(const APValue &Val, const RecordDecl *RD, bool IsPrimaryBase, - const CXXRecordDecl *VTableClass, CharUnits BaseOffset); - llvm::Constant *Finalize(QualType Ty); + ConstantAggregateBuilderUtils(CodeGenModule &CGM) : CGM(CGM) {} CharUnits getAlignment(const llvm::Constant *C) const { - if (Packed) return CharUnits::One(); return CharUnits::fromQuantity( CGM.getDataLayout().getABITypeAlignment(C->getType())); } - CharUnits getSizeInChars(const llvm::Constant *C) const { - return CharUnits::fromQuantity( - CGM.getDataLayout().getTypeAllocSize(C->getType())); + CharUnits getSize(llvm::Type *Ty) const { + return CharUnits::fromQuantity(CGM.getDataLayout().getTypeAllocSize(Ty)); } -}; - -void ConstStructBuilder:: -AppendField(const FieldDecl *Field, uint64_t FieldOffset, - llvm::Constant *InitCst) { - const ASTContext &Context = CGM.getContext(); - - CharUnits FieldOffsetInChars = Context.toCharUnitsFromBits(FieldOffset); - - AppendBytes(FieldOffsetInChars, InitCst); -} - -void ConstStructBuilder:: -AppendBytes(CharUnits FieldOffsetInChars, llvm::Constant *InitCst) { - - assert(NextFieldOffsetInChars <= FieldOffsetInChars - && "Field offset mismatch!"); - - CharUnits FieldAlignment = getAlignment(InitCst); - - // Round up the field offset to the alignment of the field type. - CharUnits AlignedNextFieldOffsetInChars = - NextFieldOffsetInChars.alignTo(FieldAlignment); - if (AlignedNextFieldOffsetInChars < FieldOffsetInChars) { - // We need to append padding. - AppendPadding(FieldOffsetInChars - NextFieldOffsetInChars); - - assert(NextFieldOffsetInChars == FieldOffsetInChars && - "Did not add enough padding!"); + CharUnits getSize(const llvm::Constant *C) const { + return getSize(C->getType()); + } - AlignedNextFieldOffsetInChars = - NextFieldOffsetInChars.alignTo(FieldAlignment); + llvm::Constant *getPadding(CharUnits PadSize) const { + llvm::Type *Ty = CGM.Int8Ty; + if (PadSize > CharUnits::One()) + Ty = llvm::ArrayType::get(Ty, PadSize.getQuantity()); + return llvm::UndefValue::get(Ty); } - if (AlignedNextFieldOffsetInChars > FieldOffsetInChars) { - assert(!Packed && "Alignment is wrong even with a packed struct!"); + llvm::Constant *getZeroes(CharUnits ZeroSize) const { + llvm::Type *Ty = llvm::ArrayType::get(CGM.Int8Ty, ZeroSize.getQuantity()); + return llvm::ConstantAggregateZero::get(Ty); + } +}; - // Convert the struct to a packed struct. - ConvertStructToPacked(); +/// Incremental builder for an llvm::Constant* holding a struct or array +/// constant. +class ConstantAggregateBuilder : private ConstantAggregateBuilderUtils { + /// The elements of the constant. These two arrays must have the same size; + /// Offsets[i] describes the offset of Elems[i] within the constant. The + /// elements are kept in increasing offset order, and we ensure that there + /// is no overlap: Offsets[i+1] >= Offsets[i] + getSize(Elemes[i]). + /// + /// This may contain explicit padding elements (in order to create a + /// natural layout), but need not. Gaps between elements are implicitly + /// considered to be filled with undef. + llvm::SmallVector<llvm::Constant*, 32> Elems; + llvm::SmallVector<CharUnits, 32> Offsets; + + /// The size of the constant (the maximum end offset of any added element). + /// May be larger than the end of Elems.back() if we split the last element + /// and removed some trailing undefs. + CharUnits Size = CharUnits::Zero(); + + /// This is true only if laying out Elems in order as the elements of a + /// non-packed LLVM struct will give the correct layout. + bool NaturalLayout = true; + + bool split(size_t Index, CharUnits Hint); + Optional<size_t> splitAt(CharUnits Pos); + + static llvm::Constant *buildFrom(CodeGenModule &CGM, + ArrayRef<llvm::Constant *> Elems, + ArrayRef<CharUnits> Offsets, + CharUnits StartOffset, CharUnits Size, + bool NaturalLayout, llvm::Type *DesiredTy, + bool AllowOversized); - // After we pack the struct, we may need to insert padding. - if (NextFieldOffsetInChars < FieldOffsetInChars) { - // We need to append padding. - AppendPadding(FieldOffsetInChars - NextFieldOffsetInChars); +public: + ConstantAggregateBuilder(CodeGenModule &CGM) + : ConstantAggregateBuilderUtils(CGM) {} + + /// Update or overwrite the value starting at \p Offset with \c C. + /// + /// \param AllowOverwrite If \c true, this constant might overwrite (part of) + /// a constant that has already been added. This flag is only used to + /// detect bugs. + bool add(llvm::Constant *C, CharUnits Offset, bool AllowOverwrite); + + /// Update or overwrite the bits starting at \p OffsetInBits with \p Bits. + bool addBits(llvm::APInt Bits, uint64_t OffsetInBits, bool AllowOverwrite); + + /// Attempt to condense the value starting at \p Offset to a constant of type + /// \p DesiredTy. + void condense(CharUnits Offset, llvm::Type *DesiredTy); + + /// Produce a constant representing the entire accumulated value, ideally of + /// the specified type. If \p AllowOversized, the constant might be larger + /// than implied by \p DesiredTy (eg, if there is a flexible array member). + /// Otherwise, the constant will be of exactly the same size as \p DesiredTy + /// even if we can't represent it as that type. + llvm::Constant *build(llvm::Type *DesiredTy, bool AllowOversized) const { + return buildFrom(CGM, Elems, Offsets, CharUnits::Zero(), Size, + NaturalLayout, DesiredTy, AllowOversized); + } +}; - assert(NextFieldOffsetInChars == FieldOffsetInChars && - "Did not add enough padding!"); +template<typename Container, typename Range = std::initializer_list< + typename Container::value_type>> +static void replace(Container &C, size_t BeginOff, size_t EndOff, Range Vals) { + assert(BeginOff <= EndOff && "invalid replacement range"); + llvm::replace(C, C.begin() + BeginOff, C.begin() + EndOff, Vals); +} + +bool ConstantAggregateBuilder::add(llvm::Constant *C, CharUnits Offset, + bool AllowOverwrite) { + // Common case: appending to a layout. + if (Offset >= Size) { + CharUnits Align = getAlignment(C); + CharUnits AlignedSize = Size.alignTo(Align); + if (AlignedSize > Offset || Offset.alignTo(Align) != Offset) + NaturalLayout = false; + else if (AlignedSize < Offset) { + Elems.push_back(getPadding(Offset - Size)); + Offsets.push_back(Size); } - AlignedNextFieldOffsetInChars = NextFieldOffsetInChars; + Elems.push_back(C); + Offsets.push_back(Offset); + Size = Offset + getSize(C); + return true; } - // Add the field. - Elements.push_back(InitCst); - NextFieldOffsetInChars = AlignedNextFieldOffsetInChars + - getSizeInChars(InitCst); + // Uncommon case: constant overlaps what we've already created. + llvm::Optional<size_t> FirstElemToReplace = splitAt(Offset); + if (!FirstElemToReplace) + return false; - if (Packed) - assert(LLVMStructAlignment == CharUnits::One() && - "Packed struct not byte-aligned!"); - else - LLVMStructAlignment = std::max(LLVMStructAlignment, FieldAlignment); + CharUnits CSize = getSize(C); + llvm::Optional<size_t> LastElemToReplace = splitAt(Offset + CSize); + if (!LastElemToReplace) + return false; + + assert((FirstElemToReplace == LastElemToReplace || AllowOverwrite) && + "unexpectedly overwriting field"); + + replace(Elems, *FirstElemToReplace, *LastElemToReplace, {C}); + replace(Offsets, *FirstElemToReplace, *LastElemToReplace, {Offset}); + Size = std::max(Size, Offset + CSize); + NaturalLayout = false; + return true; } -void ConstStructBuilder::AppendBitField(const FieldDecl *Field, - uint64_t FieldOffset, - llvm::ConstantInt *CI) { +bool ConstantAggregateBuilder::addBits(llvm::APInt Bits, uint64_t OffsetInBits, + bool AllowOverwrite) { const ASTContext &Context = CGM.getContext(); - const uint64_t CharWidth = Context.getCharWidth(); - uint64_t NextFieldOffsetInBits = Context.toBits(NextFieldOffsetInChars); - if (FieldOffset > NextFieldOffsetInBits) { - // We need to add padding. - CharUnits PadSize = Context.toCharUnitsFromBits( - llvm::alignTo(FieldOffset - NextFieldOffsetInBits, - Context.getTargetInfo().getCharAlign())); + const uint64_t CharWidth = CGM.getContext().getCharWidth(); + + // Offset of where we want the first bit to go within the bits of the + // current char. + unsigned OffsetWithinChar = OffsetInBits % CharWidth; + + // We split bit-fields up into individual bytes. Walk over the bytes and + // update them. + for (CharUnits OffsetInChars = + Context.toCharUnitsFromBits(OffsetInBits - OffsetWithinChar); + /**/; ++OffsetInChars) { + // Number of bits we want to fill in this char. + unsigned WantedBits = + std::min((uint64_t)Bits.getBitWidth(), CharWidth - OffsetWithinChar); + + // Get a char containing the bits we want in the right places. The other + // bits have unspecified values. + llvm::APInt BitsThisChar = Bits; + if (BitsThisChar.getBitWidth() < CharWidth) + BitsThisChar = BitsThisChar.zext(CharWidth); + if (CGM.getDataLayout().isBigEndian()) { + // Figure out how much to shift by. We may need to left-shift if we have + // less than one byte of Bits left. + int Shift = Bits.getBitWidth() - CharWidth + OffsetWithinChar; + if (Shift > 0) + BitsThisChar.lshrInPlace(Shift); + else if (Shift < 0) + BitsThisChar = BitsThisChar.shl(-Shift); + } else { + BitsThisChar = BitsThisChar.shl(OffsetWithinChar); + } + if (BitsThisChar.getBitWidth() > CharWidth) + BitsThisChar = BitsThisChar.trunc(CharWidth); - AppendPadding(PadSize); - } + if (WantedBits == CharWidth) { + // Got a full byte: just add it directly. + add(llvm::ConstantInt::get(CGM.getLLVMContext(), BitsThisChar), + OffsetInChars, AllowOverwrite); + } else { + // Partial byte: update the existing integer if there is one. If we + // can't split out a 1-CharUnit range to update, then we can't add + // these bits and fail the entire constant emission. + llvm::Optional<size_t> FirstElemToUpdate = splitAt(OffsetInChars); + if (!FirstElemToUpdate) + return false; + llvm::Optional<size_t> LastElemToUpdate = + splitAt(OffsetInChars + CharUnits::One()); + if (!LastElemToUpdate) + return false; + assert(*LastElemToUpdate - *FirstElemToUpdate < 2 && + "should have at most one element covering one byte"); + + // Figure out which bits we want and discard the rest. + llvm::APInt UpdateMask(CharWidth, 0); + if (CGM.getDataLayout().isBigEndian()) + UpdateMask.setBits(CharWidth - OffsetWithinChar - WantedBits, + CharWidth - OffsetWithinChar); + else + UpdateMask.setBits(OffsetWithinChar, OffsetWithinChar + WantedBits); + BitsThisChar &= UpdateMask; + + if (*FirstElemToUpdate == *LastElemToUpdate || + Elems[*FirstElemToUpdate]->isNullValue() || + isa<llvm::UndefValue>(Elems[*FirstElemToUpdate])) { + // All existing bits are either zero or undef. + add(llvm::ConstantInt::get(CGM.getLLVMContext(), BitsThisChar), + OffsetInChars, /*AllowOverwrite*/ true); + } else { + llvm::Constant *&ToUpdate = Elems[*FirstElemToUpdate]; + // In order to perform a partial update, we need the existing bitwise + // value, which we can only extract for a constant int. + auto *CI = dyn_cast<llvm::ConstantInt>(ToUpdate); + if (!CI) + return false; + // Because this is a 1-CharUnit range, the constant occupying it must + // be exactly one CharUnit wide. + assert(CI->getBitWidth() == CharWidth && "splitAt failed"); + assert((!(CI->getValue() & UpdateMask) || AllowOverwrite) && + "unexpectedly overwriting bitfield"); + BitsThisChar |= (CI->getValue() & ~UpdateMask); + ToUpdate = llvm::ConstantInt::get(CGM.getLLVMContext(), BitsThisChar); + } + } - uint64_t FieldSize = Field->getBitWidthValue(Context); + // Stop if we've added all the bits. + if (WantedBits == Bits.getBitWidth()) + break; - llvm::APInt FieldValue = CI->getValue(); + // Remove the consumed bits from Bits. + if (!CGM.getDataLayout().isBigEndian()) + Bits.lshrInPlace(WantedBits); + Bits = Bits.trunc(Bits.getBitWidth() - WantedBits); - // Promote the size of FieldValue if necessary - // FIXME: This should never occur, but currently it can because initializer - // constants are cast to bool, and because clang is not enforcing bitfield - // width limits. - if (FieldSize > FieldValue.getBitWidth()) - FieldValue = FieldValue.zext(FieldSize); + // The remanining bits go at the start of the following bytes. + OffsetWithinChar = 0; + } - // Truncate the size of FieldValue to the bit field size. - if (FieldSize < FieldValue.getBitWidth()) - FieldValue = FieldValue.trunc(FieldSize); + return true; +} - NextFieldOffsetInBits = Context.toBits(NextFieldOffsetInChars); - if (FieldOffset < NextFieldOffsetInBits) { - // Either part of the field or the entire field can go into the previous - // byte. - assert(!Elements.empty() && "Elements can't be empty!"); +/// Returns a position within Elems and Offsets such that all elements +/// before the returned index end before Pos and all elements at or after +/// the returned index begin at or after Pos. Splits elements as necessary +/// to ensure this. Returns None if we find something we can't split. +Optional<size_t> ConstantAggregateBuilder::splitAt(CharUnits Pos) { + if (Pos >= Size) + return Offsets.size(); + + while (true) { + auto FirstAfterPos = llvm::upper_bound(Offsets, Pos); + if (FirstAfterPos == Offsets.begin()) + return 0; + + // If we already have an element starting at Pos, we're done. + size_t LastAtOrBeforePosIndex = FirstAfterPos - Offsets.begin() - 1; + if (Offsets[LastAtOrBeforePosIndex] == Pos) + return LastAtOrBeforePosIndex; + + // We found an element starting before Pos. Check for overlap. + if (Offsets[LastAtOrBeforePosIndex] + + getSize(Elems[LastAtOrBeforePosIndex]) <= Pos) + return LastAtOrBeforePosIndex + 1; + + // Try to decompose it into smaller constants. + if (!split(LastAtOrBeforePosIndex, Pos)) + return None; + } +} + +/// Split the constant at index Index, if possible. Return true if we did. +/// Hint indicates the location at which we'd like to split, but may be +/// ignored. +bool ConstantAggregateBuilder::split(size_t Index, CharUnits Hint) { + NaturalLayout = false; + llvm::Constant *C = Elems[Index]; + CharUnits Offset = Offsets[Index]; + + if (auto *CA = dyn_cast<llvm::ConstantAggregate>(C)) { + replace(Elems, Index, Index + 1, + llvm::map_range(llvm::seq(0u, CA->getNumOperands()), + [&](unsigned Op) { return CA->getOperand(Op); })); + if (auto *Seq = dyn_cast<llvm::SequentialType>(CA->getType())) { + // Array or vector. + CharUnits ElemSize = getSize(Seq->getElementType()); + replace( + Offsets, Index, Index + 1, + llvm::map_range(llvm::seq(0u, CA->getNumOperands()), + [&](unsigned Op) { return Offset + Op * ElemSize; })); + } else { + // Must be a struct. + auto *ST = cast<llvm::StructType>(CA->getType()); + const llvm::StructLayout *Layout = + CGM.getDataLayout().getStructLayout(ST); + replace(Offsets, Index, Index + 1, + llvm::map_range( + llvm::seq(0u, CA->getNumOperands()), [&](unsigned Op) { + return Offset + CharUnits::fromQuantity( + Layout->getElementOffset(Op)); + })); + } + return true; + } - unsigned BitsInPreviousByte = NextFieldOffsetInBits - FieldOffset; + if (auto *CDS = dyn_cast<llvm::ConstantDataSequential>(C)) { + // FIXME: If possible, split into two ConstantDataSequentials at Hint. + CharUnits ElemSize = getSize(CDS->getElementType()); + replace(Elems, Index, Index + 1, + llvm::map_range(llvm::seq(0u, CDS->getNumElements()), + [&](unsigned Elem) { + return CDS->getElementAsConstant(Elem); + })); + replace(Offsets, Index, Index + 1, + llvm::map_range( + llvm::seq(0u, CDS->getNumElements()), + [&](unsigned Elem) { return Offset + Elem * ElemSize; })); + return true; + } - bool FitsCompletelyInPreviousByte = - BitsInPreviousByte >= FieldValue.getBitWidth(); + if (isa<llvm::ConstantAggregateZero>(C)) { + CharUnits ElemSize = getSize(C); + assert(Hint > Offset && Hint < Offset + ElemSize && "nothing to split"); + replace(Elems, Index, Index + 1, + {getZeroes(Hint - Offset), getZeroes(Offset + ElemSize - Hint)}); + replace(Offsets, Index, Index + 1, {Offset, Hint}); + return true; + } - llvm::APInt Tmp = FieldValue; + if (isa<llvm::UndefValue>(C)) { + replace(Elems, Index, Index + 1, {}); + replace(Offsets, Index, Index + 1, {}); + return true; + } - if (!FitsCompletelyInPreviousByte) { - unsigned NewFieldWidth = FieldSize - BitsInPreviousByte; + // FIXME: We could split a ConstantInt if the need ever arose. + // We don't need to do this to handle bit-fields because we always eagerly + // split them into 1-byte chunks. - if (CGM.getDataLayout().isBigEndian()) { - Tmp.lshrInPlace(NewFieldWidth); - Tmp = Tmp.trunc(BitsInPreviousByte); + return false; +} - // We want the remaining high bits. - FieldValue = FieldValue.trunc(NewFieldWidth); - } else { - Tmp = Tmp.trunc(BitsInPreviousByte); +static llvm::Constant * +EmitArrayConstant(CodeGenModule &CGM, llvm::ArrayType *DesiredType, + llvm::Type *CommonElementType, unsigned ArrayBound, + SmallVectorImpl<llvm::Constant *> &Elements, + llvm::Constant *Filler); + +llvm::Constant *ConstantAggregateBuilder::buildFrom( + CodeGenModule &CGM, ArrayRef<llvm::Constant *> Elems, + ArrayRef<CharUnits> Offsets, CharUnits StartOffset, CharUnits Size, + bool NaturalLayout, llvm::Type *DesiredTy, bool AllowOversized) { + ConstantAggregateBuilderUtils Utils(CGM); + + if (Elems.empty()) + return llvm::UndefValue::get(DesiredTy); + + auto Offset = [&](size_t I) { return Offsets[I] - StartOffset; }; + + // If we want an array type, see if all the elements are the same type and + // appropriately spaced. + if (llvm::ArrayType *ATy = dyn_cast<llvm::ArrayType>(DesiredTy)) { + assert(!AllowOversized && "oversized array emission not supported"); + + bool CanEmitArray = true; + llvm::Type *CommonType = Elems[0]->getType(); + llvm::Constant *Filler = llvm::Constant::getNullValue(CommonType); + CharUnits ElemSize = Utils.getSize(ATy->getElementType()); + SmallVector<llvm::Constant*, 32> ArrayElements; + for (size_t I = 0; I != Elems.size(); ++I) { + // Skip zeroes; we'll use a zero value as our array filler. + if (Elems[I]->isNullValue()) + continue; - // We want the remaining low bits. - FieldValue.lshrInPlace(BitsInPreviousByte); - FieldValue = FieldValue.trunc(NewFieldWidth); + // All remaining elements must be the same type. + if (Elems[I]->getType() != CommonType || + Offset(I) % ElemSize != 0) { + CanEmitArray = false; + break; } + ArrayElements.resize(Offset(I) / ElemSize + 1, Filler); + ArrayElements.back() = Elems[I]; } - Tmp = Tmp.zext(CharWidth); - if (CGM.getDataLayout().isBigEndian()) { - if (FitsCompletelyInPreviousByte) - Tmp = Tmp.shl(BitsInPreviousByte - FieldValue.getBitWidth()); - } else { - Tmp = Tmp.shl(CharWidth - BitsInPreviousByte); + if (CanEmitArray) { + return EmitArrayConstant(CGM, ATy, CommonType, ATy->getNumElements(), + ArrayElements, Filler); } - // 'or' in the bits that go into the previous byte. - llvm::Value *LastElt = Elements.back(); - if (llvm::ConstantInt *Val = dyn_cast<llvm::ConstantInt>(LastElt)) - Tmp |= Val->getValue(); - else { - assert(isa<llvm::UndefValue>(LastElt)); - // If there is an undef field that we're adding to, it can either be a - // scalar undef (in which case, we just replace it with our field) or it - // is an array. If it is an array, we have to pull one byte off the - // array so that the other undef bytes stay around. - if (!isa<llvm::IntegerType>(LastElt->getType())) { - // The undef padding will be a multibyte array, create a new smaller - // padding and then an hole for our i8 to get plopped into. - assert(isa<llvm::ArrayType>(LastElt->getType()) && - "Expected array padding of undefs"); - llvm::ArrayType *AT = cast<llvm::ArrayType>(LastElt->getType()); - assert(AT->getElementType()->isIntegerTy(CharWidth) && - AT->getNumElements() != 0 && - "Expected non-empty array padding of undefs"); - - // Remove the padding array. - NextFieldOffsetInChars -= CharUnits::fromQuantity(AT->getNumElements()); - Elements.pop_back(); - - // Add the padding back in two chunks. - AppendPadding(CharUnits::fromQuantity(AT->getNumElements()-1)); - AppendPadding(CharUnits::One()); - assert(isa<llvm::UndefValue>(Elements.back()) && - Elements.back()->getType()->isIntegerTy(CharWidth) && - "Padding addition didn't work right"); - } + // Can't emit as an array, carry on to emit as a struct. + } + + CharUnits DesiredSize = Utils.getSize(DesiredTy); + CharUnits Align = CharUnits::One(); + for (llvm::Constant *C : Elems) + Align = std::max(Align, Utils.getAlignment(C)); + CharUnits AlignedSize = Size.alignTo(Align); + + bool Packed = false; + ArrayRef<llvm::Constant*> UnpackedElems = Elems; + llvm::SmallVector<llvm::Constant*, 32> UnpackedElemStorage; + if ((DesiredSize < AlignedSize && !AllowOversized) || + DesiredSize.alignTo(Align) != DesiredSize) { + // The natural layout would be the wrong size; force use of a packed layout. + NaturalLayout = false; + Packed = true; + } else if (DesiredSize > AlignedSize) { + // The constant would be too small. Add padding to fix it. + UnpackedElemStorage.assign(Elems.begin(), Elems.end()); + UnpackedElemStorage.push_back(Utils.getPadding(DesiredSize - Size)); + UnpackedElems = UnpackedElemStorage; + } + + // If we don't have a natural layout, insert padding as necessary. + // As we go, double-check to see if we can actually just emit Elems + // as a non-packed struct and do so opportunistically if possible. + llvm::SmallVector<llvm::Constant*, 32> PackedElems; + if (!NaturalLayout) { + CharUnits SizeSoFar = CharUnits::Zero(); + for (size_t I = 0; I != Elems.size(); ++I) { + CharUnits Align = Utils.getAlignment(Elems[I]); + CharUnits NaturalOffset = SizeSoFar.alignTo(Align); + CharUnits DesiredOffset = Offset(I); + assert(DesiredOffset >= SizeSoFar && "elements out of order"); + + if (DesiredOffset != NaturalOffset) + Packed = true; + if (DesiredOffset != SizeSoFar) + PackedElems.push_back(Utils.getPadding(DesiredOffset - SizeSoFar)); + PackedElems.push_back(Elems[I]); + SizeSoFar = DesiredOffset + Utils.getSize(Elems[I]); + } + // If we're using the packed layout, pad it out to the desired size if + // necessary. + if (Packed) { + assert((SizeSoFar <= DesiredSize || AllowOversized) && + "requested size is too small for contents"); + if (SizeSoFar < DesiredSize) + PackedElems.push_back(Utils.getPadding(DesiredSize - SizeSoFar)); } + } - Elements.back() = llvm::ConstantInt::get(CGM.getLLVMContext(), Tmp); + llvm::StructType *STy = llvm::ConstantStruct::getTypeForElements( + CGM.getLLVMContext(), Packed ? PackedElems : UnpackedElems, Packed); - if (FitsCompletelyInPreviousByte) - return; + // Pick the type to use. If the type is layout identical to the desired + // type then use it, otherwise use whatever the builder produced for us. + if (llvm::StructType *DesiredSTy = dyn_cast<llvm::StructType>(DesiredTy)) { + if (DesiredSTy->isLayoutIdentical(STy)) + STy = DesiredSTy; } - while (FieldValue.getBitWidth() > CharWidth) { - llvm::APInt Tmp; + return llvm::ConstantStruct::get(STy, Packed ? PackedElems : UnpackedElems); +} - if (CGM.getDataLayout().isBigEndian()) { - // We want the high bits. - Tmp = - FieldValue.lshr(FieldValue.getBitWidth() - CharWidth).trunc(CharWidth); - } else { - // We want the low bits. - Tmp = FieldValue.trunc(CharWidth); +void ConstantAggregateBuilder::condense(CharUnits Offset, + llvm::Type *DesiredTy) { + CharUnits Size = getSize(DesiredTy); - FieldValue.lshrInPlace(CharWidth); - } + llvm::Optional<size_t> FirstElemToReplace = splitAt(Offset); + if (!FirstElemToReplace) + return; + size_t First = *FirstElemToReplace; - Elements.push_back(llvm::ConstantInt::get(CGM.getLLVMContext(), Tmp)); - ++NextFieldOffsetInChars; + llvm::Optional<size_t> LastElemToReplace = splitAt(Offset + Size); + if (!LastElemToReplace) + return; + size_t Last = *LastElemToReplace; - FieldValue = FieldValue.trunc(FieldValue.getBitWidth() - CharWidth); + size_t Length = Last - First; + if (Length == 0) + return; + + if (Length == 1 && Offsets[First] == Offset && + getSize(Elems[First]) == Size) { + // Re-wrap single element structs if necessary. Otherwise, leave any single + // element constant of the right size alone even if it has the wrong type. + auto *STy = dyn_cast<llvm::StructType>(DesiredTy); + if (STy && STy->getNumElements() == 1 && + STy->getElementType(0) == Elems[First]->getType()) + Elems[First] = llvm::ConstantStruct::get(STy, Elems[First]); + return; } - assert(FieldValue.getBitWidth() > 0 && - "Should have at least one bit left!"); - assert(FieldValue.getBitWidth() <= CharWidth && - "Should not have more than a byte left!"); + llvm::Constant *Replacement = buildFrom( + CGM, makeArrayRef(Elems).slice(First, Length), + makeArrayRef(Offsets).slice(First, Length), Offset, getSize(DesiredTy), + /*known to have natural layout=*/false, DesiredTy, false); + replace(Elems, First, Last, {Replacement}); + replace(Offsets, First, Last, {Offset}); +} - if (FieldValue.getBitWidth() < CharWidth) { - if (CGM.getDataLayout().isBigEndian()) { - unsigned BitWidth = FieldValue.getBitWidth(); +//===----------------------------------------------------------------------===// +// ConstStructBuilder +//===----------------------------------------------------------------------===// - FieldValue = FieldValue.zext(CharWidth) << (CharWidth - BitWidth); - } else - FieldValue = FieldValue.zext(CharWidth); - } +class ConstStructBuilder { + CodeGenModule &CGM; + ConstantEmitter &Emitter; + ConstantAggregateBuilder &Builder; + CharUnits StartOffset; - // Append the last element. - Elements.push_back(llvm::ConstantInt::get(CGM.getLLVMContext(), - FieldValue)); - ++NextFieldOffsetInChars; -} +public: + static llvm::Constant *BuildStruct(ConstantEmitter &Emitter, + InitListExpr *ILE, QualType StructTy); + static llvm::Constant *BuildStruct(ConstantEmitter &Emitter, + const APValue &Value, QualType ValTy); + static bool UpdateStruct(ConstantEmitter &Emitter, + ConstantAggregateBuilder &Const, CharUnits Offset, + InitListExpr *Updater); -void ConstStructBuilder::AppendPadding(CharUnits PadSize) { - if (PadSize.isZero()) - return; +private: + ConstStructBuilder(ConstantEmitter &Emitter, + ConstantAggregateBuilder &Builder, CharUnits StartOffset) + : CGM(Emitter.CGM), Emitter(Emitter), Builder(Builder), + StartOffset(StartOffset) {} - llvm::Type *Ty = CGM.Int8Ty; - if (PadSize > CharUnits::One()) - Ty = llvm::ArrayType::get(Ty, PadSize.getQuantity()); + bool AppendField(const FieldDecl *Field, uint64_t FieldOffset, + llvm::Constant *InitExpr, bool AllowOverwrite = false); - llvm::Constant *C = llvm::UndefValue::get(Ty); - Elements.push_back(C); - assert(getAlignment(C) == CharUnits::One() && - "Padding must have 1 byte alignment!"); + bool AppendBytes(CharUnits FieldOffsetInChars, llvm::Constant *InitCst, + bool AllowOverwrite = false); - NextFieldOffsetInChars += getSizeInChars(C); -} + bool AppendBitField(const FieldDecl *Field, uint64_t FieldOffset, + llvm::ConstantInt *InitExpr, bool AllowOverwrite = false); -void ConstStructBuilder::AppendTailPadding(CharUnits RecordSize) { - assert(NextFieldOffsetInChars <= RecordSize && - "Size mismatch!"); + bool Build(InitListExpr *ILE, bool AllowOverwrite); + bool Build(const APValue &Val, const RecordDecl *RD, bool IsPrimaryBase, + const CXXRecordDecl *VTableClass, CharUnits BaseOffset); + llvm::Constant *Finalize(QualType Ty); +}; - AppendPadding(RecordSize - NextFieldOffsetInChars); -} +bool ConstStructBuilder::AppendField( + const FieldDecl *Field, uint64_t FieldOffset, llvm::Constant *InitCst, + bool AllowOverwrite) { + const ASTContext &Context = CGM.getContext(); -void ConstStructBuilder::ConvertStructToPacked() { - SmallVector<llvm::Constant *, 16> PackedElements; - CharUnits ElementOffsetInChars = CharUnits::Zero(); + CharUnits FieldOffsetInChars = Context.toCharUnitsFromBits(FieldOffset); - for (unsigned i = 0, e = Elements.size(); i != e; ++i) { - llvm::Constant *C = Elements[i]; + return AppendBytes(FieldOffsetInChars, InitCst, AllowOverwrite); +} - CharUnits ElementAlign = CharUnits::fromQuantity( - CGM.getDataLayout().getABITypeAlignment(C->getType())); - CharUnits AlignedElementOffsetInChars = - ElementOffsetInChars.alignTo(ElementAlign); +bool ConstStructBuilder::AppendBytes(CharUnits FieldOffsetInChars, + llvm::Constant *InitCst, + bool AllowOverwrite) { + return Builder.add(InitCst, StartOffset + FieldOffsetInChars, AllowOverwrite); +} - if (AlignedElementOffsetInChars > ElementOffsetInChars) { - // We need some padding. - CharUnits NumChars = - AlignedElementOffsetInChars - ElementOffsetInChars; +bool ConstStructBuilder::AppendBitField( + const FieldDecl *Field, uint64_t FieldOffset, llvm::ConstantInt *CI, + bool AllowOverwrite) { + uint64_t FieldSize = Field->getBitWidthValue(CGM.getContext()); + llvm::APInt FieldValue = CI->getValue(); - llvm::Type *Ty = CGM.Int8Ty; - if (NumChars > CharUnits::One()) - Ty = llvm::ArrayType::get(Ty, NumChars.getQuantity()); + // Promote the size of FieldValue if necessary + // FIXME: This should never occur, but currently it can because initializer + // constants are cast to bool, and because clang is not enforcing bitfield + // width limits. + if (FieldSize > FieldValue.getBitWidth()) + FieldValue = FieldValue.zext(FieldSize); - llvm::Constant *Padding = llvm::UndefValue::get(Ty); - PackedElements.push_back(Padding); - ElementOffsetInChars += getSizeInChars(Padding); - } + // Truncate the size of FieldValue to the bit field size. + if (FieldSize < FieldValue.getBitWidth()) + FieldValue = FieldValue.trunc(FieldSize); - PackedElements.push_back(C); - ElementOffsetInChars += getSizeInChars(C); + return Builder.addBits(FieldValue, + CGM.getContext().toBits(StartOffset) + FieldOffset, + AllowOverwrite); +} + +static bool EmitDesignatedInitUpdater(ConstantEmitter &Emitter, + ConstantAggregateBuilder &Const, + CharUnits Offset, QualType Type, + InitListExpr *Updater) { + if (Type->isRecordType()) + return ConstStructBuilder::UpdateStruct(Emitter, Const, Offset, Updater); + + auto CAT = Emitter.CGM.getContext().getAsConstantArrayType(Type); + if (!CAT) + return false; + QualType ElemType = CAT->getElementType(); + CharUnits ElemSize = Emitter.CGM.getContext().getTypeSizeInChars(ElemType); + llvm::Type *ElemTy = Emitter.CGM.getTypes().ConvertTypeForMem(ElemType); + + llvm::Constant *FillC = nullptr; + if (Expr *Filler = Updater->getArrayFiller()) { + if (!isa<NoInitExpr>(Filler)) { + FillC = Emitter.tryEmitAbstractForMemory(Filler, ElemType); + if (!FillC) + return false; + } } - assert(ElementOffsetInChars == NextFieldOffsetInChars && - "Packing the struct changed its size!"); + unsigned NumElementsToUpdate = + FillC ? CAT->getSize().getZExtValue() : Updater->getNumInits(); + for (unsigned I = 0; I != NumElementsToUpdate; ++I, Offset += ElemSize) { + Expr *Init = nullptr; + if (I < Updater->getNumInits()) + Init = Updater->getInit(I); + + if (!Init && FillC) { + if (!Const.add(FillC, Offset, true)) + return false; + } else if (!Init || isa<NoInitExpr>(Init)) { + continue; + } else if (InitListExpr *ChildILE = dyn_cast<InitListExpr>(Init)) { + if (!EmitDesignatedInitUpdater(Emitter, Const, Offset, ElemType, + ChildILE)) + return false; + // Attempt to reduce the array element to a single constant if necessary. + Const.condense(Offset, ElemTy); + } else { + llvm::Constant *Val = Emitter.tryEmitPrivateForMemory(Init, ElemType); + if (!Const.add(Val, Offset, true)) + return false; + } + } - Elements.swap(PackedElements); - LLVMStructAlignment = CharUnits::One(); - Packed = true; + return true; } -bool ConstStructBuilder::Build(InitListExpr *ILE) { +bool ConstStructBuilder::Build(InitListExpr *ILE, bool AllowOverwrite) { RecordDecl *RD = ILE->getType()->getAs<RecordType>()->getDecl(); const ASTRecordLayout &Layout = CGM.getContext().getASTRecordLayout(RD); - unsigned FieldNo = 0; + unsigned FieldNo = -1; unsigned ElementNo = 0; // Bail out if we have base classes. We could support these, but they only @@ -379,35 +672,66 @@ bool ConstStructBuilder::Build(InitListExpr *ILE) { if (CXXRD->getNumBases()) return false; - for (RecordDecl::field_iterator Field = RD->field_begin(), - FieldEnd = RD->field_end(); Field != FieldEnd; ++Field, ++FieldNo) { + for (FieldDecl *Field : RD->fields()) { + ++FieldNo; + // If this is a union, skip all the fields that aren't being initialized. - if (RD->isUnion() && ILE->getInitializedFieldInUnion() != *Field) + if (RD->isUnion() && + !declaresSameEntity(ILE->getInitializedFieldInUnion(), Field)) continue; - // Don't emit anonymous bitfields, they just affect layout. - if (Field->isUnnamedBitfield()) + // Don't emit anonymous bitfields or zero-sized fields. + if (Field->isUnnamedBitfield() || Field->isZeroSize(CGM.getContext())) continue; // Get the initializer. A struct can include fields without initializers, // we just use explicit null values for them. - llvm::Constant *EltInit; + Expr *Init = nullptr; if (ElementNo < ILE->getNumInits()) - EltInit = Emitter.tryEmitPrivateForMemory(ILE->getInit(ElementNo++), - Field->getType()); - else - EltInit = Emitter.emitNullForMemory(Field->getType()); + Init = ILE->getInit(ElementNo++); + if (Init && isa<NoInitExpr>(Init)) + continue; + // When emitting a DesignatedInitUpdateExpr, a nested InitListExpr + // represents additional overwriting of our current constant value, and not + // a new constant to emit independently. + if (AllowOverwrite && + (Field->getType()->isArrayType() || Field->getType()->isRecordType())) { + if (auto *SubILE = dyn_cast<InitListExpr>(Init)) { + CharUnits Offset = CGM.getContext().toCharUnitsFromBits( + Layout.getFieldOffset(FieldNo)); + if (!EmitDesignatedInitUpdater(Emitter, Builder, StartOffset + Offset, + Field->getType(), SubILE)) + return false; + // If we split apart the field's value, try to collapse it down to a + // single value now. + Builder.condense(StartOffset + Offset, + CGM.getTypes().ConvertTypeForMem(Field->getType())); + continue; + } + } + + llvm::Constant *EltInit = + Init ? Emitter.tryEmitPrivateForMemory(Init, Field->getType()) + : Emitter.emitNullForMemory(Field->getType()); if (!EltInit) return false; if (!Field->isBitField()) { // Handle non-bitfield members. - AppendField(*Field, Layout.getFieldOffset(FieldNo), EltInit); + if (!AppendField(Field, Layout.getFieldOffset(FieldNo), EltInit, + AllowOverwrite)) + return false; + // After emitting a non-empty field with [[no_unique_address]], we may + // need to overwrite its tail padding. + if (Field->hasAttr<NoUniqueAddressAttr>()) + AllowOverwrite = true; } else { // Otherwise we have a bitfield. if (auto *CI = dyn_cast<llvm::ConstantInt>(EltInit)) { - AppendBitField(*Field, Layout.getFieldOffset(FieldNo), CI); + if (!AppendBitField(Field, Layout.getFieldOffset(FieldNo), CI, + AllowOverwrite)) + return false; } else { // We are trying to initialize a bitfield with a non-trivial constant, // this must require run-time code. @@ -445,7 +769,8 @@ bool ConstStructBuilder::Build(const APValue &Val, const RecordDecl *RD, llvm::Constant *VTableAddressPoint = CGM.getCXXABI().getVTableAddressPointForConstExpr( BaseSubobject(CD, Offset), VTableClass); - AppendBytes(Offset, VTableAddressPoint); + if (!AppendBytes(Offset, VTableAddressPoint)) + return false; } // Accumulate and sort bases, in order to visit them in address order, which @@ -460,7 +785,7 @@ bool ConstStructBuilder::Build(const APValue &Val, const RecordDecl *RD, CharUnits BaseOffset = Layout.getBaseClassOffset(BD); Bases.push_back(BaseInfo(BD, BaseOffset, BaseNo)); } - std::stable_sort(Bases.begin(), Bases.end()); + llvm::stable_sort(Bases); for (unsigned I = 0, N = Bases.size(); I != N; ++I) { BaseInfo &Base = Bases[I]; @@ -474,14 +799,15 @@ bool ConstStructBuilder::Build(const APValue &Val, const RecordDecl *RD, unsigned FieldNo = 0; uint64_t OffsetBits = CGM.getContext().toBits(Offset); + bool AllowOverwrite = false; for (RecordDecl::field_iterator Field = RD->field_begin(), FieldEnd = RD->field_end(); Field != FieldEnd; ++Field, ++FieldNo) { // If this is a union, skip all the fields that aren't being initialized. - if (RD->isUnion() && Val.getUnionField() != *Field) + if (RD->isUnion() && !declaresSameEntity(Val.getUnionField(), *Field)) continue; - // Don't emit anonymous bitfields, they just affect layout. - if (Field->isUnnamedBitfield()) + // Don't emit anonymous bitfields or zero-sized fields. + if (Field->isUnnamedBitfield() || Field->isZeroSize(CGM.getContext())) continue; // Emit the value of the initializer. @@ -494,93 +820,37 @@ bool ConstStructBuilder::Build(const APValue &Val, const RecordDecl *RD, if (!Field->isBitField()) { // Handle non-bitfield members. - AppendField(*Field, Layout.getFieldOffset(FieldNo) + OffsetBits, EltInit); + if (!AppendField(*Field, Layout.getFieldOffset(FieldNo) + OffsetBits, + EltInit, AllowOverwrite)) + return false; + // After emitting a non-empty field with [[no_unique_address]], we may + // need to overwrite its tail padding. + if (Field->hasAttr<NoUniqueAddressAttr>()) + AllowOverwrite = true; } else { // Otherwise we have a bitfield. - AppendBitField(*Field, Layout.getFieldOffset(FieldNo) + OffsetBits, - cast<llvm::ConstantInt>(EltInit)); + if (!AppendBitField(*Field, Layout.getFieldOffset(FieldNo) + OffsetBits, + cast<llvm::ConstantInt>(EltInit), AllowOverwrite)) + return false; } } return true; } -llvm::Constant *ConstStructBuilder::Finalize(QualType Ty) { - RecordDecl *RD = Ty->getAs<RecordType>()->getDecl(); - const ASTRecordLayout &Layout = CGM.getContext().getASTRecordLayout(RD); - - CharUnits LayoutSizeInChars = Layout.getSize(); - - if (NextFieldOffsetInChars > LayoutSizeInChars) { - // If the struct is bigger than the size of the record type, - // we must have a flexible array member at the end. - assert(RD->hasFlexibleArrayMember() && - "Must have flexible array member if struct is bigger than type!"); - - // No tail padding is necessary. - } else { - // Append tail padding if necessary. - CharUnits LLVMSizeInChars = - NextFieldOffsetInChars.alignTo(LLVMStructAlignment); - - if (LLVMSizeInChars != LayoutSizeInChars) - AppendTailPadding(LayoutSizeInChars); - - LLVMSizeInChars = NextFieldOffsetInChars.alignTo(LLVMStructAlignment); - - // Check if we need to convert the struct to a packed struct. - if (NextFieldOffsetInChars <= LayoutSizeInChars && - LLVMSizeInChars > LayoutSizeInChars) { - assert(!Packed && "Size mismatch!"); - - ConvertStructToPacked(); - assert(NextFieldOffsetInChars <= LayoutSizeInChars && - "Converting to packed did not help!"); - } - - LLVMSizeInChars = NextFieldOffsetInChars.alignTo(LLVMStructAlignment); - - assert(LayoutSizeInChars == LLVMSizeInChars && - "Tail padding mismatch!"); - } - - // Pick the type to use. If the type is layout identical to the ConvertType - // type then use it, otherwise use whatever the builder produced for us. - llvm::StructType *STy = - llvm::ConstantStruct::getTypeForElements(CGM.getLLVMContext(), - Elements, Packed); - llvm::Type *ValTy = CGM.getTypes().ConvertType(Ty); - if (llvm::StructType *ValSTy = dyn_cast<llvm::StructType>(ValTy)) { - if (ValSTy->isLayoutIdentical(STy)) - STy = ValSTy; - } - - llvm::Constant *Result = llvm::ConstantStruct::get(STy, Elements); - - assert(NextFieldOffsetInChars.alignTo(getAlignment(Result)) == - getSizeInChars(Result) && - "Size mismatch!"); - - return Result; -} - -llvm::Constant *ConstStructBuilder::BuildStruct(ConstantEmitter &Emitter, - ConstExprEmitter *ExprEmitter, - llvm::Constant *Base, - InitListExpr *Updater, - QualType ValTy) { - ConstStructBuilder Builder(Emitter); - if (!Builder.Build(ExprEmitter, Base, Updater)) - return nullptr; - return Builder.Finalize(ValTy); +llvm::Constant *ConstStructBuilder::Finalize(QualType Type) { + RecordDecl *RD = Type->getAs<RecordType>()->getDecl(); + llvm::Type *ValTy = CGM.getTypes().ConvertType(Type); + return Builder.build(ValTy, RD->hasFlexibleArrayMember()); } llvm::Constant *ConstStructBuilder::BuildStruct(ConstantEmitter &Emitter, InitListExpr *ILE, QualType ValTy) { - ConstStructBuilder Builder(Emitter); + ConstantAggregateBuilder Const(Emitter.CGM); + ConstStructBuilder Builder(Emitter, Const, CharUnits::Zero()); - if (!Builder.Build(ILE)) + if (!Builder.Build(ILE, /*AllowOverwrite*/false)) return nullptr; return Builder.Finalize(ValTy); @@ -589,7 +859,8 @@ llvm::Constant *ConstStructBuilder::BuildStruct(ConstantEmitter &Emitter, llvm::Constant *ConstStructBuilder::BuildStruct(ConstantEmitter &Emitter, const APValue &Val, QualType ValTy) { - ConstStructBuilder Builder(Emitter); + ConstantAggregateBuilder Const(Emitter.CGM); + ConstStructBuilder Builder(Emitter, Const, CharUnits::Zero()); const RecordDecl *RD = ValTy->castAs<RecordType>()->getDecl(); const CXXRecordDecl *CD = dyn_cast<CXXRecordDecl>(RD); @@ -599,6 +870,12 @@ llvm::Constant *ConstStructBuilder::BuildStruct(ConstantEmitter &Emitter, return Builder.Finalize(ValTy); } +bool ConstStructBuilder::UpdateStruct(ConstantEmitter &Emitter, + ConstantAggregateBuilder &Const, + CharUnits Offset, InitListExpr *Updater) { + return ConstStructBuilder(Emitter, Const, Offset) + .Build(Updater, /*AllowOverwrite*/ true); +} //===----------------------------------------------------------------------===// // ConstExprEmitter @@ -636,7 +913,7 @@ static ConstantAddress tryEmitGlobalCompoundLiteral(CodeGenModule &CGM, } static llvm::Constant * -EmitArrayConstant(CodeGenModule &CGM, const ConstantArrayType *DestType, +EmitArrayConstant(CodeGenModule &CGM, llvm::ArrayType *DesiredType, llvm::Type *CommonElementType, unsigned ArrayBound, SmallVectorImpl<llvm::Constant *> &Elements, llvm::Constant *Filler) { @@ -649,10 +926,8 @@ EmitArrayConstant(CodeGenModule &CGM, const ConstantArrayType *DestType, --NonzeroLength; } - if (NonzeroLength == 0) { - return llvm::ConstantAggregateZero::get( - CGM.getTypes().ConvertType(QualType(DestType, 0))); - } + if (NonzeroLength == 0) + return llvm::ConstantAggregateZero::get(DesiredType); // Add a zeroinitializer array filler if we have lots of trailing zeroes. unsigned TrailingZeroes = ArrayBound - NonzeroLength; @@ -673,9 +948,7 @@ EmitArrayConstant(CodeGenModule &CGM, const ConstantArrayType *DestType, } auto *FillerType = - CommonElementType - ? CommonElementType - : CGM.getTypes().ConvertType(DestType->getElementType()); + CommonElementType ? CommonElementType : DesiredType->getElementType(); FillerType = llvm::ArrayType::get(FillerType, TrailingZeroes); Elements.back() = llvm::ConstantAggregateZero::get(FillerType); CommonElementType = nullptr; @@ -701,10 +974,12 @@ EmitArrayConstant(CodeGenModule &CGM, const ConstantArrayType *DestType, return llvm::ConstantStruct::get(SType, Elements); } -/// This class only needs to handle two cases: -/// 1) Literals (this is used by APValue emission to emit literals). -/// 2) Arrays, structs and unions (outside C++11 mode, we don't currently -/// constant fold these types). +// This class only needs to handle arrays, structs and unions. Outside C++11 +// mode, we don't currently constant fold those types. All other types are +// handled by constant folding. +// +// Constant folding is currently missing support for a few features supported +// here: CK_ToUnion, CK_ReinterpretMemberPointer, and DesignatedInitUpdateExpr. class ConstExprEmitter : public StmtVisitor<ConstExprEmitter, llvm::Constant*, QualType> { CodeGenModule &CGM; @@ -840,6 +1115,7 @@ public: case CK_ToVoid: case CK_Dynamic: case CK_LValueBitCast: + case CK_LValueToRValueBitCast: case CK_NullToMemberPointer: case CK_UserDefinedConversion: case CK_CPointerToObjCPointerCast: @@ -875,16 +1151,14 @@ public: case CK_FloatingCast: case CK_FixedPointCast: case CK_FixedPointToBoolean: + case CK_FixedPointToIntegral: + case CK_IntegralToFixedPoint: case CK_ZeroToOCLOpaqueType: return nullptr; } llvm_unreachable("Invalid CastKind"); } - llvm::Constant *VisitCXXDefaultArgExpr(CXXDefaultArgExpr *DAE, QualType T) { - return Visit(DAE->getExpr(), T); - } - llvm::Constant *VisitCXXDefaultInitExpr(CXXDefaultInitExpr *DIE, QualType T) { // No need for a DefaultInitExprScope: we don't handle 'this' in a // constant expression. @@ -942,7 +1216,9 @@ public: Elts.push_back(C); } - return EmitArrayConstant(CGM, CAT, CommonElementType, NumElements, Elts, + llvm::ArrayType *Desired = + cast<llvm::ArrayType>(CGM.getTypes().ConvertType(ILE->getType())); + return EmitArrayConstant(CGM, Desired, CommonElementType, NumElements, Elts, fillC); } @@ -968,80 +1244,24 @@ public: return nullptr; } - llvm::Constant *EmitDesignatedInitUpdater(llvm::Constant *Base, - InitListExpr *Updater, - QualType destType) { - if (auto destAT = CGM.getContext().getAsArrayType(destType)) { - llvm::ArrayType *AType = cast<llvm::ArrayType>(ConvertType(destType)); - llvm::Type *ElemType = AType->getElementType(); - - unsigned NumInitElements = Updater->getNumInits(); - unsigned NumElements = AType->getNumElements(); - - std::vector<llvm::Constant *> Elts; - Elts.reserve(NumElements); - - QualType destElemType = destAT->getElementType(); - - if (auto DataArray = dyn_cast<llvm::ConstantDataArray>(Base)) - for (unsigned i = 0; i != NumElements; ++i) - Elts.push_back(DataArray->getElementAsConstant(i)); - else if (auto Array = dyn_cast<llvm::ConstantArray>(Base)) - for (unsigned i = 0; i != NumElements; ++i) - Elts.push_back(Array->getOperand(i)); - else - return nullptr; // FIXME: other array types not implemented - - llvm::Constant *fillC = nullptr; - if (Expr *filler = Updater->getArrayFiller()) - if (!isa<NoInitExpr>(filler)) - fillC = Emitter.tryEmitAbstractForMemory(filler, destElemType); - bool RewriteType = (fillC && fillC->getType() != ElemType); - - for (unsigned i = 0; i != NumElements; ++i) { - Expr *Init = nullptr; - if (i < NumInitElements) - Init = Updater->getInit(i); - - if (!Init && fillC) - Elts[i] = fillC; - else if (!Init || isa<NoInitExpr>(Init)) - ; // Do nothing. - else if (InitListExpr *ChildILE = dyn_cast<InitListExpr>(Init)) - Elts[i] = EmitDesignatedInitUpdater(Elts[i], ChildILE, destElemType); - else - Elts[i] = Emitter.tryEmitPrivateForMemory(Init, destElemType); - - if (!Elts[i]) - return nullptr; - RewriteType |= (Elts[i]->getType() != ElemType); - } - - if (RewriteType) { - std::vector<llvm::Type *> Types; - Types.reserve(NumElements); - for (unsigned i = 0; i != NumElements; ++i) - Types.push_back(Elts[i]->getType()); - llvm::StructType *SType = llvm::StructType::get(AType->getContext(), - Types, true); - return llvm::ConstantStruct::get(SType, Elts); - } - - return llvm::ConstantArray::get(AType, Elts); - } - - if (destType->isRecordType()) - return ConstStructBuilder::BuildStruct(Emitter, this, Base, Updater, - destType); - - return nullptr; - } - llvm::Constant *VisitDesignatedInitUpdateExpr(DesignatedInitUpdateExpr *E, QualType destType) { auto C = Visit(E->getBase(), destType); - if (!C) return nullptr; - return EmitDesignatedInitUpdater(C, E->getUpdater(), destType); + if (!C) + return nullptr; + + ConstantAggregateBuilder Const(CGM); + Const.add(C, CharUnits::Zero(), false); + + if (!EmitDesignatedInitUpdater(Emitter, Const, CharUnits::Zero(), destType, + E->getUpdater())) + return nullptr; + + llvm::Type *ValTy = CGM.getTypes().ConvertType(destType); + bool HasFlexibleArray = false; + if (auto *RT = destType->getAs<RecordType>()) + HasFlexibleArray = RT->getDecl()->hasFlexibleArrayMember(); + return Const.build(ValTy, HasFlexibleArray); } llvm::Constant *VisitCXXConstructExpr(CXXConstructExpr *E, QualType Ty) { @@ -1077,6 +1297,7 @@ public: } llvm::Constant *VisitStringLiteral(StringLiteral *E, QualType T) { + // This is a string literal initializing an array in an initializer. return CGM.GetConstantArrayFromStringLiteral(E); } @@ -1106,76 +1327,6 @@ public: } // end anonymous namespace. -bool ConstStructBuilder::Build(ConstExprEmitter *ExprEmitter, - llvm::Constant *Base, - InitListExpr *Updater) { - assert(Base && "base expression should not be empty"); - - QualType ExprType = Updater->getType(); - RecordDecl *RD = ExprType->getAs<RecordType>()->getDecl(); - const ASTRecordLayout &Layout = CGM.getContext().getASTRecordLayout(RD); - const llvm::StructLayout *BaseLayout = CGM.getDataLayout().getStructLayout( - cast<llvm::StructType>(Base->getType())); - unsigned FieldNo = -1; - unsigned ElementNo = 0; - - // Bail out if we have base classes. We could support these, but they only - // arise in C++1z where we will have already constant folded most interesting - // cases. FIXME: There are still a few more cases we can handle this way. - if (auto *CXXRD = dyn_cast<CXXRecordDecl>(RD)) - if (CXXRD->getNumBases()) - return false; - - for (FieldDecl *Field : RD->fields()) { - ++FieldNo; - - if (RD->isUnion() && Updater->getInitializedFieldInUnion() != Field) - continue; - - // Skip anonymous bitfields. - if (Field->isUnnamedBitfield()) - continue; - - llvm::Constant *EltInit = Base->getAggregateElement(ElementNo); - - // Bail out if the type of the ConstantStruct does not have the same layout - // as the type of the InitListExpr. - if (CGM.getTypes().ConvertType(Field->getType()) != EltInit->getType() || - Layout.getFieldOffset(ElementNo) != - BaseLayout->getElementOffsetInBits(ElementNo)) - return false; - - // Get the initializer. If we encounter an empty field or a NoInitExpr, - // we use values from the base expression. - Expr *Init = nullptr; - if (ElementNo < Updater->getNumInits()) - Init = Updater->getInit(ElementNo); - - if (!Init || isa<NoInitExpr>(Init)) - ; // Do nothing. - else if (InitListExpr *ChildILE = dyn_cast<InitListExpr>(Init)) - EltInit = ExprEmitter->EmitDesignatedInitUpdater(EltInit, ChildILE, - Field->getType()); - else - EltInit = Emitter.tryEmitPrivateForMemory(Init, Field->getType()); - - ++ElementNo; - - if (!EltInit) - return false; - - if (!Field->isBitField()) - AppendField(Field, Layout.getFieldOffset(FieldNo), EltInit); - else if (llvm::ConstantInt *CI = dyn_cast<llvm::ConstantInt>(EltInit)) - AppendBitField(Field, Layout.getFieldOffset(FieldNo), CI); - else - // Initializing a bitfield with a non-trivial constant? - return false; - } - - return true; -} - llvm::Constant *ConstantEmitter::validateAndPopAbstract(llvm::Constant *C, AbstractState saved) { Abstract = saved.OldValue; @@ -1609,6 +1760,7 @@ private: ConstantLValue VisitConstantExpr(const ConstantExpr *E); ConstantLValue VisitCompoundLiteralExpr(const CompoundLiteralExpr *E); ConstantLValue VisitStringLiteral(const StringLiteral *E); + ConstantLValue VisitObjCBoxedExpr(const ObjCBoxedExpr *E); ConstantLValue VisitObjCEncodeExpr(const ObjCEncodeExpr *E); ConstantLValue VisitObjCStringLiteral(const ObjCStringLiteral *E); ConstantLValue VisitPredefinedExpr(const PredefinedExpr *E); @@ -1650,17 +1802,7 @@ private: llvm::Constant *ConstantLValueEmitter::tryEmit() { const APValue::LValueBase &base = Value.getLValueBase(); - // Certain special array initializers are represented in APValue - // as l-values referring to the base expression which generates the - // array. This happens with e.g. string literals. These should - // probably just get their own representation kind in APValue. - if (DestType->isArrayType()) { - assert(!hasNonZeroOffset() && "offset on array initializer"); - auto expr = const_cast<Expr*>(base.get<const Expr*>()); - return ConstExprEmitter(Emitter).Visit(expr, DestType); - } - - // Otherwise, the destination type should be a pointer or reference + // The destination type should be a pointer or reference // type, but it might also be a cast thereof. // // FIXME: the chain of casts required should be reflected in the APValue. @@ -1700,34 +1842,21 @@ llvm::Constant *ConstantLValueEmitter::tryEmit() { /// bitcast to pointer type. llvm::Constant * ConstantLValueEmitter::tryEmitAbsolute(llvm::Type *destTy) { - auto offset = getOffset(); - // If we're producing a pointer, this is easy. - if (auto destPtrTy = cast<llvm::PointerType>(destTy)) { - if (Value.isNullPointer()) { - // FIXME: integer offsets from non-zero null pointers. - return CGM.getNullPointer(destPtrTy, DestType); - } - - // Convert the integer to a pointer-sized integer before converting it - // to a pointer. - // FIXME: signedness depends on the original integer type. - auto intptrTy = CGM.getDataLayout().getIntPtrType(destPtrTy); - llvm::Constant *C = offset; - C = llvm::ConstantExpr::getIntegerCast(getOffset(), intptrTy, - /*isSigned*/ false); - C = llvm::ConstantExpr::getIntToPtr(C, destPtrTy); - return C; + auto destPtrTy = cast<llvm::PointerType>(destTy); + if (Value.isNullPointer()) { + // FIXME: integer offsets from non-zero null pointers. + return CGM.getNullPointer(destPtrTy, DestType); } - // Otherwise, we're basically returning an integer constant. - - // FIXME: this does the wrong thing with ptrtoint of a null pointer, - // but since we don't know the original pointer type, there's not much - // we can do about it. - - auto C = getOffset(); - C = llvm::ConstantExpr::getIntegerCast(C, destTy, /*isSigned*/ false); + // Convert the integer to a pointer-sized integer before converting it + // to a pointer. + // FIXME: signedness depends on the original integer type. + auto intptrTy = CGM.getDataLayout().getIntPtrType(destPtrTy); + llvm::Constant *C; + C = llvm::ConstantExpr::getIntegerCast(getOffset(), intptrTy, + /*isSigned*/ false); + C = llvm::ConstantExpr::getIntToPtr(C, destPtrTy); return C; } @@ -1749,7 +1878,7 @@ ConstantLValueEmitter::tryEmitBase(const APValue::LValueBase &base) { if (VD->isLocalVarDecl()) { return CGM.getOrCreateStaticVarDecl( - *VD, CGM.getLLVMLinkageVarDefinition(VD, /*isConstant=*/false)); + *VD, CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false)); } } } @@ -1757,6 +1886,17 @@ ConstantLValueEmitter::tryEmitBase(const APValue::LValueBase &base) { return nullptr; } + // Handle typeid(T). + if (TypeInfoLValue TI = base.dyn_cast<TypeInfoLValue>()) { + llvm::Type *StdTypeInfoPtrTy = + CGM.getTypes().ConvertType(base.getTypeInfoType())->getPointerTo(); + llvm::Constant *TypeInfo = + CGM.GetAddrOfRTTIDescriptor(QualType(TI.getType(), 0)); + if (TypeInfo->getType() != StdTypeInfoPtrTy) + TypeInfo = llvm::ConstantExpr::getBitCast(TypeInfo, StdTypeInfoPtrTy); + return TypeInfo; + } + // Otherwise, it must be an expression. return Visit(base.get<const Expr*>()); } @@ -1781,25 +1921,29 @@ ConstantLValueEmitter::VisitObjCEncodeExpr(const ObjCEncodeExpr *E) { return CGM.GetAddrOfConstantStringFromObjCEncode(E); } +static ConstantLValue emitConstantObjCStringLiteral(const StringLiteral *S, + QualType T, + CodeGenModule &CGM) { + auto C = CGM.getObjCRuntime().GenerateConstantString(S); + return C.getElementBitCast(CGM.getTypes().ConvertTypeForMem(T)); +} + ConstantLValue ConstantLValueEmitter::VisitObjCStringLiteral(const ObjCStringLiteral *E) { - auto C = CGM.getObjCRuntime().GenerateConstantString(E->getString()); - return C.getElementBitCast(CGM.getTypes().ConvertTypeForMem(E->getType())); + return emitConstantObjCStringLiteral(E->getString(), E->getType(), CGM); } ConstantLValue -ConstantLValueEmitter::VisitPredefinedExpr(const PredefinedExpr *E) { - if (auto CGF = Emitter.CGF) { - LValue Res = CGF->EmitPredefinedLValue(E); - return cast<ConstantAddress>(Res.getAddress()); - } - - auto kind = E->getIdentKind(); - if (kind == PredefinedExpr::PrettyFunction) { - return CGM.GetAddrOfConstantCString("top level", ".tmp"); - } +ConstantLValueEmitter::VisitObjCBoxedExpr(const ObjCBoxedExpr *E) { + assert(E->isExpressibleAsConstantInitializer() && + "this boxed expression can't be emitted as a compile-time constant"); + auto *SL = cast<StringLiteral>(E->getSubExpr()->IgnoreParenCasts()); + return emitConstantObjCStringLiteral(SL, E->getType(), CGM); +} - return CGM.GetAddrOfConstantCString("", ".tmp"); +ConstantLValue +ConstantLValueEmitter::VisitPredefinedExpr(const PredefinedExpr *E) { + return CGM.GetAddrOfConstantStringFromLiteral(E->getFunctionName()); } ConstantLValue @@ -1867,12 +2011,17 @@ ConstantLValueEmitter::VisitMaterializeTemporaryExpr( llvm::Constant *ConstantEmitter::tryEmitPrivate(const APValue &Value, QualType DestType) { switch (Value.getKind()) { - case APValue::Uninitialized: - llvm_unreachable("Constant expressions should be initialized."); + case APValue::None: + case APValue::Indeterminate: + // Out-of-lifetime and indeterminate values can be modeled as 'undef'. + return llvm::UndefValue::get(CGM.getTypes().ConvertType(DestType)); case APValue::LValue: return ConstantLValueEmitter(*this, Value, DestType).tryEmit(); case APValue::Int: return llvm::ConstantInt::get(CGM.getLLVMContext(), Value.getInt()); + case APValue::FixedPoint: + return llvm::ConstantInt::get(CGM.getLLVMContext(), + Value.getFixedPoint().getValue()); case APValue::ComplexInt: { llvm::Constant *Complex[2]; @@ -1990,7 +2139,9 @@ llvm::Constant *ConstantEmitter::tryEmitPrivate(const APValue &Value, return llvm::ConstantAggregateZero::get(AType); } - return EmitArrayConstant(CGM, CAT, CommonElementType, NumElements, Elts, + llvm::ArrayType *Desired = + cast<llvm::ArrayType>(CGM.getTypes().ConvertType(DestType)); + return EmitArrayConstant(CGM, Desired, CommonElementType, NumElements, Elts, Filler); } case APValue::MemberPointer: @@ -2077,7 +2228,7 @@ static llvm::Constant *EmitNullConstant(CodeGenModule &CGM, for (const auto *Field : record->fields()) { // Fill in non-bitfields. (Bitfields always use a zero pattern, which we // will fill in later.) - if (!Field->isBitField()) { + if (!Field->isBitField() && !Field->isZeroSize(CGM.getContext())) { unsigned fieldIndex = layout.getLLVMFieldNo(Field); elements[fieldIndex] = CGM.EmitNullConstant(Field->getType()); } diff --git a/lib/CodeGen/CGExprScalar.cpp b/lib/CodeGen/CGExprScalar.cpp index 1c14d4c99a23..3d082de2a14f 100644 --- a/lib/CodeGen/CGExprScalar.cpp +++ b/lib/CodeGen/CGExprScalar.cpp @@ -1,9 +1,8 @@ //===--- CGExprScalar.cpp - Emit LLVM Code for Scalar Exprs ---------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -17,6 +16,7 @@ #include "CGObjCRuntime.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" +#include "ConstantEmitter.h" #include "TargetInfo.h" #include "clang/AST/ASTContext.h" #include "clang/AST/DeclObjC.h" @@ -125,6 +125,21 @@ struct BinOpInfo { return CFP->isZero(); return true; } + + /// Check if either operand is a fixed point type or integer type, with at + /// least one being a fixed point type. In any case, this + /// operation did not follow usual arithmetic conversion and both operands may + /// not be the same. + bool isFixedPointBinOp() const { + // We cannot simply check the result type since comparison operations return + // an int. + if (const auto *BinOp = dyn_cast<BinaryOperator>(E)) { + QualType LHSType = BinOp->getLHS()->getType(); + QualType RHSType = BinOp->getRHS()->getType(); + return LHSType->isFixedPointType() || RHSType->isFixedPointType(); + } + return false; + } }; static bool MustVisitNullValue(const Expr *E) { @@ -298,7 +313,7 @@ public: /// boolean (i1) truth value. This is equivalent to "Val != 0". Value *EmitConversionToBool(Value *Src, QualType DstTy); - /// Emit a check that a conversion to or from a floating-point type does not + /// Emit a check that a conversion from a floating-point type does not /// overflow. void EmitFloatConversionCheck(Value *OrigSrc, QualType OrigSrcType, Value *Src, QualType SrcType, QualType DstType, @@ -349,8 +364,14 @@ public: SourceLocation Loc, ScalarConversionOpts Opts = ScalarConversionOpts()); + /// Convert between either a fixed point and other fixed point or fixed point + /// and an integer. Value *EmitFixedPointConversion(Value *Src, QualType SrcTy, QualType DstTy, SourceLocation Loc); + Value *EmitFixedPointConversion(Value *Src, FixedPointSemantics &SrcFixedSema, + FixedPointSemantics &DstFixedSema, + SourceLocation Loc, + bool DstIsInteger = false); /// Emit a conversion from the specified complex type to the specified /// destination type, where the destination type is an LLVM scalar type. @@ -620,12 +641,20 @@ public: Value *VisitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *E) { return EmitLoadOfLValue(E); } + Value *VisitSourceLocExpr(SourceLocExpr *SLE) { + auto &Ctx = CGF.getContext(); + APValue Evaluated = + SLE->EvaluateInContext(Ctx, CGF.CurSourceLocExprScope.getDefaultExpr()); + return ConstantEmitter(CGF.CGM, &CGF) + .emitAbstract(SLE->getLocation(), Evaluated, SLE->getType()); + } Value *VisitCXXDefaultArgExpr(CXXDefaultArgExpr *DAE) { + CodeGenFunction::CXXDefaultArgExprScope Scope(CGF, DAE); return Visit(DAE->getExpr()); } Value *VisitCXXDefaultInitExpr(CXXDefaultInitExpr *DIE) { - CodeGenFunction::CXXDefaultInitExprScope Scope(CGF); + CodeGenFunction::CXXDefaultInitExprScope Scope(CGF, DIE); return Visit(DIE->getExpr()); } Value *VisitCXXThisExpr(CXXThisExpr *TE) { @@ -729,6 +758,9 @@ public: return Builder.CreateOr(Ops.LHS, Ops.RHS, "or"); } + // Helper functions for fixed point binary operations. + Value *EmitFixedPointBinOp(const BinOpInfo &Ops); + BinOpInfo EmitBinOps(const BinaryOperator *E); LValue EmitCompoundAssignLValue(const CompoundAssignOperator *E, Value *(ScalarExprEmitter::*F)(const BinOpInfo &), @@ -832,128 +864,63 @@ Value *ScalarExprEmitter::EmitConversionToBool(Value *Src, QualType SrcType) { void ScalarExprEmitter::EmitFloatConversionCheck( Value *OrigSrc, QualType OrigSrcType, Value *Src, QualType SrcType, QualType DstType, llvm::Type *DstTy, SourceLocation Loc) { + assert(SrcType->isFloatingType() && "not a conversion from floating point"); + if (!isa<llvm::IntegerType>(DstTy)) + return; + CodeGenFunction::SanitizerScope SanScope(&CGF); using llvm::APFloat; using llvm::APSInt; - llvm::Type *SrcTy = Src->getType(); - llvm::Value *Check = nullptr; - if (llvm::IntegerType *IntTy = dyn_cast<llvm::IntegerType>(SrcTy)) { - // Integer to floating-point. This can fail for unsigned short -> __half - // or unsigned __int128 -> float. - assert(DstType->isFloatingType()); - bool SrcIsUnsigned = OrigSrcType->isUnsignedIntegerOrEnumerationType(); - - APFloat LargestFloat = - APFloat::getLargest(CGF.getContext().getFloatTypeSemantics(DstType)); - APSInt LargestInt(IntTy->getBitWidth(), SrcIsUnsigned); - - bool IsExact; - if (LargestFloat.convertToInteger(LargestInt, APFloat::rmTowardZero, - &IsExact) != APFloat::opOK) - // The range of representable values of this floating point type includes - // all values of this integer type. Don't need an overflow check. - return; - - llvm::Value *Max = llvm::ConstantInt::get(VMContext, LargestInt); - if (SrcIsUnsigned) - Check = Builder.CreateICmpULE(Src, Max); - else { - llvm::Value *Min = llvm::ConstantInt::get(VMContext, -LargestInt); - llvm::Value *GE = Builder.CreateICmpSGE(Src, Min); - llvm::Value *LE = Builder.CreateICmpSLE(Src, Max); - Check = Builder.CreateAnd(GE, LE); - } - } else { - const llvm::fltSemantics &SrcSema = - CGF.getContext().getFloatTypeSemantics(OrigSrcType); - if (isa<llvm::IntegerType>(DstTy)) { - // Floating-point to integer. This has undefined behavior if the source is - // +-Inf, NaN, or doesn't fit into the destination type (after truncation - // to an integer). - unsigned Width = CGF.getContext().getIntWidth(DstType); - bool Unsigned = DstType->isUnsignedIntegerOrEnumerationType(); - - APSInt Min = APSInt::getMinValue(Width, Unsigned); - APFloat MinSrc(SrcSema, APFloat::uninitialized); - if (MinSrc.convertFromAPInt(Min, !Unsigned, APFloat::rmTowardZero) & - APFloat::opOverflow) - // Don't need an overflow check for lower bound. Just check for - // -Inf/NaN. - MinSrc = APFloat::getInf(SrcSema, true); - else - // Find the largest value which is too small to represent (before - // truncation toward zero). - MinSrc.subtract(APFloat(SrcSema, 1), APFloat::rmTowardNegative); - - APSInt Max = APSInt::getMaxValue(Width, Unsigned); - APFloat MaxSrc(SrcSema, APFloat::uninitialized); - if (MaxSrc.convertFromAPInt(Max, !Unsigned, APFloat::rmTowardZero) & - APFloat::opOverflow) - // Don't need an overflow check for upper bound. Just check for - // +Inf/NaN. - MaxSrc = APFloat::getInf(SrcSema, false); - else - // Find the smallest value which is too large to represent (before - // truncation toward zero). - MaxSrc.add(APFloat(SrcSema, 1), APFloat::rmTowardPositive); - - // If we're converting from __half, convert the range to float to match - // the type of src. - if (OrigSrcType->isHalfType()) { - const llvm::fltSemantics &Sema = - CGF.getContext().getFloatTypeSemantics(SrcType); - bool IsInexact; - MinSrc.convert(Sema, APFloat::rmTowardZero, &IsInexact); - MaxSrc.convert(Sema, APFloat::rmTowardZero, &IsInexact); - } - - llvm::Value *GE = - Builder.CreateFCmpOGT(Src, llvm::ConstantFP::get(VMContext, MinSrc)); - llvm::Value *LE = - Builder.CreateFCmpOLT(Src, llvm::ConstantFP::get(VMContext, MaxSrc)); - Check = Builder.CreateAnd(GE, LE); - } else { - // FIXME: Maybe split this sanitizer out from float-cast-overflow. - // - // Floating-point to floating-point. This has undefined behavior if the - // source is not in the range of representable values of the destination - // type. The C and C++ standards are spectacularly unclear here. We - // diagnose finite out-of-range conversions, but allow infinities and NaNs - // to convert to the corresponding value in the smaller type. - // - // C11 Annex F gives all such conversions defined behavior for IEC 60559 - // conforming implementations. Unfortunately, LLVM's fptrunc instruction - // does not. - - // Converting from a lower rank to a higher rank can never have - // undefined behavior, since higher-rank types must have a superset - // of values of lower-rank types. - if (CGF.getContext().getFloatingTypeOrder(OrigSrcType, DstType) != 1) - return; - - assert(!OrigSrcType->isHalfType() && - "should not check conversion from __half, it has the lowest rank"); - - const llvm::fltSemantics &DstSema = - CGF.getContext().getFloatTypeSemantics(DstType); - APFloat MinBad = APFloat::getLargest(DstSema, false); - APFloat MaxBad = APFloat::getInf(DstSema, false); - - bool IsInexact; - MinBad.convert(SrcSema, APFloat::rmTowardZero, &IsInexact); - MaxBad.convert(SrcSema, APFloat::rmTowardZero, &IsInexact); - - Value *AbsSrc = CGF.EmitNounwindRuntimeCall( - CGF.CGM.getIntrinsic(llvm::Intrinsic::fabs, Src->getType()), Src); - llvm::Value *GE = - Builder.CreateFCmpOGT(AbsSrc, llvm::ConstantFP::get(VMContext, MinBad)); - llvm::Value *LE = - Builder.CreateFCmpOLT(AbsSrc, llvm::ConstantFP::get(VMContext, MaxBad)); - Check = Builder.CreateNot(Builder.CreateAnd(GE, LE)); - } - } + const llvm::fltSemantics &SrcSema = + CGF.getContext().getFloatTypeSemantics(OrigSrcType); + + // Floating-point to integer. This has undefined behavior if the source is + // +-Inf, NaN, or doesn't fit into the destination type (after truncation + // to an integer). + unsigned Width = CGF.getContext().getIntWidth(DstType); + bool Unsigned = DstType->isUnsignedIntegerOrEnumerationType(); + + APSInt Min = APSInt::getMinValue(Width, Unsigned); + APFloat MinSrc(SrcSema, APFloat::uninitialized); + if (MinSrc.convertFromAPInt(Min, !Unsigned, APFloat::rmTowardZero) & + APFloat::opOverflow) + // Don't need an overflow check for lower bound. Just check for + // -Inf/NaN. + MinSrc = APFloat::getInf(SrcSema, true); + else + // Find the largest value which is too small to represent (before + // truncation toward zero). + MinSrc.subtract(APFloat(SrcSema, 1), APFloat::rmTowardNegative); + + APSInt Max = APSInt::getMaxValue(Width, Unsigned); + APFloat MaxSrc(SrcSema, APFloat::uninitialized); + if (MaxSrc.convertFromAPInt(Max, !Unsigned, APFloat::rmTowardZero) & + APFloat::opOverflow) + // Don't need an overflow check for upper bound. Just check for + // +Inf/NaN. + MaxSrc = APFloat::getInf(SrcSema, false); + else + // Find the smallest value which is too large to represent (before + // truncation toward zero). + MaxSrc.add(APFloat(SrcSema, 1), APFloat::rmTowardPositive); + + // If we're converting from __half, convert the range to float to match + // the type of src. + if (OrigSrcType->isHalfType()) { + const llvm::fltSemantics &Sema = + CGF.getContext().getFloatTypeSemantics(SrcType); + bool IsInexact; + MinSrc.convert(Sema, APFloat::rmTowardZero, &IsInexact); + MaxSrc.convert(Sema, APFloat::rmTowardZero, &IsInexact); + } + + llvm::Value *GE = + Builder.CreateFCmpOGT(Src, llvm::ConstantFP::get(VMContext, MinSrc)); + llvm::Value *LE = + Builder.CreateFCmpOLT(Src, llvm::ConstantFP::get(VMContext, MaxSrc)); + Check = Builder.CreateAnd(GE, LE); llvm::Constant *StaticArgs[] = {CGF.EmitCheckSourceLocation(Loc), CGF.EmitCheckTypeDescriptor(OrigSrcType), @@ -1205,17 +1172,25 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType, // TODO(leonardchan): When necessary, add another if statement checking for // conversions to fixed point types from other types. if (SrcType->isFixedPointType()) { - if (DstType->isFixedPointType()) { - return EmitFixedPointConversion(Src, SrcType, DstType, Loc); - } else if (DstType->isBooleanType()) { + if (DstType->isBooleanType()) + // It is important that we check this before checking if the dest type is + // an integer because booleans are technically integer types. // We do not need to check the padding bit on unsigned types if unsigned // padding is enabled because overflow into this bit is undefined // behavior. return Builder.CreateIsNotNull(Src, "tobool"); - } + if (DstType->isFixedPointType() || DstType->isIntegerType()) + return EmitFixedPointConversion(Src, SrcType, DstType, Loc); + + llvm_unreachable( + "Unhandled scalar conversion from a fixed point type to another type."); + } else if (DstType->isFixedPointType()) { + if (SrcType->isIntegerType()) + // This also includes converting booleans and enums to fixed point types. + return EmitFixedPointConversion(Src, SrcType, DstType, Loc); llvm_unreachable( - "Unhandled scalar conversion involving a fixed point type."); + "Unhandled scalar conversion to a fixed point type from another type."); } QualType NoncanonicalSrcType = SrcType; @@ -1351,9 +1326,12 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType, llvm::Type *ResTy = DstTy; // An overflowing conversion has undefined behavior if either the source type - // or the destination type is a floating-point type. + // or the destination type is a floating-point type. However, we consider the + // range of representable values for all floating-point types to be + // [-inf,+inf], so no overflow can ever happen when the destination type is a + // floating-point type. if (CGF.SanOpts.has(SanitizerKind::FloatCastOverflow) && - (OrigSrcType->isFloatingType() || DstType->isFloatingType())) + OrigSrcType->isFloatingType()) EmitFloatConversionCheck(OrigSrc, OrigSrcType, Src, SrcType, DstType, DstTy, Loc); @@ -1423,17 +1401,21 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType, Value *ScalarExprEmitter::EmitFixedPointConversion(Value *Src, QualType SrcTy, QualType DstTy, SourceLocation Loc) { - using llvm::APInt; - using llvm::ConstantInt; - using llvm::Value; - - assert(SrcTy->isFixedPointType()); - assert(DstTy->isFixedPointType()); - FixedPointSemantics SrcFPSema = CGF.getContext().getFixedPointSemantics(SrcTy); FixedPointSemantics DstFPSema = CGF.getContext().getFixedPointSemantics(DstTy); + return EmitFixedPointConversion(Src, SrcFPSema, DstFPSema, Loc, + DstTy->isIntegerType()); +} + +Value *ScalarExprEmitter::EmitFixedPointConversion( + Value *Src, FixedPointSemantics &SrcFPSema, FixedPointSemantics &DstFPSema, + SourceLocation Loc, bool DstIsInteger) { + using llvm::APInt; + using llvm::ConstantInt; + using llvm::Value; + unsigned SrcWidth = SrcFPSema.getWidth(); unsigned DstWidth = DstFPSema.getWidth(); unsigned SrcScale = SrcFPSema.getScale(); @@ -1446,13 +1428,26 @@ Value *ScalarExprEmitter::EmitFixedPointConversion(Value *Src, QualType SrcTy, Value *Result = Src; unsigned ResultWidth = SrcWidth; - if (!DstFPSema.isSaturated()) { - // Downscale. - if (DstScale < SrcScale) - Result = SrcIsSigned ? - Builder.CreateAShr(Result, SrcScale - DstScale, "downscale") : - Builder.CreateLShr(Result, SrcScale - DstScale, "downscale"); + // Downscale. + if (DstScale < SrcScale) { + // When converting to integers, we round towards zero. For negative numbers, + // right shifting rounds towards negative infinity. In this case, we can + // just round up before shifting. + if (DstIsInteger && SrcIsSigned) { + Value *Zero = llvm::Constant::getNullValue(Result->getType()); + Value *IsNegative = Builder.CreateICmpSLT(Result, Zero); + Value *LowBits = ConstantInt::get( + CGF.getLLVMContext(), APInt::getLowBitsSet(ResultWidth, SrcScale)); + Value *Rounded = Builder.CreateAdd(Result, LowBits); + Result = Builder.CreateSelect(IsNegative, Rounded, Result); + } + Result = SrcIsSigned + ? Builder.CreateAShr(Result, SrcScale - DstScale, "downscale") + : Builder.CreateLShr(Result, SrcScale - DstScale, "downscale"); + } + + if (!DstFPSema.isSaturated()) { // Resize. Result = Builder.CreateIntCast(Result, DstIntTy, SrcIsSigned, "resize"); @@ -1462,14 +1457,11 @@ Value *ScalarExprEmitter::EmitFixedPointConversion(Value *Src, QualType SrcTy, } else { // Adjust the number of fractional bits. if (DstScale > SrcScale) { - ResultWidth = SrcWidth + DstScale - SrcScale; + // Compare to DstWidth to prevent resizing twice. + ResultWidth = std::max(SrcWidth + DstScale - SrcScale, DstWidth); llvm::Type *UpscaledTy = Builder.getIntNTy(ResultWidth); Result = Builder.CreateIntCast(Result, UpscaledTy, SrcIsSigned, "resize"); Result = Builder.CreateShl(Result, DstScale - SrcScale, "upscale"); - } else if (DstScale < SrcScale) { - Result = SrcIsSigned ? - Builder.CreateAShr(Result, SrcScale - DstScale, "downscale") : - Builder.CreateLShr(Result, SrcScale - DstScale, "downscale"); } // Handle saturation. @@ -1493,7 +1485,8 @@ Value *ScalarExprEmitter::EmitFixedPointConversion(Value *Src, QualType SrcTy, } // Resize the integer part to get the final destination size. - Result = Builder.CreateIntCast(Result, DstIntTy, SrcIsSigned, "resize"); + if (ResultWidth != DstWidth) + Result = Builder.CreateIntCast(Result, DstIntTy, SrcIsSigned, "resize"); } return Result; } @@ -1978,6 +1971,15 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { return EmitLoadOfLValue(LV, CE->getExprLoc()); } + case CK_LValueToRValueBitCast: { + LValue SourceLVal = CGF.EmitLValue(E); + Address Addr = Builder.CreateElementBitCast(SourceLVal.getAddress(), + CGF.ConvertTypeForMem(DestTy)); + LValue DestLV = CGF.MakeAddrLValue(Addr, DestTy); + DestLV.setTBAAInfo(TBAAAccessInfo::getMayAliasInfo()); + return EmitLoadOfLValue(DestLV, CE->getExprLoc()); + } + case CK_CPointerToObjCPointerCast: case CK_BlockPointerToObjCPointerCast: case CK_AnyPointerToBlockPointerCast: @@ -2017,6 +2019,12 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { } } + // Update heapallocsite metadata when there is an explicit cast. + if (llvm::CallInst *CI = dyn_cast<llvm::CallInst>(Src)) + if (CI->getMetadata("heapallocsite") && isa<ExplicitCastExpr>(CE)) + CGF.getDebugInfo()-> + addHeapAllocSiteMetadata(CI, CE->getType(), CE->getExprLoc()); + return Builder.CreateBitCast(Src, DstTy); } case CK_AddressSpaceConversion: { @@ -2087,14 +2095,14 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { case CK_NullToPointer: if (MustVisitNullValue(E)) - (void) Visit(E); + CGF.EmitIgnoredExpr(E); return CGF.CGM.getNullPointer(cast<llvm::PointerType>(ConvertType(DestTy)), DestTy); case CK_NullToMemberPointer: { if (MustVisitNullValue(E)) - (void) Visit(E); + CGF.EmitIgnoredExpr(E); const MemberPointerType *MPT = CE->getType()->getAs<MemberPointerType>(); return CGF.CGM.getCXXABI().EmitNullMemberPointer(MPT); @@ -2200,6 +2208,21 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { return EmitScalarConversion(Visit(E), E->getType(), DestTy, CE->getExprLoc()); + case CK_FixedPointToIntegral: + assert(E->getType()->isFixedPointType() && + "Expected src type to be fixed point type"); + assert(DestTy->isIntegerType() && "Expected dest type to be an integer"); + return EmitScalarConversion(Visit(E), E->getType(), DestTy, + CE->getExprLoc()); + + case CK_IntegralToFixedPoint: + assert(E->getType()->isIntegerType() && + "Expected src type to be an integer"); + assert(DestTy->isFixedPointType() && + "Expected dest type to be fixed point type"); + return EmitScalarConversion(Visit(E), E->getType(), DestTy, + CE->getExprLoc()); + case CK_IntegralCast: { ScalarConversionOpts Opts; if (auto *ICE = dyn_cast<ImplicitCastExpr>(CE)) { @@ -2527,14 +2550,14 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, } if (atomicPHI) { - llvm::BasicBlock *opBB = Builder.GetInsertBlock(); + llvm::BasicBlock *curBlock = Builder.GetInsertBlock(); llvm::BasicBlock *contBB = CGF.createBasicBlock("atomic_cont", CGF.CurFn); auto Pair = CGF.EmitAtomicCompareExchange( LV, RValue::get(atomicPHI), RValue::get(value), E->getExprLoc()); llvm::Value *old = CGF.EmitToMemory(Pair.first.getScalarVal(), type); llvm::Value *success = Pair.second; - atomicPHI->addIncoming(old, opBB); - Builder.CreateCondBr(success, contBB, opBB); + atomicPHI->addIncoming(old, curBlock); + Builder.CreateCondBr(success, contBB, atomicPHI->getParent()); Builder.SetInsertPoint(contBB); return isPre ? value : input; } @@ -2881,14 +2904,14 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue( Loc, ScalarConversionOpts(CGF.SanOpts)); if (atomicPHI) { - llvm::BasicBlock *opBB = Builder.GetInsertBlock(); + llvm::BasicBlock *curBlock = Builder.GetInsertBlock(); llvm::BasicBlock *contBB = CGF.createBasicBlock("atomic_cont", CGF.CurFn); auto Pair = CGF.EmitAtomicCompareExchange( LHSLV, RValue::get(atomicPHI), RValue::get(Result), E->getExprLoc()); llvm::Value *old = CGF.EmitToMemory(Pair.first.getScalarVal(), LHSTy); llvm::Value *success = Pair.second; - atomicPHI->addIncoming(old, opBB); - Builder.CreateCondBr(success, contBB, opBB); + atomicPHI->addIncoming(old, curBlock); + Builder.CreateCondBr(success, contBB, atomicPHI->getParent()); Builder.SetInsertPoint(contBB); return LHSLV; } @@ -2908,7 +2931,7 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue( Value *ScalarExprEmitter::EmitCompoundAssign(const CompoundAssignOperator *E, Value *(ScalarExprEmitter::*Func)(const BinOpInfo &)) { bool Ignore = TestAndClearIgnoreResultAssign(); - Value *RHS; + Value *RHS = nullptr; LValue LHS = EmitCompoundAssignLValue(E, Func, RHS); // If the result is clearly ignored, return now. @@ -3090,7 +3113,8 @@ Value *ScalarExprEmitter::EmitOverflowCheckedBinOp(const BinOpInfo &Ops) { llvm::Type *argTypes[] = { CGF.Int64Ty, CGF.Int64Ty, Int8Ty, Int8Ty }; llvm::FunctionType *handlerTy = llvm::FunctionType::get(CGF.Int64Ty, argTypes, true); - llvm::Value *handler = CGF.CGM.CreateRuntimeFunction(handlerTy, *handlerName); + llvm::FunctionCallee handler = + CGF.CGM.CreateRuntimeFunction(handlerTy, *handlerName); // Sign extend the args to 64-bit, so that we can use the same handler for // all types of overflow. @@ -3338,9 +3362,119 @@ Value *ScalarExprEmitter::EmitAdd(const BinOpInfo &op) { return propagateFMFlags(V, op); } + if (op.isFixedPointBinOp()) + return EmitFixedPointBinOp(op); + return Builder.CreateAdd(op.LHS, op.RHS, "add"); } +/// The resulting value must be calculated with exact precision, so the operands +/// may not be the same type. +Value *ScalarExprEmitter::EmitFixedPointBinOp(const BinOpInfo &op) { + using llvm::APSInt; + using llvm::ConstantInt; + + const auto *BinOp = cast<BinaryOperator>(op.E); + + // The result is a fixed point type and at least one of the operands is fixed + // point while the other is either fixed point or an int. This resulting type + // should be determined by Sema::handleFixedPointConversions(). + QualType ResultTy = op.Ty; + QualType LHSTy = BinOp->getLHS()->getType(); + QualType RHSTy = BinOp->getRHS()->getType(); + ASTContext &Ctx = CGF.getContext(); + Value *LHS = op.LHS; + Value *RHS = op.RHS; + + auto LHSFixedSema = Ctx.getFixedPointSemantics(LHSTy); + auto RHSFixedSema = Ctx.getFixedPointSemantics(RHSTy); + auto ResultFixedSema = Ctx.getFixedPointSemantics(ResultTy); + auto CommonFixedSema = LHSFixedSema.getCommonSemantics(RHSFixedSema); + + // Convert the operands to the full precision type. + Value *FullLHS = EmitFixedPointConversion(LHS, LHSFixedSema, CommonFixedSema, + BinOp->getExprLoc()); + Value *FullRHS = EmitFixedPointConversion(RHS, RHSFixedSema, CommonFixedSema, + BinOp->getExprLoc()); + + // Perform the actual addition. + Value *Result; + switch (BinOp->getOpcode()) { + case BO_Add: { + if (ResultFixedSema.isSaturated()) { + llvm::Intrinsic::ID IID = ResultFixedSema.isSigned() + ? llvm::Intrinsic::sadd_sat + : llvm::Intrinsic::uadd_sat; + Result = Builder.CreateBinaryIntrinsic(IID, FullLHS, FullRHS); + } else { + Result = Builder.CreateAdd(FullLHS, FullRHS); + } + break; + } + case BO_Sub: { + if (ResultFixedSema.isSaturated()) { + llvm::Intrinsic::ID IID = ResultFixedSema.isSigned() + ? llvm::Intrinsic::ssub_sat + : llvm::Intrinsic::usub_sat; + Result = Builder.CreateBinaryIntrinsic(IID, FullLHS, FullRHS); + } else { + Result = Builder.CreateSub(FullLHS, FullRHS); + } + break; + } + case BO_LT: + return CommonFixedSema.isSigned() ? Builder.CreateICmpSLT(FullLHS, FullRHS) + : Builder.CreateICmpULT(FullLHS, FullRHS); + case BO_GT: + return CommonFixedSema.isSigned() ? Builder.CreateICmpSGT(FullLHS, FullRHS) + : Builder.CreateICmpUGT(FullLHS, FullRHS); + case BO_LE: + return CommonFixedSema.isSigned() ? Builder.CreateICmpSLE(FullLHS, FullRHS) + : Builder.CreateICmpULE(FullLHS, FullRHS); + case BO_GE: + return CommonFixedSema.isSigned() ? Builder.CreateICmpSGE(FullLHS, FullRHS) + : Builder.CreateICmpUGE(FullLHS, FullRHS); + case BO_EQ: + // For equality operations, we assume any padding bits on unsigned types are + // zero'd out. They could be overwritten through non-saturating operations + // that cause overflow, but this leads to undefined behavior. + return Builder.CreateICmpEQ(FullLHS, FullRHS); + case BO_NE: + return Builder.CreateICmpNE(FullLHS, FullRHS); + case BO_Mul: + case BO_Div: + case BO_Shl: + case BO_Shr: + case BO_Cmp: + case BO_LAnd: + case BO_LOr: + case BO_MulAssign: + case BO_DivAssign: + case BO_AddAssign: + case BO_SubAssign: + case BO_ShlAssign: + case BO_ShrAssign: + llvm_unreachable("Found unimplemented fixed point binary operation"); + case BO_PtrMemD: + case BO_PtrMemI: + case BO_Rem: + case BO_Xor: + case BO_And: + case BO_Or: + case BO_Assign: + case BO_RemAssign: + case BO_AndAssign: + case BO_XorAssign: + case BO_OrAssign: + case BO_Comma: + llvm_unreachable("Found unsupported binary operation for fixed point types."); + } + + // Convert to the result type. + return EmitFixedPointConversion(Result, CommonFixedSema, ResultFixedSema, + BinOp->getExprLoc()); +} + Value *ScalarExprEmitter::EmitSub(const BinOpInfo &op) { // The LHS is always a pointer if either side is. if (!op.LHS->getType()->isPointerTy()) { @@ -3372,6 +3506,9 @@ Value *ScalarExprEmitter::EmitSub(const BinOpInfo &op) { return propagateFMFlags(V, op); } + if (op.isFixedPointBinOp()) + return EmitFixedPointBinOp(op); + return Builder.CreateSub(op.LHS, op.RHS, "sub"); } @@ -3450,7 +3587,8 @@ Value *ScalarExprEmitter::EmitShl(const BinOpInfo &Ops) { bool SanitizeBase = CGF.SanOpts.has(SanitizerKind::ShiftBase) && Ops.Ty->hasSignedIntegerRepresentation() && - !CGF.getLangOpts().isSignedOverflowDefined(); + !CGF.getLangOpts().isSignedOverflowDefined() && + !CGF.getLangOpts().CPlusPlus2a; bool SanitizeExponent = CGF.SanOpts.has(SanitizerKind::ShiftExponent); // OpenCL 6.3j: shift values are effectively % word size of LHS. if (CGF.getLangOpts().OpenCL) @@ -3591,8 +3729,9 @@ Value *ScalarExprEmitter::EmitCompare(const BinaryOperator *E, Result = CGF.CGM.getCXXABI().EmitMemberPointerComparison( CGF, LHS, RHS, MPT, E->getOpcode() == BO_NE); } else if (!LHSTy->isAnyComplexType() && !RHSTy->isAnyComplexType()) { - Value *LHS = Visit(E->getLHS()); - Value *RHS = Visit(E->getRHS()); + BinOpInfo BOInfo = EmitBinOps(E); + Value *LHS = BOInfo.LHS; + Value *RHS = BOInfo.RHS; // If AltiVec, the comparison results in a numeric type, so we use // intrinsics comparing vectors and giving 0 or 1 as a result @@ -3670,7 +3809,9 @@ Value *ScalarExprEmitter::EmitCompare(const BinaryOperator *E, E->getExprLoc()); } - if (LHS->getType()->isFPOrFPVectorTy()) { + if (BOInfo.isFixedPointBinOp()) { + Result = EmitFixedPointBinOp(BOInfo); + } else if (LHS->getType()->isFPOrFPVectorTy()) { Result = Builder.CreateFCmp(FCmpOpc, LHS, RHS, "cmp"); } else if (LHSTy->hasSignedIntegerRepresentation()) { Result = Builder.CreateICmp(SICmpOpc, LHS, RHS, "cmp"); diff --git a/lib/CodeGen/CGGPUBuiltin.cpp b/lib/CodeGen/CGGPUBuiltin.cpp index b5375ffb8db7..d7e267630762 100644 --- a/lib/CodeGen/CGGPUBuiltin.cpp +++ b/lib/CodeGen/CGGPUBuiltin.cpp @@ -1,9 +1,8 @@ //===------ CGGPUBuiltin.cpp - Codegen for GPU builtins -------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/CGLoopInfo.cpp b/lib/CodeGen/CGLoopInfo.cpp index fd0a9c773a2e..b2bc42bfa013 100644 --- a/lib/CodeGen/CGLoopInfo.cpp +++ b/lib/CodeGen/CGLoopInfo.cpp @@ -1,9 +1,8 @@ //===---- CGLoopInfo.cpp - LLVM CodeGen for loop metadata -*- C++ -*-------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -19,138 +18,396 @@ using namespace clang::CodeGen; using namespace llvm; -static MDNode *createMetadata(LLVMContext &Ctx, const LoopAttributes &Attrs, - const llvm::DebugLoc &StartLoc, - const llvm::DebugLoc &EndLoc, MDNode *&AccGroup) { +MDNode * +LoopInfo::createLoopPropertiesMetadata(ArrayRef<Metadata *> LoopProperties) { + LLVMContext &Ctx = Header->getContext(); + SmallVector<Metadata *, 4> NewLoopProperties; + TempMDTuple TempNode = MDNode::getTemporary(Ctx, None); + NewLoopProperties.push_back(TempNode.get()); + NewLoopProperties.append(LoopProperties.begin(), LoopProperties.end()); - if (!Attrs.IsParallel && Attrs.VectorizeWidth == 0 && - Attrs.InterleaveCount == 0 && Attrs.UnrollCount == 0 && - Attrs.UnrollAndJamCount == 0 && !Attrs.PipelineDisabled && - Attrs.PipelineInitiationInterval == 0 && - Attrs.VectorizeEnable == LoopAttributes::Unspecified && - Attrs.UnrollEnable == LoopAttributes::Unspecified && - Attrs.UnrollAndJamEnable == LoopAttributes::Unspecified && - Attrs.DistributeEnable == LoopAttributes::Unspecified && !StartLoc && - !EndLoc) - return nullptr; + MDNode *LoopID = MDNode::getDistinct(Ctx, NewLoopProperties); + LoopID->replaceOperandWith(0, LoopID); + return LoopID; +} + +MDNode *LoopInfo::createPipeliningMetadata(const LoopAttributes &Attrs, + ArrayRef<Metadata *> LoopProperties, + bool &HasUserTransforms) { + LLVMContext &Ctx = Header->getContext(); + + Optional<bool> Enabled; + if (Attrs.PipelineDisabled) + Enabled = false; + else if (Attrs.PipelineInitiationInterval != 0) + Enabled = true; + + if (Enabled != true) { + SmallVector<Metadata *, 4> NewLoopProperties; + if (Enabled == false) { + NewLoopProperties.append(LoopProperties.begin(), LoopProperties.end()); + NewLoopProperties.push_back( + MDNode::get(Ctx, {MDString::get(Ctx, "llvm.loop.pipeline.disable"), + ConstantAsMetadata::get(ConstantInt::get( + llvm::Type::getInt1Ty(Ctx), 1))})); + LoopProperties = NewLoopProperties; + } + return createLoopPropertiesMetadata(LoopProperties); + } SmallVector<Metadata *, 4> Args; - // Reserve operand 0 for loop id self reference. - auto TempNode = MDNode::getTemporary(Ctx, None); + TempMDTuple TempNode = MDNode::getTemporary(Ctx, None); Args.push_back(TempNode.get()); + Args.append(LoopProperties.begin(), LoopProperties.end()); - // If we have a valid start debug location for the loop, add it. - if (StartLoc) { - Args.push_back(StartLoc.getAsMDNode()); - - // If we also have a valid end debug location for the loop, add it. - if (EndLoc) - Args.push_back(EndLoc.getAsMDNode()); - } - - // Setting vectorize.width - if (Attrs.VectorizeWidth > 0) { - Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.vectorize.width"), - ConstantAsMetadata::get(ConstantInt::get( - Type::getInt32Ty(Ctx), Attrs.VectorizeWidth))}; + if (Attrs.PipelineInitiationInterval > 0) { + Metadata *Vals[] = { + MDString::get(Ctx, "llvm.loop.pipeline.initiationinterval"), + ConstantAsMetadata::get(ConstantInt::get( + llvm::Type::getInt32Ty(Ctx), Attrs.PipelineInitiationInterval))}; Args.push_back(MDNode::get(Ctx, Vals)); } - // Setting interleave.count - if (Attrs.InterleaveCount > 0) { - Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.interleave.count"), - ConstantAsMetadata::get(ConstantInt::get( - Type::getInt32Ty(Ctx), Attrs.InterleaveCount))}; - Args.push_back(MDNode::get(Ctx, Vals)); + // No follow-up: This is the last transformation. + + MDNode *LoopID = MDNode::getDistinct(Ctx, Args); + LoopID->replaceOperandWith(0, LoopID); + HasUserTransforms = true; + return LoopID; +} + +MDNode * +LoopInfo::createPartialUnrollMetadata(const LoopAttributes &Attrs, + ArrayRef<Metadata *> LoopProperties, + bool &HasUserTransforms) { + LLVMContext &Ctx = Header->getContext(); + + Optional<bool> Enabled; + if (Attrs.UnrollEnable == LoopAttributes::Disable) + Enabled = false; + else if (Attrs.UnrollEnable == LoopAttributes::Full) + Enabled = None; + else if (Attrs.UnrollEnable != LoopAttributes::Unspecified || + Attrs.UnrollCount != 0) + Enabled = true; + + if (Enabled != true) { + // createFullUnrollMetadata will already have added llvm.loop.unroll.disable + // if unrolling is disabled. + return createPipeliningMetadata(Attrs, LoopProperties, HasUserTransforms); } + SmallVector<Metadata *, 4> FollowupLoopProperties; + + // Apply all loop properties to the unrolled loop. + FollowupLoopProperties.append(LoopProperties.begin(), LoopProperties.end()); + + // Don't unroll an already unrolled loop. + FollowupLoopProperties.push_back( + MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.disable"))); + + bool FollowupHasTransforms = false; + MDNode *Followup = createPipeliningMetadata(Attrs, FollowupLoopProperties, + FollowupHasTransforms); + + SmallVector<Metadata *, 4> Args; + TempMDTuple TempNode = MDNode::getTemporary(Ctx, None); + Args.push_back(TempNode.get()); + Args.append(LoopProperties.begin(), LoopProperties.end()); + // Setting unroll.count if (Attrs.UnrollCount > 0) { Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.unroll.count"), ConstantAsMetadata::get(ConstantInt::get( - Type::getInt32Ty(Ctx), Attrs.UnrollCount))}; + llvm::Type::getInt32Ty(Ctx), Attrs.UnrollCount))}; Args.push_back(MDNode::get(Ctx, Vals)); } - // Setting unroll_and_jam.count - if (Attrs.UnrollAndJamCount > 0) { - Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.unroll_and_jam.count"), - ConstantAsMetadata::get(ConstantInt::get( - Type::getInt32Ty(Ctx), Attrs.UnrollAndJamCount))}; + // Setting unroll.full or unroll.disable + if (Attrs.UnrollEnable == LoopAttributes::Enable) { + Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.unroll.enable")}; Args.push_back(MDNode::get(Ctx, Vals)); } - // Setting vectorize.enable - if (Attrs.VectorizeEnable != LoopAttributes::Unspecified) { - Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.vectorize.enable"), - ConstantAsMetadata::get(ConstantInt::get( - Type::getInt1Ty(Ctx), (Attrs.VectorizeEnable == - LoopAttributes::Enable)))}; - Args.push_back(MDNode::get(Ctx, Vals)); - } + if (FollowupHasTransforms) + Args.push_back(MDNode::get( + Ctx, {MDString::get(Ctx, "llvm.loop.unroll.followup_all"), Followup})); - // Setting unroll.full or unroll.disable - if (Attrs.UnrollEnable != LoopAttributes::Unspecified) { - std::string Name; - if (Attrs.UnrollEnable == LoopAttributes::Enable) - Name = "llvm.loop.unroll.enable"; - else if (Attrs.UnrollEnable == LoopAttributes::Full) - Name = "llvm.loop.unroll.full"; - else - Name = "llvm.loop.unroll.disable"; - Metadata *Vals[] = {MDString::get(Ctx, Name)}; - Args.push_back(MDNode::get(Ctx, Vals)); + MDNode *LoopID = MDNode::getDistinct(Ctx, Args); + LoopID->replaceOperandWith(0, LoopID); + HasUserTransforms = true; + return LoopID; +} + +MDNode * +LoopInfo::createUnrollAndJamMetadata(const LoopAttributes &Attrs, + ArrayRef<Metadata *> LoopProperties, + bool &HasUserTransforms) { + LLVMContext &Ctx = Header->getContext(); + + Optional<bool> Enabled; + if (Attrs.UnrollAndJamEnable == LoopAttributes::Disable) + Enabled = false; + else if (Attrs.UnrollAndJamEnable == LoopAttributes::Enable || + Attrs.UnrollAndJamCount != 0) + Enabled = true; + + if (Enabled != true) { + SmallVector<Metadata *, 4> NewLoopProperties; + if (Enabled == false) { + NewLoopProperties.append(LoopProperties.begin(), LoopProperties.end()); + NewLoopProperties.push_back(MDNode::get( + Ctx, MDString::get(Ctx, "llvm.loop.unroll_and_jam.disable"))); + LoopProperties = NewLoopProperties; + } + return createPartialUnrollMetadata(Attrs, LoopProperties, + HasUserTransforms); } - // Setting unroll_and_jam.full or unroll_and_jam.disable - if (Attrs.UnrollAndJamEnable != LoopAttributes::Unspecified) { - std::string Name; - if (Attrs.UnrollAndJamEnable == LoopAttributes::Enable) - Name = "llvm.loop.unroll_and_jam.enable"; - else if (Attrs.UnrollAndJamEnable == LoopAttributes::Full) - Name = "llvm.loop.unroll_and_jam.full"; - else - Name = "llvm.loop.unroll_and_jam.disable"; - Metadata *Vals[] = {MDString::get(Ctx, Name)}; + SmallVector<Metadata *, 4> FollowupLoopProperties; + FollowupLoopProperties.append(LoopProperties.begin(), LoopProperties.end()); + FollowupLoopProperties.push_back( + MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll_and_jam.disable"))); + + bool FollowupHasTransforms = false; + MDNode *Followup = createPartialUnrollMetadata(Attrs, FollowupLoopProperties, + FollowupHasTransforms); + + SmallVector<Metadata *, 4> Args; + TempMDTuple TempNode = MDNode::getTemporary(Ctx, None); + Args.push_back(TempNode.get()); + Args.append(LoopProperties.begin(), LoopProperties.end()); + + // Setting unroll_and_jam.count + if (Attrs.UnrollAndJamCount > 0) { + Metadata *Vals[] = { + MDString::get(Ctx, "llvm.loop.unroll_and_jam.count"), + ConstantAsMetadata::get(ConstantInt::get(llvm::Type::getInt32Ty(Ctx), + Attrs.UnrollAndJamCount))}; Args.push_back(MDNode::get(Ctx, Vals)); } - if (Attrs.DistributeEnable != LoopAttributes::Unspecified) { - Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.distribute.enable"), - ConstantAsMetadata::get(ConstantInt::get( - Type::getInt1Ty(Ctx), (Attrs.DistributeEnable == - LoopAttributes::Enable)))}; + if (Attrs.UnrollAndJamEnable == LoopAttributes::Enable) { + Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.unroll_and_jam.enable")}; Args.push_back(MDNode::get(Ctx, Vals)); } - if (Attrs.IsParallel) { - AccGroup = MDNode::getDistinct(Ctx, {}); + if (FollowupHasTransforms) Args.push_back(MDNode::get( - Ctx, {MDString::get(Ctx, "llvm.loop.parallel_accesses"), AccGroup})); + Ctx, {MDString::get(Ctx, "llvm.loop.unroll_and_jam.followup_outer"), + Followup})); + + if (UnrollAndJamInnerFollowup) + Args.push_back(MDNode::get( + Ctx, {MDString::get(Ctx, "llvm.loop.unroll_and_jam.followup_inner"), + UnrollAndJamInnerFollowup})); + + MDNode *LoopID = MDNode::getDistinct(Ctx, Args); + LoopID->replaceOperandWith(0, LoopID); + HasUserTransforms = true; + return LoopID; +} + +MDNode * +LoopInfo::createLoopVectorizeMetadata(const LoopAttributes &Attrs, + ArrayRef<Metadata *> LoopProperties, + bool &HasUserTransforms) { + LLVMContext &Ctx = Header->getContext(); + + Optional<bool> Enabled; + if (Attrs.VectorizeEnable == LoopAttributes::Disable) + Enabled = false; + else if (Attrs.VectorizeEnable != LoopAttributes::Unspecified || + Attrs.InterleaveCount != 0 || Attrs.VectorizeWidth != 0) + Enabled = true; + + if (Enabled != true) { + SmallVector<Metadata *, 4> NewLoopProperties; + if (Enabled == false) { + NewLoopProperties.append(LoopProperties.begin(), LoopProperties.end()); + NewLoopProperties.push_back( + MDNode::get(Ctx, {MDString::get(Ctx, "llvm.loop.vectorize.enable"), + ConstantAsMetadata::get(ConstantInt::get( + llvm::Type::getInt1Ty(Ctx), 0))})); + LoopProperties = NewLoopProperties; + } + return createUnrollAndJamMetadata(Attrs, LoopProperties, HasUserTransforms); + } + + // Apply all loop properties to the vectorized loop. + SmallVector<Metadata *, 4> FollowupLoopProperties; + FollowupLoopProperties.append(LoopProperties.begin(), LoopProperties.end()); + + // Don't vectorize an already vectorized loop. + FollowupLoopProperties.push_back( + MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.isvectorized"))); + + bool FollowupHasTransforms = false; + MDNode *Followup = createUnrollAndJamMetadata(Attrs, FollowupLoopProperties, + FollowupHasTransforms); + + SmallVector<Metadata *, 4> Args; + TempMDTuple TempNode = MDNode::getTemporary(Ctx, None); + Args.push_back(TempNode.get()); + Args.append(LoopProperties.begin(), LoopProperties.end()); + + // Setting vectorize.width + if (Attrs.VectorizeWidth > 0) { + Metadata *Vals[] = { + MDString::get(Ctx, "llvm.loop.vectorize.width"), + ConstantAsMetadata::get(ConstantInt::get(llvm::Type::getInt32Ty(Ctx), + Attrs.VectorizeWidth))}; + Args.push_back(MDNode::get(Ctx, Vals)); } - if (Attrs.PipelineDisabled) { + // Setting interleave.count + if (Attrs.InterleaveCount > 0) { Metadata *Vals[] = { - MDString::get(Ctx, "llvm.loop.pipeline.disable"), - ConstantAsMetadata::get(ConstantInt::get( - Type::getInt1Ty(Ctx), (Attrs.PipelineDisabled == true)))}; + MDString::get(Ctx, "llvm.loop.interleave.count"), + ConstantAsMetadata::get(ConstantInt::get(llvm::Type::getInt32Ty(Ctx), + Attrs.InterleaveCount))}; Args.push_back(MDNode::get(Ctx, Vals)); } - if (Attrs.PipelineInitiationInterval > 0) { + // Setting vectorize.enable + if (Attrs.VectorizeEnable != LoopAttributes::Unspecified) { Metadata *Vals[] = { - MDString::get(Ctx, "llvm.loop.pipeline.initiationinterval"), + MDString::get(Ctx, "llvm.loop.vectorize.enable"), ConstantAsMetadata::get(ConstantInt::get( - Type::getInt32Ty(Ctx), Attrs.PipelineInitiationInterval))}; + llvm::Type::getInt1Ty(Ctx), + (Attrs.VectorizeEnable == LoopAttributes::Enable)))}; Args.push_back(MDNode::get(Ctx, Vals)); } - // Set the first operand to itself. + if (FollowupHasTransforms) + Args.push_back(MDNode::get( + Ctx, + {MDString::get(Ctx, "llvm.loop.vectorize.followup_all"), Followup})); + MDNode *LoopID = MDNode::get(Ctx, Args); LoopID->replaceOperandWith(0, LoopID); + HasUserTransforms = true; return LoopID; } +MDNode * +LoopInfo::createLoopDistributeMetadata(const LoopAttributes &Attrs, + ArrayRef<Metadata *> LoopProperties, + bool &HasUserTransforms) { + LLVMContext &Ctx = Header->getContext(); + + Optional<bool> Enabled; + if (Attrs.DistributeEnable == LoopAttributes::Disable) + Enabled = false; + if (Attrs.DistributeEnable == LoopAttributes::Enable) + Enabled = true; + + if (Enabled != true) { + SmallVector<Metadata *, 4> NewLoopProperties; + if (Enabled == false) { + NewLoopProperties.append(LoopProperties.begin(), LoopProperties.end()); + NewLoopProperties.push_back( + MDNode::get(Ctx, {MDString::get(Ctx, "llvm.loop.distribute.enable"), + ConstantAsMetadata::get(ConstantInt::get( + llvm::Type::getInt1Ty(Ctx), 0))})); + LoopProperties = NewLoopProperties; + } + return createLoopVectorizeMetadata(Attrs, LoopProperties, + HasUserTransforms); + } + + bool FollowupHasTransforms = false; + MDNode *Followup = + createLoopVectorizeMetadata(Attrs, LoopProperties, FollowupHasTransforms); + + SmallVector<Metadata *, 4> Args; + TempMDTuple TempNode = MDNode::getTemporary(Ctx, None); + Args.push_back(TempNode.get()); + Args.append(LoopProperties.begin(), LoopProperties.end()); + + Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.distribute.enable"), + ConstantAsMetadata::get(ConstantInt::get( + llvm::Type::getInt1Ty(Ctx), + (Attrs.DistributeEnable == LoopAttributes::Enable)))}; + Args.push_back(MDNode::get(Ctx, Vals)); + + if (FollowupHasTransforms) + Args.push_back(MDNode::get( + Ctx, + {MDString::get(Ctx, "llvm.loop.distribute.followup_all"), Followup})); + + MDNode *LoopID = MDNode::get(Ctx, Args); + LoopID->replaceOperandWith(0, LoopID); + HasUserTransforms = true; + return LoopID; +} + +MDNode *LoopInfo::createFullUnrollMetadata(const LoopAttributes &Attrs, + ArrayRef<Metadata *> LoopProperties, + bool &HasUserTransforms) { + LLVMContext &Ctx = Header->getContext(); + + Optional<bool> Enabled; + if (Attrs.UnrollEnable == LoopAttributes::Disable) + Enabled = false; + else if (Attrs.UnrollEnable == LoopAttributes::Full) + Enabled = true; + + if (Enabled != true) { + SmallVector<Metadata *, 4> NewLoopProperties; + if (Enabled == false) { + NewLoopProperties.append(LoopProperties.begin(), LoopProperties.end()); + NewLoopProperties.push_back( + MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.disable"))); + LoopProperties = NewLoopProperties; + } + return createLoopDistributeMetadata(Attrs, LoopProperties, + HasUserTransforms); + } + + SmallVector<Metadata *, 4> Args; + TempMDTuple TempNode = MDNode::getTemporary(Ctx, None); + Args.push_back(TempNode.get()); + Args.append(LoopProperties.begin(), LoopProperties.end()); + Args.push_back(MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.full"))); + + // No follow-up: there is no loop after full unrolling. + // TODO: Warn if there are transformations after full unrolling. + + MDNode *LoopID = MDNode::getDistinct(Ctx, Args); + LoopID->replaceOperandWith(0, LoopID); + HasUserTransforms = true; + return LoopID; +} + +MDNode *LoopInfo::createMetadata( + const LoopAttributes &Attrs, + llvm::ArrayRef<llvm::Metadata *> AdditionalLoopProperties, + bool &HasUserTransforms) { + SmallVector<Metadata *, 3> LoopProperties; + + // If we have a valid start debug location for the loop, add it. + if (StartLoc) { + LoopProperties.push_back(StartLoc.getAsMDNode()); + + // If we also have a valid end debug location for the loop, add it. + if (EndLoc) + LoopProperties.push_back(EndLoc.getAsMDNode()); + } + + assert(!!AccGroup == Attrs.IsParallel && + "There must be an access group iff the loop is parallel"); + if (Attrs.IsParallel) { + LLVMContext &Ctx = Header->getContext(); + LoopProperties.push_back(MDNode::get( + Ctx, {MDString::get(Ctx, "llvm.loop.parallel_accesses"), AccGroup})); + } + + LoopProperties.insert(LoopProperties.end(), AdditionalLoopProperties.begin(), + AdditionalLoopProperties.end()); + return createFullUnrollMetadata(Attrs, LoopProperties, HasUserTransforms); +} + LoopAttributes::LoopAttributes(bool IsParallel) : IsParallel(IsParallel), VectorizeEnable(LoopAttributes::Unspecified), UnrollEnable(LoopAttributes::Unspecified), @@ -174,15 +431,114 @@ void LoopAttributes::clear() { } LoopInfo::LoopInfo(BasicBlock *Header, const LoopAttributes &Attrs, - const llvm::DebugLoc &StartLoc, const llvm::DebugLoc &EndLoc) - : LoopID(nullptr), Header(Header), Attrs(Attrs) { - LoopID = - createMetadata(Header->getContext(), Attrs, StartLoc, EndLoc, AccGroup); + const llvm::DebugLoc &StartLoc, const llvm::DebugLoc &EndLoc, + LoopInfo *Parent) + : Header(Header), Attrs(Attrs), StartLoc(StartLoc), EndLoc(EndLoc), + Parent(Parent) { + + if (Attrs.IsParallel) { + // Create an access group for this loop. + LLVMContext &Ctx = Header->getContext(); + AccGroup = MDNode::getDistinct(Ctx, {}); + } + + if (!Attrs.IsParallel && Attrs.VectorizeWidth == 0 && + Attrs.InterleaveCount == 0 && Attrs.UnrollCount == 0 && + Attrs.UnrollAndJamCount == 0 && !Attrs.PipelineDisabled && + Attrs.PipelineInitiationInterval == 0 && + Attrs.VectorizeEnable == LoopAttributes::Unspecified && + Attrs.UnrollEnable == LoopAttributes::Unspecified && + Attrs.UnrollAndJamEnable == LoopAttributes::Unspecified && + Attrs.DistributeEnable == LoopAttributes::Unspecified && !StartLoc && + !EndLoc) + return; + + TempLoopID = MDNode::getTemporary(Header->getContext(), None); +} + +void LoopInfo::finish() { + // We did not annotate the loop body instructions because there are no + // attributes for this loop. + if (!TempLoopID) + return; + + MDNode *LoopID; + LoopAttributes CurLoopAttr = Attrs; + LLVMContext &Ctx = Header->getContext(); + + if (Parent && (Parent->Attrs.UnrollAndJamEnable || + Parent->Attrs.UnrollAndJamCount != 0)) { + // Parent unroll-and-jams this loop. + // Split the transformations in those that happens before the unroll-and-jam + // and those after. + + LoopAttributes BeforeJam, AfterJam; + + BeforeJam.IsParallel = AfterJam.IsParallel = Attrs.IsParallel; + + BeforeJam.VectorizeWidth = Attrs.VectorizeWidth; + BeforeJam.InterleaveCount = Attrs.InterleaveCount; + BeforeJam.VectorizeEnable = Attrs.VectorizeEnable; + BeforeJam.DistributeEnable = Attrs.DistributeEnable; + + switch (Attrs.UnrollEnable) { + case LoopAttributes::Unspecified: + case LoopAttributes::Disable: + BeforeJam.UnrollEnable = Attrs.UnrollEnable; + AfterJam.UnrollEnable = Attrs.UnrollEnable; + break; + case LoopAttributes::Full: + BeforeJam.UnrollEnable = LoopAttributes::Full; + break; + case LoopAttributes::Enable: + AfterJam.UnrollEnable = LoopAttributes::Enable; + break; + } + + AfterJam.UnrollCount = Attrs.UnrollCount; + AfterJam.PipelineDisabled = Attrs.PipelineDisabled; + AfterJam.PipelineInitiationInterval = Attrs.PipelineInitiationInterval; + + // If this loop is subject of an unroll-and-jam by the parent loop, and has + // an unroll-and-jam annotation itself, we have to decide whether to first + // apply the parent's unroll-and-jam or this loop's unroll-and-jam. The + // UnrollAndJam pass processes loops from inner to outer, so we apply the + // inner first. + BeforeJam.UnrollAndJamCount = Attrs.UnrollAndJamCount; + BeforeJam.UnrollAndJamEnable = Attrs.UnrollAndJamEnable; + + // Set the inner followup metadata to process by the outer loop. Only + // consider the first inner loop. + if (!Parent->UnrollAndJamInnerFollowup) { + // Splitting the attributes into a BeforeJam and an AfterJam part will + // stop 'llvm.loop.isvectorized' (generated by vectorization in BeforeJam) + // to be forwarded to the AfterJam part. We detect the situation here and + // add it manually. + SmallVector<Metadata *, 1> BeforeLoopProperties; + if (BeforeJam.VectorizeEnable != LoopAttributes::Unspecified || + BeforeJam.InterleaveCount != 0 || BeforeJam.VectorizeWidth != 0) + BeforeLoopProperties.push_back( + MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.isvectorized"))); + + bool InnerFollowupHasTransform = false; + MDNode *InnerFollowup = createMetadata(AfterJam, BeforeLoopProperties, + InnerFollowupHasTransform); + if (InnerFollowupHasTransform) + Parent->UnrollAndJamInnerFollowup = InnerFollowup; + } + + CurLoopAttr = BeforeJam; + } + + bool HasUserTransforms = false; + LoopID = createMetadata(CurLoopAttr, {}, HasUserTransforms); + TempLoopID->replaceAllUsesWith(LoopID); } void LoopInfoStack::push(BasicBlock *Header, const llvm::DebugLoc &StartLoc, const llvm::DebugLoc &EndLoc) { - Active.push_back(LoopInfo(Header, StagedAttrs, StartLoc, EndLoc)); + Active.push_back(LoopInfo(Header, StagedAttrs, StartLoc, EndLoc, + Active.empty() ? nullptr : &Active.back())); // Clear the attributes so nested loops do not inherit them. StagedAttrs.clear(); } @@ -209,13 +565,13 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx, // Translate opencl_unroll_hint attribute argument to // equivalent LoopHintAttr enums. // OpenCL v2.0 s6.11.5: - // 0 - full unroll (no argument). + // 0 - enable unroll (no argument). // 1 - disable unroll. // other positive integer n - unroll by n. if (OpenCLHint) { ValueInt = OpenCLHint->getUnrollHint(); if (ValueInt == 0) { - State = LoopHintAttr::Full; + State = LoopHintAttr::Enable; } else if (ValueInt != 1) { Option = LoopHintAttr::UnrollCount; State = LoopHintAttr::Numeric; @@ -365,6 +721,7 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx, void LoopInfoStack::pop() { assert(!Active.empty() && "No active loops to pop"); + Active.back().finish(); Active.pop_back(); } diff --git a/lib/CodeGen/CGLoopInfo.h b/lib/CodeGen/CGLoopInfo.h index 84ba03bfb00b..35d0e00527b9 100644 --- a/lib/CodeGen/CGLoopInfo.h +++ b/lib/CodeGen/CGLoopInfo.h @@ -1,9 +1,8 @@ //===---- CGLoopInfo.h - LLVM CodeGen for loop metadata -*- C++ -*---------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -79,10 +78,11 @@ class LoopInfo { public: /// Construct a new LoopInfo for the loop with entry Header. LoopInfo(llvm::BasicBlock *Header, const LoopAttributes &Attrs, - const llvm::DebugLoc &StartLoc, const llvm::DebugLoc &EndLoc); + const llvm::DebugLoc &StartLoc, const llvm::DebugLoc &EndLoc, + LoopInfo *Parent); /// Get the loop id metadata for this loop. - llvm::MDNode *getLoopID() const { return LoopID; } + llvm::MDNode *getLoopID() const { return TempLoopID.get(); } /// Get the header block of this loop. llvm::BasicBlock *getHeader() const { return Header; } @@ -93,15 +93,92 @@ public: /// Return this loop's access group or nullptr if it does not have one. llvm::MDNode *getAccessGroup() const { return AccGroup; } + /// Create the loop's metadata. Must be called after its nested loops have + /// been processed. + void finish(); + private: /// Loop ID metadata. - llvm::MDNode *LoopID; + llvm::TempMDTuple TempLoopID; /// Header block of this loop. llvm::BasicBlock *Header; /// The attributes for this loop. LoopAttributes Attrs; /// The access group for memory accesses parallel to this loop. llvm::MDNode *AccGroup = nullptr; + /// Start location of this loop. + llvm::DebugLoc StartLoc; + /// End location of this loop. + llvm::DebugLoc EndLoc; + /// The next outer loop, or nullptr if this is the outermost loop. + LoopInfo *Parent; + /// If this loop has unroll-and-jam metadata, this can be set by the inner + /// loop's LoopInfo to set the llvm.loop.unroll_and_jam.followup_inner + /// metadata. + llvm::MDNode *UnrollAndJamInnerFollowup = nullptr; + + /// Create a LoopID without any transformations. + llvm::MDNode * + createLoopPropertiesMetadata(llvm::ArrayRef<llvm::Metadata *> LoopProperties); + + /// Create a LoopID for transformations. + /// + /// The methods call each other in case multiple transformations are applied + /// to a loop. The transformation first to be applied will use LoopID of the + /// next transformation in its followup attribute. + /// + /// @param Attrs The loop's transformations. + /// @param LoopProperties Non-transformation properties such as debug + /// location, parallel accesses and disabled + /// transformations. These are added to the returned + /// LoopID. + /// @param HasUserTransforms [out] Set to true if the returned MDNode encodes + /// at least one transformation. + /// + /// @return A LoopID (metadata node) that can be used for the llvm.loop + /// annotation or followup-attribute. + /// @{ + llvm::MDNode * + createPipeliningMetadata(const LoopAttributes &Attrs, + llvm::ArrayRef<llvm::Metadata *> LoopProperties, + bool &HasUserTransforms); + llvm::MDNode * + createPartialUnrollMetadata(const LoopAttributes &Attrs, + llvm::ArrayRef<llvm::Metadata *> LoopProperties, + bool &HasUserTransforms); + llvm::MDNode * + createUnrollAndJamMetadata(const LoopAttributes &Attrs, + llvm::ArrayRef<llvm::Metadata *> LoopProperties, + bool &HasUserTransforms); + llvm::MDNode * + createLoopVectorizeMetadata(const LoopAttributes &Attrs, + llvm::ArrayRef<llvm::Metadata *> LoopProperties, + bool &HasUserTransforms); + llvm::MDNode * + createLoopDistributeMetadata(const LoopAttributes &Attrs, + llvm::ArrayRef<llvm::Metadata *> LoopProperties, + bool &HasUserTransforms); + llvm::MDNode * + createFullUnrollMetadata(const LoopAttributes &Attrs, + llvm::ArrayRef<llvm::Metadata *> LoopProperties, + bool &HasUserTransforms); + /// @} + + /// Create a LoopID for this loop, including transformation-unspecific + /// metadata such as debug location. + /// + /// @param Attrs This loop's attributes and transformations. + /// @param LoopProperties Additional non-transformation properties to add + /// to the LoopID, such as transformation-specific + /// metadata that are not covered by @p Attrs. + /// @param HasUserTransforms [out] Set to true if the returned MDNode encodes + /// at least one transformation. + /// + /// @return A LoopID (metadata node) that can be used for the llvm.loop + /// annotation. + llvm::MDNode *createMetadata(const LoopAttributes &Attrs, + llvm::ArrayRef<llvm::Metadata *> LoopProperties, + bool &HasUserTransforms); }; /// A stack of loop information corresponding to loop nesting levels. diff --git a/lib/CodeGen/CGNonTrivialStruct.cpp b/lib/CodeGen/CGNonTrivialStruct.cpp index c6a96a912622..caf62d2ac93a 100644 --- a/lib/CodeGen/CGNonTrivialStruct.cpp +++ b/lib/CodeGen/CGNonTrivialStruct.cpp @@ -1,9 +1,8 @@ //===--- CGNonTrivialStruct.cpp - Emit Special Functions for C Structs ----===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -15,6 +14,7 @@ #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "clang/AST/NonTrivialTypeVisitor.h" +#include "clang/CodeGen/CodeGenABITypes.h" #include "llvm/Support/ScopedPrinter.h" #include <array> @@ -84,23 +84,22 @@ struct CopyStructVisitor : StructVisitor<Derived>, template <class... Ts> void preVisit(QualType::PrimitiveCopyKind PCK, QualType FT, - const FieldDecl *FD, CharUnits CurStructOffsset, - Ts &&... Args) { + const FieldDecl *FD, CharUnits CurStructOffset, Ts &&... Args) { if (PCK) asDerived().flushTrivialFields(std::forward<Ts>(Args)...); } template <class... Ts> void visitWithKind(QualType::PrimitiveCopyKind PCK, QualType FT, - const FieldDecl *FD, CharUnits CurStructOffsset, + const FieldDecl *FD, CharUnits CurStructOffset, Ts &&... Args) { if (const auto *AT = asDerived().getContext().getAsArrayType(FT)) { asDerived().visitArray(PCK, AT, FT.isVolatileQualified(), FD, - CurStructOffsset, std::forward<Ts>(Args)...); + CurStructOffset, std::forward<Ts>(Args)...); return; } - Super::visitWithKind(PCK, FT, FD, CurStructOffsset, + Super::visitWithKind(PCK, FT, FD, CurStructOffset, std::forward<Ts>(Args)...); } @@ -140,8 +139,8 @@ struct CopyStructVisitor : StructVisitor<Derived>, // <alignment-info> ::= <dst-alignment> ["_" <src-alignment>] // <struct-field-info> ::= <field-info>+ // <field-info> ::= <struct-or-scalar-field-info> | <array-field-info> -// <struct-or-scalar-field-info> ::= <struct-field-info> | <strong-field-info> | -// <trivial-field-info> +// <struct-or-scalar-field-info> ::= "_S" <struct-field-info> | +// <strong-field-info> | <trivial-field-info> // <array-field-info> ::= "_AB" <array-offset> "s" <element-size> "n" // <num-elements> <innermost-element-info> "_AE" // <innermost-element-info> ::= <struct-or-scalar-field-info> @@ -176,6 +175,7 @@ template <class Derived> struct GenFuncNameBase { void visitStruct(QualType QT, const FieldDecl *FD, CharUnits CurStructOffset) { CharUnits FieldOffset = CurStructOffset + asDerived().getFieldOffset(FD); + appendStr("_S"); asDerived().visitStructFields(QT, FieldOffset); } @@ -253,11 +253,11 @@ struct GenBinaryFuncName : CopyStructVisitor<GenBinaryFuncName<IsMove>, IsMove>, } void visitVolatileTrivial(QualType FT, const FieldDecl *FD, - CharUnits CurStackOffset) { + CharUnits CurStructOffset) { // Because volatile fields can be bit-fields and are individually copied, // their offset and width are in bits. uint64_t OffsetInBits = - this->Ctx.toBits(CurStackOffset) + this->getFieldOffsetInBits(FD); + this->Ctx.toBits(CurStructOffset) + this->getFieldOffsetInBits(FD); this->appendStr("_tv" + llvm::to_string(OffsetInBits) + "w" + llvm::to_string(getFieldSize(FD, FT, this->Ctx))); } @@ -286,8 +286,7 @@ struct GenDestructorFuncName : GenUnaryFuncName<GenDestructorFuncName>, using Super = DestructedTypeVisitor<GenDestructorFuncName>; GenDestructorFuncName(const char *Prefix, CharUnits DstAlignment, ASTContext &Ctx) - : GenUnaryFuncName<GenDestructorFuncName>(Prefix, DstAlignment, - Ctx) {} + : GenUnaryFuncName<GenDestructorFuncName>(Prefix, DstAlignment, Ctx) {} void visitWithKind(QualType::DestructionKind DK, QualType FT, const FieldDecl *FD, CharUnits CurStructOffset) { if (const auto *AT = getContext().getAsArrayType(FT)) { @@ -322,19 +321,19 @@ static const CGFunctionInfo &getFunctionInfo(CodeGenModule &CGM, // functions. template <class Derived> struct GenFuncBase { template <size_t N> - void visitStruct(QualType FT, const FieldDecl *FD, CharUnits CurStackOffset, + void visitStruct(QualType FT, const FieldDecl *FD, CharUnits CurStructOffset, std::array<Address, N> Addrs) { this->asDerived().callSpecialFunction( - FT, CurStackOffset + asDerived().getFieldOffset(FD), Addrs); + FT, CurStructOffset + asDerived().getFieldOffset(FD), Addrs); } template <class FieldKind, size_t N> void visitArray(FieldKind FK, const ArrayType *AT, bool IsVolatile, - const FieldDecl *FD, CharUnits CurStackOffset, + const FieldDecl *FD, CharUnits CurStructOffset, std::array<Address, N> Addrs) { // Non-volatile trivial fields are copied when flushTrivialFields is called. if (!FK) - return asDerived().visitTrivial(QualType(AT, 0), FD, CurStackOffset, + return asDerived().visitTrivial(QualType(AT, 0), FD, CurStructOffset, Addrs); asDerived().flushTrivialFields(Addrs); @@ -345,7 +344,7 @@ template <class Derived> struct GenFuncBase { QualType BaseEltQT; std::array<Address, N> StartAddrs = Addrs; for (unsigned I = 0; I < N; ++I) - StartAddrs[I] = getAddrWithOffset(Addrs[I], CurStackOffset, FD); + StartAddrs[I] = getAddrWithOffset(Addrs[I], CurStructOffset, FD); Address DstAddr = StartAddrs[DstIdx]; llvm::Value *NumElts = CGF.emitArrayLength(AT, BaseEltQT, DstAddr); unsigned BaseEltSize = Ctx.getTypeSizeInChars(BaseEltQT).getQuantity(); @@ -414,8 +413,7 @@ template <class Derived> struct GenFuncBase { if (Offset.getQuantity() == 0) return Addr; Addr = CGF->Builder.CreateBitCast(Addr, CGF->CGM.Int8PtrTy); - Addr = CGF->Builder.CreateConstInBoundsGEP(Addr, Offset.getQuantity(), - CharUnits::One()); + Addr = CGF->Builder.CreateConstInBoundsGEP(Addr, Offset.getQuantity()); return CGF->Builder.CreateBitCast(Addr, CGF->CGM.Int8PtrPtrTy); } @@ -586,15 +584,15 @@ struct GenDestructor : StructVisitor<GenDestructor>, } void visitARCStrong(QualType QT, const FieldDecl *FD, - CharUnits CurStackOffset, std::array<Address, 1> Addrs) { + CharUnits CurStructOffset, std::array<Address, 1> Addrs) { CGF->destroyARCStrongImprecise( - *CGF, getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD), QT); + *CGF, getAddrWithOffset(Addrs[DstIdx], CurStructOffset, FD), QT); } - void visitARCWeak(QualType QT, const FieldDecl *FD, CharUnits CurStackOffset, + void visitARCWeak(QualType QT, const FieldDecl *FD, CharUnits CurStructOffset, std::array<Address, 1> Addrs) { CGF->destroyARCWeak( - *CGF, getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD), QT); + *CGF, getAddrWithOffset(Addrs[DstIdx], CurStructOffset, FD), QT); } void callSpecialFunction(QualType FT, CharUnits Offset, @@ -627,35 +625,35 @@ struct GenDefaultInitialize } void visitARCStrong(QualType QT, const FieldDecl *FD, - CharUnits CurStackOffset, std::array<Address, 1> Addrs) { + CharUnits CurStructOffset, std::array<Address, 1> Addrs) { CGF->EmitNullInitialization( - getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD), QT); + getAddrWithOffset(Addrs[DstIdx], CurStructOffset, FD), QT); } - void visitARCWeak(QualType QT, const FieldDecl *FD, CharUnits CurStackOffset, + void visitARCWeak(QualType QT, const FieldDecl *FD, CharUnits CurStructOffset, std::array<Address, 1> Addrs) { CGF->EmitNullInitialization( - getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD), QT); + getAddrWithOffset(Addrs[DstIdx], CurStructOffset, FD), QT); } template <class FieldKind, size_t... Is> void visitArray(FieldKind FK, const ArrayType *AT, bool IsVolatile, - const FieldDecl *FD, CharUnits CurStackOffset, + const FieldDecl *FD, CharUnits CurStructOffset, std::array<Address, 1> Addrs) { if (!FK) - return visitTrivial(QualType(AT, 0), FD, CurStackOffset, Addrs); + return visitTrivial(QualType(AT, 0), FD, CurStructOffset, Addrs); ASTContext &Ctx = getContext(); CharUnits Size = Ctx.getTypeSizeInChars(QualType(AT, 0)); QualType EltTy = Ctx.getBaseElementType(QualType(AT, 0)); if (Size < CharUnits::fromQuantity(16) || EltTy->getAs<RecordType>()) { - GenFuncBaseTy::visitArray(FK, AT, IsVolatile, FD, CurStackOffset, Addrs); + GenFuncBaseTy::visitArray(FK, AT, IsVolatile, FD, CurStructOffset, Addrs); return; } llvm::Constant *SizeVal = CGF->Builder.getInt64(Size.getQuantity()); - Address DstAddr = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD); + Address DstAddr = getAddrWithOffset(Addrs[DstIdx], CurStructOffset, FD); Address Loc = CGF->Builder.CreateElementBitCast(DstAddr, CGF->Int8Ty); CGF->Builder.CreateMemSet(Loc, CGF->Builder.getInt8(0), SizeVal, IsVolatile); @@ -673,24 +671,26 @@ struct GenCopyConstructor : GenBinaryFunc<GenCopyConstructor, false> { : GenBinaryFunc<GenCopyConstructor, false>(Ctx) {} void visitARCStrong(QualType QT, const FieldDecl *FD, - CharUnits CurStackOffset, std::array<Address, 2> Addrs) { - Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD); - Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStackOffset, FD); + CharUnits CurStructOffset, std::array<Address, 2> Addrs) { + Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStructOffset, FD); + Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStructOffset, FD); llvm::Value *SrcVal = CGF->EmitLoadOfScalar( Addrs[SrcIdx], QT.isVolatileQualified(), QT, SourceLocation()); llvm::Value *Val = CGF->EmitARCRetain(QT, SrcVal); CGF->EmitStoreOfScalar(Val, CGF->MakeAddrLValue(Addrs[DstIdx], QT), true); } - void visitARCWeak(QualType QT, const FieldDecl *FD, CharUnits CurStackOffset, + void visitARCWeak(QualType QT, const FieldDecl *FD, CharUnits CurStructOffset, std::array<Address, 2> Addrs) { - Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD); - Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStackOffset, FD); + Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStructOffset, FD); + Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStructOffset, FD); CGF->EmitARCCopyWeak(Addrs[DstIdx], Addrs[SrcIdx]); } void callSpecialFunction(QualType FT, CharUnits Offset, std::array<Address, 2> Addrs) { + Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], Offset); + Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], Offset); CGF->callCStructCopyConstructor(CGF->MakeAddrLValue(Addrs[DstIdx], FT), CGF->MakeAddrLValue(Addrs[SrcIdx], FT)); } @@ -701,9 +701,9 @@ struct GenMoveConstructor : GenBinaryFunc<GenMoveConstructor, true> { : GenBinaryFunc<GenMoveConstructor, true>(Ctx) {} void visitARCStrong(QualType QT, const FieldDecl *FD, - CharUnits CurStackOffset, std::array<Address, 2> Addrs) { - Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD); - Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStackOffset, FD); + CharUnits CurStructOffset, std::array<Address, 2> Addrs) { + Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStructOffset, FD); + Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStructOffset, FD); LValue SrcLV = CGF->MakeAddrLValue(Addrs[SrcIdx], QT); llvm::Value *SrcVal = CGF->EmitLoadOfLValue(SrcLV, SourceLocation()).getScalarVal(); @@ -712,15 +712,17 @@ struct GenMoveConstructor : GenBinaryFunc<GenMoveConstructor, true> { /* isInitialization */ true); } - void visitARCWeak(QualType QT, const FieldDecl *FD, CharUnits CurStackOffset, + void visitARCWeak(QualType QT, const FieldDecl *FD, CharUnits CurStructOffset, std::array<Address, 2> Addrs) { - Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD); - Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStackOffset, FD); + Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStructOffset, FD); + Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStructOffset, FD); CGF->EmitARCMoveWeak(Addrs[DstIdx], Addrs[SrcIdx]); } void callSpecialFunction(QualType FT, CharUnits Offset, std::array<Address, 2> Addrs) { + Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], Offset); + Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], Offset); CGF->callCStructMoveConstructor(CGF->MakeAddrLValue(Addrs[DstIdx], FT), CGF->MakeAddrLValue(Addrs[SrcIdx], FT)); } @@ -731,24 +733,26 @@ struct GenCopyAssignment : GenBinaryFunc<GenCopyAssignment, false> { : GenBinaryFunc<GenCopyAssignment, false>(Ctx) {} void visitARCStrong(QualType QT, const FieldDecl *FD, - CharUnits CurStackOffset, std::array<Address, 2> Addrs) { - Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD); - Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStackOffset, FD); + CharUnits CurStructOffset, std::array<Address, 2> Addrs) { + Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStructOffset, FD); + Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStructOffset, FD); llvm::Value *SrcVal = CGF->EmitLoadOfScalar( Addrs[SrcIdx], QT.isVolatileQualified(), QT, SourceLocation()); CGF->EmitARCStoreStrong(CGF->MakeAddrLValue(Addrs[DstIdx], QT), SrcVal, false); } - void visitARCWeak(QualType QT, const FieldDecl *FD, CharUnits CurStackOffset, + void visitARCWeak(QualType QT, const FieldDecl *FD, CharUnits CurStructOffset, std::array<Address, 2> Addrs) { - Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD); - Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStackOffset, FD); + Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStructOffset, FD); + Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStructOffset, FD); CGF->emitARCCopyAssignWeak(QT, Addrs[DstIdx], Addrs[SrcIdx]); } void callSpecialFunction(QualType FT, CharUnits Offset, std::array<Address, 2> Addrs) { + Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], Offset); + Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], Offset); CGF->callCStructCopyAssignmentOperator( CGF->MakeAddrLValue(Addrs[DstIdx], FT), CGF->MakeAddrLValue(Addrs[SrcIdx], FT)); @@ -760,9 +764,9 @@ struct GenMoveAssignment : GenBinaryFunc<GenMoveAssignment, true> { : GenBinaryFunc<GenMoveAssignment, true>(Ctx) {} void visitARCStrong(QualType QT, const FieldDecl *FD, - CharUnits CurStackOffset, std::array<Address, 2> Addrs) { - Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD); - Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStackOffset, FD); + CharUnits CurStructOffset, std::array<Address, 2> Addrs) { + Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStructOffset, FD); + Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStructOffset, FD); LValue SrcLV = CGF->MakeAddrLValue(Addrs[SrcIdx], QT); llvm::Value *SrcVal = CGF->EmitLoadOfLValue(SrcLV, SourceLocation()).getScalarVal(); @@ -774,15 +778,17 @@ struct GenMoveAssignment : GenBinaryFunc<GenMoveAssignment, true> { CGF->EmitARCRelease(DstVal, ARCImpreciseLifetime); } - void visitARCWeak(QualType QT, const FieldDecl *FD, CharUnits CurStackOffset, + void visitARCWeak(QualType QT, const FieldDecl *FD, CharUnits CurStructOffset, std::array<Address, 2> Addrs) { - Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD); - Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStackOffset, FD); + Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStructOffset, FD); + Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStructOffset, FD); CGF->emitARCMoveAssignWeak(QT, Addrs[DstIdx], Addrs[SrcIdx]); } void callSpecialFunction(QualType FT, CharUnits Offset, std::array<Address, 2> Addrs) { + Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], Offset); + Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], Offset); CGF->callCStructMoveAssignmentOperator( CGF->MakeAddrLValue(Addrs[DstIdx], FT), CGF->MakeAddrLValue(Addrs[SrcIdx], FT)); @@ -817,6 +823,29 @@ static void callSpecialFunction(G &&Gen, StringRef FuncName, QualType QT, Gen.callFunc(FuncName, QT, Addrs, CGF); } +template <size_t N> std::array<Address, N> createNullAddressArray(); + +template <> std::array<Address, 1> createNullAddressArray() { + return std::array<Address, 1>({{Address(nullptr, CharUnits::Zero())}}); +} + +template <> std::array<Address, 2> createNullAddressArray() { + return std::array<Address, 2>({{Address(nullptr, CharUnits::Zero()), + Address(nullptr, CharUnits::Zero())}}); +} + +template <class G, size_t N> +static llvm::Function * +getSpecialFunction(G &&Gen, StringRef FuncName, QualType QT, bool IsVolatile, + std::array<CharUnits, N> Alignments, CodeGenModule &CGM) { + QT = IsVolatile ? QT.withVolatile() : QT; + // The following call requires an array of addresses as arguments, but doesn't + // actually use them (it overwrites them with the addresses of the arguments + // of the created function). + return Gen.getFunction(FuncName, QT, createNullAddressArray<N>(), Alignments, + CGM); +} + // Functions to emit calls to the special functions of a non-trivial C struct. void CodeGenFunction::callCStructDefaultConstructor(LValue Dst) { bool IsVolatile = Dst.isVolatile(); @@ -828,18 +857,16 @@ void CodeGenFunction::callCStructDefaultConstructor(LValue Dst) { IsVolatile, *this, std::array<Address, 1>({{DstPtr}})); } -std::string -CodeGenFunction::getNonTrivialCopyConstructorStr(QualType QT, - CharUnits Alignment, - bool IsVolatile, - ASTContext &Ctx) { +std::string CodeGenFunction::getNonTrivialCopyConstructorStr( + QualType QT, CharUnits Alignment, bool IsVolatile, ASTContext &Ctx) { GenBinaryFuncName<false> GenName("", Alignment, Alignment, Ctx); return GenName.getName(QT, IsVolatile); } -std::string -CodeGenFunction::getNonTrivialDestructorStr(QualType QT, CharUnits Alignment, - bool IsVolatile, ASTContext &Ctx) { +std::string CodeGenFunction::getNonTrivialDestructorStr(QualType QT, + CharUnits Alignment, + bool IsVolatile, + ASTContext &Ctx) { GenDestructorFuncName GenName("", Alignment, Ctx); return GenName.getName(QT, IsVolatile); } @@ -904,3 +931,69 @@ void CodeGenFunction::callCStructMoveAssignmentOperator(LValue Dst, LValue Src callSpecialFunction(GenMoveAssignment(getContext()), FuncName, QT, IsVolatile, *this, std::array<Address, 2>({{DstPtr, SrcPtr}})); } + +llvm::Function *clang::CodeGen::getNonTrivialCStructDefaultConstructor( + CodeGenModule &CGM, CharUnits DstAlignment, bool IsVolatile, QualType QT) { + ASTContext &Ctx = CGM.getContext(); + GenDefaultInitializeFuncName GenName(DstAlignment, Ctx); + std::string FuncName = GenName.getName(QT, IsVolatile); + return getSpecialFunction(GenDefaultInitialize(Ctx), FuncName, QT, IsVolatile, + std::array<CharUnits, 1>({{DstAlignment}}), CGM); +} + +llvm::Function *clang::CodeGen::getNonTrivialCStructCopyConstructor( + CodeGenModule &CGM, CharUnits DstAlignment, CharUnits SrcAlignment, + bool IsVolatile, QualType QT) { + ASTContext &Ctx = CGM.getContext(); + GenBinaryFuncName<false> GenName("__copy_constructor_", DstAlignment, + SrcAlignment, Ctx); + std::string FuncName = GenName.getName(QT, IsVolatile); + return getSpecialFunction( + GenCopyConstructor(Ctx), FuncName, QT, IsVolatile, + std::array<CharUnits, 2>({{DstAlignment, SrcAlignment}}), CGM); +} + +llvm::Function *clang::CodeGen::getNonTrivialCStructMoveConstructor( + CodeGenModule &CGM, CharUnits DstAlignment, CharUnits SrcAlignment, + bool IsVolatile, QualType QT) { + ASTContext &Ctx = CGM.getContext(); + GenBinaryFuncName<true> GenName("__move_constructor_", DstAlignment, + SrcAlignment, Ctx); + std::string FuncName = GenName.getName(QT, IsVolatile); + return getSpecialFunction( + GenMoveConstructor(Ctx), FuncName, QT, IsVolatile, + std::array<CharUnits, 2>({{DstAlignment, SrcAlignment}}), CGM); +} + +llvm::Function *clang::CodeGen::getNonTrivialCStructCopyAssignmentOperator( + CodeGenModule &CGM, CharUnits DstAlignment, CharUnits SrcAlignment, + bool IsVolatile, QualType QT) { + ASTContext &Ctx = CGM.getContext(); + GenBinaryFuncName<false> GenName("__copy_assignment_", DstAlignment, + SrcAlignment, Ctx); + std::string FuncName = GenName.getName(QT, IsVolatile); + return getSpecialFunction( + GenCopyAssignment(Ctx), FuncName, QT, IsVolatile, + std::array<CharUnits, 2>({{DstAlignment, SrcAlignment}}), CGM); +} + +llvm::Function *clang::CodeGen::getNonTrivialCStructMoveAssignmentOperator( + CodeGenModule &CGM, CharUnits DstAlignment, CharUnits SrcAlignment, + bool IsVolatile, QualType QT) { + ASTContext &Ctx = CGM.getContext(); + GenBinaryFuncName<true> GenName("__move_assignment_", DstAlignment, + SrcAlignment, Ctx); + std::string FuncName = GenName.getName(QT, IsVolatile); + return getSpecialFunction( + GenMoveAssignment(Ctx), FuncName, QT, IsVolatile, + std::array<CharUnits, 2>({{DstAlignment, SrcAlignment}}), CGM); +} + +llvm::Function *clang::CodeGen::getNonTrivialCStructDestructor( + CodeGenModule &CGM, CharUnits DstAlignment, bool IsVolatile, QualType QT) { + ASTContext &Ctx = CGM.getContext(); + GenDestructorFuncName GenName("__destructor_", DstAlignment, Ctx); + std::string FuncName = GenName.getName(QT, IsVolatile); + return getSpecialFunction(GenDestructor(Ctx), FuncName, QT, IsVolatile, + std::array<CharUnits, 1>({{DstAlignment}}), CGM); +} diff --git a/lib/CodeGen/CGObjC.cpp b/lib/CodeGen/CGObjC.cpp index 9c66ff0e8fb2..1dd7ec52230e 100644 --- a/lib/CodeGen/CGObjC.cpp +++ b/lib/CodeGen/CGObjC.cpp @@ -1,9 +1,8 @@ //===---- CGObjC.cpp - Emit LLVM Code for Objective-C ---------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -15,6 +14,7 @@ #include "CGObjCRuntime.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" +#include "ConstantEmitter.h" #include "TargetInfo.h" #include "clang/AST/ASTContext.h" #include "clang/AST/DeclObjC.h" @@ -22,7 +22,6 @@ #include "clang/Basic/Diagnostic.h" #include "clang/CodeGen/CGFunctionInfo.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" using namespace clang; @@ -62,7 +61,12 @@ CodeGenFunction::EmitObjCBoxedExpr(const ObjCBoxedExpr *E) { // Get the method. const ObjCMethodDecl *BoxingMethod = E->getBoxingMethod(); const Expr *SubExpr = E->getSubExpr(); - assert(BoxingMethod && "BoxingMethod is null"); + + if (E->isExpressibleAsConstantInitializer()) { + ConstantEmitter ConstEmitter(CGM); + return ConstEmitter.tryEmitAbstract(E, E->getType()); + } + assert(BoxingMethod->isClassMethod() && "BoxingMethod must be a class method"); Selector Sel = BoxingMethod->getSelector(); @@ -160,9 +164,8 @@ llvm::Value *CodeGenFunction::EmitObjCCollectionLiteral(const Expr *E, if (ALE) { // Emit the element and store it to the appropriate array slot. const Expr *Rhs = ALE->getElement(i); - LValue LV = MakeAddrLValue( - Builder.CreateConstArrayGEP(Objects, i, getPointerSize()), - ElementType, AlignmentSource::Decl); + LValue LV = MakeAddrLValue(Builder.CreateConstArrayGEP(Objects, i), + ElementType, AlignmentSource::Decl); llvm::Value *value = EmitScalarExpr(Rhs); EmitStoreThroughLValue(RValue::get(value), LV, true); @@ -172,17 +175,15 @@ llvm::Value *CodeGenFunction::EmitObjCCollectionLiteral(const Expr *E, } else { // Emit the key and store it to the appropriate array slot. const Expr *Key = DLE->getKeyValueElement(i).Key; - LValue KeyLV = MakeAddrLValue( - Builder.CreateConstArrayGEP(Keys, i, getPointerSize()), - ElementType, AlignmentSource::Decl); + LValue KeyLV = MakeAddrLValue(Builder.CreateConstArrayGEP(Keys, i), + ElementType, AlignmentSource::Decl); llvm::Value *keyValue = EmitScalarExpr(Key); EmitStoreThroughLValue(RValue::get(keyValue), KeyLV, /*isInit=*/true); // Emit the value and store it to the appropriate array slot. const Expr *Value = DLE->getKeyValueElement(i).Value; - LValue ValueLV = MakeAddrLValue( - Builder.CreateConstArrayGEP(Objects, i, getPointerSize()), - ElementType, AlignmentSource::Decl); + LValue ValueLV = MakeAddrLValue(Builder.CreateConstArrayGEP(Objects, i), + ElementType, AlignmentSource::Decl); llvm::Value *valueValue = EmitScalarExpr(Value); EmitStoreThroughLValue(RValue::get(valueValue), ValueLV, /*isInit=*/true); if (TrackNeededObjects) { @@ -382,10 +383,12 @@ tryGenerateSpecializedMessageSend(CodeGenFunction &CGF, QualType ResultType, if (isClassMessage && Runtime.shouldUseRuntimeFunctionsForAlloc() && ResultType->isObjCObjectPointerType()) { - // [Foo alloc] -> objc_alloc(Foo) + // [Foo alloc] -> objc_alloc(Foo) or + // [self alloc] -> objc_alloc(self) if (Sel.isUnarySelector() && Sel.getNameForSlot(0) == "alloc") return CGF.EmitObjCAlloc(Receiver, CGF.ConvertType(ResultType)); - // [Foo allocWithZone:nil] -> objc_allocWithZone(Foo) + // [Foo allocWithZone:nil] -> objc_allocWithZone(Foo) or + // [self allocWithZone:nil] -> objc_allocWithZone(self) if (Sel.isKeywordSelector() && Sel.getNumArgs() == 1 && Args.size() == 1 && Args.front().getType()->isPointerType() && Sel.getNameForSlot(0) == "allocWithZone") { @@ -427,6 +430,57 @@ tryGenerateSpecializedMessageSend(CodeGenFunction &CGF, QualType ResultType, return None; } +/// Instead of '[[MyClass alloc] init]', try to generate +/// 'objc_alloc_init(MyClass)'. This provides a code size improvement on the +/// caller side, as well as the optimized objc_alloc. +static Optional<llvm::Value *> +tryEmitSpecializedAllocInit(CodeGenFunction &CGF, const ObjCMessageExpr *OME) { + auto &Runtime = CGF.getLangOpts().ObjCRuntime; + if (!Runtime.shouldUseRuntimeFunctionForCombinedAllocInit()) + return None; + + // Match the exact pattern '[[MyClass alloc] init]'. + Selector Sel = OME->getSelector(); + if (OME->getReceiverKind() != ObjCMessageExpr::Instance || + !OME->getType()->isObjCObjectPointerType() || !Sel.isUnarySelector() || + Sel.getNameForSlot(0) != "init") + return None; + + // Okay, this is '[receiver init]', check if 'receiver' is '[cls alloc]' or + // we are in an ObjC class method and 'receiver' is '[self alloc]'. + auto *SubOME = + dyn_cast<ObjCMessageExpr>(OME->getInstanceReceiver()->IgnoreParenCasts()); + if (!SubOME) + return None; + Selector SubSel = SubOME->getSelector(); + + // Check if we are in an ObjC class method and the receiver expression is + // 'self'. + const Expr *SelfInClassMethod = nullptr; + if (const auto *CurMD = dyn_cast_or_null<ObjCMethodDecl>(CGF.CurFuncDecl)) + if (CurMD->isClassMethod()) + if ((SelfInClassMethod = SubOME->getInstanceReceiver())) + if (!SelfInClassMethod->isObjCSelfExpr()) + SelfInClassMethod = nullptr; + + if ((SubOME->getReceiverKind() != ObjCMessageExpr::Class && + !SelfInClassMethod) || !SubOME->getType()->isObjCObjectPointerType() || + !SubSel.isUnarySelector() || SubSel.getNameForSlot(0) != "alloc") + return None; + + llvm::Value *Receiver; + if (SelfInClassMethod) { + Receiver = CGF.EmitScalarExpr(SelfInClassMethod); + } else { + QualType ReceiverType = SubOME->getClassReceiver(); + const ObjCObjectType *ObjTy = ReceiverType->getAs<ObjCObjectType>(); + const ObjCInterfaceDecl *ID = ObjTy->getInterface(); + assert(ID && "null interface should be impossible here"); + Receiver = CGF.CGM.getObjCRuntime().GetClass(CGF, ID); + } + return CGF.EmitObjCAllocInit(Receiver, CGF.ConvertType(OME->getType())); +} + RValue CodeGenFunction::EmitObjCMessageExpr(const ObjCMessageExpr *E, ReturnValueSlot Return) { // Only the lookup mechanism and first two arguments of the method @@ -448,6 +502,9 @@ RValue CodeGenFunction::EmitObjCMessageExpr(const ObjCMessageExpr *E, } } + if (Optional<llvm::Value *> Val = tryEmitSpecializedAllocInit(*this, E)) + return AdjustObjCObjectType(*this, E->getType(), RValue::get(*Val)); + // We don't retain the receiver in delegate init calls, and this is // safe because the receiver value is always loaded from 'self', // which we zero out. We don't want to Block_copy block receivers, @@ -468,6 +525,10 @@ RValue CodeGenFunction::EmitObjCMessageExpr(const ObjCMessageExpr *E, switch (E->getReceiverKind()) { case ObjCMessageExpr::Instance: ReceiverType = E->getInstanceReceiver()->getType(); + if (auto *OMD = dyn_cast_or_null<ObjCMethodDecl>(CurFuncDecl)) + if (OMD->isClassMethod()) + if (E->getInstanceReceiver()->isObjCSelfExpr()) + isClassMessage = true; if (retainSelf) { TryEmitResult ter = tryEmitARCRetainScalarExpr(*this, E->getInstanceReceiver()); @@ -685,7 +746,7 @@ static void emitStructGetterCall(CodeGenFunction &CGF, ObjCIvarDecl *ivar, args.add(RValue::get(CGF.Builder.getInt1(isAtomic)), Context.BoolTy); args.add(RValue::get(CGF.Builder.getInt1(hasStrong)), Context.BoolTy); - llvm::Constant *fn = CGF.CGM.getObjCRuntime().GetGetStructFunction(); + llvm::FunctionCallee fn = CGF.CGM.getObjCRuntime().GetGetStructFunction(); CGCallee callee = CGCallee::forDirect(fn); CGF.EmitCall(CGF.getTypes().arrangeBuiltinFunctionCall(Context.VoidTy, args), callee, ReturnValueSlot(), args); @@ -949,8 +1010,8 @@ static void emitCPPObjectAtomicGetterCall(CodeGenFunction &CGF, // Third argument is the helper function. args.add(RValue::get(AtomicHelperFn), CGF.getContext().VoidPtrTy); - llvm::Constant *copyCppAtomicObjectFn = - CGF.CGM.getObjCRuntime().GetCppAtomicObjectGetFunction(); + llvm::FunctionCallee copyCppAtomicObjectFn = + CGF.CGM.getObjCRuntime().GetCppAtomicObjectGetFunction(); CGCallee callee = CGCallee::forDirect(copyCppAtomicObjectFn); CGF.EmitCall( CGF.getTypes().arrangeBuiltinFunctionCall(CGF.getContext().VoidTy, args), @@ -1026,8 +1087,8 @@ CodeGenFunction::generateObjCGetterBody(const ObjCImplementationDecl *classImpl, } case PropertyImplStrategy::GetSetProperty: { - llvm::Constant *getPropertyFn = - CGM.getObjCRuntime().GetPropertyGetFunction(); + llvm::FunctionCallee getPropertyFn = + CGM.getObjCRuntime().GetPropertyGetFunction(); if (!getPropertyFn) { CGM.ErrorUnsupported(propImpl, "Obj-C getter requiring atomic copy"); return; @@ -1052,10 +1113,10 @@ CodeGenFunction::generateObjCGetterBody(const ObjCImplementationDecl *classImpl, // FIXME: We shouldn't need to get the function info here, the // runtime already should have computed it to build the function. - llvm::Instruction *CallInstruction; - RValue RV = EmitCall( - getTypes().arrangeBuiltinFunctionCall(propType, args), - callee, ReturnValueSlot(), args, &CallInstruction); + llvm::CallBase *CallInstruction; + RValue RV = EmitCall(getTypes().arrangeBuiltinFunctionCall( + getContext().getObjCIdType(), args), + callee, ReturnValueSlot(), args, &CallInstruction); if (llvm::CallInst *call = dyn_cast<llvm::CallInst>(CallInstruction)) call->setTailCall(); @@ -1096,7 +1157,7 @@ CodeGenFunction::generateObjCGetterBody(const ObjCImplementationDecl *classImpl, // that's not necessarily the same as "on the stack", so // we still potentially need objc_memmove_collectable. EmitAggregateCopy(/* Dest= */ MakeAddrLValue(ReturnValue, ivarType), - /* Src= */ LV, ivarType, overlapForReturnValue()); + /* Src= */ LV, ivarType, getOverlapForReturnValue()); return; } case TEK_Scalar: { @@ -1170,7 +1231,7 @@ static void emitStructSetterCall(CodeGenFunction &CGF, ObjCMethodDecl *OMD, // FIXME: should this really always be false? args.add(RValue::get(CGF.Builder.getFalse()), CGF.getContext().BoolTy); - llvm::Constant *fn = CGF.CGM.getObjCRuntime().GetSetStructFunction(); + llvm::FunctionCallee fn = CGF.CGM.getObjCRuntime().GetSetStructFunction(); CGCallee callee = CGCallee::forDirect(fn); CGF.EmitCall( CGF.getTypes().arrangeBuiltinFunctionCall(CGF.getContext().VoidTy, args), @@ -1207,8 +1268,8 @@ static void emitCPPObjectAtomicSetterCall(CodeGenFunction &CGF, // Third argument is the helper function. args.add(RValue::get(AtomicHelperFn), CGF.getContext().VoidPtrTy); - llvm::Constant *fn = - CGF.CGM.getObjCRuntime().GetCppAtomicObjectSetFunction(); + llvm::FunctionCallee fn = + CGF.CGM.getObjCRuntime().GetCppAtomicObjectSetFunction(); CGCallee callee = CGCallee::forDirect(fn); CGF.EmitCall( CGF.getTypes().arrangeBuiltinFunctionCall(CGF.getContext().VoidTy, args), @@ -1302,14 +1363,13 @@ CodeGenFunction::generateObjCSetterBody(const ObjCImplementationDecl *classImpl, case PropertyImplStrategy::GetSetProperty: case PropertyImplStrategy::SetPropertyAndExpressionGet: { - llvm::Constant *setOptimizedPropertyFn = nullptr; - llvm::Constant *setPropertyFn = nullptr; + llvm::FunctionCallee setOptimizedPropertyFn = nullptr; + llvm::FunctionCallee setPropertyFn = nullptr; if (UseOptimizedSetter(CGM)) { // 10.8 and iOS 6.0 code and GC is off setOptimizedPropertyFn = - CGM.getObjCRuntime() - .GetOptimizedPropertySetFunction(strategy.isAtomic(), - strategy.isCopy()); + CGM.getObjCRuntime().GetOptimizedPropertySetFunction( + strategy.isAtomic(), strategy.isCopy()); if (!setOptimizedPropertyFn) { CGM.ErrorUnsupported(propImpl, "Obj-C optimized setter - NYI"); return; @@ -1560,8 +1620,8 @@ QualType CodeGenFunction::TypeOfSelfObject() { } void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){ - llvm::Constant *EnumerationMutationFnPtr = - CGM.getObjCRuntime().EnumerationMutationFunction(); + llvm::FunctionCallee EnumerationMutationFnPtr = + CGM.getObjCRuntime().EnumerationMutationFunction(); if (!EnumerationMutationFnPtr) { CGM.ErrorUnsupported(&S, "Obj-C fast enumeration for this runtime"); return; @@ -1669,8 +1729,8 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){ // Save the initial mutations value. This is the value at an // address that was written into the state object by // countByEnumeratingWithState:objects:count:. - Address StateMutationsPtrPtr = Builder.CreateStructGEP( - StatePtr, 2, 2 * getPointerSize(), "mutationsptr.ptr"); + Address StateMutationsPtrPtr = + Builder.CreateStructGEP(StatePtr, 2, "mutationsptr.ptr"); llvm::Value *StateMutationsPtr = Builder.CreateLoad(StateMutationsPtrPtr, "mutationsptr"); @@ -1751,8 +1811,8 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){ // Fetch the buffer out of the enumeration state. // TODO: this pointer should actually be invariant between // refreshes, which would help us do certain loop optimizations. - Address StateItemsPtr = Builder.CreateStructGEP( - StatePtr, 1, getPointerSize(), "stateitems.ptr"); + Address StateItemsPtr = + Builder.CreateStructGEP(StatePtr, 1, "stateitems.ptr"); llvm::Value *EnumStateItems = Builder.CreateLoad(StateItemsPtr, "stateitems"); @@ -1891,7 +1951,7 @@ llvm::Value *CodeGenFunction::EmitObjCExtendObjectLifetime(QualType type, /// Given a number of pointers, inform the optimizer that they're /// being intrinsically used up until this point in the program. void CodeGenFunction::EmitARCIntrinsicUse(ArrayRef<llvm::Value*> values) { - llvm::Constant *&fn = CGM.getObjCEntrypoints().clang_arc_use; + llvm::Function *&fn = CGM.getObjCEntrypoints().clang_arc_use; if (!fn) fn = CGM.getIntrinsic(llvm::Intrinsic::objc_clang_arc_use); @@ -1900,8 +1960,7 @@ void CodeGenFunction::EmitARCIntrinsicUse(ArrayRef<llvm::Value*> values) { EmitNounwindRuntimeCall(fn, values); } -static void setARCRuntimeFunctionLinkage(CodeGenModule &CGM, - llvm::Constant *RTF) { +static void setARCRuntimeFunctionLinkage(CodeGenModule &CGM, llvm::Value *RTF) { if (auto *F = dyn_cast<llvm::Function>(RTF)) { // If the target runtime doesn't naturally support ARC, emit weak // references to the runtime support library. We don't really @@ -1913,15 +1972,18 @@ static void setARCRuntimeFunctionLinkage(CodeGenModule &CGM, } } +static void setARCRuntimeFunctionLinkage(CodeGenModule &CGM, + llvm::FunctionCallee RTF) { + setARCRuntimeFunctionLinkage(CGM, RTF.getCallee()); +} + /// Perform an operation having the signature /// i8* (i8*) /// where a null input causes a no-op and returns null. -static llvm::Value *emitARCValueOperation(CodeGenFunction &CGF, - llvm::Value *value, - llvm::Type *returnType, - llvm::Constant *&fn, - llvm::Intrinsic::ID IntID, - bool isTailCall = false) { +static llvm::Value *emitARCValueOperation( + CodeGenFunction &CGF, llvm::Value *value, llvm::Type *returnType, + llvm::Function *&fn, llvm::Intrinsic::ID IntID, + llvm::CallInst::TailCallKind tailKind = llvm::CallInst::TCK_None) { if (isa<llvm::ConstantPointerNull>(value)) return value; @@ -1936,8 +1998,7 @@ static llvm::Value *emitARCValueOperation(CodeGenFunction &CGF, // Call the function. llvm::CallInst *call = CGF.EmitNounwindRuntimeCall(fn, value); - if (isTailCall) - call->setTailCall(); + call->setTailCallKind(tailKind); // Cast the result back to the original type. return CGF.Builder.CreateBitCast(call, origType); @@ -1945,9 +2006,8 @@ static llvm::Value *emitARCValueOperation(CodeGenFunction &CGF, /// Perform an operation having the following signature: /// i8* (i8**) -static llvm::Value *emitARCLoadOperation(CodeGenFunction &CGF, - Address addr, - llvm::Constant *&fn, +static llvm::Value *emitARCLoadOperation(CodeGenFunction &CGF, Address addr, + llvm::Function *&fn, llvm::Intrinsic::ID IntID) { if (!fn) { fn = CGF.CGM.getIntrinsic(IntID); @@ -1970,10 +2030,9 @@ static llvm::Value *emitARCLoadOperation(CodeGenFunction &CGF, /// Perform an operation having the following signature: /// i8* (i8**, i8*) -static llvm::Value *emitARCStoreOperation(CodeGenFunction &CGF, - Address addr, +static llvm::Value *emitARCStoreOperation(CodeGenFunction &CGF, Address addr, llvm::Value *value, - llvm::Constant *&fn, + llvm::Function *&fn, llvm::Intrinsic::ID IntID, bool ignored) { assert(addr.getElementType() == value->getType()); @@ -1998,10 +2057,8 @@ static llvm::Value *emitARCStoreOperation(CodeGenFunction &CGF, /// Perform an operation having the following signature: /// void (i8**, i8**) -static void emitARCCopyOperation(CodeGenFunction &CGF, - Address dst, - Address src, - llvm::Constant *&fn, +static void emitARCCopyOperation(CodeGenFunction &CGF, Address dst, Address src, + llvm::Function *&fn, llvm::Intrinsic::ID IntID) { assert(dst.getType() == src.getType()); @@ -2023,7 +2080,7 @@ static void emitARCCopyOperation(CodeGenFunction &CGF, static llvm::Value *emitObjCValueOperation(CodeGenFunction &CGF, llvm::Value *value, llvm::Type *returnType, - llvm::Constant *&fn, + llvm::FunctionCallee &fn, StringRef fnName) { if (isa<llvm::ConstantPointerNull>(value)) return value; @@ -2034,7 +2091,7 @@ static llvm::Value *emitObjCValueOperation(CodeGenFunction &CGF, fn = CGF.CGM.CreateRuntimeFunction(fnType, fnName); // We have Native ARC, so set nonlazybind attribute for performance - if (llvm::Function *f = dyn_cast<llvm::Function>(fn)) + if (llvm::Function *f = dyn_cast<llvm::Function>(fn.getCallee())) if (fnName == "objc_retain") f->addFnAttr(llvm::Attribute::NonLazyBind); } @@ -2044,10 +2101,10 @@ static llvm::Value *emitObjCValueOperation(CodeGenFunction &CGF, value = CGF.Builder.CreateBitCast(value, CGF.Int8PtrTy); // Call the function. - llvm::CallInst *call = CGF.EmitNounwindRuntimeCall(fn, value); + llvm::CallBase *Inst = CGF.EmitCallOrInvoke(fn, value); // Cast the result back to the original type. - return CGF.Builder.CreateBitCast(call, origType); + return CGF.Builder.CreateBitCast(Inst, origType); } /// Produce the code to do a retain. Based on the type, calls one of: @@ -2122,14 +2179,10 @@ static void emitAutoreleasedReturnValueMarker(CodeGenFunction &CGF) { // with this marker yet, so leave a breadcrumb for the ARC // optimizer to pick up. } else { - llvm::NamedMDNode *metadata = - CGF.CGM.getModule().getOrInsertNamedMetadata( - "clang.arc.retainAutoreleasedReturnValueMarker"); - assert(metadata->getNumOperands() <= 1); - if (metadata->getNumOperands() == 0) { - auto &ctx = CGF.getLLVMContext(); - metadata->addOperand(llvm::MDNode::get(ctx, - llvm::MDString::get(ctx, assembly))); + const char *markerKey = "clang.arc.retainAutoreleasedReturnValueMarker"; + if (!CGF.CGM.getModule().getModuleFlag(markerKey)) { + auto *str = llvm::MDString::get(CGF.getLLVMContext(), assembly); + CGF.CGM.getModule().addModuleFlag(llvm::Module::Error, markerKey, str); } } } @@ -2147,9 +2200,15 @@ static void emitAutoreleasedReturnValueMarker(CodeGenFunction &CGF) { llvm::Value * CodeGenFunction::EmitARCRetainAutoreleasedReturnValue(llvm::Value *value) { emitAutoreleasedReturnValueMarker(*this); - return emitARCValueOperation(*this, value, nullptr, - CGM.getObjCEntrypoints().objc_retainAutoreleasedReturnValue, - llvm::Intrinsic::objc_retainAutoreleasedReturnValue); + llvm::CallInst::TailCallKind tailKind = + CGM.getTargetCodeGenInfo() + .shouldSuppressTailCallsOfRetainAutoreleasedReturnValue() + ? llvm::CallInst::TCK_NoTail + : llvm::CallInst::TCK_None; + return emitARCValueOperation( + *this, value, nullptr, + CGM.getObjCEntrypoints().objc_retainAutoreleasedReturnValue, + llvm::Intrinsic::objc_retainAutoreleasedReturnValue, tailKind); } /// Claim a possibly-autoreleased return value at +0. This is only @@ -2173,7 +2232,7 @@ void CodeGenFunction::EmitARCRelease(llvm::Value *value, ARCPreciseLifetime_t precise) { if (isa<llvm::ConstantPointerNull>(value)) return; - llvm::Constant *&fn = CGM.getObjCEntrypoints().objc_release; + llvm::Function *&fn = CGM.getObjCEntrypoints().objc_release; if (!fn) { fn = CGM.getIntrinsic(llvm::Intrinsic::objc_release); setARCRuntimeFunctionLinkage(CGM, fn); @@ -2219,7 +2278,7 @@ llvm::Value *CodeGenFunction::EmitARCStoreStrongCall(Address addr, bool ignored) { assert(addr.getElementType() == value->getType()); - llvm::Constant *&fn = CGM.getObjCEntrypoints().objc_storeStrong; + llvm::Function *&fn = CGM.getObjCEntrypoints().objc_storeStrong; if (!fn) { fn = CGM.getIntrinsic(llvm::Intrinsic::objc_storeStrong); setARCRuntimeFunctionLinkage(CGM, fn); @@ -2286,7 +2345,7 @@ CodeGenFunction::EmitARCAutoreleaseReturnValue(llvm::Value *value) { return emitARCValueOperation(*this, value, nullptr, CGM.getObjCEntrypoints().objc_autoreleaseReturnValue, llvm::Intrinsic::objc_autoreleaseReturnValue, - /*isTailCall*/ true); + llvm::CallInst::TCK_Tail); } /// Do a fused retain/autorelease of the given object. @@ -2296,7 +2355,7 @@ CodeGenFunction::EmitARCRetainAutoreleaseReturnValue(llvm::Value *value) { return emitARCValueOperation(*this, value, nullptr, CGM.getObjCEntrypoints().objc_retainAutoreleaseReturnValue, llvm::Intrinsic::objc_retainAutoreleaseReturnValue, - /*isTailCall*/ true); + llvm::CallInst::TCK_Tail); } /// Do a fused retain/autorelease of the given object. @@ -2375,7 +2434,7 @@ void CodeGenFunction::EmitARCInitWeak(Address addr, llvm::Value *value) { /// void \@objc_destroyWeak(i8** %addr) /// Essentially objc_storeWeak(addr, nil). void CodeGenFunction::EmitARCDestroyWeak(Address addr) { - llvm::Constant *&fn = CGM.getObjCEntrypoints().objc_destroyWeak; + llvm::Function *&fn = CGM.getObjCEntrypoints().objc_destroyWeak; if (!fn) { fn = CGM.getIntrinsic(llvm::Intrinsic::objc_destroyWeak); setARCRuntimeFunctionLinkage(CGM, fn); @@ -2423,7 +2482,7 @@ void CodeGenFunction::emitARCMoveAssignWeak(QualType Ty, Address DstAddr, /// Produce the code to do a objc_autoreleasepool_push. /// call i8* \@objc_autoreleasePoolPush(void) llvm::Value *CodeGenFunction::EmitObjCAutoreleasePoolPush() { - llvm::Constant *&fn = CGM.getObjCEntrypoints().objc_autoreleasePoolPush; + llvm::Function *&fn = CGM.getObjCEntrypoints().objc_autoreleasePoolPush; if (!fn) { fn = CGM.getIntrinsic(llvm::Intrinsic::objc_autoreleasePoolPush); setARCRuntimeFunctionLinkage(CGM, fn); @@ -2439,8 +2498,8 @@ void CodeGenFunction::EmitObjCAutoreleasePoolPop(llvm::Value *value) { if (getInvokeDest()) { // Call the runtime method not the intrinsic if we are handling exceptions - llvm::Constant *&fn = - CGM.getObjCEntrypoints().objc_autoreleasePoolPopInvoke; + llvm::FunctionCallee &fn = + CGM.getObjCEntrypoints().objc_autoreleasePoolPopInvoke; if (!fn) { llvm::FunctionType *fnType = llvm::FunctionType::get(Builder.getVoidTy(), Int8PtrTy, false); @@ -2451,7 +2510,7 @@ void CodeGenFunction::EmitObjCAutoreleasePoolPop(llvm::Value *value) { // objc_autoreleasePoolPop can throw. EmitRuntimeCallOrInvoke(fn, value); } else { - llvm::Constant *&fn = CGM.getObjCEntrypoints().objc_autoreleasePoolPop; + llvm::FunctionCallee &fn = CGM.getObjCEntrypoints().objc_autoreleasePoolPop; if (!fn) { fn = CGM.getIntrinsic(llvm::Intrinsic::objc_autoreleasePoolPop); setARCRuntimeFunctionLinkage(CGM, fn); @@ -2507,6 +2566,13 @@ llvm::Value *CodeGenFunction::EmitObjCAllocWithZone(llvm::Value *value, "objc_allocWithZone"); } +llvm::Value *CodeGenFunction::EmitObjCAllocInit(llvm::Value *value, + llvm::Type *resultType) { + return emitObjCValueOperation(*this, value, resultType, + CGM.getObjCEntrypoints().objc_alloc_init, + "objc_alloc_init"); +} + /// Produce the code to do a primitive release. /// [tmp drain]; void CodeGenFunction::EmitObjCMRRAutoreleasePoolPop(llvm::Value *Arg) { @@ -2545,18 +2611,19 @@ void CodeGenFunction::emitARCIntrinsicUse(CodeGenFunction &CGF, Address addr, /// call i8* \@objc_autorelease(i8* %value) llvm::Value *CodeGenFunction::EmitObjCAutorelease(llvm::Value *value, llvm::Type *returnType) { - return emitObjCValueOperation(*this, value, returnType, - CGM.getObjCEntrypoints().objc_autoreleaseRuntimeFunction, - "objc_autorelease"); + return emitObjCValueOperation( + *this, value, returnType, + CGM.getObjCEntrypoints().objc_autoreleaseRuntimeFunction, + "objc_autorelease"); } /// Retain the given object, with normal retain semantics. /// call i8* \@objc_retain(i8* %value) llvm::Value *CodeGenFunction::EmitObjCRetainNonBlock(llvm::Value *value, llvm::Type *returnType) { - return emitObjCValueOperation(*this, value, returnType, - CGM.getObjCEntrypoints().objc_retainRuntimeFunction, - "objc_retain"); + return emitObjCValueOperation( + *this, value, returnType, + CGM.getObjCEntrypoints().objc_retainRuntimeFunction, "objc_retain"); } /// Release the given object. @@ -2565,24 +2632,23 @@ void CodeGenFunction::EmitObjCRelease(llvm::Value *value, ARCPreciseLifetime_t precise) { if (isa<llvm::ConstantPointerNull>(value)) return; - llvm::Constant *&fn = CGM.getObjCEntrypoints().objc_release; + llvm::FunctionCallee &fn = + CGM.getObjCEntrypoints().objc_releaseRuntimeFunction; if (!fn) { - if (!fn) { - llvm::FunctionType *fnType = + llvm::FunctionType *fnType = llvm::FunctionType::get(Builder.getVoidTy(), Int8PtrTy, false); - fn = CGM.CreateRuntimeFunction(fnType, "objc_release"); - setARCRuntimeFunctionLinkage(CGM, fn); - // We have Native ARC, so set nonlazybind attribute for performance - if (llvm::Function *f = dyn_cast<llvm::Function>(fn)) - f->addFnAttr(llvm::Attribute::NonLazyBind); - } + fn = CGM.CreateRuntimeFunction(fnType, "objc_release"); + setARCRuntimeFunctionLinkage(CGM, fn); + // We have Native ARC, so set nonlazybind attribute for performance + if (llvm::Function *f = dyn_cast<llvm::Function>(fn.getCallee())) + f->addFnAttr(llvm::Attribute::NonLazyBind); } // Cast the argument to 'id'. value = Builder.CreateBitCast(value, Int8PtrTy); // Call objc_release. - llvm::CallInst *call = EmitNounwindRuntimeCall(fn, value); + llvm::CallBase *call = EmitCallOrInvoke(fn, value); if (precise == ARCImpreciseLifetime) { call->setMetadata("clang.imprecise_release", @@ -2829,6 +2895,7 @@ public: Result visit(const Expr *e); Result visitCastExpr(const CastExpr *e); Result visitPseudoObjectExpr(const PseudoObjectExpr *e); + Result visitBlockExpr(const BlockExpr *e); Result visitBinaryOperator(const BinaryOperator *e); Result visitBinAssign(const BinaryOperator *e); Result visitBinAssignUnsafeUnretained(const BinaryOperator *e); @@ -2905,6 +2972,12 @@ ARCExprEmitter<Impl,Result>::visitPseudoObjectExpr(const PseudoObjectExpr *E) { } template <typename Impl, typename Result> +Result ARCExprEmitter<Impl, Result>::visitBlockExpr(const BlockExpr *e) { + // The default implementation just forwards the expression to visitExpr. + return asImpl().visitExpr(e); +} + +template <typename Impl, typename Result> Result ARCExprEmitter<Impl,Result>::visitCastExpr(const CastExpr *e) { switch (e->getCastKind()) { @@ -3047,7 +3120,8 @@ Result ARCExprEmitter<Impl,Result>::visit(const Expr *e) { // Look through pseudo-object expressions. } else if (const PseudoObjectExpr *pseudo = dyn_cast<PseudoObjectExpr>(e)) { return asImpl().visitPseudoObjectExpr(pseudo); - } + } else if (auto *be = dyn_cast<BlockExpr>(e)) + return asImpl().visitBlockExpr(be); return asImpl().visitExpr(e); } @@ -3082,6 +3156,15 @@ struct ARCRetainExprEmitter : return TryEmitResult(result, true); } + TryEmitResult visitBlockExpr(const BlockExpr *e) { + TryEmitResult result = visitExpr(e); + // Avoid the block-retain if this is a block literal that doesn't need to be + // copied to the heap. + if (e->getBlockDecl()->canAvoidCopyToHeap()) + result.setInt(true); + return result; + } + /// Block extends are net +0. Naively, we could just recurse on /// the subexpression, but actually we need to ensure that the /// value is copied as a block, so there's a little filter here. @@ -3384,11 +3467,10 @@ void CodeGenFunction::EmitExtendGCLifetime(llvm::Value *object) { // We just use an inline assembly. llvm::FunctionType *extenderType = llvm::FunctionType::get(VoidTy, VoidPtrTy, RequiredArgs::All); - llvm::Value *extender - = llvm::InlineAsm::get(extenderType, - /* assembly */ "", - /* constraints */ "r", - /* side effects */ true); + llvm::InlineAsm *extender = llvm::InlineAsm::get(extenderType, + /* assembly */ "", + /* constraints */ "r", + /* side effects */ true); object = Builder.CreateBitCast(object, VoidPtrTy); EmitNounwindRuntimeCall(extender, object); @@ -3647,19 +3729,25 @@ void CodeGenModule::emitAtAvailableLinkGuard() { // CoreFoundation is linked into the final binary. llvm::FunctionType *FTy = llvm::FunctionType::get(Int32Ty, {VoidPtrTy}, false); - llvm::Constant *CFFunc = + llvm::FunctionCallee CFFunc = CreateRuntimeFunction(FTy, "CFBundleGetVersionNumber"); llvm::FunctionType *CheckFTy = llvm::FunctionType::get(VoidTy, {}, false); - llvm::Function *CFLinkCheckFunc = cast<llvm::Function>(CreateBuiltinFunction( - CheckFTy, "__clang_at_available_requires_core_foundation_framework")); - CFLinkCheckFunc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); - CFLinkCheckFunc->setVisibility(llvm::GlobalValue::HiddenVisibility); - CodeGenFunction CGF(*this); - CGF.Builder.SetInsertPoint(CGF.createBasicBlock("", CFLinkCheckFunc)); - CGF.EmitNounwindRuntimeCall(CFFunc, llvm::Constant::getNullValue(VoidPtrTy)); - CGF.Builder.CreateUnreachable(); - addCompilerUsedGlobal(CFLinkCheckFunc); + llvm::FunctionCallee CFLinkCheckFuncRef = CreateRuntimeFunction( + CheckFTy, "__clang_at_available_requires_core_foundation_framework", + llvm::AttributeList(), /*Local=*/true); + llvm::Function *CFLinkCheckFunc = + cast<llvm::Function>(CFLinkCheckFuncRef.getCallee()->stripPointerCasts()); + if (CFLinkCheckFunc->empty()) { + CFLinkCheckFunc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); + CFLinkCheckFunc->setVisibility(llvm::GlobalValue::HiddenVisibility); + CodeGenFunction CGF(*this); + CGF.Builder.SetInsertPoint(CGF.createBasicBlock("", CFLinkCheckFunc)); + CGF.EmitNounwindRuntimeCall(CFFunc, + llvm::Constant::getNullValue(VoidPtrTy)); + CGF.Builder.CreateUnreachable(); + addCompilerUsedGlobal(CFLinkCheckFunc); + } } CGObjCRuntime::~CGObjCRuntime() {} diff --git a/lib/CodeGen/CGObjCGNU.cpp b/lib/CodeGen/CGObjCGNU.cpp index 548bd6b3fd72..ee5c12aa35bd 100644 --- a/lib/CodeGen/CGObjCGNU.cpp +++ b/lib/CodeGen/CGObjCGNU.cpp @@ -1,9 +1,8 @@ //===------- CGObjCGNU.cpp - Emit LLVM Code from ASTs for a Module --------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -29,7 +28,6 @@ #include "clang/Basic/SourceManager.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" @@ -60,7 +58,7 @@ class LazyRuntimeFunction { CodeGenModule *CGM; llvm::FunctionType *FTy; const char *FunctionName; - llvm::Constant *Function; + llvm::FunctionCallee Function; public: /// Constructor leaves this class uninitialized, because it is intended to @@ -90,7 +88,7 @@ public: /// Overloaded cast operator, allows the class to be implicitly cast to an /// LLVM constant. - operator llvm::Constant *() { + operator llvm::FunctionCallee() { if (!Function) { if (!FunctionName) return nullptr; @@ -98,9 +96,6 @@ public: } return Function; } - operator llvm::Function *() { - return cast<llvm::Function>((llvm::Constant *)*this); - } }; @@ -190,12 +185,16 @@ protected: (R.getVersion() >= VersionTuple(major, minor)); } - std::string SymbolForProtocol(StringRef Name) { - return (StringRef("._OBJC_PROTOCOL_") + Name).str(); + std::string ManglePublicSymbol(StringRef Name) { + return (StringRef(CGM.getTriple().isOSBinFormatCOFF() ? "$_" : "._") + Name).str(); + } + + std::string SymbolForProtocol(Twine Name) { + return (ManglePublicSymbol("OBJC_PROTOCOL_") + Name).str(); } std::string SymbolForProtocolRef(StringRef Name) { - return (StringRef("._OBJC_REF_PROTOCOL_") + Name).str(); + return (ManglePublicSymbol("OBJC_REF_PROTOCOL_") + Name).str(); } @@ -614,15 +613,15 @@ public: const ObjCProtocolDecl *PD) override; void GenerateProtocol(const ObjCProtocolDecl *PD) override; llvm::Function *ModuleInitFunction() override; - llvm::Constant *GetPropertyGetFunction() override; - llvm::Constant *GetPropertySetFunction() override; - llvm::Constant *GetOptimizedPropertySetFunction(bool atomic, - bool copy) override; - llvm::Constant *GetSetStructFunction() override; - llvm::Constant *GetGetStructFunction() override; - llvm::Constant *GetCppAtomicObjectGetFunction() override; - llvm::Constant *GetCppAtomicObjectSetFunction() override; - llvm::Constant *EnumerationMutationFunction() override; + llvm::FunctionCallee GetPropertyGetFunction() override; + llvm::FunctionCallee GetPropertySetFunction() override; + llvm::FunctionCallee GetOptimizedPropertySetFunction(bool atomic, + bool copy) override; + llvm::FunctionCallee GetSetStructFunction() override; + llvm::FunctionCallee GetGetStructFunction() override; + llvm::FunctionCallee GetCppAtomicObjectGetFunction() override; + llvm::FunctionCallee GetCppAtomicObjectSetFunction() override; + llvm::FunctionCallee EnumerationMutationFunction() override; void EmitTryStmt(CodeGenFunction &CGF, const ObjCAtTryStmt &S) override; @@ -691,9 +690,9 @@ protected: llvm::Value *args[] = { EnforceType(Builder, Receiver, IdTy), EnforceType(Builder, cmd, SelectorTy) }; - llvm::CallSite imp = CGF.EmitRuntimeCallOrInvoke(MsgLookupFn, args); + llvm::CallBase *imp = CGF.EmitRuntimeCallOrInvoke(MsgLookupFn, args); imp->setMetadata(msgSendMDKind, node); - return imp.getInstruction(); + return imp; } llvm::Value *LookupIMPSuper(CodeGenFunction &CGF, Address ObjCSuper, @@ -750,7 +749,7 @@ class CGObjCGNUstep : public CGObjCGNU { llvm::Value *cmd, llvm::MDNode *node, MessageSendInfo &MSI) override { CGBuilderTy &Builder = CGF.Builder; - llvm::Function *LookupFn = SlotLookupFn; + llvm::FunctionCallee LookupFn = SlotLookupFn; // Store the receiver on the stack so that we can reload it later Address ReceiverPtr = @@ -766,20 +765,20 @@ class CGObjCGNUstep : public CGObjCGNU { } // The lookup function is guaranteed not to capture the receiver pointer. - LookupFn->addParamAttr(0, llvm::Attribute::NoCapture); + if (auto *LookupFn2 = dyn_cast<llvm::Function>(LookupFn.getCallee())) + LookupFn2->addParamAttr(0, llvm::Attribute::NoCapture); llvm::Value *args[] = { EnforceType(Builder, ReceiverPtr.getPointer(), PtrToIdTy), EnforceType(Builder, cmd, SelectorTy), EnforceType(Builder, self, IdTy) }; - llvm::CallSite slot = CGF.EmitRuntimeCallOrInvoke(LookupFn, args); - slot.setOnlyReadsMemory(); + llvm::CallBase *slot = CGF.EmitRuntimeCallOrInvoke(LookupFn, args); + slot->setOnlyReadsMemory(); slot->setMetadata(msgSendMDKind, node); // Load the imp from the slot llvm::Value *imp = Builder.CreateAlignedLoad( - Builder.CreateStructGEP(nullptr, slot.getInstruction(), 4), - CGF.getPointerAlign()); + Builder.CreateStructGEP(nullptr, slot, 4), CGF.getPointerAlign()); // The lookup function may have changed the receiver, so make sure we use // the new one. @@ -859,7 +858,7 @@ class CGObjCGNUstep : public CGObjCGNU { PtrTy, PtrTy); } - llvm::Constant *GetCppAtomicObjectGetFunction() override { + llvm::FunctionCallee GetCppAtomicObjectGetFunction() override { // The optimised functions were added in version 1.7 of the GNUstep // runtime. assert (CGM.getLangOpts().ObjCRuntime.getVersion() >= @@ -867,7 +866,7 @@ class CGObjCGNUstep : public CGObjCGNU { return CxxAtomicObjectGetFn; } - llvm::Constant *GetCppAtomicObjectSetFunction() override { + llvm::FunctionCallee GetCppAtomicObjectSetFunction() override { // The optimised functions were added in version 1.7 of the GNUstep // runtime. assert (CGM.getLangOpts().ObjCRuntime.getVersion() >= @@ -875,8 +874,8 @@ class CGObjCGNUstep : public CGObjCGNU { return CxxAtomicObjectSetFn; } - llvm::Constant *GetOptimizedPropertySetFunction(bool atomic, - bool copy) override { + llvm::FunctionCallee GetOptimizedPropertySetFunction(bool atomic, + bool copy) override { // The optimised property functions omit the GC check, and so are not // safe to use in GC mode. The standard functions are fast in GC mode, // so there is less advantage in using them. @@ -911,12 +910,15 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { ConstantStringSection }; static const char *const SectionsBaseNames[8]; + static const char *const PECOFFSectionsBaseNames[8]; template<SectionKind K> std::string sectionName() { - std::string name(SectionsBaseNames[K]); - if (CGM.getTriple().isOSBinFormatCOFF()) + if (CGM.getTriple().isOSBinFormatCOFF()) { + std::string name(PECOFFSectionsBaseNames[K]); name += "$m"; - return name; + return name; + } + return SectionsBaseNames[K]; } /// The GCC ABI superclass message lookup function. Takes a pointer to a /// structure describing the receiver and the class, and a selector as @@ -937,15 +939,19 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { bool EmittedClass = false; /// Generate the name of a symbol for a reference to a class. Accesses to /// classes should be indirected via this. + + typedef std::pair<std::string, std::pair<llvm::Constant*, int>> EarlyInitPair; + std::vector<EarlyInitPair> EarlyInitList; + std::string SymbolForClassRef(StringRef Name, bool isWeak) { if (isWeak) - return (StringRef("._OBJC_WEAK_REF_CLASS_") + Name).str(); + return (ManglePublicSymbol("OBJC_WEAK_REF_CLASS_") + Name).str(); else - return (StringRef("._OBJC_REF_CLASS_") + Name).str(); + return (ManglePublicSymbol("OBJC_REF_CLASS_") + Name).str(); } /// Generate the name of a class symbol. std::string SymbolForClass(StringRef Name) { - return (StringRef("._OBJC_CLASS_") + Name).str(); + return (ManglePublicSymbol("OBJC_CLASS_") + Name).str(); } void CallRuntimeFunction(CGBuilderTy &B, StringRef FunctionName, ArrayRef<llvm::Value*> Args) { @@ -954,7 +960,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { Types.push_back(Arg->getType()); llvm::FunctionType *FT = llvm::FunctionType::get(B.getVoidTy(), Types, false); - llvm::Value *Fn = CGM.CreateRuntimeFunction(FT, FunctionName); + llvm::FunctionCallee Fn = CGM.CreateRuntimeFunction(FT, FunctionName); B.CreateCall(Fn, Args); } @@ -999,10 +1005,13 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { llvm::Constant *isa = TheModule.getNamedGlobal(Sym); - if (!isa) + if (!isa) { isa = new llvm::GlobalVariable(TheModule, IdTy, /* isConstant */false, llvm::GlobalValue::ExternalLinkage, nullptr, Sym); - else if (isa->getType() != PtrToIdTy) + if (CGM.getTriple().isOSBinFormatCOFF()) { + cast<llvm::GlobalValue>(isa)->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); + } + } else if (isa->getType() != PtrToIdTy) isa = llvm::ConstantExpr::getBitCast(isa, PtrToIdTy); |