aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2020-06-24 20:22:44 +0000
committerDimitry Andric <dim@FreeBSD.org>2020-06-24 20:22:44 +0000
commit483b61a50e7423b063fc26985325f594560b3f7e (patch)
tree5bb205026b61f3dd88d63f43d0b790d518acefec
parent8055b7e383f74dbc58c8085a0f0c45f4c61f8231 (diff)
downloadsrc-483b61a50e7423b063fc26985325f594560b3f7e.tar.gz
src-483b61a50e7423b063fc26985325f594560b3f7e.zip
Vendor import of llvm-project branch release/10.xvendor/llvm-project/llvmorg-10.0.0-129-gd24d5c8e308
llvmorg-10.0.0-129-gd24d5c8e308.
Notes
Notes: svn path=/vendor/llvm-project/release-10.x/; revision=362593 svn path=/vendor/llvm-project/llvmorg-10.0.0-129-gd24d5c8e308/; revision=362594; tag=vendor/llvm-project/llvmorg-10.0.0-129-gd24d5c8e308
-rw-r--r--clang/include/clang/Driver/Options.td8
-rw-r--r--clang/lib/Basic/Targets/PPC.h3
-rw-r--r--clang/lib/Driver/SanitizerArgs.cpp3
-rw-r--r--clang/lib/Driver/ToolChain.cpp9
-rw-r--r--clang/lib/Driver/ToolChains/Arch/X86.cpp23
-rw-r--r--llvm/include/llvm/CodeGen/RDFGraph.h (renamed from llvm/lib/Target/Hexagon/RDFGraph.h)0
-rw-r--r--llvm/include/llvm/CodeGen/RDFLiveness.h (renamed from llvm/lib/Target/Hexagon/RDFLiveness.h)0
-rw-r--r--llvm/include/llvm/CodeGen/RDFRegisters.h (renamed from llvm/lib/Target/Hexagon/RDFRegisters.h)0
-rw-r--r--llvm/include/llvm/IR/IntrinsicsPowerPC.td3
-rw-r--r--llvm/include/llvm/Support/ManagedStatic.h4
-rw-r--r--llvm/include/llvm/Target/TargetSelectionDAG.td9
-rw-r--r--llvm/lib/Analysis/BasicAliasAnalysis.cpp5
-rw-r--r--llvm/lib/CodeGen/RDFGraph.cpp (renamed from llvm/lib/Target/Hexagon/RDFGraph.cpp)10
-rw-r--r--llvm/lib/CodeGen/RDFLiveness.cpp (renamed from llvm/lib/Target/Hexagon/RDFLiveness.cpp)6
-rw-r--r--llvm/lib/CodeGen/RDFRegisters.cpp (renamed from llvm/lib/Target/Hexagon/RDFRegisters.cpp)2
-rw-r--r--llvm/lib/LTO/LTO.cpp28
-rw-r--r--llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp2
-rw-r--r--llvm/lib/Target/BPF/BTFDebug.cpp32
-rw-r--r--llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp6
-rw-r--r--llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp6
-rw-r--r--llvm/lib/Target/Hexagon/RDFCopy.cpp6
-rw-r--r--llvm/lib/Target/Hexagon/RDFCopy.h6
-rw-r--r--llvm/lib/Target/Hexagon/RDFDeadCode.cpp4
-rw-r--r--llvm/lib/Target/Hexagon/RDFDeadCode.h4
-rw-r--r--llvm/lib/Target/PowerPC/P9InstrResources.td1
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.cpp27
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.h2
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrAltivec.td4
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrInfo.cpp4
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrVSX.td17
-rw-r--r--llvm/lib/Target/X86/ImmutableGraph.h446
-rw-r--r--llvm/lib/Target/X86/X86.h8
-rw-r--r--llvm/lib/Target/X86/X86.td16
-rw-r--r--llvm/lib/Target/X86/X86FastISel.cpp4
-rw-r--r--llvm/lib/Target/X86/X86FrameLowering.cpp10
-rw-r--r--llvm/lib/Target/X86/X86ISelDAGToDAG.cpp2
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp81
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.h4
-rw-r--r--llvm/lib/Target/X86/X86IndirectThunks.cpp364
-rw-r--r--llvm/lib/Target/X86/X86InstrCompiler.td16
-rw-r--r--llvm/lib/Target/X86/X86InstrControl.td22
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.td4
-rw-r--r--llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp900
-rw-r--r--llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp143
-rw-r--r--llvm/lib/Target/X86/X86MCInstLower.cpp8
-rw-r--r--llvm/lib/Target/X86/X86RetpolineThunks.cpp286
-rw-r--r--llvm/lib/Target/X86/X86Subtarget.h27
-rw-r--r--llvm/lib/Target/X86/X86TargetMachine.cpp9
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp2
49 files changed, 2180 insertions, 406 deletions
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 0a60873443fc..391c895a453b 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -2267,6 +2267,14 @@ def mspeculative_load_hardening : Flag<["-"], "mspeculative-load-hardening">,
Group<m_Group>, Flags<[CoreOption,CC1Option]>;
def mno_speculative_load_hardening : Flag<["-"], "mno-speculative-load-hardening">,
Group<m_Group>, Flags<[CoreOption]>;
+def mlvi_hardening : Flag<["-"], "mlvi-hardening">, Group<m_Group>, Flags<[CoreOption,DriverOption]>,
+ HelpText<"Enable all mitigations for Load Value Injection (LVI)">;
+def mno_lvi_hardening : Flag<["-"], "mno-lvi-hardening">, Group<m_Group>, Flags<[CoreOption,DriverOption]>,
+ HelpText<"Disable mitigations for Load Value Injection (LVI)">;
+def mlvi_cfi : Flag<["-"], "mlvi-cfi">, Group<m_Group>, Flags<[CoreOption,DriverOption]>,
+ HelpText<"Enable only control-flow mitigations for Load Value Injection (LVI)">;
+def mno_lvi_cfi : Flag<["-"], "mno-lvi-cfi">, Group<m_Group>, Flags<[CoreOption,DriverOption]>,
+ HelpText<"Disable control-flow mitigations for Load Value Injection (LVI)">;
def mrelax : Flag<["-"], "mrelax">, Group<m_riscv_Features_Group>,
HelpText<"Enable linker relaxation">;
diff --git a/clang/lib/Basic/Targets/PPC.h b/clang/lib/Basic/Targets/PPC.h
index 270aa7ff9181..ef5c2264a0b0 100644
--- a/clang/lib/Basic/Targets/PPC.h
+++ b/clang/lib/Basic/Targets/PPC.h
@@ -276,11 +276,12 @@ public:
break;
case 'Q': // Memory operand that is an offset from a register (it is
// usually better to use `m' or `es' in asm statements)
+ Info.setAllowsRegister();
+ LLVM_FALLTHROUGH;
case 'Z': // Memory operand that is an indexed or indirect from a
// register (it is usually better to use `m' or `es' in
// asm statements)
Info.setAllowsMemory();
- Info.setAllowsRegister();
break;
case 'R': // AIX TOC entry
case 'a': // Address operand that is an indexed or indirect from a
diff --git a/clang/lib/Driver/SanitizerArgs.cpp b/clang/lib/Driver/SanitizerArgs.cpp
index ac9a294ee3fa..60fd932fbe6f 100644
--- a/clang/lib/Driver/SanitizerArgs.cpp
+++ b/clang/lib/Driver/SanitizerArgs.cpp
@@ -454,8 +454,7 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
<< lastArgumentForMask(D, Args, Kinds & NeedsLTO) << "-flto";
}
- if ((Kinds & SanitizerKind::ShadowCallStack) &&
- TC.getTriple().getArch() == llvm::Triple::aarch64 &&
+ if ((Kinds & SanitizerKind::ShadowCallStack) && TC.getTriple().isAArch64() &&
!llvm::AArch64::isX18ReservedByDefault(TC.getTriple()) &&
!Args.hasArg(options::OPT_ffixed_x18)) {
D.Diag(diag::err_drv_argument_only_allowed_with)
diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp
index cab97b1a601a..18400d9def54 100644
--- a/clang/lib/Driver/ToolChain.cpp
+++ b/clang/lib/Driver/ToolChain.cpp
@@ -954,15 +954,12 @@ SanitizerMask ToolChain::getSupportedSanitizers() const {
if (getTriple().getArch() == llvm::Triple::x86 ||
getTriple().getArch() == llvm::Triple::x86_64 ||
getTriple().getArch() == llvm::Triple::arm ||
- getTriple().getArch() == llvm::Triple::aarch64 ||
getTriple().getArch() == llvm::Triple::wasm32 ||
- getTriple().getArch() == llvm::Triple::wasm64)
+ getTriple().getArch() == llvm::Triple::wasm64 || getTriple().isAArch64())
Res |= SanitizerKind::CFIICall;
- if (getTriple().getArch() == llvm::Triple::x86_64 ||
- getTriple().getArch() == llvm::Triple::aarch64)
+ if (getTriple().getArch() == llvm::Triple::x86_64 || getTriple().isAArch64())
Res |= SanitizerKind::ShadowCallStack;
- if (getTriple().getArch() == llvm::Triple::aarch64 ||
- getTriple().getArch() == llvm::Triple::aarch64_be)
+ if (getTriple().isAArch64())
Res |= SanitizerKind::MemTag;
return Res;
}
diff --git a/clang/lib/Driver/ToolChains/Arch/X86.cpp b/clang/lib/Driver/ToolChains/Arch/X86.cpp
index d1e0c8253b79..d170b7ac3a77 100644
--- a/clang/lib/Driver/ToolChains/Arch/X86.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/X86.cpp
@@ -146,6 +146,7 @@ void x86::getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple,
// flags). This is a bit hacky but keeps existing usages working. We should
// consider deprecating this and instead warn if the user requests external
// retpoline thunks and *doesn't* request some form of retpolines.
+ auto SpectreOpt = clang::driver::options::ID::OPT_INVALID;
if (Args.hasArgNoClaim(options::OPT_mretpoline, options::OPT_mno_retpoline,
options::OPT_mspeculative_load_hardening,
options::OPT_mno_speculative_load_hardening)) {
@@ -153,12 +154,14 @@ void x86::getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple,
false)) {
Features.push_back("+retpoline-indirect-calls");
Features.push_back("+retpoline-indirect-branches");
+ SpectreOpt = options::OPT_mretpoline;
} else if (Args.hasFlag(options::OPT_mspeculative_load_hardening,
options::OPT_mno_speculative_load_hardening,
false)) {
// On x86, speculative load hardening relies on at least using retpolines
// for indirect calls.
Features.push_back("+retpoline-indirect-calls");
+ SpectreOpt = options::OPT_mspeculative_load_hardening;
}
} else if (Args.hasFlag(options::OPT_mretpoline_external_thunk,
options::OPT_mno_retpoline_external_thunk, false)) {
@@ -166,6 +169,26 @@ void x86::getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple,
// eventually switch to an error here.
Features.push_back("+retpoline-indirect-calls");
Features.push_back("+retpoline-indirect-branches");
+ SpectreOpt = options::OPT_mretpoline_external_thunk;
+ }
+
+ auto LVIOpt = clang::driver::options::ID::OPT_INVALID;
+ if (Args.hasFlag(options::OPT_mlvi_hardening, options::OPT_mno_lvi_hardening,
+ false)) {
+ Features.push_back("+lvi-load-hardening");
+ Features.push_back("+lvi-cfi"); // load hardening implies CFI protection
+ LVIOpt = options::OPT_mlvi_hardening;
+ } else if (Args.hasFlag(options::OPT_mlvi_cfi, options::OPT_mno_lvi_cfi,
+ false)) {
+ Features.push_back("+lvi-cfi");
+ LVIOpt = options::OPT_mlvi_cfi;
+ }
+
+ if (SpectreOpt != clang::driver::options::ID::OPT_INVALID &&
+ LVIOpt != clang::driver::options::ID::OPT_INVALID) {
+ D.Diag(diag::err_drv_argument_not_allowed_with)
+ << D.getOpts().getOptionName(SpectreOpt)
+ << D.getOpts().getOptionName(LVIOpt);
}
// Now add any that the user explicitly requested on the command line,
diff --git a/llvm/lib/Target/Hexagon/RDFGraph.h b/llvm/include/llvm/CodeGen/RDFGraph.h
index 585f43e116f9..585f43e116f9 100644
--- a/llvm/lib/Target/Hexagon/RDFGraph.h
+++ b/llvm/include/llvm/CodeGen/RDFGraph.h
diff --git a/llvm/lib/Target/Hexagon/RDFLiveness.h b/llvm/include/llvm/CodeGen/RDFLiveness.h
index ea4890271726..ea4890271726 100644
--- a/llvm/lib/Target/Hexagon/RDFLiveness.h
+++ b/llvm/include/llvm/CodeGen/RDFLiveness.h
diff --git a/llvm/lib/Target/Hexagon/RDFRegisters.h b/llvm/include/llvm/CodeGen/RDFRegisters.h
index 4afaf80e4659..4afaf80e4659 100644
--- a/llvm/lib/Target/Hexagon/RDFRegisters.h
+++ b/llvm/include/llvm/CodeGen/RDFRegisters.h
diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index f87317445753..fc9fa2153aea 100644
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -488,6 +488,9 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.".
def int_ppc_altivec_vmsumuhm : GCCBuiltin<"__builtin_altivec_vmsumuhm">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
llvm_v4i32_ty], [IntrNoMem]>;
+ def int_ppc_altivec_vmsumudm : GCCBuiltin<"__builtin_altivec_vmsumudm">,
+ Intrinsic<[llvm_v1i128_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
+ llvm_v1i128_ty], [IntrNoMem]>;
def int_ppc_altivec_vmsumuhs : GCCBuiltin<"__builtin_altivec_vmsumuhs">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
llvm_v4i32_ty], [IntrNoMem]>;
diff --git a/llvm/include/llvm/Support/ManagedStatic.h b/llvm/include/llvm/Support/ManagedStatic.h
index bbd0d04ed040..f2b41422f131 100644
--- a/llvm/include/llvm/Support/ManagedStatic.h
+++ b/llvm/include/llvm/Support/ManagedStatic.h
@@ -40,8 +40,8 @@ template <typename T, size_t N> struct object_deleter<T[N]> {
// constexpr, a dynamic initializer may be emitted depending on optimization
// settings. For the affected versions of MSVC, use the old linker
// initialization pattern of not providing a constructor and leaving the fields
-// uninitialized.
-#if !defined(_MSC_VER) || defined(__clang__)
+// uninitialized. See http://llvm.org/PR41367 for details.
+#if !defined(_MSC_VER) || (_MSC_VER >= 1925) || defined(__clang__)
#define LLVM_USE_CONSTEXPR_CTOR
#endif
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index 1700c6c4640d..46ad5a619770 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -959,6 +959,10 @@ def extloadi32 : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
let IsLoad = 1;
let MemoryVT = i32;
}
+def extloadf16 : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
+ let IsLoad = 1;
+ let MemoryVT = f16;
+}
def extloadf32 : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
let IsLoad = 1;
let MemoryVT = f32;
@@ -1094,6 +1098,11 @@ def truncstorei32 : PatFrag<(ops node:$val, node:$ptr),
let IsStore = 1;
let MemoryVT = i32;
}
+def truncstoref16 : PatFrag<(ops node:$val, node:$ptr),
+ (truncstore node:$val, node:$ptr)> {
+ let IsStore = 1;
+ let MemoryVT = f16;
+}
def truncstoref32 : PatFrag<(ops node:$val, node:$ptr),
(truncstore node:$val, node:$ptr)> {
let IsStore = 1;
diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index e852d663c6b4..e439c94a7325 100644
--- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -2059,12 +2059,13 @@ char BasicAAWrapperPass::ID = 0;
void BasicAAWrapperPass::anchor() {}
INITIALIZE_PASS_BEGIN(BasicAAWrapperPass, "basicaa",
- "Basic Alias Analysis (stateless AA impl)", false, true)
+ "Basic Alias Analysis (stateless AA impl)", true, true)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(PhiValuesWrapperPass)
INITIALIZE_PASS_END(BasicAAWrapperPass, "basicaa",
- "Basic Alias Analysis (stateless AA impl)", false, true)
+ "Basic Alias Analysis (stateless AA impl)", true, true)
FunctionPass *llvm::createBasicAAWrapperPass() {
return new BasicAAWrapperPass();
diff --git a/llvm/lib/Target/Hexagon/RDFGraph.cpp b/llvm/lib/CodeGen/RDFGraph.cpp
index 0cb35dc98819..437a6b030096 100644
--- a/llvm/lib/Target/Hexagon/RDFGraph.cpp
+++ b/llvm/lib/CodeGen/RDFGraph.cpp
@@ -8,8 +8,6 @@
//
// Target-independent, SSA-based data flow graph for register data flow (RDF).
//
-#include "RDFGraph.h"
-#include "RDFRegisters.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
@@ -20,6 +18,8 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RDFGraph.h"
+#include "llvm/CodeGen/RDFRegisters.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -753,8 +753,10 @@ RegisterSet DataFlowGraph::getLandingPadLiveIns() const {
const TargetLowering &TLI = *MF.getSubtarget().getTargetLowering();
if (RegisterId R = TLI.getExceptionPointerRegister(PF))
LR.insert(RegisterRef(R));
- if (RegisterId R = TLI.getExceptionSelectorRegister(PF))
- LR.insert(RegisterRef(R));
+ if (!isFuncletEHPersonality(classifyEHPersonality(PF))) {
+ if (RegisterId R = TLI.getExceptionSelectorRegister(PF))
+ LR.insert(RegisterRef(R));
+ }
return LR;
}
diff --git a/llvm/lib/Target/Hexagon/RDFLiveness.cpp b/llvm/lib/CodeGen/RDFLiveness.cpp
index e2c007c9d01a..0bcd27f8ea45 100644
--- a/llvm/lib/Target/Hexagon/RDFLiveness.cpp
+++ b/llvm/lib/CodeGen/RDFLiveness.cpp
@@ -22,9 +22,6 @@
// and Embedded Architectures and Compilers", 8 (4),
// <10.1145/2086696.2086706>. <hal-00647369>
//
-#include "RDFLiveness.h"
-#include "RDFGraph.h"
-#include "RDFRegisters.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
@@ -33,6 +30,9 @@
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/RDFLiveness.h"
+#include "llvm/CodeGen/RDFGraph.h"
+#include "llvm/CodeGen/RDFRegisters.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/MC/MCRegisterInfo.h"
diff --git a/llvm/lib/Target/Hexagon/RDFRegisters.cpp b/llvm/lib/CodeGen/RDFRegisters.cpp
index b5675784e34b..bd8661816e71 100644
--- a/llvm/lib/Target/Hexagon/RDFRegisters.cpp
+++ b/llvm/lib/CodeGen/RDFRegisters.cpp
@@ -6,11 +6,11 @@
//
//===----------------------------------------------------------------------===//
-#include "RDFRegisters.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/RDFRegisters.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/MC/MCRegisterInfo.h"
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index 297b11de17a9..fa2f0777897b 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -147,8 +147,17 @@ void llvm::computeLTOCacheKey(
// Include the hash for the current module
auto ModHash = Index.getModuleHash(ModuleID);
Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));
+
+ std::vector<uint64_t> ExportsGUID;
+ ExportsGUID.reserve(ExportList.size());
for (const auto &VI : ExportList) {
auto GUID = VI.getGUID();
+ ExportsGUID.push_back(GUID);
+ }
+
+ // Sort the export list elements GUIDs.
+ llvm::sort(ExportsGUID);
+ for (uint64_t GUID : ExportsGUID) {
// The export list can impact the internalization, be conservative here
Hasher.update(ArrayRef<uint8_t>((uint8_t *)&GUID, sizeof(GUID)));
}
@@ -156,12 +165,23 @@ void llvm::computeLTOCacheKey(
// Include the hash for every module we import functions from. The set of
// imported symbols for each module may affect code generation and is
// sensitive to link order, so include that as well.
- for (auto &Entry : ImportList) {
- auto ModHash = Index.getModuleHash(Entry.first());
+ using ImportMapIteratorTy = FunctionImporter::ImportMapTy::const_iterator;
+ std::vector<ImportMapIteratorTy> ImportModulesVector;
+ ImportModulesVector.reserve(ImportList.size());
+
+ for (ImportMapIteratorTy It = ImportList.begin(); It != ImportList.end();
+ ++It) {
+ ImportModulesVector.push_back(It);
+ }
+ llvm::sort(ImportModulesVector,
+ [](const ImportMapIteratorTy &Lhs, const ImportMapIteratorTy &Rhs)
+ -> bool { return Lhs->getKey() < Rhs->getKey(); });
+ for (const ImportMapIteratorTy &EntryIt : ImportModulesVector) {
+ auto ModHash = Index.getModuleHash(EntryIt->first());
Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));
- AddUint64(Entry.second.size());
- for (auto &Fn : Entry.second)
+ AddUint64(EntryIt->second.size());
+ for (auto &Fn : EntryIt->second)
AddUint64(Fn);
}
diff --git a/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp b/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp
index 6f5f58554d09..d407edfbd966 100644
--- a/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp
+++ b/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp
@@ -304,7 +304,7 @@ void BPFDAGToDAGISel::PreprocessLoad(SDNode *Node,
LLVM_DEBUG(dbgs() << "Replacing load of size " << size << " with constant "
<< val << '\n');
- SDValue NVal = CurDAG->getConstant(val, DL, MVT::i64);
+ SDValue NVal = CurDAG->getConstant(val, DL, LD->getValueType(0));
// After replacement, the current node is dead, we need to
// go backward one step to make iterator still work
diff --git a/llvm/lib/Target/BPF/BTFDebug.cpp b/llvm/lib/Target/BPF/BTFDebug.cpp
index a9fb04f20d1c..6daeb3b4b63b 100644
--- a/llvm/lib/Target/BPF/BTFDebug.cpp
+++ b/llvm/lib/Target/BPF/BTFDebug.cpp
@@ -600,6 +600,38 @@ void BTFDebug::visitTypeEntry(const DIType *Ty, uint32_t &TypeId,
bool CheckPointer, bool SeenPointer) {
if (!Ty || DIToIdMap.find(Ty) != DIToIdMap.end()) {
TypeId = DIToIdMap[Ty];
+
+ // To handle the case like the following:
+ // struct t;
+ // typedef struct t _t;
+ // struct s1 { _t *c; };
+ // int test1(struct s1 *arg) { ... }
+ //
+ // struct t { int a; int b; };
+ // struct s2 { _t c; }
+ // int test2(struct s2 *arg) { ... }
+ //
+ // During traversing test1() argument, "_t" is recorded
+ // in DIToIdMap and a forward declaration fixup is created
+ // for "struct t" to avoid pointee type traversal.
+ //
+ // During traversing test2() argument, even if we see "_t" is
+ // already defined, we should keep moving to eventually
+ // bring in types for "struct t". Otherwise, the "struct s2"
+ // definition won't be correct.
+ if (Ty && (!CheckPointer || !SeenPointer)) {
+ if (const auto *DTy = dyn_cast<DIDerivedType>(Ty)) {
+ unsigned Tag = DTy->getTag();
+ if (Tag == dwarf::DW_TAG_typedef || Tag == dwarf::DW_TAG_const_type ||
+ Tag == dwarf::DW_TAG_volatile_type ||
+ Tag == dwarf::DW_TAG_restrict_type) {
+ uint32_t TmpTypeId;
+ visitTypeEntry(DTy->getBaseType(), TmpTypeId, CheckPointer,
+ SeenPointer);
+ }
+ }
+ }
+
return;
}
diff --git a/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp b/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp
index 886034d9601a..f1fe51f5e54f 100644
--- a/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp
@@ -12,9 +12,6 @@
#include "HexagonInstrInfo.h"
#include "HexagonSubtarget.h"
#include "MCTargetDesc/HexagonBaseInfo.h"
-#include "RDFGraph.h"
-#include "RDFLiveness.h"
-#include "RDFRegisters.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/StringRef.h"
@@ -27,6 +24,9 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RDFGraph.h"
+#include "llvm/CodeGen/RDFLiveness.h"
+#include "llvm/CodeGen/RDFRegisters.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/InitializePasses.h"
#include "llvm/MC/MCInstrDesc.h"
diff --git a/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp b/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp
index 517ad1c6ee7b..f26e23befde2 100644
--- a/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp
@@ -11,9 +11,6 @@
#include "MCTargetDesc/HexagonBaseInfo.h"
#include "RDFCopy.h"
#include "RDFDeadCode.h"
-#include "RDFGraph.h"
-#include "RDFLiveness.h"
-#include "RDFRegisters.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
@@ -24,6 +21,9 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RDFGraph.h"
+#include "llvm/CodeGen/RDFLiveness.h"
+#include "llvm/CodeGen/RDFRegisters.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
diff --git a/llvm/lib/Target/Hexagon/RDFCopy.cpp b/llvm/lib/Target/Hexagon/RDFCopy.cpp
index a9d39fd4b2dc..34d58f0a7a23 100644
--- a/llvm/lib/Target/Hexagon/RDFCopy.cpp
+++ b/llvm/lib/Target/Hexagon/RDFCopy.cpp
@@ -11,13 +11,13 @@
//===----------------------------------------------------------------------===//
#include "RDFCopy.h"
-#include "RDFGraph.h"
-#include "RDFLiveness.h"
-#include "RDFRegisters.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RDFGraph.h"
+#include "llvm/CodeGen/RDFLiveness.h"
+#include "llvm/CodeGen/RDFRegisters.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
diff --git a/llvm/lib/Target/Hexagon/RDFCopy.h b/llvm/lib/Target/Hexagon/RDFCopy.h
index 1450ab884849..99b18a75d8c2 100644
--- a/llvm/lib/Target/Hexagon/RDFCopy.h
+++ b/llvm/lib/Target/Hexagon/RDFCopy.h
@@ -9,9 +9,9 @@
#ifndef LLVM_LIB_TARGET_HEXAGON_RDFCOPY_H
#define LLVM_LIB_TARGET_HEXAGON_RDFCOPY_H
-#include "RDFGraph.h"
-#include "RDFLiveness.h"
-#include "RDFRegisters.h"
+#include "llvm/CodeGen/RDFGraph.h"
+#include "llvm/CodeGen/RDFLiveness.h"
+#include "llvm/CodeGen/RDFRegisters.h"
#include "llvm/CodeGen/MachineFunction.h"
#include <map>
#include <vector>
diff --git a/llvm/lib/Target/Hexagon/RDFDeadCode.cpp b/llvm/lib/Target/Hexagon/RDFDeadCode.cpp
index af86c7b1956b..5a98debd3c00 100644
--- a/llvm/lib/Target/Hexagon/RDFDeadCode.cpp
+++ b/llvm/lib/Target/Hexagon/RDFDeadCode.cpp
@@ -9,13 +9,13 @@
// RDF-based generic dead code elimination.
#include "RDFDeadCode.h"
-#include "RDFGraph.h"
-#include "RDFLiveness.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RDFGraph.h"
+#include "llvm/CodeGen/RDFLiveness.h"
#include "llvm/Support/Debug.h"
#include <queue>
diff --git a/llvm/lib/Target/Hexagon/RDFDeadCode.h b/llvm/lib/Target/Hexagon/RDFDeadCode.h
index 7f91977e1d6c..859c8161d355 100644
--- a/llvm/lib/Target/Hexagon/RDFDeadCode.h
+++ b/llvm/lib/Target/Hexagon/RDFDeadCode.h
@@ -23,8 +23,8 @@
#ifndef RDF_DEADCODE_H
#define RDF_DEADCODE_H
-#include "RDFGraph.h"
-#include "RDFLiveness.h"
+#include "llvm/CodeGen/RDFGraph.h"
+#include "llvm/CodeGen/RDFLiveness.h"
#include "llvm/ADT/SetVector.h"
namespace llvm {
diff --git a/llvm/lib/Target/PowerPC/P9InstrResources.td b/llvm/lib/Target/PowerPC/P9InstrResources.td
index 9b3d13989ee2..d7e3519d5539 100644
--- a/llvm/lib/Target/PowerPC/P9InstrResources.td
+++ b/llvm/lib/Target/PowerPC/P9InstrResources.td
@@ -373,6 +373,7 @@ def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
VMSUMSHS,
VMSUMUBM,
VMSUMUHM,
+ VMSUMUDM,
VMSUMUHS,
VMULESB,
VMULESH,
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 00f59bba52e8..ca1649fae258 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -167,6 +167,23 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
}
+ if (Subtarget.isISA3_0()) {
+ setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
+ setTruncStoreAction(MVT::f64, MVT::f16, Legal);
+ setTruncStoreAction(MVT::f32, MVT::f16, Legal);
+ } else {
+ // No extending loads from f16 or HW conversions back and forth.
+ setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
+ setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
+ setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
+ setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
+ setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
+ setTruncStoreAction(MVT::f64, MVT::f16, Expand);
+ setTruncStoreAction(MVT::f32, MVT::f16, Expand);
+ }
+
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
// PowerPC has pre-inc load and store's.
@@ -677,6 +694,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
}
}
+ setOperationAction(ISD::SELECT_CC, MVT::v4i32, Expand);
if (!Subtarget.hasP8Vector()) {
setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
@@ -10361,6 +10379,7 @@ SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
assert(Op.getOpcode() == ISD::FP_EXTEND &&
"Should only be called for ISD::FP_EXTEND");
+ // FIXME: handle extends from half precision float vectors on P9.
// We only want to custom lower an extend from v2f32 to v2f64.
if (Op.getValueType() != MVT::v2f64 ||
Op.getOperand(0).getValueType() != MVT::v2f32)
@@ -10574,6 +10593,11 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::BITCAST:
// Don't handle bitcast here.
return;
+ case ISD::FP_EXTEND:
+ SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
+ if (Lowered)
+ Results.push_back(Lowered);
+ return;
}
}
@@ -15255,7 +15279,8 @@ bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
if (!VT.isSimple())
return false;
- if (VT.isFloatingPoint() && !Subtarget.allowsUnalignedFPAccess())
+ if (VT.isFloatingPoint() && !VT.isVector() &&
+ !Subtarget.allowsUnalignedFPAccess())
return false;
if (VT.getSimpleVT().isVector()) {
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index e0c381827b87..2e1485373d19 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -637,7 +637,7 @@ namespace llvm {
/// then the VPERM for the shuffle. All in all a very slow sequence.
TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT)
const override {
- if (VT.getScalarSizeInBits() % 8 == 0)
+ if (VT.getVectorNumElements() != 1 && VT.getScalarSizeInBits() % 8 == 0)
return TypeWidenVector;
return TargetLoweringBase::getPreferredVectorAction(VT);
}
diff --git a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
index f94816a35f79..6e8635f2413c 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -1342,6 +1342,10 @@ def VSBOX : VXBX_Int_Ty<1480, "vsbox", int_ppc_altivec_crypto_vsbox, v2i64>;
def HasP9Altivec : Predicate<"PPCSubTarget->hasP9Altivec()">;
let Predicates = [HasP9Altivec] in {
+// Vector Multiply-Sum
+def VMSUMUDM : VA1a_Int_Ty3<35, "vmsumudm", int_ppc_altivec_vmsumudm,
+ v1i128, v2i64, v1i128>;
+
// i8 element comparisons.
def VCMPNEB : VCMP < 7, "vcmpneb $vD, $vA, $vB" , v16i8>;
def VCMPNEB_rec : VCMPo < 7, "vcmpneb. $vD, $vA, $vB" , v16i8>;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 30906a32b00c..d7925befcd37 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -2631,6 +2631,10 @@ bool PPCInstrInfo::isADDIInstrEligibleForFolding(MachineInstr &ADDIMI,
if (Opc != PPC::ADDI && Opc != PPC::ADDI8)
return false;
+ // The operand may not necessarily be an immediate - it could be a relocation.
+ if (!ADDIMI.getOperand(2).isImm())
+ return false;
+
Imm = ADDIMI.getOperand(2).getImm();
return true;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index be6b30ffa08b..95e5ff6b130d 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -3343,6 +3343,23 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
def : Pat<(v2i64 (scalar_to_vector ScalarLoads.SELi16i64)),
(v2i64 (XXPERMDIs (VEXTSH2Ds (LXSIHZX xoaddr:$src)), 0))>;
+ // Load/convert and convert/store patterns for f16.
+ def : Pat<(f64 (extloadf16 xoaddr:$src)),
+ (f64 (XSCVHPDP (LXSIHZX xoaddr:$src)))>;
+ def : Pat<(truncstoref16 f64:$src, xoaddr:$dst),
+ (STXSIHX (XSCVDPHP $src), xoaddr:$dst)>;
+ def : Pat<(f32 (extloadf16 xoaddr:$src)),
+ (f32 (COPY_TO_REGCLASS (XSCVHPDP (LXSIHZX xoaddr:$src)), VSSRC))>;
+ def : Pat<(truncstoref16 f32:$src, xoaddr:$dst),
+ (STXSIHX (XSCVDPHP (COPY_TO_REGCLASS $src, VSFRC)), xoaddr:$dst)>;
+ def : Pat<(f64 (f16_to_fp i32:$A)),
+ (f64 (XSCVHPDP (MTVSRWZ $A)))>;
+ def : Pat<(f32 (f16_to_fp i32:$A)),
+ (f32 (COPY_TO_REGCLASS (XSCVHPDP (MTVSRWZ $A)), VSSRC))>;
+ def : Pat<(i32 (fp_to_f16 f32:$A)),
+ (i32 (MFVSRWZ (XSCVDPHP (COPY_TO_REGCLASS $A, VSFRC))))>;
+ def : Pat<(i32 (fp_to_f16 f64:$A)), (i32 (MFVSRWZ (XSCVDPHP $A)))>;
+
let Predicates = [IsBigEndian, HasP9Vector] in {
// Scalar stores of i8
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst),
diff --git a/llvm/lib/Target/X86/ImmutableGraph.h b/llvm/lib/Target/X86/ImmutableGraph.h
new file mode 100644
index 000000000000..5833017037a5
--- /dev/null
+++ b/llvm/lib/Target/X86/ImmutableGraph.h
@@ -0,0 +1,446 @@
+//==========-- ImmutableGraph.h - A fast DAG implementation ---------=========//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// Description: ImmutableGraph is a fast DAG implementation that cannot be
+/// modified, except by creating a new ImmutableGraph. ImmutableGraph is
+/// implemented as two arrays: one containing nodes, and one containing edges.
+/// The advantages to this implementation are two-fold:
+/// 1. Iteration and traversal operations benefit from cache locality.
+/// 2. Operations on sets of nodes/edges are efficient, and representations of
+/// those sets in memory are compact. For instance, a set of edges is
+/// implemented as a bit vector, wherein each bit corresponds to one edge in
+/// the edge array. This implies a lower bound of 64x spatial improvement
+/// over, e.g., an llvm::DenseSet or llvm::SmallSet. It also means that
+/// insert/erase/contains operations complete in negligible constant time:
+/// insert and erase require one load and one store, and contains requires
+/// just one load.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_X86_IMMUTABLEGRAPH_H
+#define LLVM_LIB_TARGET_X86_IMMUTABLEGRAPH_H
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/GraphTraits.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <iterator>
+#include <utility>
+#include <vector>
+
+namespace llvm {
+
+template <typename NodeValueT, typename EdgeValueT> class ImmutableGraph {
+ using Traits = GraphTraits<ImmutableGraph<NodeValueT, EdgeValueT> *>;
+ template <typename> friend class ImmutableGraphBuilder;
+
+public:
+ using node_value_type = NodeValueT;
+ using edge_value_type = EdgeValueT;
+ using size_type = int;
+ class Node;
+ class Edge {
+ friend class ImmutableGraph;
+ template <typename> friend class ImmutableGraphBuilder;
+
+ const Node *Dest;
+ edge_value_type Value;
+
+ public:
+ const Node *getDest() const { return Dest; };
+ const edge_value_type &getValue() const { return Value; }
+ };
+ class Node {
+ friend class ImmutableGraph;
+ template <typename> friend class ImmutableGraphBuilder;
+
+ const Edge *Edges;
+ node_value_type Value;
+
+ public:
+ const node_value_type &getValue() const { return Value; }
+
+ const Edge *edges_begin() const { return Edges; }
+ // Nodes are allocated sequentially. Edges for a node are stored together.
+ // The end of this Node's edges is the beginning of the next node's edges.
+ // An extra node was allocated to hold the end pointer for the last real
+ // node.
+ const Edge *edges_end() const { return (this + 1)->Edges; }
+ ArrayRef<Edge> edges() const {
+ return makeArrayRef(edges_begin(), edges_end());
+ }
+ };
+
+protected:
+ ImmutableGraph(std::unique_ptr<Node[]> Nodes, std::unique_ptr<Edge[]> Edges,
+ size_type NodesSize, size_type EdgesSize)
+ : Nodes(std::move(Nodes)), Edges(std::move(Edges)), NodesSize(NodesSize),
+ EdgesSize(EdgesSize) {}
+ ImmutableGraph(const ImmutableGraph &) = delete;
+ ImmutableGraph(ImmutableGraph &&) = delete;
+ ImmutableGraph &operator=(const ImmutableGraph &) = delete;
+ ImmutableGraph &operator=(ImmutableGraph &&) = delete;
+
+public:
+ ArrayRef<Node> nodes() const { return makeArrayRef(Nodes.get(), NodesSize); }
+ const Node *nodes_begin() const { return nodes().begin(); }
+ const Node *nodes_end() const { return nodes().end(); }
+
+ ArrayRef<Edge> edges() const { return makeArrayRef(Edges.get(), EdgesSize); }
+ const Edge *edges_begin() const { return edges().begin(); }
+ const Edge *edges_end() const { return edges().end(); }
+
+ size_type nodes_size() const { return NodesSize; }
+ size_type edges_size() const { return EdgesSize; }
+
+ // Node N must belong to this ImmutableGraph.
+ size_type getNodeIndex(const Node &N) const {
+ return std::distance(nodes_begin(), &N);
+ }
+ // Edge E must belong to this ImmutableGraph.
+ size_type getEdgeIndex(const Edge &E) const {
+ return std::distance(edges_begin(), &E);
+ }
+
+ // FIXME: Could NodeSet and EdgeSet be templated to share code?
+ class NodeSet {
+ const ImmutableGraph &G;
+ BitVector V;
+
+ public:
+ NodeSet(const ImmutableGraph &G, bool ContainsAll = false)
+ : G{G}, V{static_cast<unsigned>(G.nodes_size()), ContainsAll} {}
+ bool insert(const Node &N) {
+ size_type Idx = G.getNodeIndex(N);
+ bool AlreadyExists = V.test(Idx);
+ V.set(Idx);
+ return !AlreadyExists;
+ }
+ void erase(const Node &N) {
+ size_type Idx = G.getNodeIndex(N);
+ V.reset(Idx);
+ }
+ bool contains(const Node &N) const {
+ size_type Idx = G.getNodeIndex(N);
+ return V.test(Idx);
+ }
+ void clear() { V.reset(); }
+ size_type empty() const { return V.none(); }
+ /// Return the number of elements in the set
+ size_type count() const { return V.count(); }
+ /// Return the size of the set's domain
+ size_type size() const { return V.size(); }
+ /// Set union
+ NodeSet &operator|=(const NodeSet &RHS) {
+ assert(&this->G == &RHS.G);
+ V |= RHS.V;
+ return *this;
+ }
+ /// Set intersection
+ NodeSet &operator&=(const NodeSet &RHS) {
+ assert(&this->G == &RHS.G);
+ V &= RHS.V;
+ return *this;
+ }
+ /// Set disjoint union
+ NodeSet &operator^=(const NodeSet &RHS) {
+ assert(&this->G == &RHS.G);
+ V ^= RHS.V;
+ return *this;
+ }
+
+ using index_iterator = typename BitVector::const_set_bits_iterator;
+ index_iterator index_begin() const { return V.set_bits_begin(); }
+ index_iterator index_end() const { return V.set_bits_end(); }
+ void set(size_type Idx) { V.set(Idx); }
+ void reset(size_type Idx) { V.reset(Idx); }
+
+ class iterator {
+ const NodeSet &Set;
+ size_type Current;
+
+ void advance() {
+ assert(Current != -1);
+ Current = Set.V.find_next(Current);
+ }
+
+ public:
+ iterator(const NodeSet &Set, size_type Begin)
+ : Set{Set}, Current{Begin} {}
+ iterator operator++(int) {
+ iterator Tmp = *this;
+ advance();
+ return Tmp;
+ }
+ iterator &operator++() {
+ advance();
+ return *this;
+ }
+ Node *operator*() const {
+ assert(Current != -1);
+ return Set.G.nodes_begin() + Current;
+ }
+ bool operator==(const iterator &other) const {
+ assert(&this->Set == &other.Set);
+ return this->Current == other.Current;
+ }
+ bool operator!=(const iterator &other) const { return !(*this == other); }
+ };
+
+ iterator begin() const { return iterator{*this, V.find_first()}; }
+ iterator end() const { return iterator{*this, -1}; }
+ };
+
+ class EdgeSet {
+ const ImmutableGraph &G;
+ BitVector V;
+
+ public:
+ EdgeSet(const ImmutableGraph &G, bool ContainsAll = false)
+ : G{G}, V{static_cast<unsigned>(G.edges_size()), ContainsAll} {}
+ bool insert(const Edge &E) {
+ size_type Idx = G.getEdgeIndex(E);
+ bool AlreadyExists = V.test(Idx);
+ V.set(Idx);
+ return !AlreadyExists;
+ }
+ void erase(const Edge &E) {
+ size_type Idx = G.getEdgeIndex(E);
+ V.reset(Idx);
+ }
+ bool contains(const Edge &E) const {
+ size_type Idx = G.getEdgeIndex(E);
+ return V.test(Idx);
+ }
+ void clear() { V.reset(); }
+ bool empty() const { return V.none(); }
+ /// Return the number of elements in the set
+ size_type count() const { return V.count(); }
+ /// Return the size of the set's domain
+ size_type size() const { return V.size(); }
+ /// Set union
+ EdgeSet &operator|=(const EdgeSet &RHS) {
+ assert(&this->G == &RHS.G);
+ V |= RHS.V;
+ return *this;
+ }
+ /// Set intersection
+ EdgeSet &operator&=(const EdgeSet &RHS) {
+ assert(&this->G == &RHS.G);
+ V &= RHS.V;
+ return *this;
+ }
+ /// Set disjoint union
+ EdgeSet &operator^=(const EdgeSet &RHS) {
+ assert(&this->G == &RHS.G);
+ V ^= RHS.V;
+ return *this;
+ }
+
+ using index_iterator = typename BitVector::const_set_bits_iterator;
+ index_iterator index_begin() const { return V.set_bits_begin(); }
+ index_iterator index_end() const { return V.set_bits_end(); }
+ void set(size_type Idx) { V.set(Idx); }
+ void reset(size_type Idx) { V.reset(Idx); }
+
+ class iterator {
+ const EdgeSet &Set;
+ size_type Current;
+
+ void advance() {
+ assert(Current != -1);
+ Current = Set.V.find_next(Current);
+ }
+
+ public:
+ iterator(const EdgeSet &Set, size_type Begin)
+ : Set{Set}, Current{Begin} {}
+ iterator operator++(int) {
+ iterator Tmp = *this;
+ advance();
+ return Tmp;
+ }
+ iterator &operator++() {
+ advance();
+ return *this;
+ }
+ Edge *operator*() const {
+ assert(Current != -1);
+ return Set.G.edges_begin() + Current;
+ }
+ bool operator==(const iterator &other) const {
+ assert(&this->Set == &other.Set);
+ return this->Current == other.Current;
+ }
+ bool operator!=(const iterator &other) const { return !(*this == other); }
+ };
+
+ iterator begin() const { return iterator{*this, V.find_first()}; }
+ iterator end() const { return iterator{*this, -1}; }
+ };
+
+private:
+ std::unique_ptr<Node[]> Nodes;
+ std::unique_ptr<Edge[]> Edges;
+ size_type NodesSize;
+ size_type EdgesSize;
+};
+
+template <typename GraphT> class ImmutableGraphBuilder {
+ using node_value_type = typename GraphT::node_value_type;
+ using edge_value_type = typename GraphT::edge_value_type;
+ static_assert(
+ std::is_base_of<ImmutableGraph<node_value_type, edge_value_type>,
+ GraphT>::value,
+ "Template argument to ImmutableGraphBuilder must derive from "
+ "ImmutableGraph<>");
+ using size_type = typename GraphT::size_type;
+ using NodeSet = typename GraphT::NodeSet;
+ using Node = typename GraphT::Node;
+ using EdgeSet = typename GraphT::EdgeSet;
+ using Edge = typename GraphT::Edge;
+ using BuilderEdge = std::pair<edge_value_type, size_type>;
+ using EdgeList = std::vector<BuilderEdge>;
+ using BuilderVertex = std::pair<node_value_type, EdgeList>;
+ using VertexVec = std::vector<BuilderVertex>;
+
+public:
+ using BuilderNodeRef = size_type;
+
+ BuilderNodeRef addVertex(const node_value_type &V) {
+ auto I = AdjList.emplace(AdjList.end(), V, EdgeList{});
+ return std::distance(AdjList.begin(), I);
+ }
+
+ void addEdge(const edge_value_type &E, BuilderNodeRef From,
+ BuilderNodeRef To) {
+ AdjList[From].second.emplace_back(E, To);
+ }
+
+ bool empty() const { return AdjList.empty(); }
+
+ template <typename... ArgT> std::unique_ptr<GraphT> get(ArgT &&... Args) {
+ size_type VertexSize = AdjList.size(), EdgeSize = 0;
+ for (const auto &V : AdjList) {
+ EdgeSize += V.second.size();
+ }
+ auto VertexArray =
+ std::make_unique<Node[]>(VertexSize + 1 /* terminator node */);
+ auto EdgeArray = std::make_unique<Edge[]>(EdgeSize);
+ size_type VI = 0, EI = 0;
+ for (; VI < VertexSize; ++VI) {
+ VertexArray[VI].Value = std::move(AdjList[VI].first);
+ VertexArray[VI].Edges = &EdgeArray[EI];
+ auto NumEdges = static_cast<size_type>(AdjList[VI].second.size());
+ for (size_type VEI = 0; VEI < NumEdges; ++VEI, ++EI) {
+ auto &E = AdjList[VI].second[VEI];
+ EdgeArray[EI].Value = std::move(E.first);
+ EdgeArray[EI].Dest = &VertexArray[E.second];
+ }
+ }
+ assert(VI == VertexSize && EI == EdgeSize && "ImmutableGraph malformed");
+ VertexArray[VI].Edges = &EdgeArray[EdgeSize]; // terminator node
+ return std::make_unique<GraphT>(std::move(VertexArray),
+ std::move(EdgeArray), VertexSize, EdgeSize,
+ std::forward<ArgT>(Args)...);
+ }
+
+ template <typename... ArgT>
+ static std::unique_ptr<GraphT> trim(const GraphT &G, const NodeSet &TrimNodes,
+ const EdgeSet &TrimEdges,
+ ArgT &&... Args) {
+ size_type NewVertexSize = G.nodes_size() - TrimNodes.count();
+ size_type NewEdgeSize = G.edges_size() - TrimEdges.count();
+ auto NewVertexArray =
+ std::make_unique<Node[]>(NewVertexSize + 1 /* terminator node */);
+ auto NewEdgeArray = std::make_unique<Edge[]>(NewEdgeSize);
+
+ // Walk the nodes and determine the new index for each node.
+ size_type NewNodeIndex = 0;
+ std::vector<size_type> RemappedNodeIndex(G.nodes_size());
+ for (const Node &N : G.nodes()) {
+ if (TrimNodes.contains(N))
+ continue;
+ RemappedNodeIndex[G.getNodeIndex(N)] = NewNodeIndex++;
+ }
+ assert(NewNodeIndex == NewVertexSize &&
+ "Should have assigned NewVertexSize indices");
+
+ size_type VertexI = 0, EdgeI = 0;
+ for (const Node &N : G.nodes()) {
+ if (TrimNodes.contains(N))
+ continue;
+ NewVertexArray[VertexI].Value = N.getValue();
+ NewVertexArray[VertexI].Edges = &NewEdgeArray[EdgeI];
+ for (const Edge &E : N.edges()) {
+ if (TrimEdges.contains(E))
+ continue;
+ NewEdgeArray[EdgeI].Value = E.getValue();
+ size_type DestIdx = G.getNodeIndex(*E.getDest());
+ size_type NewIdx = RemappedNodeIndex[DestIdx];
+ assert(NewIdx < NewVertexSize);
+ NewEdgeArray[EdgeI].Dest = &NewVertexArray[NewIdx];
+ ++EdgeI;
+ }
+ ++VertexI;
+ }
+ assert(VertexI == NewVertexSize && EdgeI == NewEdgeSize &&
+ "Gadget graph malformed");
+ NewVertexArray[VertexI].Edges = &NewEdgeArray[NewEdgeSize]; // terminator
+ return std::make_unique<GraphT>(std::move(NewVertexArray),
+ std::move(NewEdgeArray), NewVertexSize,
+ NewEdgeSize, std::forward<ArgT>(Args)...);
+ }
+
+private:
+ VertexVec AdjList;
+};
+
+template <typename NodeValueT, typename EdgeValueT>
+struct GraphTraits<ImmutableGraph<NodeValueT, EdgeValueT> *> {
+ using GraphT = ImmutableGraph<NodeValueT, EdgeValueT>;
+ using NodeRef = typename GraphT::Node const *;
+ using EdgeRef = typename GraphT::Edge const &;
+
+ static NodeRef edge_dest(EdgeRef E) { return E.getDest(); }
+ using ChildIteratorType =
+ mapped_iterator<typename GraphT::Edge const *, decltype(&edge_dest)>;
+
+ static NodeRef getEntryNode(GraphT *G) { return G->nodes_begin(); }
+ static ChildIteratorType child_begin(NodeRef N) {
+ return {N->edges_begin(), &edge_dest};
+ }
+ static ChildIteratorType child_end(NodeRef N) {
+ return {N->edges_end(), &edge_dest};
+ }
+
+ static NodeRef getNode(typename GraphT::Node const &N) { return NodeRef{&N}; }
+ using nodes_iterator =
+ mapped_iterator<typename GraphT::Node const *, decltype(&getNode)>;
+ static nodes_iterator nodes_begin(GraphT *G) {
+ return {G->nodes_begin(), &getNode};
+ }
+ static nodes_iterator nodes_end(GraphT *G) {
+ return {G->nodes_end(), &getNode};
+ }
+
+ using ChildEdgeIteratorType = typename GraphT::Edge const *;
+
+ static ChildEdgeIteratorType child_edge_begin(NodeRef N) {
+ return N->edges_begin();
+ }
+ static ChildEdgeIteratorType child_edge_end(NodeRef N) {
+ return N->edges_end();
+ }
+ static typename GraphT::size_type size(GraphT *G) { return G->nodes_size(); }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_X86_IMMUTABLEGRAPH_H
diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h
index 0481a40d462a..a0ab5c3a5b3c 100644
--- a/llvm/lib/Target/X86/X86.h
+++ b/llvm/lib/Target/X86/X86.h
@@ -120,7 +120,7 @@ FunctionPass *createX86DomainReassignmentPass();
FunctionPass *createX86EvexToVexInsts();
/// This pass creates the thunks for the retpoline feature.
-FunctionPass *createX86RetpolineThunksPass();
+FunctionPass *createX86IndirectThunksPass();
/// This pass ensures instructions featuring a memory operand
/// have distinctive <LineNumber, Discriminator> (with respect to eachother)
@@ -133,6 +133,9 @@ InstructionSelector *createX86InstructionSelector(const X86TargetMachine &TM,
X86Subtarget &,
X86RegisterBankInfo &);
+FunctionPass *createX86LoadValueInjectionLoadHardeningPass();
+FunctionPass *createX86LoadValueInjectionLoadHardeningUnoptimizedPass();
+FunctionPass *createX86LoadValueInjectionRetHardeningPass();
FunctionPass *createX86SpeculativeLoadHardeningPass();
void initializeEvexToVexInstPassPass(PassRegistry &);
@@ -148,6 +151,9 @@ void initializeX86DomainReassignmentPass(PassRegistry &);
void initializeX86ExecutionDomainFixPass(PassRegistry &);
void initializeX86ExpandPseudoPass(PassRegistry &);
void initializeX86FlagsCopyLoweringPassPass(PassRegistry &);
+void initializeX86LoadValueInjectionLoadHardeningUnoptimizedPassPass(PassRegistry &);
+void initializeX86LoadValueInjectionLoadHardeningPassPass(PassRegistry &);
+void initializeX86LoadValueInjectionRetHardeningPassPass(PassRegistry &);
void initializeX86OptimizeLEAPassPass(PassRegistry &);
void initializeX86SpeculativeLoadHardeningPassPass(PassRegistry &);
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index a2b11d55f650..bb8952f54e3a 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -426,6 +426,22 @@ def FeatureRetpolineExternalThunk
"ourselves. Only has effect when combined with some other retpoline "
"feature", [FeatureRetpolineIndirectCalls]>;
+// Mitigate LVI attacks against indirect calls/branches and call returns
+def FeatureLVIControlFlowIntegrity
+ : SubtargetFeature<
+ "lvi-cfi", "UseLVIControlFlowIntegrity", "true",
+ "Prevent indirect calls/branches from using a memory operand, and "
+ "precede all indirect calls/branches from a register with an "
+ "LFENCE instruction to serialize control flow. Also decompose RET "
+ "instructions into a POP+LFENCE+JMP sequence.">;
+
+// Mitigate LVI attacks against data loads
+def FeatureLVILoadHardening
+ : SubtargetFeature<
+ "lvi-load-hardening", "UseLVILoadHardening", "true",
+ "Insert LFENCE instructions to prevent data speculatively injected "
+ "into loads from being used maliciously.">;
+
// Direct Move instructions.
def FeatureMOVDIRI : SubtargetFeature<"movdiri", "HasMOVDIRI", "true",
"Support movdiri instruction">;
diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp
index 1dbf40683564..a1d256ea872d 100644
--- a/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/llvm/lib/Target/X86/X86FastISel.cpp
@@ -3202,8 +3202,8 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
(CalledFn && CalledFn->hasFnAttribute("no_caller_saved_registers")))
return false;
- // Functions using retpoline for indirect calls need to use SDISel.
- if (Subtarget->useRetpolineIndirectCalls())
+ // Functions using thunks for indirect calls need to use SDISel.
+ if (Subtarget->useIndirectThunkCalls())
return false;
// Handle only C, fastcc, and webkit_js calling conventions for now.
diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp
index 799c1f5d1285..1da20371caf5 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -765,10 +765,10 @@ void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
bool InProlog) const {
bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large;
- // FIXME: Add retpoline support and remove this.
- if (Is64Bit && IsLargeCodeModel && STI.useRetpolineIndirectCalls())
+ // FIXME: Add indirect thunk support and remove this.
+ if (Is64Bit && IsLargeCodeModel && STI.useIndirectThunkCalls())
report_fatal_error("Emitting stack probe calls on 64-bit with the large "
- "code model and retpoline not yet implemented.");
+ "code model and indirect thunks not yet implemented.");
unsigned CallOp;
if (Is64Bit)
@@ -2493,9 +2493,9 @@ void X86FrameLowering::adjustForSegmentedStacks(
// is laid out within 2^31 bytes of each function body, but this seems
// to be sufficient for JIT.
// FIXME: Add retpoline support and remove the error here..
- if (STI.useRetpolineIndirectCalls())
+ if (STI.useIndirectThunkCalls())
report_fatal_error("Emitting morestack calls on 64-bit with the large "
- "code model and retpoline not yet implemented.");
+ "code model and thunks not yet implemented.");
BuildMI(allocMBB, DL, TII.get(X86::CALL64m))
.addReg(X86::RIP)
.addImm(0)
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index bf33f399db28..88af0ebcfd0e 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -987,7 +987,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
if (OptLevel != CodeGenOpt::None &&
// Only do this when the target can fold the load into the call or
// jmp.
- !Subtarget->useRetpolineIndirectCalls() &&
+ !Subtarget->useIndirectThunkCalls() &&
((N->getOpcode() == X86ISD::CALL && !Subtarget->slowTwoMemOps()) ||
(N->getOpcode() == X86ISD::TC_RETURN &&
(Subtarget->is64Bit() ||
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 1523d56cc4e7..c8720d9ae3a6 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -30221,8 +30221,8 @@ bool X86TargetLowering::isVectorClearMaskLegal(ArrayRef<int> Mask,
}
bool X86TargetLowering::areJTsAllowed(const Function *Fn) const {
- // If the subtarget is using retpolines, we need to not generate jump tables.
- if (Subtarget.useRetpolineIndirectBranches())
+ // If the subtarget is using thunks, we need to not generate jump tables.
+ if (Subtarget.useIndirectThunkBranches())
return false;
// Otherwise, fallback on the generic logic.
@@ -31345,22 +31345,22 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr &MI,
return BB;
}
-static unsigned getOpcodeForRetpoline(unsigned RPOpc) {
+static unsigned getOpcodeForIndirectThunk(unsigned RPOpc) {
switch (RPOpc) {
- case X86::RETPOLINE_CALL32:
+ case X86::INDIRECT_THUNK_CALL32:
return X86::CALLpcrel32;
- case X86::RETPOLINE_CALL64:
+ case X86::INDIRECT_THUNK_CALL64:
return X86::CALL64pcrel32;
- case X86::RETPOLINE_TCRETURN32:
+ case X86::INDIRECT_THUNK_TCRETURN32:
return X86::TCRETURNdi;
- case X86::RETPOLINE_TCRETURN64:
+ case X86::INDIRECT_THUNK_TCRETURN64:
return X86::TCRETURNdi64;
}
- llvm_unreachable("not retpoline opcode");
+ llvm_unreachable("not indirect thunk opcode");
}
-static const char *getRetpolineSymbol(const X86Subtarget &Subtarget,
- unsigned Reg) {
+static const char *getIndirectThunkSymbol(const X86Subtarget &Subtarget,
+ unsigned Reg) {
if (Subtarget.useRetpolineExternalThunk()) {
// When using an external thunk for retpolines, we pick names that match the
// names GCC happens to use as well. This helps simplify the implementation
@@ -31392,39 +31392,48 @@ static const char *getRetpolineSymbol(const X86Subtarget &Subtarget,
assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");
return "__x86_indirect_thunk_r11";
}
+ llvm_unreachable("unexpected reg for external indirect thunk");
+ }
+
+ if (Subtarget.useRetpolineIndirectCalls() ||
+ Subtarget.useRetpolineIndirectBranches()) {
+ // When targeting an internal COMDAT thunk use an LLVM-specific name.
+ switch (Reg) {
+ case X86::EAX:
+ assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
+ return "__llvm_retpoline_eax";
+ case X86::ECX:
+ assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
+ return "__llvm_retpoline_ecx";
+ case X86::EDX:
+ assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
+ return "__llvm_retpoline_edx";
+ case X86::EDI:
+ assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
+ return "__llvm_retpoline_edi";
+ case X86::R11:
+ assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");
+ return "__llvm_retpoline_r11";
+ }
llvm_unreachable("unexpected reg for retpoline");
}
- // When targeting an internal COMDAT thunk use an LLVM-specific name.
- switch (Reg) {
- case X86::EAX:
- assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
- return "__llvm_retpoline_eax";
- case X86::ECX:
- assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
- return "__llvm_retpoline_ecx";
- case X86::EDX:
- assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
- return "__llvm_retpoline_edx";
- case X86::EDI:
- assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
- return "__llvm_retpoline_edi";
- case X86::R11:
+ if (Subtarget.useLVIControlFlowIntegrity()) {
assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");
- return "__llvm_retpoline_r11";
+ return "__llvm_lvi_thunk_r11";
}
- llvm_unreachable("unexpected reg for retpoline");
+ llvm_unreachable("getIndirectThunkSymbol() invoked without thunk feature");
}
MachineBasicBlock *
-X86TargetLowering::EmitLoweredRetpoline(MachineInstr &MI,
- MachineBasicBlock *BB) const {
+X86TargetLowering::EmitLoweredIndirectThunk(MachineInstr &MI,
+ MachineBasicBlock *BB) const {
// Copy the virtual register into the R11 physical register and
// call the retpoline thunk.
DebugLoc DL = MI.getDebugLoc();
const X86InstrInfo *TII = Subtarget.getInstrInfo();
Register CalleeVReg = MI.getOperand(0).getReg();
- unsigned Opc = getOpcodeForRetpoline(MI.getOpcode());
+ unsigned Opc = getOpcodeForIndirectThunk(MI.getOpcode());
// Find an available scratch register to hold the callee. On 64-bit, we can
// just use R11, but we scan for uses anyway to ensure we don't generate
@@ -31458,7 +31467,7 @@ X86TargetLowering::EmitLoweredRetpoline(MachineInstr &MI,
report_fatal_error("calling convention incompatible with retpoline, no "
"available registers");
- const char *Symbol = getRetpolineSymbol(Subtarget, AvailableReg);
+ const char *Symbol = getIndirectThunkSymbol(Subtarget, AvailableReg);
BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), AvailableReg)
.addReg(CalleeVReg);
@@ -32234,11 +32243,11 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case X86::TLS_base_addr32:
case X86::TLS_base_addr64:
return EmitLoweredTLSAddr(MI, BB);
- case X86::RETPOLINE_CALL32:
- case X86::RETPOLINE_CALL64:
- case X86::RETPOLINE_TCRETURN32:
- case X86::RETPOLINE_TCRETURN64:
- return EmitLoweredRetpoline(MI, BB);
+ case X86::INDIRECT_THUNK_CALL32:
+ case X86::INDIRECT_THUNK_CALL64:
+ case X86::INDIRECT_THUNK_TCRETURN32:
+ case X86::INDIRECT_THUNK_TCRETURN64:
+ return EmitLoweredIndirectThunk(MI, BB);
case X86::CATCHRET:
return EmitLoweredCatchRet(MI, BB);
case X86::CATCHPAD:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 3a17099da38f..830cdfc79c0a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1482,8 +1482,8 @@ namespace llvm {
MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
MachineBasicBlock *BB) const;
- MachineBasicBlock *EmitLoweredRetpoline(MachineInstr &MI,
- MachineBasicBlock *BB) const;
+ MachineBasicBlock *EmitLoweredIndirectThunk(MachineInstr &MI,
+ MachineBasicBlock *BB) const;
MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
MachineBasicBlock *MBB) const;
diff --git a/llvm/lib/Target/X86/X86IndirectThunks.cpp b/llvm/lib/Target/X86/X86IndirectThunks.cpp
new file mode 100644
index 000000000000..36b9c3ccc959
--- /dev/null
+++ b/llvm/lib/Target/X86/X86IndirectThunks.cpp
@@ -0,0 +1,364 @@
+//==- X86IndirectThunks.cpp - Construct indirect call/jump thunks for x86 --=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// Pass that injects an MI thunk that is used to lower indirect calls in a way
+/// that prevents speculation on some x86 processors and can be used to mitigate
+/// security vulnerabilities due to targeted speculative execution and side
+/// channels such as CVE-2017-5715.
+///
+/// Currently supported thunks include:
+/// - Retpoline -- A RET-implemented trampoline that lowers indirect calls
+/// - LVI Thunk -- A CALL/JMP-implemented thunk that forces load serialization
+/// before making an indirect call/jump
+///
+/// Note that the reason that this is implemented as a MachineFunctionPass and
+/// not a ModulePass is that ModulePasses at this point in the LLVM X86 pipeline
+/// serialize all transformations, which can consume lots of memory.
+///
+/// TODO(chandlerc): All of this code could use better comments and
+/// documentation.
+///
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86InstrBuilder.h"
+#include "X86Subtarget.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "x86-retpoline-thunks"
+
+static const char RetpolineNamePrefix[] = "__llvm_retpoline_";
+static const char R11RetpolineName[] = "__llvm_retpoline_r11";
+static const char EAXRetpolineName[] = "__llvm_retpoline_eax";
+static const char ECXRetpolineName[] = "__llvm_retpoline_ecx";
+static const char EDXRetpolineName[] = "__llvm_retpoline_edx";
+static const char EDIRetpolineName[] = "__llvm_retpoline_edi";
+
+static const char LVIThunkNamePrefix[] = "__llvm_lvi_thunk_";
+static const char R11LVIThunkName[] = "__llvm_lvi_thunk_r11";
+
+namespace {
+template <typename Derived> class ThunkInserter {
+ Derived &getDerived() { return *static_cast<Derived *>(this); }
+
+protected:
+ bool InsertedThunks;
+ void doInitialization(Module &M) {}
+ void createThunkFunction(MachineModuleInfo &MMI, StringRef Name);
+
+public:
+ void init(Module &M) {
+ InsertedThunks = false;
+ getDerived().doInitialization(M);
+ }
+ // return `true` if `MMI` or `MF` was modified
+ bool run(MachineModuleInfo &MMI, MachineFunction &MF);
+};
+
+struct RetpolineThunkInserter : ThunkInserter<RetpolineThunkInserter> {
+ const char *getThunkPrefix() { return RetpolineNamePrefix; }
+ bool mayUseThunk(const MachineFunction &MF) {
+ const auto &STI = MF.getSubtarget<X86Subtarget>();
+ return (STI.useRetpolineIndirectCalls() ||
+ STI.useRetpolineIndirectBranches()) &&
+ !STI.useRetpolineExternalThunk();
+ }
+ void insertThunks(MachineModuleInfo &MMI);
+ void populateThunk(MachineFunction &MF);
+};
+
+struct LVIThunkInserter : ThunkInserter<LVIThunkInserter> {
+ const char *getThunkPrefix() { return LVIThunkNamePrefix; }
+ bool mayUseThunk(const MachineFunction &MF) {
+ return MF.getSubtarget<X86Subtarget>().useLVIControlFlowIntegrity();
+ }
+ void insertThunks(MachineModuleInfo &MMI) {
+ createThunkFunction(MMI, R11LVIThunkName);
+ }
+ void populateThunk(MachineFunction &MF) {
+ // Grab the entry MBB and erase any other blocks. O0 codegen appears to
+ // generate two bbs for the entry block.
+ MachineBasicBlock *Entry = &MF.front();
+ Entry->clear();
+ while (MF.size() > 1)
+ MF.erase(std::next(MF.begin()));
+
+ // This code mitigates LVI by replacing each indirect call/jump with a
+ // direct call/jump to a thunk that looks like:
+ // ```
+ // lfence
+ // jmpq *%r11
+ // ```
+ // This ensures that if the value in register %r11 was loaded from memory,
+ // then the value in %r11 is (architecturally) correct prior to the jump.
+ const TargetInstrInfo *TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
+ BuildMI(&MF.front(), DebugLoc(), TII->get(X86::LFENCE));
+ BuildMI(&MF.front(), DebugLoc(), TII->get(X86::JMP64r)).addReg(X86::R11);
+ MF.front().addLiveIn(X86::R11);
+ return;
+ }
+};
+
+class X86IndirectThunks : public MachineFunctionPass {
+public:
+ static char ID;
+
+ X86IndirectThunks() : MachineFunctionPass(ID) {}
+
+ StringRef getPassName() const override { return "X86 Indirect Thunks"; }
+
+ bool doInitialization(Module &M) override;
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<MachineModuleInfoWrapperPass>();
+ AU.addPreserved<MachineModuleInfoWrapperPass>();
+ }
+
+private:
+ std::tuple<RetpolineThunkInserter, LVIThunkInserter> TIs;
+
+ // FIXME: When LLVM moves to C++17, these can become folds
+ template <typename... ThunkInserterT>
+ static void initTIs(Module &M,
+ std::tuple<ThunkInserterT...> &ThunkInserters) {
+ (void)std::initializer_list<int>{
+ (std::get<ThunkInserterT>(ThunkInserters).init(M), 0)...};
+ }
+ template <typename... ThunkInserterT>
+ static bool runTIs(MachineModuleInfo &MMI, MachineFunction &MF,
+ std::tuple<ThunkInserterT...> &ThunkInserters) {
+ bool Modified = false;
+ (void)std::initializer_list<int>{
+ Modified |= std::get<ThunkInserterT>(ThunkInserters).run(MMI, MF)...};
+ return Modified;
+ }
+};
+
+} // end anonymous namespace
+
+void RetpolineThunkInserter::insertThunks(MachineModuleInfo &MMI) {
+ if (MMI.getTarget().getTargetTriple().getArch() == Triple::x86_64)
+ createThunkFunction(MMI, R11RetpolineName);
+ else
+ for (StringRef Name : {EAXRetpolineName, ECXRetpolineName, EDXRetpolineName,
+ EDIRetpolineName})
+ createThunkFunction(MMI, Name);
+}
+
+void RetpolineThunkInserter::populateThunk(MachineFunction &MF) {
+ bool Is64Bit = MF.getTarget().getTargetTriple().getArch() == Triple::x86_64;
+ Register ThunkReg;
+ if (Is64Bit) {
+ assert(MF.getName() == "__llvm_retpoline_r11" &&
+ "Should only have an r11 thunk on 64-bit targets");
+
+ // __llvm_retpoline_r11:
+ // callq .Lr11_call_target
+ // .Lr11_capture_spec:
+ // pause
+ // lfence
+ // jmp .Lr11_capture_spec
+ // .align 16
+ // .Lr11_call_target:
+ // movq %r11, (%rsp)
+ // retq
+ ThunkReg = X86::R11;
+ } else {
+ // For 32-bit targets we need to emit a collection of thunks for various
+ // possible scratch registers as well as a fallback that uses EDI, which is
+ // normally callee saved.
+ // __llvm_retpoline_eax:
+ // calll .Leax_call_target
+ // .Leax_capture_spec:
+ // pause
+ // jmp .Leax_capture_spec
+ // .align 16
+ // .Leax_call_target:
+ // movl %eax, (%esp) # Clobber return addr
+ // retl
+ //
+ // __llvm_retpoline_ecx:
+ // ... # Same setup
+ // movl %ecx, (%esp)
+ // retl
+ //
+ // __llvm_retpoline_edx:
+ // ... # Same setup
+ // movl %edx, (%esp)
+ // retl
+ //
+ // __llvm_retpoline_edi:
+ // ... # Same setup
+ // movl %edi, (%esp)
+ // retl
+ if (MF.getName() == EAXRetpolineName)
+ ThunkReg = X86::EAX;
+ else if (MF.getName() == ECXRetpolineName)
+ ThunkReg = X86::ECX;
+ else if (MF.getName() == EDXRetpolineName)
+ ThunkReg = X86::EDX;
+ else if (MF.getName() == EDIRetpolineName)
+ ThunkReg = X86::EDI;
+ else
+ llvm_unreachable("Invalid thunk name on x86-32!");
+ }
+
+ const TargetInstrInfo *TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
+ // Grab the entry MBB and erase any other blocks. O0 codegen appears to
+ // generate two bbs for the entry block.
+ MachineBasicBlock *Entry = &MF.front();
+ Entry->clear();
+ while (MF.size() > 1)
+ MF.erase(std::next(MF.begin()));
+
+ MachineBasicBlock *CaptureSpec =
+ MF.CreateMachineBasicBlock(Entry->getBasicBlock());
+ MachineBasicBlock *CallTarget =
+ MF.CreateMachineBasicBlock(Entry->getBasicBlock());
+ MCSymbol *TargetSym = MF.getContext().createTempSymbol();
+ MF.push_back(CaptureSpec);
+ MF.push_back(CallTarget);
+
+ const unsigned CallOpc = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32;
+ const unsigned RetOpc = Is64Bit ? X86::RETQ : X86::RETL;
+
+ Entry->addLiveIn(ThunkReg);
+ BuildMI(Entry, DebugLoc(), TII->get(CallOpc)).addSym(TargetSym);
+
+ // The MIR verifier thinks that the CALL in the entry block will fall through
+ // to CaptureSpec, so mark it as the successor. Technically, CaptureTarget is
+ // the successor, but the MIR verifier doesn't know how to cope with that.
+ Entry->addSuccessor(CaptureSpec);
+
+ // In the capture loop for speculation, we want to stop the processor from
+ // speculating as fast as possible. On Intel processors, the PAUSE instruction
+ // will block speculation without consuming any execution resources. On AMD
+ // processors, the PAUSE instruction is (essentially) a nop, so we also use an
+ // LFENCE instruction which they have advised will stop speculation as well
+ // with minimal resource utilization. We still end the capture with a jump to
+ // form an infinite loop to fully guarantee that no matter what implementation
+ // of the x86 ISA, speculating this code path never escapes.
+ BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::PAUSE));
+ BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::LFENCE));
+ BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::JMP_1)).addMBB(CaptureSpec);
+ CaptureSpec->setHasAddressTaken();
+ CaptureSpec->addSuccessor(CaptureSpec);
+
+ CallTarget->addLiveIn(ThunkReg);
+ CallTarget->setHasAddressTaken();
+ CallTarget->setAlignment(Align(16));
+
+ // Insert return address clobber
+ const unsigned MovOpc = Is64Bit ? X86::MOV64mr : X86::MOV32mr;
+ const Register SPReg = Is64Bit ? X86::RSP : X86::ESP;
+ addRegOffset(BuildMI(CallTarget, DebugLoc(), TII->get(MovOpc)), SPReg, false,
+ 0)
+ .addReg(ThunkReg);
+
+ CallTarget->back().setPreInstrSymbol(MF, TargetSym);
+ BuildMI(CallTarget, DebugLoc(), TII->get(RetOpc));
+}
+
+template <typename Derived>
+void ThunkInserter<Derived>::createThunkFunction(MachineModuleInfo &MMI,
+ StringRef Name) {
+ assert(Name.startswith(getDerived().getThunkPrefix()) &&
+ "Created a thunk with an unexpected prefix!");
+
+ Module &M = const_cast<Module &>(*MMI.getModule());
+ LLVMContext &Ctx = M.getContext();
+ auto Type = FunctionType::get(Type::getVoidTy(Ctx), false);
+ Function *F =
+ Function::Create(Type, GlobalValue::LinkOnceODRLinkage, Name, &M);
+ F->setVisibility(GlobalValue::HiddenVisibility);
+ F->setComdat(M.getOrInsertComdat(Name));
+
+ // Add Attributes so that we don't create a frame, unwind information, or
+ // inline.
+ AttrBuilder B;
+ B.addAttribute(llvm::Attribute::NoUnwind);
+ B.addAttribute(llvm::Attribute::Naked);
+ F->addAttributes(llvm::AttributeList::FunctionIndex, B);
+
+ // Populate our function a bit so that we can verify.
+ BasicBlock *Entry = BasicBlock::Create(Ctx, "entry", F);
+ IRBuilder<> Builder(Entry);
+
+ Builder.CreateRetVoid();
+
+ // MachineFunctions/MachineBasicBlocks aren't created automatically for the
+ // IR-level constructs we already made. Create them and insert them into the
+ // module.
+ MachineFunction &MF = MMI.getOrCreateMachineFunction(*F);
+ MachineBasicBlock *EntryMBB = MF.CreateMachineBasicBlock(Entry);
+
+ // Insert EntryMBB into MF. It's not in the module until we do this.
+ MF.insert(MF.end(), EntryMBB);
+ // Set MF properties. We never use vregs...
+ MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs);
+}
+
+template <typename Derived>
+bool ThunkInserter<Derived>::run(MachineModuleInfo &MMI, MachineFunction &MF) {
+ // If MF is not a thunk, check to see if we need to insert a thunk.
+ if (!MF.getName().startswith(getDerived().getThunkPrefix())) {
+ // If we've already inserted a thunk, nothing else to do.
+ if (InsertedThunks)
+ return false;
+
+ // Only add a thunk if one of the functions has the corresponding feature
+ // enabled in its subtarget, and doesn't enable external thunks.
+ // FIXME: Conditionalize on indirect calls so we don't emit a thunk when
+ // nothing will end up calling it.
+ // FIXME: It's a little silly to look at every function just to enumerate
+ // the subtargets, but eventually we'll want to look at them for indirect
+ // calls, so maybe this is OK.
+ if (!getDerived().mayUseThunk(MF))
+ return false;
+
+ getDerived().insertThunks(MMI);
+ InsertedThunks = true;
+ return true;
+ }
+
+ // If this *is* a thunk function, we need to populate it with the correct MI.
+ getDerived().populateThunk(MF);
+ return true;
+}
+
+FunctionPass *llvm::createX86IndirectThunksPass() {
+ return new X86IndirectThunks();
+}
+
+char X86IndirectThunks::ID = 0;
+
+bool X86IndirectThunks::doInitialization(Module &M) {
+ initTIs(M, TIs);
+ return false;
+}
+
+bool X86IndirectThunks::runOnMachineFunction(MachineFunction &MF) {
+ LLVM_DEBUG(dbgs() << getPassName() << '\n');
+ auto &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
+ return runTIs(MMI, MF, TIs);
+}
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index 78d8dd3c0d03..1fdac104cb73 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -1213,14 +1213,14 @@ def X86tcret_6regs : PatFrag<(ops node:$ptr, node:$off),
def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
(TCRETURNri ptr_rc_tailcall:$dst, imm:$off)>,
- Requires<[Not64BitMode, NotUseRetpolineIndirectCalls]>;
+ Requires<[Not64BitMode, NotUseIndirectThunkCalls]>;
// FIXME: This is disabled for 32-bit PIC mode because the global base
// register which is part of the address mode may be assigned a
// callee-saved register.
def : Pat<(X86tcret (load addr:$dst), imm:$off),
(TCRETURNmi addr:$dst, imm:$off)>,
- Requires<[Not64BitMode, IsNotPIC, NotUseRetpolineIndirectCalls]>;
+ Requires<[Not64BitMode, IsNotPIC, NotUseIndirectThunkCalls]>;
def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off),
(TCRETURNdi tglobaladdr:$dst, imm:$off)>,
@@ -1232,21 +1232,21 @@ def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off),
def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
(TCRETURNri64 ptr_rc_tailcall:$dst, imm:$off)>,
- Requires<[In64BitMode, NotUseRetpolineIndirectCalls]>;
+ Requires<[In64BitMode, NotUseIndirectThunkCalls]>;
// Don't fold loads into X86tcret requiring more than 6 regs.
// There wouldn't be enough scratch registers for base+index.
def : Pat<(X86tcret_6regs (load addr:$dst), imm:$off),
(TCRETURNmi64 addr:$dst, imm:$off)>,
- Requires<[In64BitMode, NotUseRetpolineIndirectCalls]>;
+ Requires<[In64BitMode, NotUseIndirectThunkCalls]>;
def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
- (RETPOLINE_TCRETURN64 ptr_rc_tailcall:$dst, imm:$off)>,
- Requires<[In64BitMode, UseRetpolineIndirectCalls]>;
+ (INDIRECT_THUNK_TCRETURN64 ptr_rc_tailcall:$dst, imm:$off)>,
+ Requires<[In64BitMode, UseIndirectThunkCalls]>;
def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
- (RETPOLINE_TCRETURN32 ptr_rc_tailcall:$dst, imm:$off)>,
- Requires<[Not64BitMode, UseRetpolineIndirectCalls]>;
+ (INDIRECT_THUNK_TCRETURN32 ptr_rc_tailcall:$dst, imm:$off)>,
+ Requires<[Not64BitMode, UseIndirectThunkCalls]>;
def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off),
(TCRETURNdi64 tglobaladdr:$dst, imm:$off)>,
diff --git a/llvm/lib/Target/X86/X86InstrControl.td b/llvm/lib/Target/X86/X86InstrControl.td
index 32faeb1a86f2..1842dc19ec2e 100644
--- a/llvm/lib/Target/X86/X86InstrControl.td
+++ b/llvm/lib/Target/X86/X86InstrControl.td
@@ -237,13 +237,13 @@ let isCall = 1 in
Sched<[WriteJumpLd]>;
def CALL32r : I<0xFF, MRM2r, (outs), (ins GR32:$dst),
"call{l}\t{*}$dst", [(X86call GR32:$dst)]>, OpSize32,
- Requires<[Not64BitMode,NotUseRetpolineIndirectCalls]>,
+ Requires<[Not64BitMode,NotUseIndirectThunkCalls]>,
Sched<[WriteJump]>;
def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst),
"call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))]>,
OpSize32,
Requires<[Not64BitMode,FavorMemIndirectCall,
- NotUseRetpolineIndirectCalls]>,
+ NotUseIndirectThunkCalls]>,
Sched<[WriteJumpLd]>;
// Non-tracking calls for IBT, use with caution.
@@ -334,11 +334,11 @@ let isCall = 1, Uses = [RSP, SSP], SchedRW = [WriteJump] in {
Requires<[In64BitMode]>;
def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst),
"call{q}\t{*}$dst", [(X86call GR64:$dst)]>,
- Requires<[In64BitMode,NotUseRetpolineIndirectCalls]>;
+ Requires<[In64BitMode,NotUseIndirectThunkCalls]>;
def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst),
"call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))]>,
Requires<[In64BitMode,FavorMemIndirectCall,
- NotUseRetpolineIndirectCalls]>;
+ NotUseIndirectThunkCalls]>;
// Non-tracking calls for IBT, use with caution.
let isCodeGenOnly = 1 in {
@@ -393,19 +393,19 @@ let isPseudo = 1, isCall = 1, isCodeGenOnly = 1,
Uses = [RSP, SSP],
usesCustomInserter = 1,
SchedRW = [WriteJump] in {
- def RETPOLINE_CALL32 :
+ def INDIRECT_THUNK_CALL32 :
PseudoI<(outs), (ins GR32:$dst), [(X86call GR32:$dst)]>,
- Requires<[Not64BitMode,UseRetpolineIndirectCalls]>;
+ Requires<[Not64BitMode,UseIndirectThunkCalls]>;
- def RETPOLINE_CALL64 :
+ def INDIRECT_THUNK_CALL64 :
PseudoI<(outs), (ins GR64:$dst), [(X86call GR64:$dst)]>,
- Requires<[In64BitMode,UseRetpolineIndirectCalls]>;
+ Requires<[In64BitMode,UseIndirectThunkCalls]>;
- // Retpoline variant of indirect tail calls.
+ // Indirect thunk variant of indirect tail calls.
let isTerminator = 1, isReturn = 1, isBarrier = 1 in {
- def RETPOLINE_TCRETURN64 :
+ def INDIRECT_THUNK_TCRETURN64 :
PseudoI<(outs), (ins GR64:$dst, i32imm:$offset), []>;
- def RETPOLINE_TCRETURN32 :
+ def INDIRECT_THUNK_TCRETURN32 :
PseudoI<(outs), (ins GR32:$dst, i32imm:$offset), []>;
}
}
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index ca5425e8b89f..93f40c8ec996 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -996,8 +996,8 @@ def HasFastLZCNT : Predicate<"Subtarget->hasFastLZCNT()">;
def HasFastSHLDRotate : Predicate<"Subtarget->hasFastSHLDRotate()">;
def HasERMSB : Predicate<"Subtarget->hasERMSB()">;
def HasMFence : Predicate<"Subtarget->hasMFence()">;
-def UseRetpolineIndirectCalls : Predicate<"Subtarget->useRetpolineIndirectCalls()">;
-def NotUseRetpolineIndirectCalls : Predicate<"!Subtarget->useRetpolineIndirectCalls()">;
+def UseIndirectThunkCalls : Predicate<"Subtarget->useIndirectThunkCalls()">;
+def NotUseIndirectThunkCalls : Predicate<"!Subtarget->useIndirectThunkCalls()">;
//===----------------------------------------------------------------------===//
// X86 Instruction Format Definitions.
diff --git a/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp b/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp
new file mode 100644
index 000000000000..35fc439998f9
--- /dev/null
+++ b/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp
@@ -0,0 +1,900 @@
+//==-- X86LoadValueInjectionLoadHardening.cpp - LVI load hardening for x86 --=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// Description: This pass finds Load Value Injection (LVI) gadgets consisting
+/// of a load from memory (i.e., SOURCE), and any operation that may transmit
+/// the value loaded from memory over a covert channel, or use the value loaded
+/// from memory to determine a branch/call target (i.e., SINK). After finding
+/// all such gadgets in a given function, the pass minimally inserts LFENCE
+/// instructions in such a manner that the following property is satisfied: for
+/// all SOURCE+SINK pairs, all paths in the CFG from SOURCE to SINK contain at
+/// least one LFENCE instruction. The algorithm that implements this minimal
+/// insertion is influenced by an academic paper that minimally inserts memory
+/// fences for high-performance concurrent programs:
+/// http://www.cs.ucr.edu/~lesani/companion/oopsla15/OOPSLA15.pdf
+/// The algorithm implemented in this pass is as follows:
+/// 1. Build a condensed CFG (i.e., a GadgetGraph) consisting only of the
+/// following components:
+/// - SOURCE instructions (also includes function arguments)
+/// - SINK instructions
+/// - Basic block entry points
+/// - Basic block terminators
+/// - LFENCE instructions
+/// 2. Analyze the GadgetGraph to determine which SOURCE+SINK pairs (i.e.,
+/// gadgets) are already mitigated by existing LFENCEs. If all gadgets have been
+/// mitigated, go to step 6.
+/// 3. Use a heuristic or plugin to approximate minimal LFENCE insertion.
+/// 4. Insert one LFENCE along each CFG edge that was cut in step 3.
+/// 5. Go to step 2.
+/// 6. If any LFENCEs were inserted, return `true` from runOnMachineFunction()
+/// to tell LLVM that the function was modified.
+///
+//===----------------------------------------------------------------------===//
+
+#include "ImmutableGraph.h"
+#include "X86.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominanceFrontier.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RDFGraph.h"
+#include "llvm/CodeGen/RDFLiveness.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/DOTGraphTraits.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define PASS_KEY "x86-lvi-load"
+#define DEBUG_TYPE PASS_KEY
+
+STATISTIC(NumFences, "Number of LFENCEs inserted for LVI mitigation");
+STATISTIC(NumFunctionsConsidered, "Number of functions analyzed");
+STATISTIC(NumFunctionsMitigated, "Number of functions for which mitigations "
+ "were deployed");
+STATISTIC(NumGadgets, "Number of LVI gadgets detected during analysis");
+
+static cl::opt<std::string> OptimizePluginPath(
+ PASS_KEY "-opt-plugin",
+ cl::desc("Specify a plugin to optimize LFENCE insertion"), cl::Hidden);
+
+static cl::opt<bool> NoConditionalBranches(
+ PASS_KEY "-no-cbranch",
+ cl::desc("Don't treat conditional branches as disclosure gadgets. This "
+ "may improve performance, at the cost of security."),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool> EmitDot(
+ PASS_KEY "-dot",
+ cl::desc(
+ "For each function, emit a dot graph depicting potential LVI gadgets"),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool> EmitDotOnly(
+ PASS_KEY "-dot-only",
+ cl::desc("For each function, emit a dot graph depicting potential LVI "
+ "gadgets, and do not insert any fences"),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool> EmitDotVerify(
+ PASS_KEY "-dot-verify",
+ cl::desc("For each function, emit a dot graph to stdout depicting "
+ "potential LVI gadgets, used for testing purposes only"),
+ cl::init(false), cl::Hidden);
+
+static llvm::sys::DynamicLibrary OptimizeDL;
+typedef int (*OptimizeCutT)(unsigned int *nodes, unsigned int nodes_size,
+ unsigned int *edges, int *edge_values,
+ int *cut_edges /* out */, unsigned int edges_size);
+static OptimizeCutT OptimizeCut = nullptr;
+
+namespace {
+
+struct MachineGadgetGraph : ImmutableGraph<MachineInstr *, int> {
+ static constexpr int GadgetEdgeSentinel = -1;
+ static constexpr MachineInstr *const ArgNodeSentinel = nullptr;
+
+ using GraphT = ImmutableGraph<MachineInstr *, int>;
+ using Node = typename GraphT::Node;
+ using Edge = typename GraphT::Edge;
+ using size_type = typename GraphT::size_type;
+ MachineGadgetGraph(std::unique_ptr<Node[]> Nodes,
+ std::unique_ptr<Edge[]> Edges, size_type NodesSize,
+ size_type EdgesSize, int NumFences = 0, int NumGadgets = 0)
+ : GraphT(std::move(Nodes), std::move(Edges), NodesSize, EdgesSize),
+ NumFences(NumFences), NumGadgets(NumGadgets) {}
+ static inline bool isCFGEdge(const Edge &E) {
+ return E.getValue() != GadgetEdgeSentinel;
+ }
+ static inline bool isGadgetEdge(const Edge &E) {
+ return E.getValue() == GadgetEdgeSentinel;
+ }
+ int NumFences;
+ int NumGadgets;
+};
+
+class X86LoadValueInjectionLoadHardeningPass : public MachineFunctionPass {
+public:
+ X86LoadValueInjectionLoadHardeningPass() : MachineFunctionPass(ID) {}
+
+ StringRef getPassName() const override {
+ return "X86 Load Value Injection (LVI) Load Hardening";
+ }
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ static char ID;
+
+private:
+ using GraphBuilder = ImmutableGraphBuilder<MachineGadgetGraph>;
+ using EdgeSet = MachineGadgetGraph::EdgeSet;
+ using NodeSet = MachineGadgetGraph::NodeSet;
+ using Gadget = std::pair<MachineInstr *, MachineInstr *>;
+
+ const X86Subtarget *STI;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+
+ std::unique_ptr<MachineGadgetGraph>
+ getGadgetGraph(MachineFunction &MF, const MachineLoopInfo &MLI,
+ const MachineDominatorTree &MDT,
+ const MachineDominanceFrontier &MDF) const;
+ int hardenLoadsWithPlugin(MachineFunction &MF,
+ std::unique_ptr<MachineGadgetGraph> Graph) const;
+ int hardenLoadsWithGreedyHeuristic(
+ MachineFunction &MF, std::unique_ptr<MachineGadgetGraph> Graph) const;
+ int elimMitigatedEdgesAndNodes(MachineGadgetGraph &G,
+ EdgeSet &ElimEdges /* in, out */,
+ NodeSet &ElimNodes /* in, out */) const;
+ std::unique_ptr<MachineGadgetGraph>
+ trimMitigatedEdges(std::unique_ptr<MachineGadgetGraph> Graph) const;
+ void findAndCutEdges(MachineGadgetGraph &G,
+ EdgeSet &CutEdges /* out */) const;
+ int insertFences(MachineFunction &MF, MachineGadgetGraph &G,
+ EdgeSet &CutEdges /* in, out */) const;
+ bool instrUsesRegToAccessMemory(const MachineInstr &I, unsigned Reg) const;
+ bool instrUsesRegToBranch(const MachineInstr &I, unsigned Reg) const;
+ inline bool isFence(const MachineInstr *MI) const {
+ return MI && (MI->getOpcode() == X86::LFENCE ||
+ (STI->useLVIControlFlowIntegrity() && MI->isCall()));
+ }
+};
+
+} // end anonymous namespace
+
+namespace llvm {
+
+template <>
+struct GraphTraits<MachineGadgetGraph *>
+ : GraphTraits<ImmutableGraph<MachineInstr *, int> *> {};
+
+template <>
+struct DOTGraphTraits<MachineGadgetGraph *> : DefaultDOTGraphTraits {
+ using GraphType = MachineGadgetGraph;
+ using Traits = llvm::GraphTraits<GraphType *>;
+ using NodeRef = typename Traits::NodeRef;
+ using EdgeRef = typename Traits::EdgeRef;
+ using ChildIteratorType = typename Traits::ChildIteratorType;
+ using ChildEdgeIteratorType = typename Traits::ChildEdgeIteratorType;
+
+ DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
+
+ std::string getNodeLabel(NodeRef Node, GraphType *) {
+ if (Node->getValue() == MachineGadgetGraph::ArgNodeSentinel)
+ return "ARGS";
+
+ std::string Str;
+ raw_string_ostream OS(Str);
+ OS << *Node->getValue();
+ return OS.str();
+ }
+
+ static std::string getNodeAttributes(NodeRef Node, GraphType *) {
+ MachineInstr *MI = Node->getValue();
+ if (MI == MachineGadgetGraph::ArgNodeSentinel)
+ return "color = blue";
+ if (MI->getOpcode() == X86::LFENCE)
+ return "color = green";
+ return "";
+ }
+
+ static std::string getEdgeAttributes(NodeRef, ChildIteratorType E,
+ GraphType *) {
+ int EdgeVal = (*E.getCurrent()).getValue();
+ return EdgeVal >= 0 ? "label = " + std::to_string(EdgeVal)
+ : "color = red, style = \"dashed\"";
+ }
+};
+
+} // end namespace llvm
+
+constexpr MachineInstr *MachineGadgetGraph::ArgNodeSentinel;
+constexpr int MachineGadgetGraph::GadgetEdgeSentinel;
+
+char X86LoadValueInjectionLoadHardeningPass::ID = 0;
+
+void X86LoadValueInjectionLoadHardeningPass::getAnalysisUsage(
+ AnalysisUsage &AU) const {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachineDominanceFrontier>();
+ AU.setPreservesCFG();
+}
+
+static void WriteGadgetGraph(raw_ostream &OS, MachineFunction &MF,
+ MachineGadgetGraph *G) {
+ WriteGraph(OS, G, /*ShortNames*/ false,
+ "Speculative gadgets for \"" + MF.getName() + "\" function");
+}
+
+bool X86LoadValueInjectionLoadHardeningPass::runOnMachineFunction(
+ MachineFunction &MF) {
+ LLVM_DEBUG(dbgs() << "***** " << getPassName() << " : " << MF.getName()
+ << " *****\n");
+ STI = &MF.getSubtarget<X86Subtarget>();
+ if (!STI->useLVILoadHardening())
+ return false;
+
+ // FIXME: support 32-bit
+ if (!STI->is64Bit())
+ report_fatal_error("LVI load hardening is only supported on 64-bit", false);
+
+ // Don't skip functions with the "optnone" attr but participate in opt-bisect.
+ const Function &F = MF.getFunction();
+ if (!F.hasOptNone() && skipFunction(F))
+ return false;
+
+ ++NumFunctionsConsidered;
+ TII = STI->getInstrInfo();
+ TRI = STI->getRegisterInfo();
+ LLVM_DEBUG(dbgs() << "Building gadget graph...\n");
+ const auto &MLI = getAnalysis<MachineLoopInfo>();
+ const auto &MDT = getAnalysis<MachineDominatorTree>();
+ const auto &MDF = getAnalysis<MachineDominanceFrontier>();
+ std::unique_ptr<MachineGadgetGraph> Graph = getGadgetGraph(MF, MLI, MDT, MDF);
+ LLVM_DEBUG(dbgs() << "Building gadget graph... Done\n");
+ if (Graph == nullptr)
+ return false; // didn't find any gadgets
+
+ if (EmitDotVerify) {
+ WriteGadgetGraph(outs(), MF, Graph.get());
+ return false;
+ }
+
+ if (EmitDot || EmitDotOnly) {
+ LLVM_DEBUG(dbgs() << "Emitting gadget graph...\n");
+ std::error_code FileError;
+ std::string FileName = "lvi.";
+ FileName += MF.getName();
+ FileName += ".dot";
+ raw_fd_ostream FileOut(FileName, FileError);
+ if (FileError)
+ errs() << FileError.message();
+ WriteGadgetGraph(FileOut, MF, Graph.get());
+ FileOut.close();
+ LLVM_DEBUG(dbgs() << "Emitting gadget graph... Done\n");
+ if (EmitDotOnly)
+ return false;
+ }
+
+ int FencesInserted;
+ if (!OptimizePluginPath.empty()) {
+ if (!OptimizeDL.isValid()) {
+ std::string ErrorMsg;
+ OptimizeDL = llvm::sys::DynamicLibrary::getPermanentLibrary(
+ OptimizePluginPath.c_str(), &ErrorMsg);
+ if (!ErrorMsg.empty())
+ report_fatal_error("Failed to load opt plugin: \"" + ErrorMsg + '\"');
+ OptimizeCut = (OptimizeCutT)OptimizeDL.getAddressOfSymbol("optimize_cut");
+ if (!OptimizeCut)
+ report_fatal_error("Invalid optimization plugin");
+ }
+ FencesInserted = hardenLoadsWithPlugin(MF, std::move(Graph));
+ } else { // Use the default greedy heuristic
+ FencesInserted = hardenLoadsWithGreedyHeuristic(MF, std::move(Graph));
+ }
+
+ if (FencesInserted > 0)
+ ++NumFunctionsMitigated;
+ NumFences += FencesInserted;
+ return (FencesInserted > 0);
+}
+
+std::unique_ptr<MachineGadgetGraph>
+X86LoadValueInjectionLoadHardeningPass::getGadgetGraph(
+ MachineFunction &MF, const MachineLoopInfo &MLI,
+ const MachineDominatorTree &MDT,
+ const MachineDominanceFrontier &MDF) const {
+ using namespace rdf;
+
+ // Build the Register Dataflow Graph using the RDF framework
+ TargetOperandInfo TOI{*TII};
+ DataFlowGraph DFG{MF, *TII, *TRI, MDT, MDF, TOI};
+ DFG.build();
+ Liveness L{MF.getRegInfo(), DFG};
+ L.computePhiInfo();
+
+ GraphBuilder Builder;
+ using GraphIter = typename GraphBuilder::BuilderNodeRef;
+ DenseMap<MachineInstr *, GraphIter> NodeMap;
+ int FenceCount = 0, GadgetCount = 0;
+ auto MaybeAddNode = [&NodeMap, &Builder](MachineInstr *MI) {
+ auto Ref = NodeMap.find(MI);
+ if (Ref == NodeMap.end()) {
+ auto I = Builder.addVertex(MI);
+ NodeMap[MI] = I;
+ return std::pair<GraphIter, bool>{I, true};
+ }
+ return std::pair<GraphIter, bool>{Ref->getSecond(), false};
+ };
+
+ // The `Transmitters` map memoizes transmitters found for each def. If a def
+ // has not yet been analyzed, then it will not appear in the map. If a def
+ // has been analyzed and was determined not to have any transmitters, then
+ // its list of transmitters will be empty.
+ DenseMap<NodeId, std::vector<NodeId>> Transmitters;
+
+ // Analyze all machine instructions to find gadgets and LFENCEs, adding
+ // each interesting value to `Nodes`
+ auto AnalyzeDef = [&](NodeAddr<DefNode *> SourceDef) {
+ SmallSet<NodeId, 8> UsesVisited, DefsVisited;
+ std::function<void(NodeAddr<DefNode *>)> AnalyzeDefUseChain =
+ [&](NodeAddr<DefNode *> Def) {
+ if (Transmitters.find(Def.Id) != Transmitters.end())
+ return; // Already analyzed `Def`
+
+ // Use RDF to find all the uses of `Def`
+ rdf::NodeSet Uses;
+ RegisterRef DefReg = DFG.getPRI().normalize(Def.Addr->getRegRef(DFG));
+ for (auto UseID : L.getAllReachedUses(DefReg, Def)) {
+ auto Use = DFG.addr<UseNode *>(UseID);
+ if (Use.Addr->getFlags() & NodeAttrs::PhiRef) { // phi node
+ NodeAddr<PhiNode *> Phi = Use.Addr->getOwner(DFG);
+ for (auto I : L.getRealUses(Phi.Id)) {
+ if (DFG.getPRI().alias(RegisterRef(I.first), DefReg)) {
+ for (auto UA : I.second)
+ Uses.emplace(UA.first);
+ }
+ }
+ } else { // not a phi node
+ Uses.emplace(UseID);
+ }
+ }
+
+ // For each use of `Def`, we want to know whether:
+ // (1) The use can leak the Def'ed value,
+ // (2) The use can further propagate the Def'ed value to more defs
+ for (auto UseID : Uses) {
+ if (!UsesVisited.insert(UseID).second)
+ continue; // Already visited this use of `Def`
+
+ auto Use = DFG.addr<UseNode *>(UseID);
+ assert(!(Use.Addr->getFlags() & NodeAttrs::PhiRef));
+ MachineOperand &UseMO = Use.Addr->getOp();
+ MachineInstr &UseMI = *UseMO.getParent();
+ assert(UseMO.isReg());
+
+ // We naively assume that an instruction propagates any loaded
+ // uses to all defs unless the instruction is a call, in which
+ // case all arguments will be treated as gadget sources during
+ // analysis of the callee function.
+ if (UseMI.isCall())
+ continue;
+
+ // Check whether this use can transmit (leak) its value.
+ if (instrUsesRegToAccessMemory(UseMI, UseMO.getReg()) ||
+ (!NoConditionalBranches &&
+ instrUsesRegToBranch(UseMI, UseMO.getReg()))) {
+ Transmitters[Def.Id].push_back(Use.Addr->getOwner(DFG).Id);
+ if (UseMI.mayLoad())
+ continue; // Found a transmitting load -- no need to continue
+ // traversing its defs (i.e., this load will become
+ // a new gadget source anyways).
+ }
+
+ // Check whether the use propagates to more defs.
+ NodeAddr<InstrNode *> Owner{Use.Addr->getOwner(DFG)};
+ rdf::NodeList AnalyzedChildDefs;
+ for (auto &ChildDef :
+ Owner.Addr->members_if(DataFlowGraph::IsDef, DFG)) {
+ if (!DefsVisited.insert(ChildDef.Id).second)
+ continue; // Already visited this def
+ if (Def.Addr->getAttrs() & NodeAttrs::Dead)
+ continue;
+ if (Def.Id == ChildDef.Id)
+ continue; // `Def` uses itself (e.g., increment loop counter)
+
+ AnalyzeDefUseChain(ChildDef);
+
+ // `Def` inherits all of its child defs' transmitters.
+ for (auto TransmitterId : Transmitters[ChildDef.Id])
+ Transmitters[Def.Id].push_back(TransmitterId);
+ }
+ }
+
+ // Note that this statement adds `Def.Id` to the map if no
+ // transmitters were found for `Def`.
+ auto &DefTransmitters = Transmitters[Def.Id];
+
+ // Remove duplicate transmitters
+ llvm::sort(DefTransmitters);
+ DefTransmitters.erase(
+ std::unique(DefTransmitters.begin(), DefTransmitters.end()),
+ DefTransmitters.end());
+ };
+
+ // Find all of the transmitters
+ AnalyzeDefUseChain(SourceDef);
+ auto &SourceDefTransmitters = Transmitters[SourceDef.Id];
+ if (SourceDefTransmitters.empty())
+ return; // No transmitters for `SourceDef`
+
+ MachineInstr *Source = SourceDef.Addr->getFlags() & NodeAttrs::PhiRef
+ ? MachineGadgetGraph::ArgNodeSentinel
+ : SourceDef.Addr->getOp().getParent();
+ auto GadgetSource = MaybeAddNode(Source);
+ // Each transmitter is a sink for `SourceDef`.
+ for (auto TransmitterId : SourceDefTransmitters) {
+ MachineInstr *Sink = DFG.addr<StmtNode *>(TransmitterId).Addr->getCode();
+ auto GadgetSink = MaybeAddNode(Sink);
+ // Add the gadget edge to the graph.
+ Builder.addEdge(MachineGadgetGraph::GadgetEdgeSentinel,
+ GadgetSource.first, GadgetSink.first);
+ ++GadgetCount;
+ }
+ };
+
+ LLVM_DEBUG(dbgs() << "Analyzing def-use chains to find gadgets\n");
+ // Analyze function arguments
+ NodeAddr<BlockNode *> EntryBlock = DFG.getFunc().Addr->getEntryBlock(DFG);
+ for (NodeAddr<PhiNode *> ArgPhi :
+ EntryBlock.Addr->members_if(DataFlowGraph::IsPhi, DFG)) {
+ NodeList Defs = ArgPhi.Addr->members_if(DataFlowGraph::IsDef, DFG);
+ llvm::for_each(Defs, AnalyzeDef);
+ }
+ // Analyze every instruction in MF
+ for (NodeAddr<BlockNode *> BA : DFG.getFunc().Addr->members(DFG)) {
+ for (NodeAddr<StmtNode *> SA :
+ BA.Addr->members_if(DataFlowGraph::IsCode<NodeAttrs::Stmt>, DFG)) {
+ MachineInstr *MI = SA.Addr->getCode();
+ if (isFence(MI)) {
+ MaybeAddNode(MI);
+ ++FenceCount;
+ } else if (MI->mayLoad()) {
+ NodeList Defs = SA.Addr->members_if(DataFlowGraph::IsDef, DFG);
+ llvm::for_each(Defs, AnalyzeDef);
+ }
+ }
+ }
+ LLVM_DEBUG(dbgs() << "Found " << FenceCount << " fences\n");
+ LLVM_DEBUG(dbgs() << "Found " << GadgetCount << " gadgets\n");
+ if (GadgetCount == 0)
+ return nullptr;
+ NumGadgets += GadgetCount;
+
+ // Traverse CFG to build the rest of the graph
+ SmallSet<MachineBasicBlock *, 8> BlocksVisited;
+ std::function<void(MachineBasicBlock *, GraphIter, unsigned)> TraverseCFG =
+ [&](MachineBasicBlock *MBB, GraphIter GI, unsigned ParentDepth) {
+ unsigned LoopDepth = MLI.getLoopDepth(MBB);
+ if (!MBB->empty()) {
+ // Always add the first instruction in each block
+ auto NI = MBB->begin();
+ auto BeginBB = MaybeAddNode(&*NI);
+ Builder.addEdge(ParentDepth, GI, BeginBB.first);
+ if (!BlocksVisited.insert(MBB).second)
+ return;
+
+ // Add any instructions within the block that are gadget components
+ GI = BeginBB.first;
+ while (++NI != MBB->end()) {
+ auto Ref = NodeMap.find(&*NI);
+ if (Ref != NodeMap.end()) {
+ Builder.addEdge(LoopDepth, GI, Ref->getSecond());
+ GI = Ref->getSecond();
+ }
+ }
+
+ // Always add the terminator instruction, if one exists
+ auto T = MBB->getFirstTerminator();
+ if (T != MBB->end()) {
+ auto EndBB = MaybeAddNode(&*T);
+ if (EndBB.second)
+ Builder.addEdge(LoopDepth, GI, EndBB.first);
+ GI = EndBB.first;
+ }
+ }
+ for (MachineBasicBlock *Succ : MBB->successors())
+ TraverseCFG(Succ, GI, LoopDepth);
+ };
+ // ArgNodeSentinel is a pseudo-instruction that represents MF args in the
+ // GadgetGraph
+ GraphIter ArgNode = MaybeAddNode(MachineGadgetGraph::ArgNodeSentinel).first;
+ TraverseCFG(&MF.front(), ArgNode, 0);
+ std::unique_ptr<MachineGadgetGraph> G{Builder.get(FenceCount, GadgetCount)};
+ LLVM_DEBUG(dbgs() << "Found " << G->nodes_size() << " nodes\n");
+ return G;
+}
+
+// Returns the number of remaining gadget edges that could not be eliminated
+int X86LoadValueInjectionLoadHardeningPass::elimMitigatedEdgesAndNodes(
+ MachineGadgetGraph &G, MachineGadgetGraph::EdgeSet &ElimEdges /* in, out */,
+ MachineGadgetGraph::NodeSet &ElimNodes /* in, out */) const {
+ if (G.NumFences > 0) {
+ // Eliminate fences and CFG edges that ingress and egress the fence, as
+ // they are trivially mitigated.
+ for (const auto &E : G.edges()) {
+ const MachineGadgetGraph::Node *Dest = E.getDest();
+ if (isFence(Dest->getValue())) {
+ ElimNodes.insert(*Dest);
+ ElimEdges.insert(E);
+ for (const auto &DE : Dest->edges())
+ ElimEdges.insert(DE);
+ }
+ }
+ }
+
+ // Find and eliminate gadget edges that have been mitigated.
+ int MitigatedGadgets = 0, RemainingGadgets = 0;
+ MachineGadgetGraph::NodeSet ReachableNodes{G};
+ for (const auto &RootN : G.nodes()) {
+ if (llvm::none_of(RootN.edges(), MachineGadgetGraph::isGadgetEdge))
+ continue; // skip this node if it isn't a gadget source
+
+ // Find all of the nodes that are CFG-reachable from RootN using DFS
+ ReachableNodes.clear();
+ std::function<void(const MachineGadgetGraph::Node *, bool)>
+ FindReachableNodes =
+ [&](const MachineGadgetGraph::Node *N, bool FirstNode) {
+ if (!FirstNode)
+ ReachableNodes.insert(*N);
+ for (const auto &E : N->edges()) {
+ const MachineGadgetGraph::Node *Dest = E.getDest();
+ if (MachineGadgetGraph::isCFGEdge(E) &&
+ !ElimEdges.contains(E) && !ReachableNodes.contains(*Dest))
+ FindReachableNodes(Dest, false);
+ }
+ };
+ FindReachableNodes(&RootN, true);
+
+ // Any gadget whose sink is unreachable has been mitigated
+ for (const auto &E : RootN.edges()) {
+ if (MachineGadgetGraph::isGadgetEdge(E)) {
+ if (ReachableNodes.contains(*E.getDest())) {
+ // This gadget's sink is reachable
+ ++RemainingGadgets;
+ } else { // This gadget's sink is unreachable, and therefore mitigated
+ ++MitigatedGadgets;
+ ElimEdges.insert(E);
+ }
+ }
+ }
+ }
+ return RemainingGadgets;
+}
+
+std::unique_ptr<MachineGadgetGraph>
+X86LoadValueInjectionLoadHardeningPass::trimMitigatedEdges(
+ std::unique_ptr<MachineGadgetGraph> Graph) const {
+ MachineGadgetGraph::NodeSet ElimNodes{*Graph};
+ MachineGadgetGraph::EdgeSet ElimEdges{*Graph};
+ int RemainingGadgets =
+ elimMitigatedEdgesAndNodes(*Graph, ElimEdges, ElimNodes);
+ if (ElimEdges.empty() && ElimNodes.empty()) {
+ Graph->NumFences = 0;
+ Graph->NumGadgets = RemainingGadgets;
+ } else {
+ Graph = GraphBuilder::trim(*Graph, ElimNodes, ElimEdges, 0 /* NumFences */,
+ RemainingGadgets);
+ }
+ return Graph;
+}
+
+int X86LoadValueInjectionLoadHardeningPass::hardenLoadsWithPlugin(
+ MachineFunction &MF, std::unique_ptr<MachineGadgetGraph> Graph) const {
+ int FencesInserted = 0;
+
+ do {
+ LLVM_DEBUG(dbgs() << "Eliminating mitigated paths...\n");
+ Graph = trimMitigatedEdges(std::move(Graph));
+ LLVM_DEBUG(dbgs() << "Eliminating mitigated paths... Done\n");
+ if (Graph->NumGadgets == 0)
+ break;
+
+ LLVM_DEBUG(dbgs() << "Cutting edges...\n");
+ EdgeSet CutEdges{*Graph};
+ auto Nodes = std::make_unique<unsigned int[]>(Graph->nodes_size() +
+ 1 /* terminator node */);
+ auto Edges = std::make_unique<unsigned int[]>(Graph->edges_size());
+ auto EdgeCuts = std::make_unique<int[]>(Graph->edges_size());
+ auto EdgeValues = std::make_unique<int[]>(Graph->edges_size());
+ for (const auto &N : Graph->nodes()) {
+ Nodes[Graph->getNodeIndex(N)] = Graph->getEdgeIndex(*N.edges_begin());
+ }
+ Nodes[Graph->nodes_size()] = Graph->edges_size(); // terminator node
+ for (const auto &E : Graph->edges()) {
+ Edges[Graph->getEdgeIndex(E)] = Graph->getNodeIndex(*E.getDest());
+ EdgeValues[Graph->getEdgeIndex(E)] = E.getValue();
+ }
+ OptimizeCut(Nodes.get(), Graph->nodes_size(), Edges.get(), EdgeValues.get(),
+ EdgeCuts.get(), Graph->edges_size());
+ for (int I = 0; I < Graph->edges_size(); ++I)
+ if (EdgeCuts[I])
+ CutEdges.set(I);
+ LLVM_DEBUG(dbgs() << "Cutting edges... Done\n");
+ LLVM_DEBUG(dbgs() << "Cut " << CutEdges.count() << " edges\n");
+
+ LLVM_DEBUG(dbgs() << "Inserting LFENCEs...\n");
+ FencesInserted += insertFences(MF, *Graph, CutEdges);
+ LLVM_DEBUG(dbgs() << "Inserting LFENCEs... Done\n");
+ LLVM_DEBUG(dbgs() << "Inserted " << FencesInserted << " fences\n");
+
+ Graph = GraphBuilder::trim(*Graph, MachineGadgetGraph::NodeSet{*Graph},
+ CutEdges);
+ } while (true);
+
+ return FencesInserted;
+}
+
+int X86LoadValueInjectionLoadHardeningPass::hardenLoadsWithGreedyHeuristic(
+ MachineFunction &MF, std::unique_ptr<MachineGadgetGraph> Graph) const {
+ LLVM_DEBUG(dbgs() << "Eliminating mitigated paths...\n");
+ Graph = trimMitigatedEdges(std::move(Graph));
+ LLVM_DEBUG(dbgs() << "Eliminating mitigated paths... Done\n");
+ if (Graph->NumGadgets == 0)
+ return 0;
+
+ LLVM_DEBUG(dbgs() << "Cutting edges...\n");
+ MachineGadgetGraph::NodeSet ElimNodes{*Graph}, GadgetSinks{*Graph};
+ MachineGadgetGraph::EdgeSet ElimEdges{*Graph}, CutEdges{*Graph};
+ auto IsCFGEdge = [&ElimEdges, &CutEdges](const MachineGadgetGraph::Edge &E) {
+ return !ElimEdges.contains(E) && !CutEdges.contains(E) &&
+ MachineGadgetGraph::isCFGEdge(E);
+ };
+ auto IsGadgetEdge = [&ElimEdges,
+ &CutEdges](const MachineGadgetGraph::Edge &E) {
+ return !ElimEdges.contains(E) && !CutEdges.contains(E) &&
+ MachineGadgetGraph::isGadgetEdge(E);
+ };
+
+ // FIXME: this is O(E^2), we could probably do better.
+ do {
+ // Find the cheapest CFG edge that will eliminate a gadget (by being
+ // egress from a SOURCE node or ingress to a SINK node), and cut it.
+ const MachineGadgetGraph::Edge *CheapestSoFar = nullptr;
+
+ // First, collect all gadget source and sink nodes.
+ MachineGadgetGraph::NodeSet GadgetSources{*Graph}, GadgetSinks{*Graph};
+ for (const auto &N : Graph->nodes()) {
+ if (ElimNodes.contains(N))
+ continue;
+ for (const auto &E : N.edges()) {
+ if (IsGadgetEdge(E)) {
+ GadgetSources.insert(N);
+ GadgetSinks.insert(*E.getDest());
+ }
+ }
+ }
+
+ // Next, look for the cheapest CFG edge which, when cut, is guaranteed to
+ // mitigate at least one gadget by either:
+ // (a) being egress from a gadget source, or
+ // (b) being ingress to a gadget sink.
+ for (const auto &N : Graph->nodes()) {
+ if (ElimNodes.contains(N))
+ continue;
+ for (const auto &E : N.edges()) {
+ if (IsCFGEdge(E)) {
+ if (GadgetSources.contains(N) || GadgetSinks.contains(*E.getDest())) {
+ if (!CheapestSoFar || E.getValue() < CheapestSoFar->getValue())
+ CheapestSoFar = &E;
+ }
+ }
+ }
+ }
+
+ assert(CheapestSoFar && "Failed to cut an edge");
+ CutEdges.insert(*CheapestSoFar);
+ ElimEdges.insert(*CheapestSoFar);
+ } while (elimMitigatedEdgesAndNodes(*Graph, ElimEdges, ElimNodes));
+ LLVM_DEBUG(dbgs() << "Cutting edges... Done\n");
+ LLVM_DEBUG(dbgs() << "Cut " << CutEdges.count() << " edges\n");
+
+ LLVM_DEBUG(dbgs() << "Inserting LFENCEs...\n");
+ int FencesInserted = insertFences(MF, *Graph, CutEdges);
+ LLVM_DEBUG(dbgs() << "Inserting LFENCEs... Done\n");
+ LLVM_DEBUG(dbgs() << "Inserted " << FencesInserted << " fences\n");
+
+ return FencesInserted;
+}
+
+int X86LoadValueInjectionLoadHardeningPass::insertFences(
+ MachineFunction &MF, MachineGadgetGraph &G,
+ EdgeSet &CutEdges /* in, out */) const {
+ int FencesInserted = 0;
+ for (const auto &N : G.nodes()) {
+ for (const auto &E : N.edges()) {
+ if (CutEdges.contains(E)) {
+ MachineInstr *MI = N.getValue(), *Prev;
+ MachineBasicBlock *MBB; // Insert an LFENCE in this MBB
+ MachineBasicBlock::iterator InsertionPt; // ...at this point
+ if (MI == MachineGadgetGraph::ArgNodeSentinel) {
+ // insert LFENCE at beginning of entry block
+ MBB = &MF.front();
+ InsertionPt = MBB->begin();
+ Prev = nullptr;
+ } else if (MI->isBranch()) { // insert the LFENCE before the branch
+ MBB = MI->getParent();
+ InsertionPt = MI;
+ Prev = MI->getPrevNode();
+ // Remove all egress CFG edges from this branch because the inserted
+ // LFENCE prevents gadgets from crossing the branch.
+ for (const auto &E : N.edges()) {
+ if (MachineGadgetGraph::isCFGEdge(E))
+ CutEdges.insert(E);
+ }
+ } else { // insert the LFENCE after the instruction
+ MBB = MI->getParent();
+ InsertionPt = MI->getNextNode() ? MI->getNextNode() : MBB->end();
+ Prev = InsertionPt == MBB->end()
+ ? (MBB->empty() ? nullptr : &MBB->back())
+ : InsertionPt->getPrevNode();
+ }
+ // Ensure this insertion is not redundant (two LFENCEs in sequence).
+ if ((InsertionPt == MBB->end() || !isFence(&*InsertionPt)) &&
+ (!Prev || !isFence(Prev))) {
+ BuildMI(*MBB, InsertionPt, DebugLoc(), TII->get(X86::LFENCE));
+ ++FencesInserted;
+ }
+ }
+ }
+ }
+ return FencesInserted;
+}
+
+bool X86LoadValueInjectionLoadHardeningPass::instrUsesRegToAccessMemory(
+ const MachineInstr &MI, unsigned Reg) const {
+ if (!MI.mayLoadOrStore() || MI.getOpcode() == X86::MFENCE ||
+ MI.getOpcode() == X86::SFENCE || MI.getOpcode() == X86::LFENCE)
+ return false;
+
+ // FIXME: This does not handle pseudo loading instruction like TCRETURN*
+ const MCInstrDesc &Desc = MI.getDesc();
+ int MemRefBeginIdx = X86II::getMemoryOperandNo(Desc.TSFlags);
+ if (MemRefBeginIdx < 0) {
+ LLVM_DEBUG(dbgs() << "Warning: unable to obtain memory operand for loading "
+ "instruction:\n";
+ MI.print(dbgs()); dbgs() << '\n';);
+ return false;
+ }
+ MemRefBeginIdx += X86II::getOperandBias(Desc);
+
+ const MachineOperand &BaseMO =
+ MI.getOperand(MemRefBeginIdx + X86::AddrBaseReg);
+ const MachineOperand &IndexMO =
+ MI.getOperand(MemRefBeginIdx + X86::AddrIndexReg);
+ return (BaseMO.isReg() && BaseMO.getReg() != X86::NoRegister &&
+ TRI->regsOverlap(BaseMO.getReg(), Reg)) ||
+ (IndexMO.isReg() && IndexMO.getReg() != X86::NoRegister &&
+ TRI->regsOverlap(IndexMO.getReg(), Reg));
+}
+
+bool X86LoadValueInjectionLoadHardeningPass::instrUsesRegToBranch(
+ const MachineInstr &MI, unsigned Reg) const {
+ if (!MI.isConditionalBranch())
+ return false;
+ for (const MachineOperand &Use : MI.uses())
+ if (Use.isReg() && Use.getReg() == Reg)
+ return true;
+ return false;
+}
+
+INITIALIZE_PASS_BEGIN(X86LoadValueInjectionLoadHardeningPass, PASS_KEY,
+ "X86 LVI load hardening", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineDominanceFrontier)
+INITIALIZE_PASS_END(X86LoadValueInjectionLoadHardeningPass, PASS_KEY,
+ "X86 LVI load hardening", false, false)
+
+FunctionPass *llvm::createX86LoadValueInjectionLoadHardeningPass() {
+ return new X86LoadValueInjectionLoadHardeningPass();
+}
+
+namespace {
+
+/// The `X86LoadValueInjectionLoadHardeningPass` above depends on expensive
+/// analysis passes that add complexity to the pipeline. This complexity
+/// can cause noticable overhead when no optimizations are enabled, i.e., -O0.
+/// The purpose of `X86LoadValueInjectionLoadHardeningUnoptimizedPass` is to
+/// provide the same security as the optimized pass, but without adding
+/// unnecessary complexity to the LLVM pipeline.
+///
+/// The behavior of this pass is simply to insert an LFENCE after every load
+/// instruction.
+class X86LoadValueInjectionLoadHardeningUnoptimizedPass
+ : public MachineFunctionPass {
+public:
+ X86LoadValueInjectionLoadHardeningUnoptimizedPass()
+ : MachineFunctionPass(ID) {}
+
+ StringRef getPassName() const override {
+ return "X86 Load Value Injection (LVI) Load Hardening (Unoptimized)";
+ }
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ static char ID;
+};
+
+} // end anonymous namespace
+
+char X86LoadValueInjectionLoadHardeningUnoptimizedPass::ID = 0;
+
+bool X86LoadValueInjectionLoadHardeningUnoptimizedPass::runOnMachineFunction(
+ MachineFunction &MF) {
+ LLVM_DEBUG(dbgs() << "***** " << getPassName() << " : " << MF.getName()
+ << " *****\n");
+ const X86Subtarget *STI = &MF.getSubtarget<X86Subtarget>();
+ if (!STI->useLVILoadHardening())
+ return false;
+
+ // FIXME: support 32-bit
+ if (!STI->is64Bit())
+ report_fatal_error("LVI load hardening is only supported on 64-bit", false);
+
+ // Don't skip functions with the "optnone" attr but participate in opt-bisect.
+ const Function &F = MF.getFunction();
+ if (!F.hasOptNone() && skipFunction(F))
+ return false;
+
+ bool Modified = false;
+ ++NumFunctionsConsidered;
+
+ const TargetInstrInfo *TII = STI->getInstrInfo();
+ for (auto &MBB : MF) {
+ for (auto &MI : MBB) {
+ if (!MI.mayLoad() || MI.getOpcode() == X86::LFENCE ||
+ MI.getOpcode() == X86::MFENCE)
+ continue;
+
+ MachineBasicBlock::iterator InsertionPt =
+ MI.getNextNode() ? MI.getNextNode() : MBB.end();
+ BuildMI(MBB, InsertionPt, DebugLoc(), TII->get(X86::LFENCE));
+ ++NumFences;
+ Modified = true;
+ }
+ }
+
+ if (Modified)
+ ++NumFunctionsMitigated;
+
+ return Modified;
+}
+
+INITIALIZE_PASS(X86LoadValueInjectionLoadHardeningUnoptimizedPass, PASS_KEY,
+ "X86 LVI load hardening", false, false)
+
+FunctionPass *llvm::createX86LoadValueInjectionLoadHardeningUnoptimizedPass() {
+ return new X86LoadValueInjectionLoadHardeningUnoptimizedPass();
+}
diff --git a/llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp b/llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp
new file mode 100644
index 000000000000..6e1134a25950
--- /dev/null
+++ b/llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp
@@ -0,0 +1,143 @@
+//===-- X86LoadValueInjectionRetHardening.cpp - LVI RET hardening for x86 --==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// Description: Replaces every `ret` instruction with the sequence:
+/// ```
+/// pop <scratch-reg>
+/// lfence
+/// jmp *<scratch-reg>
+/// ```
+/// where `<scratch-reg>` is some available scratch register, according to the
+/// calling convention of the function being mitigated.
+///
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86InstrBuilder.h"
+#include "X86Subtarget.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/Debug.h"
+#include <bitset>
+
+using namespace llvm;
+
+#define PASS_KEY "x86-lvi-ret"
+#define DEBUG_TYPE PASS_KEY
+
+STATISTIC(NumFences, "Number of LFENCEs inserted for LVI mitigation");
+STATISTIC(NumFunctionsConsidered, "Number of functions analyzed");
+STATISTIC(NumFunctionsMitigated, "Number of functions for which mitigations "
+ "were deployed");
+
+namespace {
+
+class X86LoadValueInjectionRetHardeningPass : public MachineFunctionPass {
+public:
+ X86LoadValueInjectionRetHardeningPass() : MachineFunctionPass(ID) {}
+ StringRef getPassName() const override {
+ return "X86 Load Value Injection (LVI) Ret-Hardening";
+ }
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ static char ID;
+};
+
+} // end anonymous namespace
+
+char X86LoadValueInjectionRetHardeningPass::ID = 0;
+
+bool X86LoadValueInjectionRetHardeningPass::runOnMachineFunction(
+ MachineFunction &MF) {
+ LLVM_DEBUG(dbgs() << "***** " << getPassName() << " : " << MF.getName()
+ << " *****\n");
+ const X86Subtarget *Subtarget = &MF.getSubtarget<X86Subtarget>();
+ if (!Subtarget->useLVIControlFlowIntegrity() || !Subtarget->is64Bit())
+ return false; // FIXME: support 32-bit
+
+ // Don't skip functions with the "optnone" attr but participate in opt-bisect.
+ const Function &F = MF.getFunction();
+ if (!F.hasOptNone() && skipFunction(F))
+ return false;
+
+ ++NumFunctionsConsidered;
+ const X86RegisterInfo *TRI = Subtarget->getRegisterInfo();
+ const X86InstrInfo *TII = Subtarget->getInstrInfo();
+ unsigned ClobberReg = X86::NoRegister;
+ std::bitset<X86::NUM_TARGET_REGS> UnclobberableGR64s;
+ UnclobberableGR64s.set(X86::RSP); // can't clobber stack pointer
+ UnclobberableGR64s.set(X86::RIP); // can't clobber instruction pointer
+ UnclobberableGR64s.set(X86::RAX); // used for function return
+ UnclobberableGR64s.set(X86::RDX); // used for function return
+
+ // We can clobber any register allowed by the function's calling convention.
+ for (const MCPhysReg *PR = TRI->getCalleeSavedRegs(&MF); auto Reg = *PR; ++PR)
+ UnclobberableGR64s.set(Reg);
+ for (auto &Reg : X86::GR64RegClass) {
+ if (!UnclobberableGR64s.test(Reg)) {
+ ClobberReg = Reg;
+ break;
+ }
+ }
+
+ if (ClobberReg != X86::NoRegister) {
+ LLVM_DEBUG(dbgs() << "Selected register "
+ << Subtarget->getRegisterInfo()->getRegAsmName(ClobberReg)
+ << " to clobber\n");
+ } else {
+ LLVM_DEBUG(dbgs() << "Could not find a register to clobber\n");
+ }
+
+ bool Modified = false;
+ for (auto &MBB : MF) {
+ if (MBB.empty())
+ continue;
+
+ MachineInstr &MI = MBB.back();
+ if (MI.getOpcode() != X86::RETQ)
+ continue;
+
+ if (ClobberReg != X86::NoRegister) {
+ MBB.erase_instr(&MI);
+ BuildMI(MBB, MBB.end(), DebugLoc(), TII->get(X86::POP64r))
+ .addReg(ClobberReg, RegState::Define)
+ .setMIFlag(MachineInstr::FrameDestroy);
+ BuildMI(MBB, MBB.end(), DebugLoc(), TII->get(X86::LFENCE));
+ BuildMI(MBB, MBB.end(), DebugLoc(), TII->get(X86::JMP64r))
+ .addReg(ClobberReg);
+ } else {
+ // In case there is no available scratch register, we can still read from
+ // RSP to assert that RSP points to a valid page. The write to RSP is
+ // also helpful because it verifies that the stack's write permissions
+ // are intact.
+ MachineInstr *Fence = BuildMI(MBB, MI, DebugLoc(), TII->get(X86::LFENCE));
+ addRegOffset(BuildMI(MBB, Fence, DebugLoc(), TII->get(X86::SHL64mi)),
+ X86::RSP, false, 0)
+ .addImm(0)
+ ->addRegisterDead(X86::EFLAGS, TRI);
+ }
+
+ ++NumFences;
+ Modified = true;
+ }
+
+ if (Modified)
+ ++NumFunctionsMitigated;
+ return Modified;
+}
+
+INITIALIZE_PASS(X86LoadValueInjectionRetHardeningPass, PASS_KEY,
+ "X86 LVI ret hardener", false, false)
+
+FunctionPass *llvm::createX86LoadValueInjectionRetHardeningPass() {
+ return new X86LoadValueInjectionRetHardeningPass();
+}
diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp
index 7f49c6e861d4..f5caaaae4d84 100644
--- a/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -1220,8 +1220,8 @@ void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI,
break;
case MachineOperand::MO_Register:
// FIXME: Add retpoline support and remove this.
- if (Subtarget->useRetpolineIndirectCalls())
- report_fatal_error("Lowering register statepoints with retpoline not "
+ if (Subtarget->useIndirectThunkCalls())
+ report_fatal_error("Lowering register statepoints with thunks not "
"yet implemented.");
CallTargetMCOp = MCOperand::createReg(CallTarget.getReg());
CallOpcode = X86::CALL64r;
@@ -1399,9 +1399,9 @@ void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
EmitAndCountInstruction(
MCInstBuilder(X86::MOV64ri).addReg(ScratchReg).addOperand(CalleeMCOp));
// FIXME: Add retpoline support and remove this.
- if (Subtarget->useRetpolineIndirectCalls())
+ if (Subtarget->useIndirectThunkCalls())
report_fatal_error(
- "Lowering patchpoint with retpoline not yet implemented.");
+ "Lowering patchpoint with thunks not yet implemented.");
EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(ScratchReg));
}
diff --git a/llvm/lib/Target/X86/X86RetpolineThunks.cpp b/llvm/lib/Target/X86/X86RetpolineThunks.cpp
deleted file mode 100644
index 9085d7f068ac..000000000000
--- a/llvm/lib/Target/X86/X86RetpolineThunks.cpp
+++ /dev/null
@@ -1,286 +0,0 @@
-//======- X86RetpolineThunks.cpp - Construct retpoline thunks for x86 --=====//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// Pass that injects an MI thunk implementing a "retpoline". This is
-/// a RET-implemented trampoline that is used to lower indirect calls in a way
-/// that prevents speculation on some x86 processors and can be used to mitigate
-/// security vulnerabilities due to targeted speculative execution and side
-/// channels such as CVE-2017-5715.
-///
-/// TODO(chandlerc): All of this code could use better comments and
-/// documentation.
-///
-//===----------------------------------------------------------------------===//
-
-#include "X86.h"
-#include "X86InstrBuilder.h"
-#include "X86Subtarget.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "x86-retpoline-thunks"
-
-static const char ThunkNamePrefix[] = "__llvm_retpoline_";
-static const char R11ThunkName[] = "__llvm_retpoline_r11";
-static const char EAXThunkName[] = "__llvm_retpoline_eax";
-static const char ECXThunkName[] = "__llvm_retpoline_ecx";
-static const char EDXThunkName[] = "__llvm_retpoline_edx";
-static const char EDIThunkName[] = "__llvm_retpoline_edi";
-
-namespace {
-class X86RetpolineThunks : public MachineFunctionPass {
-public:
- static char ID;
-
- X86RetpolineThunks() : MachineFunctionPass(ID) {}
-
- StringRef getPassName() const override { return "X86 Retpoline Thunks"; }
-
- bool doInitialization(Module &M) override;
- bool runOnMachineFunction(MachineFunction &F) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- MachineFunctionPass::getAnalysisUsage(AU);
- AU.addRequired<MachineModuleInfoWrapperPass>();
- AU.addPreserved<MachineModuleInfoWrapperPass>();
- }
-
-private:
- MachineModuleInfo *MMI = nullptr;
- const TargetMachine *TM = nullptr;
- bool Is64Bit = false;
- const X86Subtarget *STI = nullptr;
- const X86InstrInfo *TII = nullptr;
-
- bool InsertedThunks = false;
-
- void createThunkFunction(Module &M, StringRef Name);
- void insertRegReturnAddrClobber(MachineBasicBlock &MBB, unsigned Reg);
- void populateThunk(MachineFunction &MF, unsigned Reg);
-};
-
-} // end anonymous namespace
-
-FunctionPass *llvm::createX86RetpolineThunksPass() {
- return new X86RetpolineThunks();
-}
-
-char X86RetpolineThunks::ID = 0;
-
-bool X86RetpolineThunks::doInitialization(Module &M) {
- InsertedThunks = false;
- return false;
-}
-
-bool X86RetpolineThunks::runOnMachineFunction(MachineFunction &MF) {
- LLVM_DEBUG(dbgs() << getPassName() << '\n');
-
- TM = &MF.getTarget();;
- STI = &MF.getSubtarget<X86Subtarget>();
- TII = STI->getInstrInfo();
- Is64Bit = TM->getTargetTriple().getArch() == Triple::x86_64;
-
- MMI = &getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
- Module &M = const_cast<Module &>(*MMI->getModule());
-
- // If this function is not a thunk, check to see if we need to insert
- // a thunk.
- if (!MF.getName().startswith(ThunkNamePrefix)) {
- // If we've already inserted a thunk, nothing else to do.
- if (InsertedThunks)
- return false;
-
- // Only add a thunk if one of the functions has the retpoline feature
- // enabled in its subtarget, and doesn't enable external thunks.
- // FIXME: Conditionalize on indirect calls so we don't emit a thunk when
- // nothing will end up calling it.
- // FIXME: It's a little silly to look at every function just to enumerate
- // the subtargets, but eventually we'll want to look at them for indirect
- // calls, so maybe this is OK.
- if ((!STI->useRetpolineIndirectCalls() &&
- !STI->useRetpolineIndirectBranches()) ||
- STI->useRetpolineExternalThunk())
- return false;
-
- // Otherwise, we need to insert the thunk.
- // WARNING: This is not really a well behaving thing to do in a function
- // pass. We extract the module and insert a new function (and machine
- // function) directly into the module.
- if (Is64Bit)
- createThunkFunction(M, R11ThunkName);
- else
- for (StringRef Name :
- {EAXThunkName, ECXThunkName, EDXThunkName, EDIThunkName})
- createThunkFunction(M, Name);
- InsertedThunks = true;
- return true;
- }
-
- // If this *is* a thunk function, we need to populate it with the correct MI.
- if (Is64Bit) {
- assert(MF.getName() == "__llvm_retpoline_r11" &&
- "Should only have an r11 thunk on 64-bit targets");
-
- // __llvm_retpoline_r11:
- // callq .Lr11_call_target
- // .Lr11_capture_spec:
- // pause
- // lfence
- // jmp .Lr11_capture_spec
- // .align 16
- // .Lr11_call_target:
- // movq %r11, (%rsp)
- // retq
- populateThunk(MF, X86::R11);
- } else {
- // For 32-bit targets we need to emit a collection of thunks for various
- // possible scratch registers as well as a fallback that uses EDI, which is
- // normally callee saved.
- // __llvm_retpoline_eax:
- // calll .Leax_call_target
- // .Leax_capture_spec:
- // pause
- // jmp .Leax_capture_spec
- // .align 16
- // .Leax_call_target:
- // movl %eax, (%esp) # Clobber return addr
- // retl
- //
- // __llvm_retpoline_ecx:
- // ... # Same setup
- // movl %ecx, (%esp)
- // retl
- //
- // __llvm_retpoline_edx:
- // ... # Same setup
- // movl %edx, (%esp)
- // retl
- //
- // __llvm_retpoline_edi:
- // ... # Same setup
- // movl %edi, (%esp)
- // retl
- if (MF.getName() == EAXThunkName)
- populateThunk(MF, X86::EAX);
- else if (MF.getName() == ECXThunkName)
- populateThunk(MF, X86::ECX);
- else if (MF.getName() == EDXThunkName)
- populateThunk(MF, X86::EDX);
- else if (MF.getName() == EDIThunkName)
- populateThunk(MF, X86::EDI);
- else
- llvm_unreachable("Invalid thunk name on x86-32!");
- }
-
- return true;
-}
-
-void X86RetpolineThunks::createThunkFunction(Module &M, StringRef Name) {
- assert(Name.startswith(ThunkNamePrefix) &&
- "Created a thunk with an unexpected prefix!");
-
- LLVMContext &Ctx = M.getContext();
- auto Type = FunctionType::get(Type::getVoidTy(Ctx), false);
- Function *F =
- Function::Create(Type, GlobalValue::LinkOnceODRLinkage, Name, &M);
- F->setVisibility(GlobalValue::HiddenVisibility);
- F->setComdat(M.getOrInsertComdat(Name));
-
- // Add Attributes so that we don't create a frame, unwind information, or
- // inline.
- AttrBuilder B;
- B.addAttribute(llvm::Attribute::NoUnwind);
- B.addAttribute(llvm::Attribute::Naked);
- F->addAttributes(llvm::AttributeList::FunctionIndex, B);
-
- // Populate our function a bit so that we can verify.
- BasicBlock *Entry = BasicBlock::Create(Ctx, "entry", F);
- IRBuilder<> Builder(Entry);
-
- Builder.CreateRetVoid();
-
- // MachineFunctions/MachineBasicBlocks aren't created automatically for the
- // IR-level constructs we already made. Create them and insert them into the
- // module.
- MachineFunction &MF = MMI->getOrCreateMachineFunction(*F);
- MachineBasicBlock *EntryMBB = MF.CreateMachineBasicBlock(Entry);
-
- // Insert EntryMBB into MF. It's not in the module until we do this.
- MF.insert(MF.end(), EntryMBB);
-}
-
-void X86RetpolineThunks::insertRegReturnAddrClobber(MachineBasicBlock &MBB,
- unsigned Reg) {
- const unsigned MovOpc = Is64Bit ? X86::MOV64mr : X86::MOV32mr;
- const unsigned SPReg = Is64Bit ? X86::RSP : X86::ESP;
- addRegOffset(BuildMI(&MBB, DebugLoc(), TII->get(MovOpc)), SPReg, false, 0)
- .addReg(Reg);
-}
-
-void X86RetpolineThunks::populateThunk(MachineFunction &MF,
- unsigned Reg) {
- // Set MF properties. We never use vregs...
- MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs);
-
- // Grab the entry MBB and erase any other blocks. O0 codegen appears to
- // generate two bbs for the entry block.
- MachineBasicBlock *Entry = &MF.front();
- Entry->clear();
- while (MF.size() > 1)
- MF.erase(std::next(MF.begin()));
-
- MachineBasicBlock *CaptureSpec = MF.CreateMachineBasicBlock(Entry->getBasicBlock());
- MachineBasicBlock *CallTarget = MF.CreateMachineBasicBlock(Entry->getBasicBlock());
- MCSymbol *TargetSym = MF.getContext().createTempSymbol();
- MF.push_back(CaptureSpec);
- MF.push_back(CallTarget);
-
- const unsigned CallOpc = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32;
- const unsigned RetOpc = Is64Bit ? X86::RETQ : X86::RETL;
-
- Entry->addLiveIn(Reg);
- BuildMI(Entry, DebugLoc(), TII->get(CallOpc)).addSym(TargetSym);
-
- // The MIR verifier thinks that the CALL in the entry block will fall through
- // to CaptureSpec, so mark it as the successor. Technically, CaptureTarget is
- // the successor, but the MIR verifier doesn't know how to cope with that.
- Entry->addSuccessor(CaptureSpec);
-
- // In the capture loop for speculation, we want to stop the processor from
- // speculating as fast as possible. On Intel processors, the PAUSE instruction
- // will block speculation without consuming any execution resources. On AMD
- // processors, the PAUSE instruction is (essentially) a nop, so we also use an
- // LFENCE instruction which they have advised will stop speculation as well
- // with minimal resource utilization. We still end the capture with a jump to
- // form an infinite loop to fully guarantee that no matter what implementation
- // of the x86 ISA, speculating this code path never escapes.
- BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::PAUSE));
- BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::LFENCE));
- BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::JMP_1)).addMBB(CaptureSpec);
- CaptureSpec->setHasAddressTaken();
- CaptureSpec->addSuccessor(CaptureSpec);
-
- CallTarget->addLiveIn(Reg);
- CallTarget->setHasAddressTaken();
- CallTarget->setAlignment(Align(16));
- insertRegReturnAddrClobber(*CallTarget, Reg);
- CallTarget->back().setPreInstrSymbol(MF, TargetSym);
- BuildMI(CallTarget, DebugLoc(), TII->get(RetOpc));
-}
diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h
index f4e8d30328ca..af5153243c8b 100644
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@@ -421,6 +421,16 @@ protected:
/// than emitting one inside the compiler.
bool UseRetpolineExternalThunk = false;
+ /// Prevent generation of indirect call/branch instructions from memory,
+ /// and force all indirect call/branch instructions from a register to be
+ /// preceded by an LFENCE. Also decompose RET instructions into a
+ /// POP+LFENCE+JMP sequence.
+ bool UseLVIControlFlowIntegrity = false;
+
+ /// Insert LFENCE instructions to prevent data speculatively injected into
+ /// loads from being used maliciously.
+ bool UseLVILoadHardening = false;
+
/// Use software floating point for code generation.
bool UseSoftFloat = false;
@@ -707,8 +717,21 @@ public:
return UseRetpolineIndirectBranches;
}
bool useRetpolineExternalThunk() const { return UseRetpolineExternalThunk; }
+
+ // These are generic getters that OR together all of the thunk types
+ // supported by the subtarget. Therefore useIndirectThunk*() will return true
+ // if any respective thunk feature is enabled.
+ bool useIndirectThunkCalls() const {
+ return useRetpolineIndirectCalls() || useLVIControlFlowIntegrity();
+ }
+ bool useIndirectThunkBranches() const {
+ return useRetpolineIndirectBranches() || useLVIControlFlowIntegrity();
+ }
+
bool preferMaskRegisters() const { return PreferMaskRegisters; }
bool useGLMDivSqrtCosts() const { return UseGLMDivSqrtCosts; }
+ bool useLVIControlFlowIntegrity() const { return UseLVIControlFlowIntegrity; }
+ bool useLVILoadHardening() const { return UseLVILoadHardening; }
unsigned getPreferVectorWidth() const { return PreferVectorWidth; }
unsigned getRequiredVectorWidth() const { return RequiredVectorWidth; }
@@ -853,10 +876,10 @@ public:
/// Return true if the subtarget allows calls to immediate address.
bool isLegalToCallImmediateAddr() const;
- /// If we are using retpolines, we need to expand indirectbr to avoid it
+ /// If we are using indirect thunks, we need to expand indirectbr to avoid it
/// lowering to an actual indirect jump.
bool enableIndirectBrExpand() const override {
- return useRetpolineIndirectBranches();
+ return useIndirectThunkBranches();
}
/// Enable the MachineScheduler pass for all X86 subtargets.
diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp
index 7176e46f07b1..9f639ffa22ec 100644
--- a/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -82,6 +82,8 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Target() {
initializeX86SpeculativeLoadHardeningPassPass(PR);
initializeX86FlagsCopyLoweringPassPass(PR);
initializeX86CondBrFoldingPassPass(PR);
+ initializeX86LoadValueInjectionLoadHardeningPassPass(PR);
+ initializeX86LoadValueInjectionRetHardeningPassPass(PR);
initializeX86OptimizeLEAPassPass(PR);
}
@@ -496,6 +498,10 @@ void X86PassConfig::addMachineSSAOptimization() {
void X86PassConfig::addPostRegAlloc() {
addPass(createX86FloatingPointStackifierPass());
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(createX86LoadValueInjectionLoadHardeningPass());
+ else
+ addPass(createX86LoadValueInjectionLoadHardeningUnoptimizedPass());
}
void X86PassConfig::addPreSched2() { addPass(createX86ExpandPseudoPass()); }
@@ -525,7 +531,7 @@ void X86PassConfig::addPreEmitPass2() {
const Triple &TT = TM->getTargetTriple();
const MCAsmInfo *MAI = TM->getMCAsmInfo();
- addPass(createX86RetpolineThunksPass());
+ addPass(createX86IndirectThunksPass());
// Insert extra int3 instructions after trailing call instructions to avoid
// issues in the unwinder.
@@ -542,6 +548,7 @@ void X86PassConfig::addPreEmitPass2() {
// Identify valid longjmp targets for Windows Control Flow Guard.
if (TT.isOSWindows())
addPass(createCFGuardLongjmpPass());
+ addPass(createX86LoadValueInjectionRetHardeningPass());
}
std::unique_ptr<CSEConfigBase> X86PassConfig::getCSEConfig() const {
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index ec976a971e3c..23561c25c50a 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1768,7 +1768,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
Constant *C2;
// C-(C2-X) --> X+(C-C2)
- if (match(Op1, m_Sub(m_Constant(C2), m_Value(X))))
+ if (match(Op1, m_Sub(m_Constant(C2), m_Value(X))) && !isa<ConstantExpr>(C2))
return BinaryOperator::CreateAdd(X, ConstantExpr::getSub(C, C2));
// C-(X+C2) --> (C-C2)-X