aboutsummaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2020-06-24 20:22:44 +0000
committerDimitry Andric <dim@FreeBSD.org>2020-06-24 20:22:44 +0000
commit483b61a50e7423b063fc26985325f594560b3f7e (patch)
tree5bb205026b61f3dd88d63f43d0b790d518acefec /llvm/lib/Target
parent8055b7e383f74dbc58c8085a0f0c45f4c61f8231 (diff)
downloadsrc-483b61a50e7423b063fc26985325f594560b3f7e.tar.gz
src-483b61a50e7423b063fc26985325f594560b3f7e.zip
Vendor import of llvm-project branch release/10.xvendor/llvm-project/llvmorg-10.0.0-129-gd24d5c8e308
llvmorg-10.0.0-129-gd24d5c8e308.
Notes
Notes: svn path=/vendor/llvm-project/release-10.x/; revision=362593 svn path=/vendor/llvm-project/llvmorg-10.0.0-129-gd24d5c8e308/; revision=362594; tag=vendor/llvm-project/llvmorg-10.0.0-129-gd24d5c8e308
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp2
-rw-r--r--llvm/lib/Target/BPF/BTFDebug.cpp32
-rw-r--r--llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp6
-rw-r--r--llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp6
-rw-r--r--llvm/lib/Target/Hexagon/RDFCopy.cpp6
-rw-r--r--llvm/lib/Target/Hexagon/RDFCopy.h6
-rw-r--r--llvm/lib/Target/Hexagon/RDFDeadCode.cpp4
-rw-r--r--llvm/lib/Target/Hexagon/RDFDeadCode.h4
-rw-r--r--llvm/lib/Target/Hexagon/RDFGraph.cpp1835
-rw-r--r--llvm/lib/Target/Hexagon/RDFGraph.h968
-rw-r--r--llvm/lib/Target/Hexagon/RDFLiveness.cpp1118
-rw-r--r--llvm/lib/Target/Hexagon/RDFLiveness.h151
-rw-r--r--llvm/lib/Target/Hexagon/RDFRegisters.cpp380
-rw-r--r--llvm/lib/Target/Hexagon/RDFRegisters.h240
-rw-r--r--llvm/lib/Target/PowerPC/P9InstrResources.td1
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.cpp27
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.h2
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrAltivec.td4
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrInfo.cpp4
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrVSX.td17
-rw-r--r--llvm/lib/Target/X86/ImmutableGraph.h446
-rw-r--r--llvm/lib/Target/X86/X86.h8
-rw-r--r--llvm/lib/Target/X86/X86.td16
-rw-r--r--llvm/lib/Target/X86/X86FastISel.cpp4
-rw-r--r--llvm/lib/Target/X86/X86FrameLowering.cpp10
-rw-r--r--llvm/lib/Target/X86/X86ISelDAGToDAG.cpp2
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp81
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.h4
-rw-r--r--llvm/lib/Target/X86/X86IndirectThunks.cpp364
-rw-r--r--llvm/lib/Target/X86/X86InstrCompiler.td16
-rw-r--r--llvm/lib/Target/X86/X86InstrControl.td22
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.td4
-rw-r--r--llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp900
-rw-r--r--llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp143
-rw-r--r--llvm/lib/Target/X86/X86MCInstLower.cpp8
-rw-r--r--llvm/lib/Target/X86/X86RetpolineThunks.cpp286
-rw-r--r--llvm/lib/Target/X86/X86Subtarget.h27
-rw-r--r--llvm/lib/Target/X86/X86TargetMachine.cpp9
38 files changed, 2091 insertions, 5072 deletions
diff --git a/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp b/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp
index 6f5f58554d09..d407edfbd966 100644
--- a/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp
+++ b/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp
@@ -304,7 +304,7 @@ void BPFDAGToDAGISel::PreprocessLoad(SDNode *Node,
LLVM_DEBUG(dbgs() << "Replacing load of size " << size << " with constant "
<< val << '\n');
- SDValue NVal = CurDAG->getConstant(val, DL, MVT::i64);
+ SDValue NVal = CurDAG->getConstant(val, DL, LD->getValueType(0));
// After replacement, the current node is dead, we need to
// go backward one step to make iterator still work
diff --git a/llvm/lib/Target/BPF/BTFDebug.cpp b/llvm/lib/Target/BPF/BTFDebug.cpp
index a9fb04f20d1c..6daeb3b4b63b 100644
--- a/llvm/lib/Target/BPF/BTFDebug.cpp
+++ b/llvm/lib/Target/BPF/BTFDebug.cpp
@@ -600,6 +600,38 @@ void BTFDebug::visitTypeEntry(const DIType *Ty, uint32_t &TypeId,
bool CheckPointer, bool SeenPointer) {
if (!Ty || DIToIdMap.find(Ty) != DIToIdMap.end()) {
TypeId = DIToIdMap[Ty];
+
+ // To handle the case like the following:
+ // struct t;
+ // typedef struct t _t;
+ // struct s1 { _t *c; };
+ // int test1(struct s1 *arg) { ... }
+ //
+ // struct t { int a; int b; };
+ // struct s2 { _t c; }
+ // int test2(struct s2 *arg) { ... }
+ //
+ // During traversing test1() argument, "_t" is recorded
+ // in DIToIdMap and a forward declaration fixup is created
+ // for "struct t" to avoid pointee type traversal.
+ //
+ // During traversing test2() argument, even if we see "_t" is
+ // already defined, we should keep moving to eventually
+ // bring in types for "struct t". Otherwise, the "struct s2"
+ // definition won't be correct.
+ if (Ty && (!CheckPointer || !SeenPointer)) {
+ if (const auto *DTy = dyn_cast<DIDerivedType>(Ty)) {
+ unsigned Tag = DTy->getTag();
+ if (Tag == dwarf::DW_TAG_typedef || Tag == dwarf::DW_TAG_const_type ||
+ Tag == dwarf::DW_TAG_volatile_type ||
+ Tag == dwarf::DW_TAG_restrict_type) {
+ uint32_t TmpTypeId;
+ visitTypeEntry(DTy->getBaseType(), TmpTypeId, CheckPointer,
+ SeenPointer);
+ }
+ }
+ }
+
return;
}
diff --git a/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp b/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp
index 886034d9601a..f1fe51f5e54f 100644
--- a/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp
@@ -12,9 +12,6 @@
#include "HexagonInstrInfo.h"
#include "HexagonSubtarget.h"
#include "MCTargetDesc/HexagonBaseInfo.h"
-#include "RDFGraph.h"
-#include "RDFLiveness.h"
-#include "RDFRegisters.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/StringRef.h"
@@ -27,6 +24,9 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RDFGraph.h"
+#include "llvm/CodeGen/RDFLiveness.h"
+#include "llvm/CodeGen/RDFRegisters.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/InitializePasses.h"
#include "llvm/MC/MCInstrDesc.h"
diff --git a/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp b/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp
index 517ad1c6ee7b..f26e23befde2 100644
--- a/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp
@@ -11,9 +11,6 @@
#include "MCTargetDesc/HexagonBaseInfo.h"
#include "RDFCopy.h"
#include "RDFDeadCode.h"
-#include "RDFGraph.h"
-#include "RDFLiveness.h"
-#include "RDFRegisters.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
@@ -24,6 +21,9 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RDFGraph.h"
+#include "llvm/CodeGen/RDFLiveness.h"
+#include "llvm/CodeGen/RDFRegisters.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
diff --git a/llvm/lib/Target/Hexagon/RDFCopy.cpp b/llvm/lib/Target/Hexagon/RDFCopy.cpp
index a9d39fd4b2dc..34d58f0a7a23 100644
--- a/llvm/lib/Target/Hexagon/RDFCopy.cpp
+++ b/llvm/lib/Target/Hexagon/RDFCopy.cpp
@@ -11,13 +11,13 @@
//===----------------------------------------------------------------------===//
#include "RDFCopy.h"
-#include "RDFGraph.h"
-#include "RDFLiveness.h"
-#include "RDFRegisters.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RDFGraph.h"
+#include "llvm/CodeGen/RDFLiveness.h"
+#include "llvm/CodeGen/RDFRegisters.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
diff --git a/llvm/lib/Target/Hexagon/RDFCopy.h b/llvm/lib/Target/Hexagon/RDFCopy.h
index 1450ab884849..99b18a75d8c2 100644
--- a/llvm/lib/Target/Hexagon/RDFCopy.h
+++ b/llvm/lib/Target/Hexagon/RDFCopy.h
@@ -9,9 +9,9 @@
#ifndef LLVM_LIB_TARGET_HEXAGON_RDFCOPY_H
#define LLVM_LIB_TARGET_HEXAGON_RDFCOPY_H
-#include "RDFGraph.h"
-#include "RDFLiveness.h"
-#include "RDFRegisters.h"
+#include "llvm/CodeGen/RDFGraph.h"
+#include "llvm/CodeGen/RDFLiveness.h"
+#include "llvm/CodeGen/RDFRegisters.h"
#include "llvm/CodeGen/MachineFunction.h"
#include <map>
#include <vector>
diff --git a/llvm/lib/Target/Hexagon/RDFDeadCode.cpp b/llvm/lib/Target/Hexagon/RDFDeadCode.cpp
index af86c7b1956b..5a98debd3c00 100644
--- a/llvm/lib/Target/Hexagon/RDFDeadCode.cpp
+++ b/llvm/lib/Target/Hexagon/RDFDeadCode.cpp
@@ -9,13 +9,13 @@
// RDF-based generic dead code elimination.
#include "RDFDeadCode.h"
-#include "RDFGraph.h"
-#include "RDFLiveness.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RDFGraph.h"
+#include "llvm/CodeGen/RDFLiveness.h"
#include "llvm/Support/Debug.h"
#include <queue>
diff --git a/llvm/lib/Target/Hexagon/RDFDeadCode.h b/llvm/lib/Target/Hexagon/RDFDeadCode.h
index 7f91977e1d6c..859c8161d355 100644
--- a/llvm/lib/Target/Hexagon/RDFDeadCode.h
+++ b/llvm/lib/Target/Hexagon/RDFDeadCode.h
@@ -23,8 +23,8 @@
#ifndef RDF_DEADCODE_H
#define RDF_DEADCODE_H
-#include "RDFGraph.h"
-#include "RDFLiveness.h"
+#include "llvm/CodeGen/RDFGraph.h"
+#include "llvm/CodeGen/RDFLiveness.h"
#include "llvm/ADT/SetVector.h"
namespace llvm {
diff --git a/llvm/lib/Target/Hexagon/RDFGraph.cpp b/llvm/lib/Target/Hexagon/RDFGraph.cpp
deleted file mode 100644
index 0cb35dc98819..000000000000
--- a/llvm/lib/Target/Hexagon/RDFGraph.cpp
+++ /dev/null
@@ -1,1835 +0,0 @@
-//===- RDFGraph.cpp -------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Target-independent, SSA-based data flow graph for register data flow (RDF).
-//
-#include "RDFGraph.h"
-#include "RDFRegisters.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineDominanceFrontier.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/TargetInstrInfo.h"
-#include "llvm/CodeGen/TargetLowering.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
-#include "llvm/CodeGen/TargetSubtargetInfo.h"
-#include "llvm/IR/Function.h"
-#include "llvm/MC/LaneBitmask.h"
-#include "llvm/MC/MCInstrDesc.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
-#include <cassert>
-#include <cstdint>
-#include <cstring>
-#include <iterator>
-#include <set>
-#include <utility>
-#include <vector>
-
-using namespace llvm;
-using namespace rdf;
-
-// Printing functions. Have them here first, so that the rest of the code
-// can use them.
-namespace llvm {
-namespace rdf {
-
-raw_ostream &operator<< (raw_ostream &OS, const PrintLaneMaskOpt &P) {
- if (!P.Mask.all())
- OS << ':' << PrintLaneMask(P.Mask);
- return OS;
-}
-
-raw_ostream &operator<< (raw_ostream &OS, const Print<RegisterRef> &P) {
- auto &TRI = P.G.getTRI();
- if (P.Obj.Reg > 0 && P.Obj.Reg < TRI.getNumRegs())
- OS << TRI.getName(P.Obj.Reg);
- else
- OS << '#' << P.Obj.Reg;
- OS << PrintLaneMaskOpt(P.Obj.Mask);
- return OS;
-}
-
-raw_ostream &operator<< (raw_ostream &OS, const Print<NodeId> &P) {
- auto NA = P.G.addr<NodeBase*>(P.Obj);
- uint16_t Attrs = NA.Addr->getAttrs();
- uint16_t Kind = NodeAttrs::kind(Attrs);
- uint16_t Flags = NodeAttrs::flags(Attrs);
- switch (NodeAttrs::type(Attrs)) {
- case NodeAttrs::Code:
- switch (Kind) {
- case NodeAttrs::Func: OS << 'f'; break;
- case NodeAttrs::Block: OS << 'b'; break;
- case NodeAttrs::Stmt: OS << 's'; break;
- case NodeAttrs::Phi: OS << 'p'; break;
- default: OS << "c?"; break;
- }
- break;
- case NodeAttrs::Ref:
- if (Flags & NodeAttrs::Undef)
- OS << '/';
- if (Flags & NodeAttrs::Dead)
- OS << '\\';
- if (Flags & NodeAttrs::Preserving)
- OS << '+';
- if (Flags & NodeAttrs::Clobbering)
- OS << '~';
- switch (Kind) {
- case NodeAttrs::Use: OS << 'u'; break;
- case NodeAttrs::Def: OS << 'd'; break;
- case NodeAttrs::Block: OS << 'b'; break;
- default: OS << "r?"; break;
- }
- break;
- default:
- OS << '?';
- break;
- }
- OS << P.Obj;
- if (Flags & NodeAttrs::Shadow)
- OS << '"';
- return OS;
-}
-
-static void printRefHeader(raw_ostream &OS, const NodeAddr<RefNode*> RA,
- const DataFlowGraph &G) {
- OS << Print<NodeId>(RA.Id, G) << '<'
- << Print<RegisterRef>(RA.Addr->getRegRef(G), G) << '>';
- if (RA.Addr->getFlags() & NodeAttrs::Fixed)
- OS << '!';
-}
-
-raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<DefNode*>> &P) {
- printRefHeader(OS, P.Obj, P.G);
- OS << '(';
- if (NodeId N = P.Obj.Addr->getReachingDef())
- OS << Print<NodeId>(N, P.G);
- OS << ',';
- if (NodeId N = P.Obj.Addr->getReachedDef())
- OS << Print<NodeId>(N, P.G);
- OS << ',';
- if (NodeId N = P.Obj.Addr->getReachedUse())
- OS << Print<NodeId>(N, P.G);
- OS << "):";
- if (NodeId N = P.Obj.Addr->getSibling())
- OS << Print<NodeId>(N, P.G);
- return OS;
-}
-
-raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<UseNode*>> &P) {
- printRefHeader(OS, P.Obj, P.G);
- OS << '(';
- if (NodeId N = P.Obj.Addr->getReachingDef())
- OS << Print<NodeId>(N, P.G);
- OS << "):";
- if (NodeId N = P.Obj.Addr->getSibling())
- OS << Print<NodeId>(N, P.G);
- return OS;
-}
-
-raw_ostream &operator<< (raw_ostream &OS,
- const Print<NodeAddr<PhiUseNode*>> &P) {
- printRefHeader(OS, P.Obj, P.G);
- OS << '(';
- if (NodeId N = P.Obj.Addr->getReachingDef())
- OS << Print<NodeId>(N, P.G);
- OS << ',';
- if (NodeId N = P.Obj.Addr->getPredecessor())
- OS << Print<NodeId>(N, P.G);
- OS << "):";
- if (NodeId N = P.Obj.Addr->getSibling())
- OS << Print<NodeId>(N, P.G);
- return OS;
-}
-
-raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<RefNode*>> &P) {
- switch (P.Obj.Addr->getKind()) {
- case NodeAttrs::Def:
- OS << PrintNode<DefNode*>(P.Obj, P.G);
- break;
- case NodeAttrs::Use:
- if (P.Obj.Addr->getFlags() & NodeAttrs::PhiRef)
- OS << PrintNode<PhiUseNode*>(P.Obj, P.G);
- else
- OS << PrintNode<UseNode*>(P.Obj, P.G);
- break;
- }
- return OS;
-}
-
-raw_ostream &operator<< (raw_ostream &OS, const Print<NodeList> &P) {
- unsigned N = P.Obj.size();
- for (auto I : P.Obj) {
- OS << Print<NodeId>(I.Id, P.G);
- if (--N)
- OS << ' ';
- }
- return OS;
-}
-
-raw_ostream &operator<< (raw_ostream &OS, const Print<NodeSet> &P) {
- unsigned N = P.Obj.size();
- for (auto I : P.Obj) {
- OS << Print<NodeId>(I, P.G);
- if (--N)
- OS << ' ';
- }
- return OS;
-}
-
-namespace {
-
- template <typename T>
- struct PrintListV {
- PrintListV(const NodeList &L, const DataFlowGraph &G) : List(L), G(G) {}
-
- using Type = T;
- const NodeList &List;
- const DataFlowGraph &G;
- };
-
- template <typename T>
- raw_ostream &operator<< (raw_ostream &OS, const PrintListV<T> &P) {
- unsigned N = P.List.size();
- for (NodeAddr<T> A : P.List) {
- OS << PrintNode<T>(A, P.G);
- if (--N)
- OS << ", ";
- }
- return OS;
- }
-
-} // end anonymous namespace
-
-raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<PhiNode*>> &P) {
- OS << Print<NodeId>(P.Obj.Id, P.G) << ": phi ["
- << PrintListV<RefNode*>(P.Obj.Addr->members(P.G), P.G) << ']';
- return OS;
-}
-
-raw_ostream &operator<<(raw_ostream &OS, const Print<NodeAddr<StmtNode *>> &P) {
- const MachineInstr &MI = *P.Obj.Addr->getCode();
- unsigned Opc = MI.getOpcode();
- OS << Print<NodeId>(P.Obj.Id, P.G) << ": " << P.G.getTII().getName(Opc);
- // Print the target for calls and branches (for readability).
- if (MI.isCall() || MI.isBranch()) {
- MachineInstr::const_mop_iterator T =
- llvm::find_if(MI.operands(),
- [] (const MachineOperand &Op) -> bool {
- return Op.isMBB() || Op.isGlobal() || Op.isSymbol();
- });
- if (T != MI.operands_end()) {
- OS << ' ';
- if (T->isMBB())
- OS << printMBBReference(*T->getMBB());
- else if (T->isGlobal())
- OS << T->getGlobal()->getName();
- else if (T->isSymbol())
- OS << T->getSymbolName();
- }
- }
- OS << " [" << PrintListV<RefNode*>(P.Obj.Addr->members(P.G), P.G) << ']';
- return OS;
-}
-
-raw_ostream &operator<< (raw_ostream &OS,
- const Print<NodeAddr<InstrNode*>> &P) {
- switch (P.Obj.Addr->getKind()) {
- case NodeAttrs::Phi:
- OS << PrintNode<PhiNode*>(P.Obj, P.G);
- break;
- case NodeAttrs::Stmt:
- OS << PrintNode<StmtNode*>(P.Obj, P.G);
- break;
- default:
- OS << "instr? " << Print<NodeId>(P.Obj.Id, P.G);
- break;
- }
- return OS;
-}
-
-raw_ostream &operator<< (raw_ostream &OS,
- const Print<NodeAddr<BlockNode*>> &P) {
- MachineBasicBlock *BB = P.Obj.Addr->getCode();
- unsigned NP = BB->pred_size();
- std::vector<int> Ns;
- auto PrintBBs = [&OS] (std::vector<int> Ns) -> void {
- unsigned N = Ns.size();
- for (int I : Ns) {
- OS << "%bb." << I;
- if (--N)
- OS << ", ";
- }
- };
-
- OS << Print<NodeId>(P.Obj.Id, P.G) << ": --- " << printMBBReference(*BB)
- << " --- preds(" << NP << "): ";
- for (MachineBasicBlock *B : BB->predecessors())
- Ns.push_back(B->getNumber());
- PrintBBs(Ns);
-
- unsigned NS = BB->succ_size();
- OS << " succs(" << NS << "): ";
- Ns.clear();
- for (MachineBasicBlock *B : BB->successors())
- Ns.push_back(B->getNumber());
- PrintBBs(Ns);
- OS << '\n';
-
- for (auto I : P.Obj.Addr->members(P.G))
- OS << PrintNode<InstrNode*>(I, P.G) << '\n';
- return OS;
-}
-
-raw_ostream &operator<<(raw_ostream &OS, const Print<NodeAddr<FuncNode *>> &P) {
- OS << "DFG dump:[\n" << Print<NodeId>(P.Obj.Id, P.G) << ": Function: "
- << P.Obj.Addr->getCode()->getName() << '\n';
- for (auto I : P.Obj.Addr->members(P.G))
- OS << PrintNode<BlockNode*>(I, P.G) << '\n';
- OS << "]\n";
- return OS;
-}
-
-raw_ostream &operator<< (raw_ostream &OS, const Print<RegisterSet> &P) {
- OS << '{';
- for (auto I : P.Obj)
- OS << ' ' << Print<RegisterRef>(I, P.G);
- OS << " }";
- return OS;
-}
-
-raw_ostream &operator<< (raw_ostream &OS, const Print<RegisterAggr> &P) {
- P.Obj.print(OS);
- return OS;
-}
-
-raw_ostream &operator<< (raw_ostream &OS,
- const Print<DataFlowGraph::DefStack> &P) {
- for (auto I = P.Obj.top(), E = P.Obj.bottom(); I != E; ) {
- OS << Print<NodeId>(I->Id, P.G)
- << '<' << Print<RegisterRef>(I->Addr->getRegRef(P.G), P.G) << '>';
- I.down();
- if (I != E)
- OS << ' ';
- }
- return OS;
-}
-
-} // end namespace rdf
-} // end namespace llvm
-
-// Node allocation functions.
-//
-// Node allocator is like a slab memory allocator: it allocates blocks of
-// memory in sizes that are multiples of the size of a node. Each block has
-// the same size. Nodes are allocated from the currently active block, and
-// when it becomes full, a new one is created.
-// There is a mapping scheme between node id and its location in a block,
-// and within that block is described in the header file.
-//
-void NodeAllocator::startNewBlock() {
- void *T = MemPool.Allocate(NodesPerBlock*NodeMemSize, NodeMemSize);
- char *P = static_cast<char*>(T);
- Blocks.push_back(P);
- // Check if the block index is still within the allowed range, i.e. less
- // than 2^N, where N is the number of bits in NodeId for the block index.
- // BitsPerIndex is the number of bits per node index.
- assert((Blocks.size() < ((size_t)1 << (8*sizeof(NodeId)-BitsPerIndex))) &&
- "Out of bits for block index");
- ActiveEnd = P;
-}
-
-bool NodeAllocator::needNewBlock() {
- if (Blocks.empty())
- return true;
-
- char *ActiveBegin = Blocks.back();
- uint32_t Index = (ActiveEnd-ActiveBegin)/NodeMemSize;
- return Index >= NodesPerBlock;
-}
-
-NodeAddr<NodeBase*> NodeAllocator::New() {
- if (needNewBlock())
- startNewBlock();
-
- uint32_t ActiveB = Blocks.size()-1;
- uint32_t Index = (ActiveEnd - Blocks[ActiveB])/NodeMemSize;
- NodeAddr<NodeBase*> NA = { reinterpret_cast<NodeBase*>(ActiveEnd),
- makeId(ActiveB, Index) };
- ActiveEnd += NodeMemSize;
- return NA;
-}
-
-NodeId NodeAllocator::id(const NodeBase *P) const {
- uintptr_t A = reinterpret_cast<uintptr_t>(P);
- for (unsigned i = 0, n = Blocks.size(); i != n; ++i) {
- uintptr_t B = reinterpret_cast<uintptr_t>(Blocks[i]);
- if (A < B || A >= B + NodesPerBlock*NodeMemSize)
- continue;
- uint32_t Idx = (A-B)/NodeMemSize;
- return makeId(i, Idx);
- }
- llvm_unreachable("Invalid node address");
-}
-
-void NodeAllocator::clear() {
- MemPool.Reset();
- Blocks.clear();
- ActiveEnd = nullptr;
-}
-
-// Insert node NA after "this" in the circular chain.
-void NodeBase::append(NodeAddr<NodeBase*> NA) {
- NodeId Nx = Next;
- // If NA is already "next", do nothing.
- if (Next != NA.Id) {
- Next = NA.Id;
- NA.Addr->Next = Nx;
- }
-}
-
-// Fundamental node manipulator functions.
-
-// Obtain the register reference from a reference node.
-RegisterRef RefNode::getRegRef(const DataFlowGraph &G) const {
- assert(NodeAttrs::type(Attrs) == NodeAttrs::Ref);
- if (NodeAttrs::flags(Attrs) & NodeAttrs::PhiRef)
- return G.unpack(Ref.PR);
- assert(Ref.Op != nullptr);
- return G.makeRegRef(*Ref.Op);
-}
-
-// Set the register reference in the reference node directly (for references
-// in phi nodes).
-void RefNode::setRegRef(RegisterRef RR, DataFlowGraph &G) {
- assert(NodeAttrs::type(Attrs) == NodeAttrs::Ref);
- assert(NodeAttrs::flags(Attrs) & NodeAttrs::PhiRef);
- Ref.PR = G.pack(RR);
-}
-
-// Set the register reference in the reference node based on a machine
-// operand (for references in statement nodes).
-void RefNode::setRegRef(MachineOperand *Op, DataFlowGraph &G) {
- assert(NodeAttrs::type(Attrs) == NodeAttrs::Ref);
- assert(!(NodeAttrs::flags(Attrs) & NodeAttrs::PhiRef));
- (void)G;
- Ref.Op = Op;
-}
-
-// Get the owner of a given reference node.
-NodeAddr<NodeBase*> RefNode::getOwner(const DataFlowGraph &G) {
- NodeAddr<NodeBase*> NA = G.addr<NodeBase*>(getNext());
-
- while (NA.Addr != this) {
- if (NA.Addr->getType() == NodeAttrs::Code)
- return NA;
- NA = G.addr<NodeBase*>(NA.Addr->getNext());
- }
- llvm_unreachable("No owner in circular list");
-}
-
-// Connect the def node to the reaching def node.
-void DefNode::linkToDef(NodeId Self, NodeAddr<DefNode*> DA) {
- Ref.RD = DA.Id;
- Ref.Sib = DA.Addr->getReachedDef();
- DA.Addr->setReachedDef(Self);
-}
-
-// Connect the use node to the reaching def node.
-void UseNode::linkToDef(NodeId Self, NodeAddr<DefNode*> DA) {
- Ref.RD = DA.Id;
- Ref.Sib = DA.Addr->getReachedUse();
- DA.Addr->setReachedUse(Self);
-}
-
-// Get the first member of the code node.
-NodeAddr<NodeBase*> CodeNode::getFirstMember(const DataFlowGraph &G) const {
- if (Code.FirstM == 0)
- return NodeAddr<NodeBase*>();
- return G.addr<NodeBase*>(Code.FirstM);
-}
-
-// Get the last member of the code node.
-NodeAddr<NodeBase*> CodeNode::getLastMember(const DataFlowGraph &G) const {
- if (Code.LastM == 0)
- return NodeAddr<NodeBase*>();
- return G.addr<NodeBase*>(Code.LastM);
-}
-
-// Add node NA at the end of the member list of the given code node.
-void CodeNode::addMember(NodeAddr<NodeBase*> NA, const DataFlowGraph &G) {
- NodeAddr<NodeBase*> ML = getLastMember(G);
- if (ML.Id != 0) {
- ML.Addr->append(NA);
- } else {
- Code.FirstM = NA.Id;
- NodeId Self = G.id(this);
- NA.Addr->setNext(Self);
- }
- Code.LastM = NA.Id;
-}
-
-// Add node NA after member node MA in the given code node.
-void CodeNode::addMemberAfter(NodeAddr<NodeBase*> MA, NodeAddr<NodeBase*> NA,
- const DataFlowGraph &G) {
- MA.Addr->append(NA);
- if (Code.LastM == MA.Id)
- Code.LastM = NA.Id;
-}
-
-// Remove member node NA from the given code node.
-void CodeNode::removeMember(NodeAddr<NodeBase*> NA, const DataFlowGraph &G) {
- NodeAddr<NodeBase*> MA = getFirstMember(G);
- assert(MA.Id != 0);
-
- // Special handling if the member to remove is the first member.
- if (MA.Id == NA.Id) {
- if (Code.LastM == MA.Id) {
- // If it is the only member, set both first and last to 0.
- Code.FirstM = Code.LastM = 0;
- } else {
- // Otherwise, advance the first member.
- Code.FirstM = MA.Addr->getNext();
- }
- return;
- }
-
- while (MA.Addr != this) {
- NodeId MX = MA.Addr->getNext();
- if (MX == NA.Id) {
- MA.Addr->setNext(NA.Addr->getNext());
- // If the member to remove happens to be the last one, update the
- // LastM indicator.
- if (Code.LastM == NA.Id)
- Code.LastM = MA.Id;
- return;
- }
- MA = G.addr<NodeBase*>(MX);
- }
- llvm_unreachable("No such member");
-}
-
-// Return the list of all members of the code node.
-NodeList CodeNode::members(const DataFlowGraph &G) const {
- static auto True = [] (NodeAddr<NodeBase*>) -> bool { return true; };
- return members_if(True, G);
-}
-
-// Return the owner of the given instr node.
-NodeAddr<NodeBase*> InstrNode::getOwner(const DataFlowGraph &G) {
- NodeAddr<NodeBase*> NA = G.addr<NodeBase*>(getNext());
-
- while (NA.Addr != this) {
- assert(NA.Addr->getType() == NodeAttrs::Code);
- if (NA.Addr->getKind() == NodeAttrs::Block)
- return NA;
- NA = G.addr<NodeBase*>(NA.Addr->getNext());
- }
- llvm_unreachable("No owner in circular list");
-}
-
-// Add the phi node PA to the given block node.
-void BlockNode::addPhi(NodeAddr<PhiNode*> PA, const DataFlowGraph &G) {
- NodeAddr<NodeBase*> M = getFirstMember(G);
- if (M.Id == 0) {
- addMember(PA, G);
- return;
- }
-
- assert(M.Addr->getType() == NodeAttrs::Code);
- if (M.Addr->getKind() == NodeAttrs::Stmt) {
- // If the first member of the block is a statement, insert the phi as
- // the first member.
- Code.FirstM = PA.Id;
- PA.Addr->setNext(M.Id);
- } else {
- // If the first member is a phi, find the last phi, and append PA to it.
- assert(M.Addr->getKind() == NodeAttrs::Phi);
- NodeAddr<NodeBase*> MN = M;
- do {
- M = MN;
- MN = G.addr<NodeBase*>(M.Addr->getNext());
- assert(MN.Addr->getType() == NodeAttrs::Code);
- } while (MN.Addr->getKind() == NodeAttrs::Phi);
-
- // M is the last phi.
- addMemberAfter(M, PA, G);
- }
-}
-
-// Find the block node corresponding to the machine basic block BB in the
-// given func node.
-NodeAddr<BlockNode*> FuncNode::findBlock(const MachineBasicBlock *BB,
- const DataFlowGraph &G) const {
- auto EqBB = [BB] (NodeAddr<NodeBase*> NA) -> bool {
- return NodeAddr<BlockNode*>(NA).Addr->getCode() == BB;
- };
- NodeList Ms = members_if(EqBB, G);
- if (!Ms.empty())
- return Ms[0];
- return NodeAddr<BlockNode*>();
-}
-
-// Get the block node for the entry block in the given function.
-NodeAddr<BlockNode*> FuncNode::getEntryBlock(const DataFlowGraph &G) {
- MachineBasicBlock *EntryB = &getCode()->front();
- return findBlock(EntryB, G);
-}
-
-// Target operand information.
-//
-
-// For a given instruction, check if there are any bits of RR that can remain
-// unchanged across this def.
-bool TargetOperandInfo::isPreserving(const MachineInstr &In, unsigned OpNum)
- const {
- return TII.isPredicated(In);
-}
-
-// Check if the definition of RR produces an unspecified value.
-bool TargetOperandInfo::isClobbering(const MachineInstr &In, unsigned OpNum)
- const {
- const MachineOperand &Op = In.getOperand(OpNum);
- if (Op.isRegMask())
- return true;
- assert(Op.isReg());
- if (In.isCall())
- if (Op.isDef() && Op.isDead())
- return true;
- return false;
-}
-
-// Check if the given instruction specifically requires
-bool TargetOperandInfo::isFixedReg(const MachineInstr &In, unsigned OpNum)
- const {
- if (In.isCall() || In.isReturn() || In.isInlineAsm())
- return true;
- // Check for a tail call.
- if (In.isBranch())
- for (const MachineOperand &O : In.operands())
- if (O.isGlobal() || O.isSymbol())
- return true;
-
- const MCInstrDesc &D = In.getDesc();
- if (!D.getImplicitDefs() && !D.getImplicitUses())
- return false;
- const MachineOperand &Op = In.getOperand(OpNum);
- // If there is a sub-register, treat the operand as non-fixed. Currently,
- // fixed registers are those that are listed in the descriptor as implicit
- // uses or defs, and those lists do not allow sub-registers.
- if (Op.getSubReg() != 0)
- return false;
- Register Reg = Op.getReg();
- const MCPhysReg *ImpR = Op.isDef() ? D.getImplicitDefs()
- : D.getImplicitUses();
- if (!ImpR)
- return false;
- while (*ImpR)
- if (*ImpR++ == Reg)
- return true;
- return false;
-}
-
-//
-// The data flow graph construction.
-//
-
-DataFlowGraph::DataFlowGraph(MachineFunction &mf, const TargetInstrInfo &tii,
- const TargetRegisterInfo &tri, const MachineDominatorTree &mdt,
- const MachineDominanceFrontier &mdf, const TargetOperandInfo &toi)
- : MF(mf), TII(tii), TRI(tri), PRI(tri, mf), MDT(mdt), MDF(mdf), TOI(toi),
- LiveIns(PRI) {
-}
-
-// The implementation of the definition stack.
-// Each register reference has its own definition stack. In particular,
-// for a register references "Reg" and "Reg:subreg" will each have their
-// own definition stacks.
-
-// Construct a stack iterator.
-DataFlowGraph::DefStack::Iterator::Iterator(const DataFlowGraph::DefStack &S,
- bool Top) : DS(S) {
- if (!Top) {
- // Initialize to bottom.
- Pos = 0;
- return;
- }
- // Initialize to the top, i.e. top-most non-delimiter (or 0, if empty).
- Pos = DS.Stack.size();
- while (Pos > 0 && DS.isDelimiter(DS.Stack[Pos-1]))
- Pos--;
-}
-
-// Return the size of the stack, including block delimiters.
-unsigned DataFlowGraph::DefStack::size() const {
- unsigned S = 0;
- for (auto I = top(), E = bottom(); I != E; I.down())
- S++;
- return S;
-}
-
-// Remove the top entry from the stack. Remove all intervening delimiters
-// so that after this, the stack is either empty, or the top of the stack
-// is a non-delimiter.
-void DataFlowGraph::DefStack::pop() {
- assert(!empty());
- unsigned P = nextDown(Stack.size());
- Stack.resize(P);
-}
-
-// Push a delimiter for block node N on the stack.
-void DataFlowGraph::DefStack::start_block(NodeId N) {
- assert(N != 0);
- Stack.push_back(NodeAddr<DefNode*>(nullptr, N));
-}
-
-// Remove all nodes from the top of the stack, until the delimited for
-// block node N is encountered. Remove the delimiter as well. In effect,
-// this will remove from the stack all definitions from block N.
-void DataFlowGraph::DefStack::clear_block(NodeId N) {
- assert(N != 0);
- unsigned P = Stack.size();
- while (P > 0) {
- bool Found = isDelimiter(Stack[P-1], N);
- P--;
- if (Found)
- break;
- }
- // This will also remove the delimiter, if found.
- Stack.resize(P);
-}
-
-// Move the stack iterator up by one.
-unsigned DataFlowGraph::DefStack::nextUp(unsigned P) const {
- // Get the next valid position after P (skipping all delimiters).
- // The input position P does not have to point to a non-delimiter.
- unsigned SS = Stack.size();
- bool IsDelim;
- assert(P < SS);
- do {
- P++;
- IsDelim = isDelimiter(Stack[P-1]);
- } while (P < SS && IsDelim);
- assert(!IsDelim);
- return P;
-}
-
-// Move the stack iterator down by one.
-unsigned DataFlowGraph::DefStack::nextDown(unsigned P) const {
- // Get the preceding valid position before P (skipping all delimiters).
- // The input position P does not have to point to a non-delimiter.
- assert(P > 0 && P <= Stack.size());
- bool IsDelim = isDelimiter(Stack[P-1]);
- do {
- if (--P == 0)
- break;
- IsDelim = isDelimiter(Stack[P-1]);
- } while (P > 0 && IsDelim);
- assert(!IsDelim);
- return P;
-}
-
-// Register information.
-
-RegisterSet DataFlowGraph::getLandingPadLiveIns() const {
- RegisterSet LR;
- const Function &F = MF.getFunction();
- const Constant *PF = F.hasPersonalityFn() ? F.getPersonalityFn()
- : nullptr;
- const TargetLowering &TLI = *MF.getSubtarget().getTargetLowering();
- if (RegisterId R = TLI.getExceptionPointerRegister(PF))
- LR.insert(RegisterRef(R));
- if (RegisterId R = TLI.getExceptionSelectorRegister(PF))
- LR.insert(RegisterRef(R));
- return LR;
-}
-
-// Node management functions.
-
-// Get the pointer to the node with the id N.
-NodeBase *DataFlowGraph::ptr(NodeId N) const {
- if (N == 0)
- return nullptr;
- return Memory.ptr(N);
-}
-
-// Get the id of the node at the address P.
-NodeId DataFlowGraph::id(const NodeBase *P) const {
- if (P == nullptr)
- return 0;
- return Memory.id(P);
-}
-
-// Allocate a new node and set the attributes to Attrs.
-NodeAddr<NodeBase*> DataFlowGraph::newNode(uint16_t Attrs) {
- NodeAddr<NodeBase*> P = Memory.New();
- P.Addr->init();
- P.Addr->setAttrs(Attrs);
- return P;
-}
-
-// Make a copy of the given node B, except for the data-flow links, which
-// are set to 0.
-NodeAddr<NodeBase*> DataFlowGraph::cloneNode(const NodeAddr<NodeBase*> B) {
- NodeAddr<NodeBase*> NA = newNode(0);
- memcpy(NA.Addr, B.Addr, sizeof(NodeBase));
- // Ref nodes need to have the data-flow links reset.
- if (NA.Addr->getType() == NodeAttrs::Ref) {
- NodeAddr<RefNode*> RA = NA;
- RA.Addr->setReachingDef(0);
- RA.Addr->setSibling(0);
- if (NA.Addr->getKind() == NodeAttrs::Def) {
- NodeAddr<DefNode*> DA = NA;
- DA.Addr->setReachedDef(0);
- DA.Addr->setReachedUse(0);
- }
- }
- return NA;
-}
-
-// Allocation routines for specific node types/kinds.
-
-NodeAddr<UseNode*> DataFlowGraph::newUse(NodeAddr<InstrNode*> Owner,
- MachineOperand &Op, uint16_t Flags) {
- NodeAddr<UseNode*> UA = newNode(NodeAttrs::Ref | NodeAttrs::Use | Flags);
- UA.Addr->setRegRef(&Op, *this);
- return UA;
-}
-
-NodeAddr<PhiUseNode*> DataFlowGraph::newPhiUse(NodeAddr<PhiNode*> Owner,
- RegisterRef RR, NodeAddr<BlockNode*> PredB, uint16_t Flags) {
- NodeAddr<PhiUseNode*> PUA = newNode(NodeAttrs::Ref | NodeAttrs::Use | Flags);
- assert(Flags & NodeAttrs::PhiRef);
- PUA.Addr->setRegRef(RR, *this);
- PUA.Addr->setPredecessor(PredB.Id);
- return PUA;
-}
-
-NodeAddr<DefNode*> DataFlowGraph::newDef(NodeAddr<InstrNode*> Owner,
- MachineOperand &Op, uint16_t Flags) {
- NodeAddr<DefNode*> DA = newNode(NodeAttrs::Ref | NodeAttrs::Def | Flags);
- DA.Addr->setRegRef(&Op, *this);
- return DA;
-}
-
-NodeAddr<DefNode*> DataFlowGraph::newDef(NodeAddr<InstrNode*> Owner,
- RegisterRef RR, uint16_t Flags) {
- NodeAddr<DefNode*> DA = newNode(NodeAttrs::Ref | NodeAttrs::Def | Flags);
- assert(Flags & NodeAttrs::PhiRef);
- DA.Addr->setRegRef(RR, *this);
- return DA;
-}
-
-NodeAddr<PhiNode*> DataFlowGraph::newPhi(NodeAddr<BlockNode*> Owner) {
- NodeAddr<PhiNode*> PA = newNode(NodeAttrs::Code | NodeAttrs::Phi);
- Owner.Addr->addPhi(PA, *this);
- return PA;
-}
-
-NodeAddr<StmtNode*> DataFlowGraph::newStmt(NodeAddr<BlockNode*> Owner,
- MachineInstr *MI) {
- NodeAddr<StmtNode*> SA = newNode(NodeAttrs::Code | NodeAttrs::Stmt);
- SA.Addr->setCode(MI);
- Owner.Addr->addMember(SA, *this);
- return SA;
-}
-
-NodeAddr<BlockNode*> DataFlowGraph::newBlock(NodeAddr<FuncNode*> Owner,
- MachineBasicBlock *BB) {
- NodeAddr<BlockNode*> BA = newNode(NodeAttrs::Code | NodeAttrs::Block);
- BA.Addr->setCode(BB);
- Owner.Addr->addMember(BA, *this);
- return BA;
-}
-
-NodeAddr<FuncNode*> DataFlowGraph::newFunc(MachineFunction *MF) {
- NodeAddr<FuncNode*> FA = newNode(NodeAttrs::Code | NodeAttrs::Func);
- FA.Addr->setCode(MF);
- return FA;
-}
-
-// Build the data flow graph.
-void DataFlowGraph::build(unsigned Options) {
- reset();
- Func = newFunc(&MF);
-
- if (MF.empty())
- return;
-
- for (MachineBasicBlock &B : MF) {
- NodeAddr<BlockNode*> BA = newBlock(Func, &B);
- BlockNodes.insert(std::make_pair(&B, BA));
- for (MachineInstr &I : B) {
- if (I.isDebugInstr())
- continue;
- buildStmt(BA, I);
- }
- }
-
- NodeAddr<BlockNode*> EA = Func.Addr->getEntryBlock(*this);
- NodeList Blocks = Func.Addr->members(*this);
-
- // Collect information about block references.
- RegisterSet AllRefs;
- for (NodeAddr<BlockNode*> BA : Blocks)
- for (NodeAddr<InstrNode*> IA : BA.Addr->members(*this))
- for (NodeAddr<RefNode*> RA : IA.Addr->members(*this))
- AllRefs.insert(RA.Addr->getRegRef(*this));
-
- // Collect function live-ins and entry block live-ins.
- MachineRegisterInfo &MRI = MF.getRegInfo();
- MachineBasicBlock &EntryB = *EA.Addr->getCode();
- assert(EntryB.pred_empty() && "Function entry block has predecessors");
- for (std::pair<unsigned,unsigned> P : MRI.liveins())
- LiveIns.insert(RegisterRef(P.first));
- if (MRI.tracksLiveness()) {
- for (auto I : EntryB.liveins())
- LiveIns.insert(RegisterRef(I.PhysReg, I.LaneMask));
- }
-
- // Add function-entry phi nodes for the live-in registers.
- //for (std::pair<RegisterId,LaneBitmask> P : LiveIns) {
- for (auto I = LiveIns.rr_begin(), E = LiveIns.rr_end(); I != E; ++I) {
- RegisterRef RR = *I;
- NodeAddr<PhiNode*> PA = newPhi(EA);
- uint16_t PhiFlags = NodeAttrs::PhiRef | NodeAttrs::Preserving;
- NodeAddr<DefNode*> DA = newDef(PA, RR, PhiFlags);
- PA.Addr->addMember(DA, *this);
- }
-
- // Add phis for landing pads.
- // Landing pads, unlike usual backs blocks, are not entered through
- // branches in the program, or fall-throughs from other blocks. They
- // are entered from the exception handling runtime and target's ABI
- // may define certain registers as defined on entry to such a block.
- RegisterSet EHRegs = getLandingPadLiveIns();
- if (!EHRegs.empty()) {
- for (NodeAddr<BlockNode*> BA : Blocks) {
- const MachineBasicBlock &B = *BA.Addr->getCode();
- if (!B.isEHPad())
- continue;
-
- // Prepare a list of NodeIds of the block's predecessors.
- NodeList Preds;
- for (MachineBasicBlock *PB : B.predecessors())
- Preds.push_back(findBlock(PB));
-
- // Build phi nodes for each live-in.
- for (RegisterRef RR : EHRegs) {
- NodeAddr<PhiNode*> PA = newPhi(BA);
- uint16_t PhiFlags = NodeAttrs::PhiRef | NodeAttrs::Preserving;
- // Add def:
- NodeAddr<DefNode*> DA = newDef(PA, RR, PhiFlags);
- PA.Addr->addMember(DA, *this);
- // Add uses (no reaching defs for phi uses):
- for (NodeAddr<BlockNode*> PBA : Preds) {
- NodeAddr<PhiUseNode*> PUA = newPhiUse(PA, RR, PBA);
- PA.Addr->addMember(PUA, *this);
- }
- }
- }
- }
-
- // Build a map "PhiM" which will contain, for each block, the set
- // of references that will require phi definitions in that block.
- BlockRefsMap PhiM;
- for (NodeAddr<BlockNode*> BA : Blocks)
- recordDefsForDF(PhiM, BA);
- for (NodeAddr<BlockNode*> BA : Blocks)
- buildPhis(PhiM, AllRefs, BA);
-
- // Link all the refs. This will recursively traverse the dominator tree.
- DefStackMap DM;
- linkBlockRefs(DM, EA);
-
- // Finally, remove all unused phi nodes.
- if (!(Options & BuildOptions::KeepDeadPhis))
- removeUnusedPhis();
-}
-
-RegisterRef DataFlowGraph::makeRegRef(unsigned Reg, unsigned Sub) const {
- assert(PhysicalRegisterInfo::isRegMaskId(Reg) ||
- Register::isPhysicalRegister(Reg));
- assert(Reg != 0);
- if (Sub != 0)
- Reg = TRI.getSubReg(Reg, Sub);
- return RegisterRef(Reg);
-}
-
-RegisterRef DataFlowGraph::makeRegRef(const MachineOperand &Op) const {
- assert(Op.isReg() || Op.isRegMask());
- if (Op.isReg())
- return makeRegRef(Op.getReg(), Op.getSubReg());
- return RegisterRef(PRI.getRegMaskId(Op.getRegMask()), LaneBitmask::getAll());
-}
-
-RegisterRef DataFlowGraph::restrictRef(RegisterRef AR, RegisterRef BR) const {
- if (AR.Reg == BR.Reg) {
- LaneBitmask M = AR.Mask & BR.Mask;
- return M.any() ? RegisterRef(AR.Reg, M) : RegisterRef();
- }
-#ifndef NDEBUG
-// RegisterRef NAR = PRI.normalize(AR);
-// RegisterRef NBR = PRI.normalize(BR);
-// assert(NAR.Reg != NBR.Reg);
-#endif
- // This isn't strictly correct, because the overlap may happen in the
- // part masked out.
- if (PRI.alias(AR, BR))
- return AR;
- return RegisterRef();
-}
-
-// For each stack in the map DefM, push the delimiter for block B on it.
-void DataFlowGraph::markBlock(NodeId B, DefStackMap &DefM) {
- // Push block delimiters.
- for (auto I = DefM.begin(), E = DefM.end(); I != E; ++I)
- I->second.start_block(B);
-}
-
-// Remove all definitions coming from block B from each stack in DefM.
-void DataFlowGraph::releaseBlock(NodeId B, DefStackMap &DefM) {
- // Pop all defs from this block from the definition stack. Defs that were
- // added to the map during the traversal of instructions will not have a
- // delimiter, but for those, the whole stack will be emptied.
- for (auto I = DefM.begin(), E = DefM.end(); I != E; ++I)
- I->second.clear_block(B);
-
- // Finally, remove empty stacks from the map.
- for (auto I = DefM.begin(), E = DefM.end(), NextI = I; I != E; I = NextI) {
- NextI = std::next(I);
- // This preserves the validity of iterators other than I.
- if (I->second.empty())
- DefM.erase(I);
- }
-}
-
-// Push all definitions from the instruction node IA to an appropriate
-// stack in DefM.
-void DataFlowGraph::pushAllDefs(NodeAddr<InstrNode*> IA, DefStackMap &DefM) {
- pushClobbers(IA, DefM);
- pushDefs(IA, DefM);
-}
-
-// Push all definitions from the instruction node IA to an appropriate
-// stack in DefM.
-void DataFlowGraph::pushClobbers(NodeAddr<InstrNode*> IA, DefStackMap &DefM) {
- NodeSet Visited;
- std::set<RegisterId> Defined;
-
- // The important objectives of this function are:
- // - to be able to handle instructions both while the graph is being
- // constructed, and after the graph has been constructed, and
- // - maintain proper ordering of definitions on the stack for each
- // register reference:
- // - if there are two or more related defs in IA (i.e. coming from
- // the same machine operand), then only push one def on the stack,
- // - if there are multiple unrelated defs of non-overlapping
- // subregisters of S, then the stack for S will have both (in an
- // unspecified order), but the order does not matter from the data-
- // -flow perspective.
-
- for (NodeAddr<DefNode*> DA : IA.Addr->members_if(IsDef, *this)) {
- if (Visited.count(DA.Id))
- continue;
- if (!(DA.Addr->getFlags() & NodeAttrs::Clobbering))
- continue;
-
- NodeList Rel = getRelatedRefs(IA, DA);
- NodeAddr<DefNode*> PDA = Rel.front();
- RegisterRef RR = PDA.Addr->getRegRef(*this);
-
- // Push the definition on the stack for the register and all aliases.
- // The def stack traversal in linkNodeUp will check the exact aliasing.
- DefM[RR.Reg].push(DA);
- Defined.insert(RR.Reg);
- for (RegisterId A : PRI.getAliasSet(RR.Reg)) {
- // Check that we don't push the same def twice.
- assert(A != RR.Reg);
- if (!Defined.count(A))
- DefM[A].push(DA);
- }
- // Mark all the related defs as visited.
- for (NodeAddr<NodeBase*> T : Rel)
- Visited.insert(T.Id);
- }
-}
-
-// Push all definitions from the instruction node IA to an appropriate
-// stack in DefM.
-void DataFlowGraph::pushDefs(NodeAddr<InstrNode*> IA, DefStackMap &DefM) {
- NodeSet Visited;
-#ifndef NDEBUG
- std::set<RegisterId> Defined;
-#endif
-
- // The important objectives of this function are:
- // - to be able to handle instructions both while the graph is being
- // constructed, and after the graph has been constructed, and
- // - maintain proper ordering of definitions on the stack for each
- // register reference:
- // - if there are two or more related defs in IA (i.e. coming from
- // the same machine operand), then only push one def on the stack,
- // - if there are multiple unrelated defs of non-overlapping
- // subregisters of S, then the stack for S will have both (in an
- // unspecified order), but the order does not matter from the data-
- // -flow perspective.
-
- for (NodeAddr<DefNode*> DA : IA.Addr->members_if(IsDef, *this)) {
- if (Visited.count(DA.Id))
- continue;
- if (DA.Addr->getFlags() & NodeAttrs::Clobbering)
- continue;
-
- NodeList Rel = getRelatedRefs(IA, DA);
- NodeAddr<DefNode*> PDA = Rel.front();
- RegisterRef RR = PDA.Addr->getRegRef(*this);
-#ifndef NDEBUG
- // Assert if the register is defined in two or more unrelated defs.
- // This could happen if there are two or more def operands defining it.
- if (!Defined.insert(RR.Reg).second) {
- MachineInstr *MI = NodeAddr<StmtNode*>(IA).Addr->getCode();
- dbgs() << "Multiple definitions of register: "
- << Print<RegisterRef>(RR, *this) << " in\n " << *MI << "in "
- << printMBBReference(*MI->getParent()) << '\n';
- llvm_unreachable(nullptr);
- }
-#endif
- // Push the definition on the stack for the register and all aliases.
- // The def stack traversal in linkNodeUp will check the exact aliasing.
- DefM[RR.Reg].push(DA);
- for (RegisterId A : PRI.getAliasSet(RR.Reg)) {
- // Check that we don't push the same def twice.
- assert(A != RR.Reg);
- DefM[A].push(DA);
- }
- // Mark all the related defs as visited.
- for (NodeAddr<NodeBase*> T : Rel)
- Visited.insert(T.Id);
- }
-}
-
-// Return the list of all reference nodes related to RA, including RA itself.
-// See "getNextRelated" for the meaning of a "related reference".
-NodeList DataFlowGraph::getRelatedRefs(NodeAddr<InstrNode*> IA,
- NodeAddr<RefNode*> RA) const {
- assert(IA.Id != 0 && RA.Id != 0);
-
- NodeList Refs;
- NodeId Start = RA.Id;
- do {
- Refs.push_back(RA);
- RA = getNextRelated(IA, RA);
- } while (RA.Id != 0 && RA.Id != Start);
- return Refs;
-}
-
-// Clear all information in the graph.
-void DataFlowGraph::reset() {
- Memory.clear();
- BlockNodes.clear();
- Func = NodeAddr<FuncNode*>();
-}
-
-// Return the next reference node in the instruction node IA that is related
-// to RA. Conceptually, two reference nodes are related if they refer to the
-// same instance of a register access, but differ in flags or other minor
-// characteristics. Specific examples of related nodes are shadow reference
-// nodes.
-// Return the equivalent of nullptr if there are no more related references.
-NodeAddr<RefNode*> DataFlowGraph::getNextRelated(NodeAddr<InstrNode*> IA,
- NodeAddr<RefNode*> RA) const {
- assert(IA.Id != 0 && RA.Id != 0);
-
- auto Related = [this,RA](NodeAddr<RefNode*> TA) -> bool {
- if (TA.Addr->getKind() != RA.Addr->getKind())
- return false;
- if (TA.Addr->getRegRef(*this) != RA.Addr->getRegRef(*this))
- return false;
- return true;
- };
- auto RelatedStmt = [&Related,RA](NodeAddr<RefNode*> TA) -> bool {
- return Related(TA) &&
- &RA.Addr->getOp() == &TA.Addr->getOp();
- };
- auto RelatedPhi = [&Related,RA](NodeAddr<RefNode*> TA) -> bool {
- if (!Related(TA))
- return false;
- if (TA.Addr->getKind() != NodeAttrs::Use)
- return true;
- // For phi uses, compare predecessor blocks.
- const NodeAddr<const PhiUseNode*> TUA = TA;
- const NodeAddr<const PhiUseNode*> RUA = RA;
- return TUA.Addr->getPredecessor() == RUA.Addr->getPredecessor();
- };
-
- RegisterRef RR = RA.Addr->getRegRef(*this);
- if (IA.Addr->getKind() == NodeAttrs::Stmt)
- return RA.Addr->getNextRef(RR, RelatedStmt, true, *this);
- return RA.Addr->getNextRef(RR, RelatedPhi, true, *this);
-}
-
-// Find the next node related to RA in IA that satisfies condition P.
-// If such a node was found, return a pair where the second element is the
-// located node. If such a node does not exist, return a pair where the
-// first element is the element after which such a node should be inserted,
-// and the second element is a null-address.
-template <typename Predicate>
-std::pair<NodeAddr<RefNode*>,NodeAddr<RefNode*>>
-DataFlowGraph::locateNextRef(NodeAddr<InstrNode*> IA, NodeAddr<RefNode*> RA,
- Predicate P) const {
- assert(IA.Id != 0 && RA.Id != 0);
-
- NodeAddr<RefNode*> NA;
- NodeId Start = RA.Id;
- while (true) {
- NA = getNextRelated(IA, RA);
- if (NA.Id == 0 || NA.Id == Start)
- break;
- if (P(NA))
- break;
- RA = NA;
- }
-
- if (NA.Id != 0 && NA.Id != Start)
- return std::make_pair(RA, NA);
- return std::make_pair(RA, NodeAddr<RefNode*>());
-}
-
-// Get the next shadow node in IA corresponding to RA, and optionally create
-// such a node if it does not exist.
-NodeAddr<RefNode*> DataFlowGraph::getNextShadow(NodeAddr<InstrNode*> IA,
- NodeAddr<RefNode*> RA, bool Create) {
- assert(IA.Id != 0 && RA.Id != 0);
-
- uint16_t Flags = RA.Addr->getFlags() | NodeAttrs::Shadow;
- auto IsShadow = [Flags] (NodeAddr<RefNode*> TA) -> bool {
- return TA.Addr->getFlags() == Flags;
- };
- auto Loc = locateNextRef(IA, RA, IsShadow);
- if (Loc.second.Id != 0 || !Create)
- return Loc.second;
-
- // Create a copy of RA and mark is as shadow.
- NodeAddr<RefNode*> NA = cloneNode(RA);
- NA.Addr->setFlags(Flags | NodeAttrs::Shadow);
- IA.Addr->addMemberAfter(Loc.first, NA, *this);
- return NA;
-}
-
-// Get the next shadow node in IA corresponding to RA. Return null-address
-// if such a node does not exist.
-NodeAddr<RefNode*> DataFlowGraph::getNextShadow(NodeAddr<InstrNode*> IA,
- NodeAddr<RefNode*> RA) const {
- assert(IA.Id != 0 && RA.Id != 0);
- uint16_t Flags = RA.Addr->getFlags() | NodeAttrs::Shadow;
- auto IsShadow = [Flags] (NodeAddr<RefNode*> TA) -> bool {
- return TA.Addr->getFlags() == Flags;
- };
- return locateNextRef(IA, RA, IsShadow).second;
-}
-
-// Create a new statement node in the block node BA that corresponds to
-// the machine instruction MI.
-void DataFlowGraph::buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In) {
- NodeAddr<StmtNode*> SA = newStmt(BA, &In);
-
- auto isCall = [] (const MachineInstr &In) -> bool {
- if (In.isCall())
- return true;
- // Is tail call?
- if (In.isBranch()) {
- for (const MachineOperand &Op : In.operands())
- if (Op.isGlobal() || Op.isSymbol())
- return true;
- // Assume indirect branches are calls. This is for the purpose of
- // keeping implicit operands, and so it won't hurt on intra-function
- // indirect branches.
- if (In.isIndirectBranch())
- return true;
- }
- return false;
- };
-
- auto isDefUndef = [this] (const MachineInstr &In, RegisterRef DR) -> bool {
- // This instruction defines DR. Check if there is a use operand that
- // would make DR live on entry to the instruction.
- for (const MachineOperand &Op : In.operands()) {
- if (!Op.isReg() || Op.getReg() == 0 || !Op.isUse() || Op.isUndef())
- continue;
- RegisterRef UR = makeRegRef(Op);
- if (PRI.alias(DR, UR))
- return false;
- }
- return true;
- };
-
- bool IsCall = isCall(In);
- unsigned NumOps = In.getNumOperands();
-
- // Avoid duplicate implicit defs. This will not detect cases of implicit
- // defs that define registers that overlap, but it is not clear how to
- // interpret that in the absence of explicit defs. Overlapping explicit
- // defs are likely illegal already.
- BitVector DoneDefs(TRI.getNumRegs());
- // Process explicit defs first.
- for (unsigned OpN = 0; OpN < NumOps; ++OpN) {
- MachineOperand &Op = In.getOperand(OpN);
- if (!Op.isReg() || !Op.isDef() || Op.isImplicit())
- continue;
- Register R = Op.getReg();
- if (!R || !Register::isPhysicalRegister(R))
- continue;
- uint16_t Flags = NodeAttrs::None;
- if (TOI.isPreserving(In, OpN)) {
- Flags |= NodeAttrs::Preserving;
- // If the def is preserving, check if it is also undefined.
- if (isDefUndef(In, makeRegRef(Op)))
- Flags |= NodeAttrs::Undef;
- }
- if (TOI.isClobbering(In, OpN))
- Flags |= NodeAttrs::Clobbering;
- if (TOI.isFixedReg(In, OpN))
- Flags |= NodeAttrs::Fixed;
- if (IsCall && Op.isDead())
- Flags |= NodeAttrs::Dead;
- NodeAddr<DefNode*> DA = newDef(SA, Op, Flags);
- SA.Addr->addMember(DA, *this);
- assert(!DoneDefs.test(R));
- DoneDefs.set(R);
- }
-
- // Process reg-masks (as clobbers).
- BitVector DoneClobbers(TRI.getNumRegs());
- for (unsigned OpN = 0; OpN < NumOps; ++OpN) {
- MachineOperand &Op = In.getOperand(OpN);
- if (!Op.isRegMask())
- continue;
- uint16_t Flags = NodeAttrs::Clobbering | NodeAttrs::Fixed |
- NodeAttrs::Dead;
- NodeAddr<DefNode*> DA = newDef(SA, Op, Flags);
- SA.Addr->addMember(DA, *this);
- // Record all clobbered registers in DoneDefs.
- const uint32_t *RM = Op.getRegMask();
- for (unsigned i = 1, e = TRI.getNumRegs(); i != e; ++i)
- if (!(RM[i/32] & (1u << (i%32))))
- DoneClobbers.set(i);
- }
-
- // Process implicit defs, skipping those that have already been added
- // as explicit.
- for (unsigned OpN = 0; OpN < NumOps; ++OpN) {
- MachineOperand &Op = In.getOperand(OpN);
- if (!Op.isReg() || !Op.isDef() || !Op.isImplicit())
- continue;
- Register R = Op.getReg();
- if (!R || !Register::isPhysicalRegister(R) || DoneDefs.test(R))
- continue;
- RegisterRef RR = makeRegRef(Op);
- uint16_t Flags = NodeAttrs::None;
- if (TOI.isPreserving(In, OpN)) {
- Flags |= NodeAttrs::Preserving;
- // If the def is preserving, check if it is also undefined.
- if (isDefUndef(In, RR))
- Flags |= NodeAttrs::Undef;
- }
- if (TOI.isClobbering(In, OpN))
- Flags |= NodeAttrs::Clobbering;
- if (TOI.isFixedReg(In, OpN))
- Flags |= NodeAttrs::Fixed;
- if (IsCall && Op.isDead()) {
- if (DoneClobbers.test(R))
- continue;
- Flags |= NodeAttrs::Dead;
- }
- NodeAddr<DefNode*> DA = newDef(SA, Op, Flags);
- SA.Addr->addMember(DA, *this);
- DoneDefs.set(R);
- }
-
- for (unsigned OpN = 0; OpN < NumOps; ++OpN) {
- MachineOperand &Op = In.getOperand(OpN);
- if (!Op.isReg() || !Op.isUse())
- continue;
- Register R = Op.getReg();
- if (!R || !Register::isPhysicalRegister(R))
- continue;
- uint16_t Flags = NodeAttrs::None;
- if (Op.isUndef())
- Flags |= NodeAttrs::Undef;
- if (TOI.isFixedReg(In, OpN))
- Flags |= NodeAttrs::Fixed;
- NodeAddr<UseNode*> UA = newUse(SA, Op, Flags);
- SA.Addr->addMember(UA, *this);
- }
-}
-
-// Scan all defs in the block node BA and record in PhiM the locations of
-// phi nodes corresponding to these defs.
-void DataFlowGraph::recordDefsForDF(BlockRefsMap &PhiM,
- NodeAddr<BlockNode*> BA) {
- // Check all defs from block BA and record them in each block in BA's
- // iterated dominance frontier. This information will later be used to
- // create phi nodes.
- MachineBasicBlock *BB = BA.Addr->getCode();
- assert(BB);
- auto DFLoc = MDF.find(BB);
- if (DFLoc == MDF.end() || DFLoc->second.empty())
- return;
-
- // Traverse all instructions in the block and collect the set of all
- // defined references. For each reference there will be a phi created
- // in the block's iterated dominance frontier.
- // This is done to make sure that each defined reference gets only one
- // phi node, even if it is defined multiple times.
- RegisterSet Defs;
- for (NodeAddr<InstrNode*> IA : BA.Addr->members(*this))
- for (NodeAddr<RefNode*> RA : IA.Addr->members_if(IsDef, *this))
- Defs.insert(RA.Addr->getRegRef(*this));
-
- // Calculate the iterated dominance frontier of BB.
- const MachineDominanceFrontier::DomSetType &DF = DFLoc->second;
- SetVector<MachineBasicBlock*> IDF(DF.begin(), DF.end());
- for (unsigned i = 0; i < IDF.size(); ++i) {
- auto F = MDF.find(IDF[i]);
- if (F != MDF.end())
- IDF.insert(F->second.begin(), F->second.end());
- }
-
- // Finally, add the set of defs to each block in the iterated dominance
- // frontier.
- for (auto DB : IDF) {
- NodeAddr<BlockNode*> DBA = findBlock(DB);
- PhiM[DBA.Id].insert(Defs.begin(), Defs.end());
- }
-}
-
-// Given the locations of phi nodes in the map PhiM, create the phi nodes
-// that are located in the block node BA.
-void DataFlowGraph::buildPhis(BlockRefsMap &PhiM, RegisterSet &AllRefs,
- NodeAddr<BlockNode*> BA) {
- // Check if this blocks has any DF defs, i.e. if there are any defs
- // that this block is in the iterated dominance frontier of.
- auto HasDF = PhiM.find(BA.Id);
- if (HasDF == PhiM.end() || HasDF->second.empty())
- return;
-
- // First, remove all R in Refs in such that there exists T in Refs
- // such that T covers R. In other words, only leave those refs that
- // are not covered by another ref (i.e. maximal with respect to covering).
-
- auto MaxCoverIn = [this] (RegisterRef RR, RegisterSet &RRs) -> RegisterRef {
- for (RegisterRef I : RRs)
- if (I != RR && RegisterAggr::isCoverOf(I, RR, PRI))
- RR = I;
- return RR;
- };
-
- RegisterSet MaxDF;
- for (RegisterRef I : HasDF->second)
- MaxDF.insert(MaxCoverIn(I, HasDF->second));
-
- std::vector<RegisterRef> MaxRefs;
- for (RegisterRef I : MaxDF)
- MaxRefs.push_back(MaxCoverIn(I, AllRefs));
-
- // Now, for each R in MaxRefs, get the alias closure of R. If the closure
- // only has R in it, create a phi a def for R. Otherwise, create a phi,
- // and add a def for each S in the closure.
-
- // Sort the refs so that the phis will be created in a deterministic order.
- llvm::sort(MaxRefs);
- // Remove duplicates.
- auto NewEnd = std::unique(MaxRefs.begin(), MaxRefs.end());
- MaxRefs.erase(NewEnd, MaxRefs.end());
-
- auto Aliased = [this,&MaxRefs](RegisterRef RR,
- std::vector<unsigned> &Closure) -> bool {
- for (unsigned I : Closure)
- if (PRI.alias(RR, MaxRefs[I]))
- return true;
- return false;
- };
-
- // Prepare a list of NodeIds of the block's predecessors.
- NodeList Preds;
- const MachineBasicBlock *MBB = BA.Addr->getCode();
- for (MachineBasicBlock *PB : MBB->predecessors())
- Preds.push_back(findBlock(PB));
-
- while (!MaxRefs.empty()) {
- // Put the first element in the closure, and then add all subsequent
- // elements from MaxRefs to it, if they alias at least one element
- // already in the closure.
- // ClosureIdx: vector of indices in MaxRefs of members of the closure.
- std::vector<unsigned> ClosureIdx = { 0 };
- for (unsigned i = 1; i != MaxRefs.size(); ++i)
- if (Aliased(MaxRefs[i], ClosureIdx))
- ClosureIdx.push_back(i);
-
- // Build a phi for the closure.
- unsigned CS = ClosureIdx.size();
- NodeAddr<PhiNode*> PA = newPhi(BA);
-
- // Add defs.
- for (unsigned X = 0; X != CS; ++X) {
- RegisterRef RR = MaxRefs[ClosureIdx[X]];
- uint16_t PhiFlags = NodeAttrs::PhiRef | NodeAttrs::Preserving;
- NodeAddr<DefNode*> DA = newDef(PA, RR, PhiFlags);
- PA.Addr->addMember(DA, *this);
- }
- // Add phi uses.
- for (NodeAddr<BlockNode*> PBA : Preds) {
- for (unsigned X = 0; X != CS; ++X) {
- RegisterRef RR = MaxRefs[ClosureIdx[X]];
- NodeAddr<PhiUseNode*> PUA = newPhiUse(PA, RR, PBA);
- PA.Addr->addMember(PUA, *this);
- }
- }
-
- // Erase from MaxRefs all elements in the closure.
- auto Begin = MaxRefs.begin();
- for (unsigned i = ClosureIdx.size(); i != 0; --i)
- MaxRefs.erase(Begin + ClosureIdx[i-1]);
- }
-}
-
-// Remove any unneeded phi nodes that were created during the build process.
-void DataFlowGraph::removeUnusedPhis() {
- // This will remove unused phis, i.e. phis where each def does not reach
- // any uses or other defs. This will not detect or remove circular phi
- // chains that are otherwise dead. Unused/dead phis are created during
- // the build process and this function is intended to remove these cases
- // that are easily determinable to be unnecessary.
-
- SetVector<NodeId> PhiQ;
- for (NodeAddr<BlockNode*> BA : Func.Addr->members(*this)) {
- for (auto P : BA.Addr->members_if(IsPhi, *this))
- PhiQ.insert(P.Id);
- }
-
- static auto HasUsedDef = [](NodeList &Ms) -> bool {
- for (NodeAddr<NodeBase*> M : Ms) {
- if (M.Addr->getKind() != NodeAttrs::Def)
- continue;
- NodeAddr<DefNode*> DA = M;
- if (DA.Addr->getReachedDef() != 0 || DA.Addr->getReachedUse() != 0)
- return true;
- }
- return false;
- };
-
- // Any phi, if it is removed, may affect other phis (make them dead).
- // For each removed phi, collect the potentially affected phis and add
- // them back to the queue.
- while (!PhiQ.empty()) {
- auto PA = addr<PhiNode*>(PhiQ[0]);
- PhiQ.remove(PA.Id);
- NodeList Refs = PA.Addr->members(*this);
- if (HasUsedDef(Refs))
- continue;
- for (NodeAddr<RefNode*> RA : Refs) {
- if (NodeId RD = RA.Addr->getReachingDef()) {
- auto RDA = addr<DefNode*>(RD);
- NodeAddr<InstrNode*> OA = RDA.Addr->getOwner(*this);
- if (IsPhi(OA))
- PhiQ.insert(OA.Id);
- }
- if (RA.Addr->isDef())
- unlinkDef(RA, true);
- else
- unlinkUse(RA, true);
- }
- NodeAddr<BlockNode*> BA = PA.Addr->getOwner(*this);
- BA.Addr->removeMember(PA, *this);
- }
-}
-
-// For a given reference node TA in an instruction node IA, connect the
-// reaching def of TA to the appropriate def node. Create any shadow nodes
-// as appropriate.
-template <typename T>
-void DataFlowGraph::linkRefUp(NodeAddr<InstrNode*> IA, NodeAddr<T> TA,
- DefStack &DS) {
- if (DS.empty())
- return;
- RegisterRef RR = TA.Addr->getRegRef(*this);
- NodeAddr<T> TAP;
-
- // References from the def stack that have been examined so far.
- RegisterAggr Defs(PRI);
-
- for (auto I = DS.top(), E = DS.bottom(); I != E; I.down()) {
- RegisterRef QR = I->Addr->getRegRef(*this);
-
- // Skip all defs that are aliased to any of the defs that we have already
- // seen. If this completes a cover of RR, stop the stack traversal.
- bool Alias = Defs.hasAliasOf(QR);
- bool Cover = Defs.insert(QR).hasCoverOf(RR);
- if (Alias) {
- if (Cover)
- break;
- continue;
- }
-
- // The reaching def.
- NodeAddr<DefNode*> RDA = *I;
-
- // Pick the reached node.
- if (TAP.Id == 0) {
- TAP = TA;
- } else {
- // Mark the existing ref as "shadow" and create a new shadow.
- TAP.Addr->setFlags(TAP.Addr->getFlags() | NodeAttrs::Shadow);
- TAP = getNextShadow(IA, TAP, true);
- }
-
- // Create the link.
- TAP.Addr->linkToDef(TAP.Id, RDA);
-
- if (Cover)
- break;
- }
-}
-
-// Create data-flow links for all reference nodes in the statement node SA.
-template <typename Predicate>
-void DataFlowGraph::linkStmtRefs(DefStackMap &DefM, NodeAddr<StmtNode*> SA,
- Predicate P) {
-#ifndef NDEBUG
- RegisterSet Defs;
-#endif
-
- // Link all nodes (upwards in the data-flow) with their reaching defs.
- for (NodeAddr<RefNode*> RA : SA.Addr->members_if(P, *this)) {
- uint16_t Kind = RA.Addr->getKind();
- assert(Kind == NodeAttrs::Def || Kind == NodeAttrs::Use);
- RegisterRef RR = RA.Addr->getRegRef(*this);
-#ifndef NDEBUG
- // Do not expect multiple defs of the same reference.
- assert(Kind != NodeAttrs::Def || !Defs.count(RR));
- Defs.insert(RR);
-#endif
-
- auto F = DefM.find(RR.Reg);
- if (F == DefM.end())
- continue;
- DefStack &DS = F->second;
- if (Kind == NodeAttrs::Use)
- linkRefUp<UseNode*>(SA, RA, DS);
- else if (Kind == NodeAttrs::Def)
- linkRefUp<DefNode*>(SA, RA, DS);
- else
- llvm_unreachable("Unexpected node in instruction");
- }
-}
-
-// Create data-flow links for all instructions in the block node BA. This
-// will include updating any phi nodes in BA.
-void DataFlowGraph::linkBlockRefs(DefStackMap &DefM, NodeAddr<BlockNode*> BA) {
- // Push block delimiters.
- markBlock(BA.Id, DefM);
-
- auto IsClobber = [] (NodeAddr<RefNode*> RA) -> bool {
- return IsDef(RA) && (RA.Addr->getFlags() & NodeAttrs::Clobbering);
- };
- auto IsNoClobber = [] (NodeAddr<RefNode*> RA) -> bool {
- return IsDef(RA) && !(RA.Addr->getFlags() & NodeAttrs::Clobbering);
- };
-
- assert(BA.Addr && "block node address is needed to create a data-flow link");
- // For each non-phi instruction in the block, link all the defs and uses
- // to their reaching defs. For any member of the block (including phis),
- // push the defs on the corresponding stacks.
- for (NodeAddr<InstrNode*> IA : BA.Addr->members(*this)) {
- // Ignore phi nodes here. They will be linked part by part from the
- // predecessors.
- if (IA.Addr->getKind() == NodeAttrs::Stmt) {
- linkStmtRefs(DefM, IA, IsUse);
- linkStmtRefs(DefM, IA, IsClobber);
- }
-
- // Push the definitions on the stack.
- pushClobbers(IA, DefM);
-
- if (IA.Addr->getKind() == NodeAttrs::Stmt)
- linkStmtRefs(DefM, IA, IsNoClobber);
-
- pushDefs(IA, DefM);
- }
-
- // Recursively process all children in the dominator tree.
- MachineDomTreeNode *N = MDT.getNode(BA.Addr->getCode());
- for (auto I : *N) {
- MachineBasicBlock *SB = I->getBlock();
- NodeAddr<BlockNode*> SBA = findBlock(SB);
- linkBlockRefs(DefM, SBA);
- }
-
- // Link the phi uses from the successor blocks.
- auto IsUseForBA = [BA](NodeAddr<NodeBase*> NA) -> bool {
- if (NA.Addr->getKind() != NodeAttrs::Use)
- return false;
- assert(NA.Addr->getFlags() & NodeAttrs::PhiRef);
- NodeAddr<PhiUseNode*> PUA = NA;
- return PUA.Addr->getPredecessor() == BA.Id;
- };
-
- RegisterSet EHLiveIns = getLandingPadLiveIns();
- MachineBasicBlock *MBB = BA.Addr->getCode();
-
- for (MachineBasicBlock *SB : MBB->successors()) {
- bool IsEHPad = SB->isEHPad();
- NodeAddr<BlockNode*> SBA = findBlock(SB);
- for (NodeAddr<InstrNode*> IA : SBA.Addr->members_if(IsPhi, *this)) {
- // Do not link phi uses for landing pad live-ins.
- if (IsEHPad) {
- // Find what register this phi is for.
- NodeAddr<RefNode*> RA = IA.Addr->getFirstMember(*this);
- assert(RA.Id != 0);
- if (EHLiveIns.count(RA.Addr->getRegRef(*this)))
- continue;
- }
- // Go over each phi use associated with MBB, and link it.
- for (auto U : IA.Addr->members_if(IsUseForBA, *this)) {
- NodeAddr<PhiUseNode*> PUA = U;
- RegisterRef RR = PUA.Addr->getRegRef(*this);
- linkRefUp<UseNode*>(IA, PUA, DefM[RR.Reg]);
- }
- }
- }
-
- // Pop all defs from this block from the definition stacks.
- releaseBlock(BA.Id, DefM);
-}
-
-// Remove the use node UA from any data-flow and structural links.
-void DataFlowGraph::unlinkUseDF(NodeAddr<UseNode*> UA) {
- NodeId RD = UA.Addr->getReachingDef();
- NodeId Sib = UA.Addr->getSibling();
-
- if (RD == 0) {
- assert(Sib == 0);
- return;
- }
-
- auto RDA = addr<DefNode*>(RD);
- auto TA = addr<UseNode*>(RDA.Addr->getReachedUse());
- if (TA.Id == UA.Id) {
- RDA.Addr->setReachedUse(Sib);
- return;
- }
-
- while (TA.Id != 0) {
- NodeId S = TA.Addr->getSibling();
- if (S == UA.Id) {
- TA.Addr->setSibling(UA.Addr->getSibling());
- return;
- }
- TA = addr<UseNode*>(S);
- }
-}
-
-// Remove the def node DA from any data-flow and structural links.
-void DataFlowGraph::unlinkDefDF(NodeAddr<DefNode*> DA) {
- //
- // RD
- // | reached
- // | def
- // :
- // .
- // +----+
- // ... -- | DA | -- ... -- 0 : sibling chain of DA
- // +----+
- // | | reached
- // | : def
- // | .
- // | ... : Siblings (defs)
- // |
- // : reached
- // . use
- // ... : sibling chain of reached uses
-
- NodeId RD = DA.Addr->getReachingDef();
-
- // Visit all siblings of the reached def and reset their reaching defs.
- // Also, defs reached by DA are now "promoted" to being reached by RD,
- // so all of them will need to be spliced into the sibling chain where
- // DA belongs.
- auto getAllNodes = [this] (NodeId N) -> NodeList {
- NodeList Res;
- while (N) {
- auto RA = addr<RefNode*>(N);
- // Keep the nodes in the exact sibling order.
- Res.push_back(RA);
- N = RA.Addr->getSibling();
- }
- return Res;
- };
- NodeList ReachedDefs = getAllNodes(DA.Addr->getReachedDef());
- NodeList ReachedUses = getAllNodes(DA.Addr->getReachedUse());
-
- if (RD == 0) {
- for (NodeAddr<RefNode*> I : ReachedDefs)
- I.Addr->setSibling(0);
- for (NodeAddr<RefNode*> I : ReachedUses)
- I.Addr->setSibling(0);
- }
- for (NodeAddr<DefNode*> I : ReachedDefs)
- I.Addr->setReachingDef(RD);
- for (NodeAddr<UseNode*> I : ReachedUses)
- I.Addr->setReachingDef(RD);
-
- NodeId Sib = DA.Addr->getSibling();
- if (RD == 0) {
- assert(Sib == 0);
- return;
- }
-
- // Update the reaching def node and remove DA from the sibling list.
- auto RDA = addr<DefNode*>(RD);
- auto TA = addr<DefNode*>(RDA.Addr->getReachedDef());
- if (TA.Id == DA.Id) {
- // If DA is the first reached def, just update the RD's reached def
- // to the DA's sibling.
- RDA.Addr->setReachedDef(Sib);
- } else {
- // Otherwise, traverse the sibling list of the reached defs and remove
- // DA from it.
- while (TA.Id != 0) {
- NodeId S = TA.Addr->getSibling();
- if (S == DA.Id) {
- TA.Addr->setSibling(Sib);
- break;
- }
- TA = addr<DefNode*>(S);
- }
- }
-
- // Splice the DA's reached defs into the RDA's reached def chain.
- if (!ReachedDefs.empty()) {
- auto Last = NodeAddr<DefNode*>(ReachedDefs.back());
- Last.Addr->setSibling(RDA.Addr->getReachedDef());
- RDA.Addr->setReachedDef(ReachedDefs.front().Id);
- }
- // Splice the DA's reached uses into the RDA's reached use chain.
- if (!ReachedUses.empty()) {
- auto Last = NodeAddr<UseNode*>(ReachedUses.back());
- Last.Addr->setSibling(RDA.Addr->getReachedUse());
- RDA.Addr->setReachedUse(ReachedUses.front().Id);
- }
-}
diff --git a/llvm/lib/Target/Hexagon/RDFGraph.h b/llvm/lib/Target/Hexagon/RDFGraph.h
deleted file mode 100644
index 585f43e116f9..000000000000
--- a/llvm/lib/Target/Hexagon/RDFGraph.h
+++ /dev/null
@@ -1,968 +0,0 @@
-//===- RDFGraph.h -----------------------------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Target-independent, SSA-based data flow graph for register data flow (RDF)
-// for a non-SSA program representation (e.g. post-RA machine code).
-//
-//
-// *** Introduction
-//
-// The RDF graph is a collection of nodes, each of which denotes some element
-// of the program. There are two main types of such elements: code and refe-
-// rences. Conceptually, "code" is something that represents the structure
-// of the program, e.g. basic block or a statement, while "reference" is an
-// instance of accessing a register, e.g. a definition or a use. Nodes are
-// connected with each other based on the structure of the program (such as
-// blocks, instructions, etc.), and based on the data flow (e.g. reaching
-// definitions, reached uses, etc.). The single-reaching-definition principle
-// of SSA is generally observed, although, due to the non-SSA representation
-// of the program, there are some differences between the graph and a "pure"
-// SSA representation.
-//
-//
-// *** Implementation remarks
-//
-// Since the graph can contain a large number of nodes, memory consumption
-// was one of the major design considerations. As a result, there is a single
-// base class NodeBase which defines all members used by all possible derived
-// classes. The members are arranged in a union, and a derived class cannot
-// add any data members of its own. Each derived class only defines the
-// functional interface, i.e. member functions. NodeBase must be a POD,
-// which implies that all of its members must also be PODs.
-// Since nodes need to be connected with other nodes, pointers have been
-// replaced with 32-bit identifiers: each node has an id of type NodeId.
-// There are mapping functions in the graph that translate between actual
-// memory addresses and the corresponding identifiers.
-// A node id of 0 is equivalent to nullptr.
-//
-//
-// *** Structure of the graph
-//
-// A code node is always a collection of other nodes. For example, a code
-// node corresponding to a basic block will contain code nodes corresponding
-// to instructions. In turn, a code node corresponding to an instruction will
-// contain a list of reference nodes that correspond to the definitions and
-// uses of registers in that instruction. The members are arranged into a
-// circular list, which is yet another consequence of the effort to save
-// memory: for each member node it should be possible to obtain its owner,
-// and it should be possible to access all other members. There are other
-// ways to accomplish that, but the circular list seemed the most natural.
-//
-// +- CodeNode -+
-// | | <---------------------------------------------------+
-// +-+--------+-+ |
-// |FirstM |LastM |
-// | +-------------------------------------+ |
-// | | |
-// V V |
-// +----------+ Next +----------+ Next Next +----------+ Next |
-// | |----->| |-----> ... ----->| |----->-+
-// +- Member -+ +- Member -+ +- Member -+
-//
-// The order of members is such that related reference nodes (see below)
-// should be contiguous on the member list.
-//
-// A reference node is a node that encapsulates an access to a register,
-// in other words, data flowing into or out of a register. There are two
-// major kinds of reference nodes: defs and uses. A def node will contain
-// the id of the first reached use, and the id of the first reached def.
-// Each def and use will contain the id of the reaching def, and also the
-// id of the next reached def (for def nodes) or use (for use nodes).
-// The "next node sharing the same reaching def" is denoted as "sibling".
-// In summary:
-// - Def node contains: reaching def, sibling, first reached def, and first
-// reached use.
-// - Use node contains: reaching def and sibling.
-//
-// +-- DefNode --+
-// | R2 = ... | <---+--------------------+
-// ++---------+--+ | |
-// |Reached |Reached | |
-// |Def |Use | |
-// | | |Reaching |Reaching
-// | V |Def |Def
-// | +-- UseNode --+ Sib +-- UseNode --+ Sib Sib
-// | | ... = R2 |----->| ... = R2 |----> ... ----> 0
-// | +-------------+ +-------------+
-// V
-// +-- DefNode --+ Sib
-// | R2 = ... |----> ...
-// ++---------+--+
-// | |
-// | |
-// ... ...
-//
-// To get a full picture, the circular lists connecting blocks within a
-// function, instructions within a block, etc. should be superimposed with
-// the def-def, def-use links shown above.
-// To illustrate this, consider a small example in a pseudo-assembly:
-// foo:
-// add r2, r0, r1 ; r2 = r0+r1
-// addi r0, r2, 1 ; r0 = r2+1
-// ret r0 ; return value in r0
-//
-// The graph (in a format used by the debugging functions) would look like:
-//
-// DFG dump:[
-// f1: Function foo
-// b2: === %bb.0 === preds(0), succs(0):
-// p3: phi [d4<r0>(,d12,u9):]
-// p5: phi [d6<r1>(,,u10):]
-// s7: add [d8<r2>(,,u13):, u9<r0>(d4):, u10<r1>(d6):]
-// s11: addi [d12<r0>(d4,,u15):, u13<r2>(d8):]
-// s14: ret [u15<r0>(d12):]
-// ]
-//
-// The f1, b2, p3, etc. are node ids. The letter is prepended to indicate the
-// kind of the node (i.e. f - function, b - basic block, p - phi, s - state-
-// ment, d - def, u - use).
-// The format of a def node is:
-// dN<R>(rd,d,u):sib,
-// where
-// N - numeric node id,
-// R - register being defined
-// rd - reaching def,
-// d - reached def,
-// u - reached use,
-// sib - sibling.
-// The format of a use node is:
-// uN<R>[!](rd):sib,
-// where
-// N - numeric node id,
-// R - register being used,
-// rd - reaching def,
-// sib - sibling.
-// Possible annotations (usually preceding the node id):
-// + - preserving def,
-// ~ - clobbering def,
-// " - shadow ref (follows the node id),
-// ! - fixed register (appears after register name).
-//
-// The circular lists are not explicit in the dump.
-//
-//
-// *** Node attributes
-//
-// NodeBase has a member "Attrs", which is the primary way of determining
-// the node's characteristics. The fields in this member decide whether
-// the node is a code node or a reference node (i.e. node's "type"), then
-// within each type, the "kind" determines what specifically this node
-// represents. The remaining bits, "flags", contain additional information
-// that is even more detailed than the "kind".
-// CodeNode's kinds are:
-// - Phi: Phi node, members are reference nodes.
-// - Stmt: Statement, members are reference nodes.
-// - Block: Basic block, members are instruction nodes (i.e. Phi or Stmt).
-// - Func: The whole function. The members are basic block nodes.
-// RefNode's kinds are:
-// - Use.
-// - Def.
-//
-// Meaning of flags:
-// - Preserving: applies only to defs. A preserving def is one that can
-// preserve some of the original bits among those that are included in
-// the register associated with that def. For example, if R0 is a 32-bit
-// register, but a def can only change the lower 16 bits, then it will
-// be marked as preserving.
-// - Shadow: a reference that has duplicates holding additional reaching
-// defs (see more below).
-// - Clobbering: applied only to defs, indicates that the value generated
-// by this def is unspecified. A typical example would be volatile registers
-// after function calls.
-// - Fixed: the register in this def/use cannot be replaced with any other
-// register. A typical case would be a parameter register to a call, or
-// the register with the return value from a function.
-// - Undef: the register in this reference the register is assumed to have
-// no pre-existing value, even if it appears to be reached by some def.
-// This is typically used to prevent keeping registers artificially live
-// in cases when they are defined via predicated instructions. For example:
-// r0 = add-if-true cond, r10, r11 (1)
-// r0 = add-if-false cond, r12, r13, implicit r0 (2)
-// ... = r0 (3)
-// Before (1), r0 is not intended to be live, and the use of r0 in (3) is
-// not meant to be reached by any def preceding (1). However, since the
-// defs in (1) and (2) are both preserving, these properties alone would
-// imply that the use in (3) may indeed be reached by some prior def.
-// Adding Undef flag to the def in (1) prevents that. The Undef flag
-// may be applied to both defs and uses.
-// - Dead: applies only to defs. The value coming out of a "dead" def is
-// assumed to be unused, even if the def appears to be reaching other defs
-// or uses. The motivation for this flag comes from dead defs on function
-// calls: there is no way to determine if such a def is dead without
-// analyzing the target's ABI. Hence the graph should contain this info,
-// as it is unavailable otherwise. On the other hand, a def without any
-// uses on a typical instruction is not the intended target for this flag.
-//
-// *** Shadow references
-//
-// It may happen that a super-register can have two (or more) non-overlapping
-// sub-registers. When both of these sub-registers are defined and followed
-// by a use of the super-register, the use of the super-register will not
-// have a unique reaching def: both defs of the sub-registers need to be
-// accounted for. In such cases, a duplicate use of the super-register is
-// added and it points to the extra reaching def. Both uses are marked with
-// a flag "shadow". Example:
-// Assume t0 is a super-register of r0 and r1, r0 and r1 do not overlap:
-// set r0, 1 ; r0 = 1
-// set r1, 1 ; r1 = 1
-// addi t1, t0, 1 ; t1 = t0+1
-//
-// The DFG:
-// s1: set [d2<r0>(,,u9):]
-// s3: set [d4<r1>(,,u10):]
-// s5: addi [d6<t1>(,,):, u7"<t0>(d2):, u8"<t0>(d4):]
-//
-// The statement s5 has two use nodes for t0: u7" and u9". The quotation
-// mark " indicates that the node is a shadow.
-//
-
-#ifndef LLVM_LIB_TARGET_HEXAGON_RDFGRAPH_H
-#define LLVM_LIB_TARGET_HEXAGON_RDFGRAPH_H
-
-#include "RDFRegisters.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/MC/LaneBitmask.h"
-#include "llvm/Support/Allocator.h"
-#include "llvm/Support/MathExtras.h"
-#include <cassert>
-#include <cstdint>
-#include <cstring>
-#include <map>
-#include <set>
-#include <unordered_map>
-#include <utility>
-#include <vector>
-
-// RDF uses uint32_t to refer to registers. This is to ensure that the type
-// size remains specific. In other places, registers are often stored using
-// unsigned.
-static_assert(sizeof(uint32_t) == sizeof(unsigned), "Those should be equal");
-
-namespace llvm {
-
-class MachineBasicBlock;
-class MachineDominanceFrontier;
-class MachineDominatorTree;
-class MachineFunction;
-class MachineInstr;
-class MachineOperand;
-class raw_ostream;
-class TargetInstrInfo;
-class TargetRegisterInfo;
-
-namespace rdf {
-
- using NodeId = uint32_t;
-
- struct DataFlowGraph;
-
- struct NodeAttrs {
- enum : uint16_t {
- None = 0x0000, // Nothing
-
- // Types: 2 bits
- TypeMask = 0x0003,
- Code = 0x0001, // 01, Container
- Ref = 0x0002, // 10, Reference
-
- // Kind: 3 bits
- KindMask = 0x0007 << 2,
- Def = 0x0001 << 2, // 001
- Use = 0x0002 << 2, // 010
- Phi = 0x0003 << 2, // 011
- Stmt = 0x0004 << 2, // 100
- Block = 0x0005 << 2, // 101
- Func = 0x0006 << 2, // 110
-
- // Flags: 7 bits for now
- FlagMask = 0x007F << 5,
- Shadow = 0x0001 << 5, // 0000001, Has extra reaching defs.
- Clobbering = 0x0002 << 5, // 0000010, Produces unspecified values.
- PhiRef = 0x0004 << 5, // 0000100, Member of PhiNode.
- Preserving = 0x0008 << 5, // 0001000, Def can keep original bits.
- Fixed = 0x0010 << 5, // 0010000, Fixed register.
- Undef = 0x0020 << 5, // 0100000, Has no pre-existing value.
- Dead = 0x0040 << 5, // 1000000, Does not define a value.
- };
-
- static uint16_t type(uint16_t T) { return T & TypeMask; }
- static uint16_t kind(uint16_t T) { return T & KindMask; }
- static uint16_t flags(uint16_t T) { return T & FlagMask; }
-
- static uint16_t set_type(uint16_t A, uint16_t T) {
- return (A & ~TypeMask) | T;
- }
-
- static uint16_t set_kind(uint16_t A, uint16_t K) {
- return (A & ~KindMask) | K;
- }
-
- static uint16_t set_flags(uint16_t A, uint16_t F) {
- return (A & ~FlagMask) | F;
- }
-
- // Test if A contains B.
- static bool contains(uint16_t A, uint16_t B) {
- if (type(A) != Code)
- return false;
- uint16_t KB = kind(B);
- switch (kind(A)) {
- case Func:
- return KB == Block;
- case Block:
- return KB == Phi || KB == Stmt;
- case Phi:
- case Stmt:
- return type(B) == Ref;
- }
- return false;
- }
- };
-
- struct BuildOptions {
- enum : unsigned {
- None = 0x00,
- KeepDeadPhis = 0x01, // Do not remove dead phis during build.
- };
- };
-
- template <typename T> struct NodeAddr {
- NodeAddr() = default;
- NodeAddr(T A, NodeId I) : Addr(A), Id(I) {}
-
- // Type cast (casting constructor). The reason for having this class
- // instead of std::pair.
- template <typename S> NodeAddr(const NodeAddr<S> &NA)
- : Addr(static_cast<T>(NA.Addr)), Id(NA.Id) {}
-
- bool operator== (const NodeAddr<T> &NA) const {
- assert((Addr == NA.Addr) == (Id == NA.Id));
- return Addr == NA.Addr;
- }
- bool operator!= (const NodeAddr<T> &NA) const {
- return !operator==(NA);
- }
-
- T Addr = nullptr;
- NodeId Id = 0;
- };
-
- struct NodeBase;
-
- // Fast memory allocation and translation between node id and node address.
- // This is really the same idea as the one underlying the "bump pointer
- // allocator", the difference being in the translation. A node id is
- // composed of two components: the index of the block in which it was
- // allocated, and the index within the block. With the default settings,
- // where the number of nodes per block is 4096, the node id (minus 1) is:
- //
- // bit position: 11 0
- // +----------------------------+--------------+
- // | Index of the block |Index in block|
- // +----------------------------+--------------+
- //
- // The actual node id is the above plus 1, to avoid creating a node id of 0.
- //
- // This method significantly improved the build time, compared to using maps
- // (std::unordered_map or DenseMap) to translate between pointers and ids.
- struct NodeAllocator {
- // Amount of storage for a single node.
- enum { NodeMemSize = 32 };
-
- NodeAllocator(uint32_t NPB = 4096)
- : NodesPerBlock(NPB), BitsPerIndex(Log2_32(NPB)),
- IndexMask((1 << BitsPerIndex)-1) {
- assert(isPowerOf2_32(NPB));
- }
-
- NodeBase *ptr(NodeId N) const {
- uint32_t N1 = N-1;
- uint32_t BlockN = N1 >> BitsPerIndex;
- uint32_t Offset = (N1 & IndexMask) * NodeMemSize;
- return reinterpret_cast<NodeBase*>(Blocks[BlockN]+Offset);
- }
-
- NodeId id(const NodeBase *P) const;
- NodeAddr<NodeBase*> New();
- void clear();
-
- private:
- void startNewBlock();
- bool needNewBlock();
-
- uint32_t makeId(uint32_t Block, uint32_t Index) const {
- // Add 1 to the id, to avoid the id of 0, which is treated as "null".
- return ((Block << BitsPerIndex) | Index) + 1;
- }
-
- const uint32_t NodesPerBlock;
- const uint32_t BitsPerIndex;
- const uint32_t IndexMask;
- char *ActiveEnd = nullptr;
- std::vector<char*> Blocks;
- using AllocatorTy = BumpPtrAllocatorImpl<MallocAllocator, 65536>;
- AllocatorTy MemPool;
- };
-
- using RegisterSet = std::set<RegisterRef>;
-
- struct TargetOperandInfo {
- TargetOperandInfo(const TargetInstrInfo &tii) : TII(tii) {}
- virtual ~TargetOperandInfo() = default;
-
- virtual bool isPreserving(const MachineInstr &In, unsigned OpNum) const;
- virtual bool isClobbering(const MachineInstr &In, unsigned OpNum) const;
- virtual bool isFixedReg(const MachineInstr &In, unsigned OpNum) const;
-
- const TargetInstrInfo &TII;
- };
-
- // Packed register reference. Only used for storage.
- struct PackedRegisterRef {
- RegisterId Reg;
- uint32_t MaskId;
- };
-
- struct LaneMaskIndex : private IndexedSet<LaneBitmask> {
- LaneMaskIndex() = default;
-
- LaneBitmask getLaneMaskForIndex(uint32_t K) const {
- return K == 0 ? LaneBitmask::getAll() : get(K);
- }
-
- uint32_t getIndexForLaneMask(LaneBitmask LM) {
- assert(LM.any());
- return LM.all() ? 0 : insert(LM);
- }
-
- uint32_t getIndexForLaneMask(LaneBitmask LM) const {
- assert(LM.any());
- return LM.all() ? 0 : find(LM);
- }
- };
-
- struct NodeBase {
- public:
- // Make sure this is a POD.
- NodeBase() = default;
-
- uint16_t getType() const { return NodeAttrs::type(Attrs); }
- uint16_t getKind() const { return NodeAttrs::kind(Attrs); }
- uint16_t getFlags() const { return NodeAttrs::flags(Attrs); }
- NodeId getNext() const { return Next; }
-
- uint16_t getAttrs() const { return Attrs; }
- void setAttrs(uint16_t A) { Attrs = A; }
- void setFlags(uint16_t F) { setAttrs(NodeAttrs::set_flags(getAttrs(), F)); }
-
- // Insert node NA after "this" in the circular chain.
- void append(NodeAddr<NodeBase*> NA);
-
- // Initialize all members to 0.
- void init() { memset(this, 0, sizeof *this); }
-
- void setNext(NodeId N) { Next = N; }
-
- protected:
- uint16_t Attrs;
- uint16_t Reserved;
- NodeId Next; // Id of the next node in the circular chain.
- // Definitions of nested types. Using anonymous nested structs would make
- // this class definition clearer, but unnamed structs are not a part of
- // the standard.
- struct Def_struct {
- NodeId DD, DU; // Ids of the first reached def and use.
- };
- struct PhiU_struct {
- NodeId PredB; // Id of the predecessor block for a phi use.
- };
- struct Code_struct {
- void *CP; // Pointer to the actual code.
- NodeId FirstM, LastM; // Id of the first member and last.
- };
- struct Ref_struct {
- NodeId RD, Sib; // Ids of the reaching def and the sibling.
- union {
- Def_struct Def;
- PhiU_struct PhiU;
- };
- union {
- MachineOperand *Op; // Non-phi refs point to a machine operand.
- PackedRegisterRef PR; // Phi refs store register info directly.
- };
- };
-
- // The actual payload.
- union {
- Ref_struct Ref;
- Code_struct Code;
- };
- };
- // The allocator allocates chunks of 32 bytes for each node. The fact that
- // each node takes 32 bytes in memory is used for fast translation between
- // the node id and the node address.
- static_assert(sizeof(NodeBase) <= NodeAllocator::NodeMemSize,
- "NodeBase must be at most NodeAllocator::NodeMemSize bytes");
-
- using NodeList = SmallVector<NodeAddr<NodeBase *>, 4>;
- using NodeSet = std::set<NodeId>;
-
- struct RefNode : public NodeBase {
- RefNode() = default;
-
- RegisterRef getRegRef(const DataFlowGraph &G) const;
-
- MachineOperand &getOp() {
- assert(!(getFlags() & NodeAttrs::PhiRef));
- return *Ref.Op;
- }
-
- void setRegRef(RegisterRef RR, DataFlowGraph &G);
- void setRegRef(MachineOperand *Op, DataFlowGraph &G);
-
- NodeId getReachingDef() const {
- return Ref.RD;
- }
- void setReachingDef(NodeId RD) {
- Ref.RD = RD;
- }
-
- NodeId getSibling() const {
- return Ref.Sib;
- }
- void setSibling(NodeId Sib) {
- Ref.Sib = Sib;
- }
-
- bool isUse() const {
- assert(getType() == NodeAttrs::Ref);
- return getKind() == NodeAttrs::Use;
- }
-
- bool isDef() const {
- assert(getType() == NodeAttrs::Ref);
- return getKind() == NodeAttrs::Def;
- }
-
- template <typename Predicate>
- NodeAddr<RefNode*> getNextRef(RegisterRef RR, Predicate P, bool NextOnly,
- const DataFlowGraph &G);
- NodeAddr<NodeBase*> getOwner(const DataFlowGraph &G);
- };
-
- struct DefNode : public RefNode {
- NodeId getReachedDef() const {
- return Ref.Def.DD;
- }
- void setReachedDef(NodeId D) {
- Ref.Def.DD = D;
- }
- NodeId getReachedUse() const {
- return Ref.Def.DU;
- }
- void setReachedUse(NodeId U) {
- Ref.Def.DU = U;
- }
-
- void linkToDef(NodeId Self, NodeAddr<DefNode*> DA);
- };
-
- struct UseNode : public RefNode {
- void linkToDef(NodeId Self, NodeAddr<DefNode*> DA);
- };
-
- struct PhiUseNode : public UseNode {
- NodeId getPredecessor() const {
- assert(getFlags() & NodeAttrs::PhiRef);
- return Ref.PhiU.PredB;
- }
- void setPredecessor(NodeId B) {
- assert(getFlags() & NodeAttrs::PhiRef);
- Ref.PhiU.PredB = B;
- }
- };
-
- struct CodeNode : public NodeBase {
- template <typename T> T getCode() const {
- return static_cast<T>(Code.CP);
- }
- void setCode(void *C) {
- Code.CP = C;
- }
-
- NodeAddr<NodeBase*> getFirstMember(const DataFlowGraph &G) const;
- NodeAddr<NodeBase*> getLastMember(const DataFlowGraph &G) const;
- void addMember(NodeAddr<NodeBase*> NA, const DataFlowGraph &G);
- void addMemberAfter(NodeAddr<NodeBase*> MA, NodeAddr<NodeBase*> NA,
- const DataFlowGraph &G);
- void removeMember(NodeAddr<NodeBase*> NA, const DataFlowGraph &G);
-
- NodeList members(const DataFlowGraph &G) const;
- template <typename Predicate>
- NodeList members_if(Predicate P, const DataFlowGraph &G) const;
- };
-
- struct InstrNode : public CodeNode {
- NodeAddr<NodeBase*> getOwner(const DataFlowGraph &G);
- };
-
- struct PhiNode : public InstrNode {
- MachineInstr *getCode() const {
- return nullptr;
- }
- };
-
- struct StmtNode : public InstrNode {
- MachineInstr *getCode() const {
- return CodeNode::getCode<MachineInstr*>();
- }
- };
-
- struct BlockNode : public CodeNode {
- MachineBasicBlock *getCode() const {
- return CodeNode::getCode<MachineBasicBlock*>();
- }
-
- void addPhi(NodeAddr<PhiNode*> PA, const DataFlowGraph &G);
- };
-
- struct FuncNode : public CodeNode {
- MachineFunction *getCode() const {
- return CodeNode::getCode<MachineFunction*>();
- }
-
- NodeAddr<BlockNode*> findBlock(const MachineBasicBlock *BB,
- const DataFlowGraph &G) const;
- NodeAddr<BlockNode*> getEntryBlock(const DataFlowGraph &G);
- };
-
- struct DataFlowGraph {
- DataFlowGraph(MachineFunction &mf, const TargetInstrInfo &tii,
- const TargetRegisterInfo &tri, const MachineDominatorTree &mdt,
- const MachineDominanceFrontier &mdf, const TargetOperandInfo &toi);
-
- NodeBase *ptr(NodeId N) const;
- template <typename T> T ptr(NodeId N) const {
- return static_cast<T>(ptr(N));
- }
-
- NodeId id(const NodeBase *P) const;
-
- template <typename T> NodeAddr<T> addr(NodeId N) const {
- return { ptr<T>(N), N };
- }
-
- NodeAddr<FuncNode*> getFunc() const { return Func; }
- MachineFunction &getMF() const { return MF; }
- const TargetInstrInfo &getTII() const { return TII; }
- const TargetRegisterInfo &getTRI() const { return TRI; }
- const PhysicalRegisterInfo &getPRI() const { return PRI; }
- const MachineDominatorTree &getDT() const { return MDT; }
- const MachineDominanceFrontier &getDF() const { return MDF; }
- const RegisterAggr &getLiveIns() const { return LiveIns; }
-
- struct DefStack {
- DefStack() = default;
-
- bool empty() const { return Stack.empty() || top() == bottom(); }
-
- private:
- using value_type = NodeAddr<DefNode *>;
- struct Iterator {
- using value_type = DefStack::value_type;
-
- Iterator &up() { Pos = DS.nextUp(Pos); return *this; }
- Iterator &down() { Pos = DS.nextDown(Pos); return *this; }
-
- value_type operator*() const {
- assert(Pos >= 1);
- return DS.Stack[Pos-1];
- }
- const value_type *operator->() const {
- assert(Pos >= 1);
- return &DS.Stack[Pos-1];
- }
- bool operator==(const Iterator &It) const { return Pos == It.Pos; }
- bool operator!=(const Iterator &It) const { return Pos != It.Pos; }
-
- private:
- friend struct DefStack;
-
- Iterator(const DefStack &S, bool Top);
-
- // Pos-1 is the index in the StorageType object that corresponds to
- // the top of the DefStack.
- const DefStack &DS;
- unsigned Pos;
- };
-
- public:
- using iterator = Iterator;
-
- iterator top() const { return Iterator(*this, true); }
- iterator bottom() const { return Iterator(*this, false); }
- unsigned size() const;
-
- void push(NodeAddr<DefNode*> DA) { Stack.push_back(DA); }
- void pop();
- void start_block(NodeId N);
- void clear_block(NodeId N);
-
- private:
- friend struct Iterator;
-
- using StorageType = std::vector<value_type>;
-
- bool isDelimiter(const StorageType::value_type &P, NodeId N = 0) const {
- return (P.Addr == nullptr) && (N == 0 || P.Id == N);
- }
-
- unsigned nextUp(unsigned P) const;
- unsigned nextDown(unsigned P) const;
-
- StorageType Stack;
- };
-
- // Make this std::unordered_map for speed of accessing elements.
- // Map: Register (physical or virtual) -> DefStack
- using DefStackMap = std::unordered_map<RegisterId, DefStack>;
-
- void build(unsigned Options = BuildOptions::None);
- void pushAllDefs(NodeAddr<InstrNode*> IA, DefStackMap &DM);
- void markBlock(NodeId B, DefStackMap &DefM);
- void releaseBlock(NodeId B, DefStackMap &DefM);
-
- PackedRegisterRef pack(RegisterRef RR) {
- return { RR.Reg, LMI.getIndexForLaneMask(RR.Mask) };
- }
- PackedRegisterRef pack(RegisterRef RR) const {
- return { RR.Reg, LMI.getIndexForLaneMask(RR.Mask) };
- }
- RegisterRef unpack(PackedRegisterRef PR) const {
- return RegisterRef(PR.Reg, LMI.getLaneMaskForIndex(PR.MaskId));
- }
-
- RegisterRef makeRegRef(unsigned Reg, unsigned Sub) const;
- RegisterRef makeRegRef(const MachineOperand &Op) const;
- RegisterRef restrictRef(RegisterRef AR, RegisterRef BR) const;
-
- NodeAddr<RefNode*> getNextRelated(NodeAddr<InstrNode*> IA,
- NodeAddr<RefNode*> RA) const;
- NodeAddr<RefNode*> getNextImp(NodeAddr<InstrNode*> IA,
- NodeAddr<RefNode*> RA, bool Create);
- NodeAddr<RefNode*> getNextImp(NodeAddr<InstrNode*> IA,
- NodeAddr<RefNode*> RA) const;
- NodeAddr<RefNode*> getNextShadow(NodeAddr<InstrNode*> IA,
- NodeAddr<RefNode*> RA, bool Create);
- NodeAddr<RefNode*> getNextShadow(NodeAddr<InstrNode*> IA,
- NodeAddr<RefNode*> RA) const;
-
- NodeList getRelatedRefs(NodeAddr<InstrNode*> IA,
- NodeAddr<RefNode*> RA) const;
-
- NodeAddr<BlockNode*> findBlock(MachineBasicBlock *BB) const {
- return BlockNodes.at(BB);
- }
-
- void unlinkUse(NodeAddr<UseNode*> UA, bool RemoveFromOwner) {
- unlinkUseDF(UA);
- if (RemoveFromOwner)
- removeFromOwner(UA);
- }
-
- void unlinkDef(NodeAddr<DefNode*> DA, bool RemoveFromOwner) {
- unlinkDefDF(DA);
- if (RemoveFromOwner)
- removeFromOwner(DA);
- }
-
- // Some useful filters.
- template <uint16_t Kind>
- static bool IsRef(const NodeAddr<NodeBase*> BA) {
- return BA.Addr->getType() == NodeAttrs::Ref &&
- BA.Addr->getKind() == Kind;
- }
-
- template <uint16_t Kind>
- static bool IsCode(const NodeAddr<NodeBase*> BA) {
- return BA.Addr->getType() == NodeAttrs::Code &&
- BA.Addr->getKind() == Kind;
- }
-
- static bool IsDef(const NodeAddr<NodeBase*> BA) {
- return BA.Addr->getType() == NodeAttrs::Ref &&
- BA.Addr->getKind() == NodeAttrs::Def;
- }
-
- static bool IsUse(const NodeAddr<NodeBase*> BA) {
- return BA.Addr->getType() == NodeAttrs::Ref &&
- BA.Addr->getKind() == NodeAttrs::Use;
- }
-
- static bool IsPhi(const NodeAddr<NodeBase*> BA) {
- return BA.Addr->getType() == NodeAttrs::Code &&
- BA.Addr->getKind() == NodeAttrs::Phi;
- }
-
- static bool IsPreservingDef(const NodeAddr<DefNode*> DA) {
- uint16_t Flags = DA.Addr->getFlags();
- return (Flags & NodeAttrs::Preserving) && !(Flags & NodeAttrs::Undef);
- }
-
- private:
- void reset();
-
- RegisterSet getLandingPadLiveIns() const;
-
- NodeAddr<NodeBase*> newNode(uint16_t Attrs);
- NodeAddr<NodeBase*> cloneNode(const NodeAddr<NodeBase*> B);
- NodeAddr<UseNode*> newUse(NodeAddr<InstrNode*> Owner,
- MachineOperand &Op, uint16_t Flags = NodeAttrs::None);
- NodeAddr<PhiUseNode*> newPhiUse(NodeAddr<PhiNode*> Owner,
- RegisterRef RR, NodeAddr<BlockNode*> PredB,
- uint16_t Flags = NodeAttrs::PhiRef);
- NodeAddr<DefNode*> newDef(NodeAddr<InstrNode*> Owner,
- MachineOperand &Op, uint16_t Flags = NodeAttrs::None);
- NodeAddr<DefNode*> newDef(NodeAddr<InstrNode*> Owner,
- RegisterRef RR, uint16_t Flags = NodeAttrs::PhiRef);
- NodeAddr<PhiNode*> newPhi(NodeAddr<BlockNode*> Owner);
- NodeAddr<StmtNode*> newStmt(NodeAddr<BlockNode*> Owner,
- MachineInstr *MI);
- NodeAddr<BlockNode*> newBlock(NodeAddr<FuncNode*> Owner,
- MachineBasicBlock *BB);
- NodeAddr<FuncNode*> newFunc(MachineFunction *MF);
-
- template <typename Predicate>
- std::pair<NodeAddr<RefNode*>,NodeAddr<RefNode*>>
- locateNextRef(NodeAddr<InstrNode*> IA, NodeAddr<RefNode*> RA,
- Predicate P) const;
-
- using BlockRefsMap = std::map<NodeId, RegisterSet>;
-
- void buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In);
- void recordDefsForDF(BlockRefsMap &PhiM, NodeAddr<BlockNode*> BA);
- void buildPhis(BlockRefsMap &PhiM, RegisterSet &AllRefs,
- NodeAddr<BlockNode*> BA);
- void removeUnusedPhis();
-
- void pushClobbers(NodeAddr<InstrNode*> IA, DefStackMap &DM);
- void pushDefs(NodeAddr<InstrNode*> IA, DefStackMap &DM);
- template <typename T> void linkRefUp(NodeAddr<InstrNode*> IA,
- NodeAddr<T> TA, DefStack &DS);
- template <typename Predicate> void linkStmtRefs(DefStackMap &DefM,
- NodeAddr<StmtNode*> SA, Predicate P);
- void linkBlockRefs(DefStackMap &DefM, NodeAddr<BlockNode*> BA);
-
- void unlinkUseDF(NodeAddr<UseNode*> UA);
- void unlinkDefDF(NodeAddr<DefNode*> DA);
-
- void removeFromOwner(NodeAddr<RefNode*> RA) {
- NodeAddr<InstrNode*> IA = RA.Addr->getOwner(*this);
- IA.Addr->removeMember(RA, *this);
- }
-
- MachineFunction &MF;
- const TargetInstrInfo &TII;
- const TargetRegisterInfo &TRI;
- const PhysicalRegisterInfo PRI;
- const MachineDominatorTree &MDT;
- const MachineDominanceFrontier &MDF;
- const TargetOperandInfo &TOI;
-
- RegisterAggr LiveIns;
- NodeAddr<FuncNode*> Func;
- NodeAllocator Memory;
- // Local map: MachineBasicBlock -> NodeAddr<BlockNode*>
- std::map<MachineBasicBlock*,NodeAddr<BlockNode*>> BlockNodes;
- // Lane mask map.
- LaneMaskIndex LMI;
- }; // struct DataFlowGraph
-
- template <typename Predicate>
- NodeAddr<RefNode*> RefNode::getNextRef(RegisterRef RR, Predicate P,
- bool NextOnly, const DataFlowGraph &G) {
- // Get the "Next" reference in the circular list that references RR and
- // satisfies predicate "Pred".
- auto NA = G.addr<NodeBase*>(getNext());
-
- while (NA.Addr != this) {
- if (NA.Addr->getType() == NodeAttrs::Ref) {
- NodeAddr<RefNode*> RA = NA;
- if (RA.Addr->getRegRef(G) == RR && P(NA))
- return NA;
- if (NextOnly)
- break;
- NA = G.addr<NodeBase*>(NA.Addr->getNext());
- } else {
- // We've hit the beginning of the chain.
- assert(NA.Addr->getType() == NodeAttrs::Code);
- NodeAddr<CodeNode*> CA = NA;
- NA = CA.Addr->getFirstMember(G);
- }
- }
- // Return the equivalent of "nullptr" if such a node was not found.
- return NodeAddr<RefNode*>();
- }
-
- template <typename Predicate>
- NodeList CodeNode::members_if(Predicate P, const DataFlowGraph &G) const {
- NodeList MM;
- auto M = getFirstMember(G);
- if (M.Id == 0)
- return MM;
-
- while (M.Addr != this) {
- if (P(M))
- MM.push_back(M);
- M = G.addr<NodeBase*>(M.Addr->getNext());
- }
- return MM;
- }
-
- template <typename T>
- struct Print {
- Print(const T &x, const DataFlowGraph &g) : Obj(x), G(g) {}
-
- const T &Obj;
- const DataFlowGraph &G;
- };
-
- template <typename T>
- struct PrintNode : Print<NodeAddr<T>> {
- PrintNode(const NodeAddr<T> &x, const DataFlowGraph &g)
- : Print<NodeAddr<T>>(x, g) {}
- };
-
- raw_ostream &operator<<(raw_ostream &OS, const Print<RegisterRef> &P);
- raw_ostream &operator<<(raw_ostream &OS, const Print<NodeId> &P);
- raw_ostream &operator<<(raw_ostream &OS, const Print<NodeAddr<DefNode *>> &P);
- raw_ostream &operator<<(raw_ostream &OS, const Print<NodeAddr<UseNode *>> &P);
- raw_ostream &operator<<(raw_ostream &OS,
- const Print<NodeAddr<PhiUseNode *>> &P);
- raw_ostream &operator<<(raw_ostream &OS, const Print<NodeAddr<RefNode *>> &P);
- raw_ostream &operator<<(raw_ostream &OS, const Print<NodeList> &P);
- raw_ostream &operator<<(raw_ostream &OS, const Print<NodeSet> &P);
- raw_ostream &operator<<(raw_ostream &OS, const Print<NodeAddr<PhiNode *>> &P);
- raw_ostream &operator<<(raw_ostream &OS,
- const Print<NodeAddr<StmtNode *>> &P);
- raw_ostream &operator<<(raw_ostream &OS,
- const Print<NodeAddr<InstrNode *>> &P);
- raw_ostream &operator<<(raw_ostream &OS,
- const Print<NodeAddr<BlockNode *>> &P);
- raw_ostream &operator<<(raw_ostream &OS,
- const Print<NodeAddr<FuncNode *>> &P);
- raw_ostream &operator<<(raw_ostream &OS, const Print<RegisterSet> &P);
- raw_ostream &operator<<(raw_ostream &OS, const Print<RegisterAggr> &P);
- raw_ostream &operator<<(raw_ostream &OS,
- const Print<DataFlowGraph::DefStack> &P);
-
-} // end namespace rdf
-
-} // end namespace llvm
-
-#endif // LLVM_LIB_TARGET_HEXAGON_RDFGRAPH_H
diff --git a/llvm/lib/Target/Hexagon/RDFLiveness.cpp b/llvm/lib/Target/Hexagon/RDFLiveness.cpp
deleted file mode 100644
index e2c007c9d01a..000000000000
--- a/llvm/lib/Target/Hexagon/RDFLiveness.cpp
+++ /dev/null
@@ -1,1118 +0,0 @@
-//===- RDFLiveness.cpp ----------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Computation of the liveness information from the data-flow graph.
-//
-// The main functionality of this code is to compute block live-in
-// information. With the live-in information in place, the placement
-// of kill flags can also be recalculated.
-//
-// The block live-in calculation is based on the ideas from the following
-// publication:
-//
-// Dibyendu Das, Ramakrishna Upadrasta, Benoit Dupont de Dinechin.
-// "Efficient Liveness Computation Using Merge Sets and DJ-Graphs."
-// ACM Transactions on Architecture and Code Optimization, Association for
-// Computing Machinery, 2012, ACM TACO Special Issue on "High-Performance
-// and Embedded Architectures and Compilers", 8 (4),
-// <10.1145/2086696.2086706>. <hal-00647369>
-//
-#include "RDFLiveness.h"
-#include "RDFGraph.h"
-#include "RDFRegisters.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineDominanceFrontier.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
-#include "llvm/MC/LaneBitmask.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
-#include <cassert>
-#include <cstdint>
-#include <iterator>
-#include <map>
-#include <utility>
-#include <vector>
-
-using namespace llvm;
-using namespace rdf;
-
-static cl::opt<unsigned> MaxRecNest("rdf-liveness-max-rec", cl::init(25),
- cl::Hidden, cl::desc("Maximum recursion level"));
-
-namespace llvm {
-namespace rdf {
-
- raw_ostream &operator<< (raw_ostream &OS, const Print<Liveness::RefMap> &P) {
- OS << '{';
- for (auto &I : P.Obj) {
- OS << ' ' << printReg(I.first, &P.G.getTRI()) << '{';
- for (auto J = I.second.begin(), E = I.second.end(); J != E; ) {
- OS << Print<NodeId>(J->first, P.G) << PrintLaneMaskOpt(J->second);
- if (++J != E)
- OS << ',';
- }
- OS << '}';
- }
- OS << " }";
- return OS;
- }
-
-} // end namespace rdf
-} // end namespace llvm
-
-// The order in the returned sequence is the order of reaching defs in the
-// upward traversal: the first def is the closest to the given reference RefA,
-// the next one is further up, and so on.
-// The list ends at a reaching phi def, or when the reference from RefA is
-// covered by the defs in the list (see FullChain).
-// This function provides two modes of operation:
-// (1) Returning the sequence of reaching defs for a particular reference
-// node. This sequence will terminate at the first phi node [1].
-// (2) Returning a partial sequence of reaching defs, where the final goal
-// is to traverse past phi nodes to the actual defs arising from the code
-// itself.
-// In mode (2), the register reference for which the search was started
-// may be different from the reference node RefA, for which this call was
-// made, hence the argument RefRR, which holds the original register.
-// Also, some definitions may have already been encountered in a previous
-// call that will influence register covering. The register references
-// already defined are passed in through DefRRs.
-// In mode (1), the "continuation" considerations do not apply, and the
-// RefRR is the same as the register in RefA, and the set DefRRs is empty.
-//
-// [1] It is possible for multiple phi nodes to be included in the returned
-// sequence:
-// SubA = phi ...
-// SubB = phi ...
-// ... = SuperAB(rdef:SubA), SuperAB"(rdef:SubB)
-// However, these phi nodes are independent from one another in terms of
-// the data-flow.
-
-NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
- NodeAddr<RefNode*> RefA, bool TopShadows, bool FullChain,
- const RegisterAggr &DefRRs) {
- NodeList RDefs; // Return value.
- SetVector<NodeId> DefQ;
- SetVector<NodeId> Owners;
-
- // Dead defs will be treated as if they were live, since they are actually
- // on the data-flow path. They cannot be ignored because even though they
- // do not generate meaningful values, they still modify registers.
-
- // If the reference is undefined, there is nothing to do.
- if (RefA.Addr->getFlags() & NodeAttrs::Undef)
- return RDefs;
-
- // The initial queue should not have reaching defs for shadows. The
- // whole point of a shadow is that it will have a reaching def that
- // is not aliased to the reaching defs of the related shadows.
- NodeId Start = RefA.Id;
- auto SNA = DFG.addr<RefNode*>(Start);
- if (NodeId RD = SNA.Addr->getReachingDef())
- DefQ.insert(RD);
- if (TopShadows) {
- for (auto S : DFG.getRelatedRefs(RefA.Addr->getOwner(DFG), RefA))
- if (NodeId RD = NodeAddr<RefNode*>(S).Addr->getReachingDef())
- DefQ.insert(RD);
- }
-
- // Collect all the reaching defs, going up until a phi node is encountered,
- // or there are no more reaching defs. From this set, the actual set of
- // reaching defs will be selected.
- // The traversal upwards must go on until a covering def is encountered.
- // It is possible that a collection of non-covering (individually) defs
- // will be sufficient, but keep going until a covering one is found.
- for (unsigned i = 0; i < DefQ.size(); ++i) {
- auto TA = DFG.addr<DefNode*>(DefQ[i]);
- if (TA.Addr->getFlags() & NodeAttrs::PhiRef)
- continue;
- // Stop at the covering/overwriting def of the initial register reference.
- RegisterRef RR = TA.Addr->getRegRef(DFG);
- if (!DFG.IsPreservingDef(TA))
- if (RegisterAggr::isCoverOf(RR, RefRR, PRI))
- continue;
- // Get the next level of reaching defs. This will include multiple
- // reaching defs for shadows.
- for (auto S : DFG.getRelatedRefs(TA.Addr->getOwner(DFG), TA))
- if (NodeId RD = NodeAddr<RefNode*>(S).Addr->getReachingDef())
- DefQ.insert(RD);
- }
-
- // Remove all non-phi defs that are not aliased to RefRR, and collect
- // the owners of the remaining defs.
- SetVector<NodeId> Defs;
- for (NodeId N : DefQ) {
- auto TA = DFG.addr<DefNode*>(N);
- bool IsPhi = TA.Addr->getFlags() & NodeAttrs::PhiRef;
- if (!IsPhi && !PRI.alias(RefRR, TA.Addr->getRegRef(DFG)))
- continue;
- Defs.insert(TA.Id);
- Owners.insert(TA.Addr->getOwner(DFG).Id);
- }
-
- // Return the MachineBasicBlock containing a given instruction.
- auto Block = [this] (NodeAddr<InstrNode*> IA) -> MachineBasicBlock* {
- if (IA.Addr->getKind() == NodeAttrs::Stmt)
- return NodeAddr<StmtNode*>(IA).Addr->getCode()->getParent();
- assert(IA.Addr->getKind() == NodeAttrs::Phi);
- NodeAddr<PhiNode*> PA = IA;
- NodeAddr<BlockNode*> BA = PA.Addr->getOwner(DFG);
- return BA.Addr->getCode();
- };
- // Less(A,B) iff instruction A is further down in the dominator tree than B.
- auto Less = [&Block,this] (NodeId A, NodeId B) -> bool {
- if (A == B)
- return false;
- auto OA = DFG.addr<InstrNode*>(A), OB = DFG.addr<InstrNode*>(B);
- MachineBasicBlock *BA = Block(OA), *BB = Block(OB);
- if (BA != BB)
- return MDT.dominates(BB, BA);
- // They are in the same block.
- bool StmtA = OA.Addr->getKind() == NodeAttrs::Stmt;
- bool StmtB = OB.Addr->getKind() == NodeAttrs::Stmt;
- if (StmtA) {
- if (!StmtB) // OB is a phi and phis dominate statements.
- return true;
- MachineInstr *CA = NodeAddr<StmtNode*>(OA).Addr->getCode();
- MachineInstr *CB = NodeAddr<StmtNode*>(OB).Addr->getCode();
- // The order must be linear, so tie-break such equalities.
- if (CA == CB)
- return A < B;
- return MDT.dominates(CB, CA);
- } else {
- // OA is a phi.
- if (StmtB)
- return false;
- // Both are phis. There is no ordering between phis (in terms of
- // the data-flow), so tie-break this via node id comparison.
- return A < B;
- }
- };
-
- std::vector<NodeId> Tmp(Owners.begin(), Owners.end());
- llvm::sort(Tmp, Less);
-
- // The vector is a list of instructions, so that defs coming from
- // the same instruction don't need to be artificially ordered.
- // Then, when computing the initial segment, and iterating over an
- // instruction, pick the defs that contribute to the covering (i.e. is
- // not covered by previously added defs). Check the defs individually,
- // i.e. first check each def if is covered or not (without adding them
- // to the tracking set), and then add all the selected ones.
-
- // The reason for this is this example:
- // *d1<A>, *d2<B>, ... Assume A and B are aliased (can happen in phi nodes).
- // *d3<C> If A \incl BuC, and B \incl AuC, then *d2 would be
- // covered if we added A first, and A would be covered
- // if we added B first.
-
- RegisterAggr RRs(DefRRs);
-
- auto DefInSet = [&Defs] (NodeAddr<RefNode*> TA) -> bool {
- return TA.Addr->getKind() == NodeAttrs::Def &&
- Defs.count(TA.Id);
- };
- for (NodeId T : Tmp) {
- if (!FullChain && RRs.hasCoverOf(RefRR))
- break;
- auto TA = DFG.addr<InstrNode*>(T);
- bool IsPhi = DFG.IsCode<NodeAttrs::Phi>(TA);
- NodeList Ds;
- for (NodeAddr<DefNode*> DA : TA.Addr->members_if(DefInSet, DFG)) {
- RegisterRef QR = DA.Addr->getRegRef(DFG);
- // Add phi defs even if they are covered by subsequent defs. This is
- // for cases where the reached use is not covered by any of the defs
- // encountered so far: the phi def is needed to expose the liveness
- // of that use to the entry of the block.
- // Example:
- // phi d1<R3>(,d2,), ... Phi def d1 is covered by d2.
- // d2<R3>(d1,,u3), ...
- // ..., u3<D1>(d2) This use needs to be live on entry.
- if (FullChain || IsPhi || !RRs.hasCoverOf(QR))
- Ds.push_back(DA);
- }
- RDefs.insert(RDefs.end(), Ds.begin(), Ds.end());
- for (NodeAddr<DefNode*> DA : Ds) {
- // When collecting a full chain of definitions, do not consider phi
- // defs to actually define a register.
- uint16_t Flags = DA.Addr->getFlags();
- if (!FullChain || !(Flags & NodeAttrs::PhiRef))
- if (!(Flags & NodeAttrs::Preserving)) // Don't care about Undef here.
- RRs.insert(DA.Addr->getRegRef(DFG));
- }
- }
-
- auto DeadP = [](const NodeAddr<DefNode*> DA) -> bool {
- return DA.Addr->getFlags() & NodeAttrs::Dead;
- };
- RDefs.resize(std::distance(RDefs.begin(), llvm::remove_if(RDefs, DeadP)));
-
- return RDefs;
-}
-
-std::pair<NodeSet,bool>
-Liveness::getAllReachingDefsRec(RegisterRef RefRR, NodeAddr<RefNode*> RefA,
- NodeSet &Visited, const NodeSet &Defs) {
- return getAllReachingDefsRecImpl(RefRR, RefA, Visited, Defs, 0, MaxRecNest);
-}
-
-std::pair<NodeSet,bool>
-Liveness::getAllReachingDefsRecImpl(RegisterRef RefRR, NodeAddr<RefNode*> RefA,
- NodeSet &Visited, const NodeSet &Defs, unsigned Nest, unsigned MaxNest) {
- if (Nest > MaxNest)
- return { NodeSet(), false };
- // Collect all defined registers. Do not consider phis to be defining
- // anything, only collect "real" definitions.
- RegisterAggr DefRRs(PRI);
- for (NodeId D : Defs) {
- const auto DA = DFG.addr<const DefNode*>(D);
- if (!(DA.Addr->getFlags() & NodeAttrs::PhiRef))
- DefRRs.insert(DA.Addr->getRegRef(DFG));
- }
-
- NodeList RDs = getAllReachingDefs(RefRR, RefA, false, true, DefRRs);
- if (RDs.empty())
- return { Defs, true };
-
- // Make a copy of the preexisting definitions and add the newly found ones.
- NodeSet TmpDefs = Defs;
- for (NodeAddr<NodeBase*> R : RDs)
- TmpDefs.insert(R.Id);
-
- NodeSet Result = Defs;
-
- for (NodeAddr<DefNode*> DA : RDs) {
- Result.insert(DA.Id);
- if (!(DA.Addr->getFlags() & NodeAttrs::PhiRef))
- continue;
- NodeAddr<PhiNode*> PA = DA.Addr->getOwner(DFG);
- if (Visited.count(PA.Id))
- continue;
- Visited.insert(PA.Id);
- // Go over all phi uses and get the reaching defs for each use.
- for (auto U : PA.Addr->members_if(DFG.IsRef<NodeAttrs::Use>, DFG)) {
- const auto &T = getAllReachingDefsRecImpl(RefRR, U, Visited, TmpDefs,
- Nest+1, MaxNest);
- if (!T.second)
- return { T.first, false };
- Result.insert(T.first.begin(), T.first.end());
- }
- }
-
- return { Result, true };
-}
-
-/// Find the nearest ref node aliased to RefRR, going upwards in the data
-/// flow, starting from the instruction immediately preceding Inst.
-NodeAddr<RefNode*> Liveness::getNearestAliasedRef(RegisterRef RefRR,
- NodeAddr<InstrNode*> IA) {
- NodeAddr<BlockNode*> BA = IA.Addr->getOwner(DFG);
- NodeList Ins = BA.Addr->members(DFG);
- NodeId FindId = IA.Id;
- auto E = Ins.rend();
- auto B = std::find_if(Ins.rbegin(), E,
- [FindId] (const NodeAddr<InstrNode*> T) {
- return T.Id == FindId;
- });
- // Do not scan IA (which is what B would point to).
- if (B != E)
- ++B;
-
- do {
- // Process the range of instructions from B to E.
- for (NodeAddr<InstrNode*> I : make_range(B, E)) {
- NodeList Refs = I.Addr->members(DFG);
- NodeAddr<RefNode*> Clob, Use;
- // Scan all the refs in I aliased to RefRR, and return the one that
- // is the closest to the output of I, i.e. def > clobber > use.
- for (NodeAddr<RefNode*> R : Refs) {
- if (!PRI.alias(R.Addr->getRegRef(DFG), RefRR))
- continue;
- if (DFG.IsDef(R)) {
- // If it's a non-clobbering def, just return it.
- if (!(R.Addr->getFlags() & NodeAttrs::Clobbering))
- return R;
- Clob = R;
- } else {
- Use = R;
- }
- }
- if (Clob.Id != 0)
- return Clob;
- if (Use.Id != 0)
- return Use;
- }
-
- // Go up to the immediate dominator, if any.
- MachineBasicBlock *BB = BA.Addr->getCode();
- BA = NodeAddr<BlockNode*>();
- if (MachineDomTreeNode *N = MDT.getNode(BB)) {
- if ((N = N->getIDom()))
- BA = DFG.findBlock(N->getBlock());
- }
- if (!BA.Id)
- break;
-
- Ins = BA.Addr->members(DFG);
- B = Ins.rbegin();
- E = Ins.rend();
- } while (true);
-
- return NodeAddr<RefNode*>();
-}
-
-NodeSet Liveness::getAllReachedUses(RegisterRef RefRR,
- NodeAddr<DefNode*> DefA, const RegisterAggr &DefRRs) {
- NodeSet Uses;
-
- // If the original register is already covered by all the intervening
- // defs, no more uses can be reached.
- if (DefRRs.hasCoverOf(RefRR))
- return Uses;
-
- // Add all directly reached uses.
- // If the def is dead, it does not provide a value for any use.
- bool IsDead = DefA.Addr->getFlags() & NodeAttrs::Dead;
- NodeId U = !IsDead ? DefA.Addr->getReachedUse() : 0;
- while (U != 0) {
- auto UA = DFG.addr<UseNode*>(U);
- if (!(UA.Addr->getFlags() & NodeAttrs::Undef)) {
- RegisterRef UR = UA.Addr->getRegRef(DFG);
- if (PRI.alias(RefRR, UR) && !DefRRs.hasCoverOf(UR))
- Uses.insert(U);
- }
- U = UA.Addr->getSibling();
- }
-
- // Traverse all reached defs. This time dead defs cannot be ignored.
- for (NodeId D = DefA.Addr->getReachedDef(), NextD; D != 0; D = NextD) {
- auto DA = DFG.addr<DefNode*>(D);
- NextD = DA.Addr->getSibling();
- RegisterRef DR = DA.Addr->getRegRef(DFG);
- // If this def is already covered, it cannot reach anything new.
- // Similarly, skip it if it is not aliased to the interesting register.
- if (DefRRs.hasCoverOf(DR) || !PRI.alias(RefRR, DR))
- continue;
- NodeSet T;
- if (DFG.IsPreservingDef(DA)) {
- // If it is a preserving def, do not update the set of intervening defs.
- T = getAllReachedUses(RefRR, DA, DefRRs);
- } else {
- RegisterAggr NewDefRRs = DefRRs;
- NewDefRRs.insert(DR);
- T = getAllReachedUses(RefRR, DA, NewDefRRs);
- }
- Uses.insert(T.begin(), T.end());
- }
- return Uses;
-}
-
-void Liveness::computePhiInfo() {
- RealUseMap.clear();
-
- NodeList Phis;
- NodeAddr<FuncNode*> FA = DFG.getFunc();
- NodeList Blocks = FA.Addr->members(DFG);
- for (NodeAddr<BlockNode*> BA : Blocks) {
- auto Ps = BA.Addr->members_if(DFG.IsCode<NodeAttrs::Phi>, DFG);
- Phis.insert(Phis.end(), Ps.begin(), Ps.end());
- }
-
- // phi use -> (map: reaching phi -> set of registers defined in between)
- std::map<NodeId,std::map<NodeId,RegisterAggr>> PhiUp;
- std::vector<NodeId> PhiUQ; // Work list of phis for upward propagation.
- std::map<NodeId,RegisterAggr> PhiDRs; // Phi -> registers defined by it.
-
- // Go over all phis.
- for (NodeAddr<PhiNode*> PhiA : Phis) {
- // Go over all defs and collect the reached uses that are non-phi uses
- // (i.e. the "real uses").
- RefMap &RealUses = RealUseMap[PhiA.Id];
- NodeList PhiRefs = PhiA.Addr->members(DFG);
-
- // Have a work queue of defs whose reached uses need to be found.
- // For each def, add to the queue all reached (non-phi) defs.
- SetVector<NodeId> DefQ;
- NodeSet PhiDefs;
- RegisterAggr DRs(PRI);
- for (NodeAddr<RefNode*> R : PhiRefs) {
- if (!DFG.IsRef<NodeAttrs::Def>(R))
- continue;
- DRs.insert(R.Addr->getRegRef(DFG));
- DefQ.insert(R.Id);
- PhiDefs.insert(R.Id);
- }
- PhiDRs.insert(std::make_pair(PhiA.Id, DRs));
-
- // Collect the super-set of all possible reached uses. This set will
- // contain all uses reached from this phi, either directly from the
- // phi defs, or (recursively) via non-phi defs reached by the phi defs.
- // This set of uses will later be trimmed to only contain these uses that
- // are actually reached by the phi defs.
- for (unsigned i = 0; i < DefQ.size(); ++i) {
- NodeAddr<DefNode*> DA = DFG.addr<DefNode*>(DefQ[i]);
- // Visit all reached uses. Phi defs should not really have the "dead"
- // flag set, but check it anyway for consistency.
- bool IsDead = DA.Addr->getFlags() & NodeAttrs::Dead;
- NodeId UN = !IsDead ? DA.Addr->getReachedUse() : 0;
- while (UN != 0) {
- NodeAddr<UseNode*> A = DFG.addr<UseNode*>(UN);
- uint16_t F = A.Addr->getFlags();
- if ((F & (NodeAttrs::Undef | NodeAttrs::PhiRef)) == 0) {
- RegisterRef R = PRI.normalize(A.Addr->getRegRef(DFG));
- RealUses[R.Reg].insert({A.Id,R.Mask});
- }
- UN = A.Addr->getSibling();
- }
- // Visit all reached defs, and add them to the queue. These defs may
- // override some of the uses collected here, but that will be handled
- // later.
- NodeId DN = DA.Addr->getReachedDef();
- while (DN != 0) {
- NodeAddr<DefNode*> A = DFG.addr<DefNode*>(DN);
- for (auto T : DFG.getRelatedRefs(A.Addr->getOwner(DFG), A)) {
- uint16_t Flags = NodeAddr<DefNode*>(T).Addr->getFlags();
- // Must traverse the reached-def chain. Consider:
- // def(D0) -> def(R0) -> def(R0) -> use(D0)
- // The reachable use of D0 passes through a def of R0.
- if (!(Flags & NodeAttrs::PhiRef))
- DefQ.insert(T.Id);
- }
- DN = A.Addr->getSibling();
- }
- }
- // Filter out these uses that appear to be reachable, but really
- // are not. For example:
- //
- // R1:0 = d1
- // = R1:0 u2 Reached by d1.
- // R0 = d3
- // = R1:0 u4 Still reached by d1: indirectly through
- // the def d3.
- // R1 = d5
- // = R1:0 u6 Not reached by d1 (covered collectively
- // by d3 and d5), but following reached
- // defs and uses from d1 will lead here.
- for (auto UI = RealUses.begin(), UE = RealUses.end(); UI != UE; ) {
- // For each reached register UI->first, there is a set UI->second, of
- // uses of it. For each such use, check if it is reached by this phi,
- // i.e. check if the set of its reaching uses intersects the set of
- // this phi's defs.
- NodeRefSet Uses = UI->second;
- UI->second.clear();
- for (std::pair<NodeId,LaneBitmask> I : Uses) {
- auto UA = DFG.addr<UseNode*>(I.first);
- // Undef flag is checked above.
- assert((UA.Addr->getFlags() & NodeAttrs::Undef) == 0);
- RegisterRef R(UI->first, I.second);
- // Calculate the exposed part of the reached use.
- RegisterAggr Covered(PRI);
- for (NodeAddr<DefNode*> DA : getAllReachingDefs(R, UA)) {
- if (PhiDefs.count(DA.Id))
- break;
- Covered.insert(DA.Addr->getRegRef(DFG));
- }
- if (RegisterRef RC = Covered.clearIn(R)) {
- // We are updating the map for register UI->first, so we need
- // to map RC to be expressed in terms of that register.
- RegisterRef S = PRI.mapTo(RC, UI->first);
- UI->second.insert({I.first, S.Mask});
- }
- }
- UI = UI->second.empty() ? RealUses.erase(UI) : std::next(UI);
- }
-
- // If this phi reaches some "real" uses, add it to the queue for upward
- // propagation.
- if (!RealUses.empty())
- PhiUQ.push_back(PhiA.Id);
-
- // Go over all phi uses and check if the reaching def is another phi.
- // Collect the phis that are among the reaching defs of these uses.
- // While traversing the list of reaching defs for each phi use, accumulate
- // the set of registers defined between this phi (PhiA) and the owner phi
- // of the reaching def.
- NodeSet SeenUses;
-
- for (auto I : PhiRefs) {
- if (!DFG.IsRef<NodeAttrs::Use>(I) || SeenUses.count(I.Id))
- continue;
- NodeAddr<PhiUseNode*> PUA = I;
- if (PUA.Addr->getReachingDef() == 0)
- continue;
-
- RegisterRef UR = PUA.Addr->getRegRef(DFG);
- NodeList Ds = getAllReachingDefs(UR, PUA, true, false, NoRegs);
- RegisterAggr DefRRs(PRI);
-
- for (NodeAddr<DefNode*> D : Ds) {
- if (D.Addr->getFlags() & NodeAttrs::PhiRef) {
- NodeId RP = D.Addr->getOwner(DFG).Id;
- std::map<NodeId,RegisterAggr> &M = PhiUp[PUA.Id];
- auto F = M.find(RP);
- if (F == M.end())
- M.insert(std::make_pair(RP, DefRRs));
- else
- F->second.insert(DefRRs);
- }
- DefRRs.insert(D.Addr->getRegRef(DFG));
- }
-
- for (NodeAddr<PhiUseNode*> T : DFG.getRelatedRefs(PhiA, PUA))
- SeenUses.insert(T.Id);
- }
- }
-
- if (Trace) {
- dbgs() << "Phi-up-to-phi map with intervening defs:\n";
- for (auto I : PhiUp) {
- dbgs() << "phi " << Print<NodeId>(I.first, DFG) << " -> {";
- for (auto R : I.second)
- dbgs() << ' ' << Print<NodeId>(R.first, DFG)
- << Print<RegisterAggr>(R.second, DFG);
- dbgs() << " }\n";
- }
- }
-
- // Propagate the reached registers up in the phi chain.
- //
- // The following type of situation needs careful handling:
- //
- // phi d1<R1:0> (1)
- // |
- // ... d2<R1>
- // |
- // phi u3<R1:0> (2)
- // |
- // ... u4<R1>
- //
- // The phi node (2) defines a register pair R1:0, and reaches a "real"
- // use u4 of just R1. The same phi node is also known to reach (upwards)
- // the phi node (1). However, the use u4 is not reached by phi (1),
- // because of the intervening definition d2 of R1. The data flow between
- // phis (1) and (2) is restricted to R1:0 minus R1, i.e. R0.
- //
- // When propagating uses up the phi chains, get the all reaching defs
- // for a given phi use, and traverse the list until the propagated ref
- // is covered, or until reaching the final phi. Only assume that the
- // reference reaches the phi in the latter case.
-
- for (unsigned i = 0; i < PhiUQ.size(); ++i) {
- auto PA = DFG.addr<PhiNode*>(PhiUQ[i]);
- NodeList PUs = PA.Addr->members_if(DFG.IsRef<NodeAttrs::Use>, DFG);
- RefMap &RUM = RealUseMap[PA.Id];
-
- for (NodeAddr<UseNode*> UA : PUs) {
- std::map<NodeId,RegisterAggr> &PUM = PhiUp[UA.Id];
- RegisterRef UR = PRI.normalize(UA.Addr->getRegRef(DFG));
- for (const std::pair<const NodeId, RegisterAggr> &P : PUM) {
- bool Changed = false;
- const RegisterAggr &MidDefs = P.second;
-
- // Collect the set PropUp of uses that are reached by the current
- // phi PA, and are not covered by any intervening def between the
- // currently visited use UA and the upward phi P.
-
- if (MidDefs.hasCoverOf(UR))
- continue;
-
- // General algorithm:
- // for each (R,U) : U is use node of R, U is reached by PA
- // if MidDefs does not cover (R,U)
- // then add (R-MidDefs,U) to RealUseMap[P]
- //
- for (const std::pair<const RegisterId, NodeRefSet> &T : RUM) {
- RegisterRef R(T.first);
- // The current phi (PA) could be a phi for a regmask. It could
- // reach a whole variety of uses that are not related to the
- // specific upward phi (P.first).
- const RegisterAggr &DRs = PhiDRs.at(P.first);
- if (!DRs.hasAliasOf(R))
- continue;
- R = PRI.mapTo(DRs.intersectWith(R), T.first);
- for (std::pair<NodeId,LaneBitmask> V : T.second) {
- LaneBitmask M = R.Mask & V.second;
- if (M.none())
- continue;
- if (RegisterRef SS = MidDefs.clearIn(RegisterRef(R.Reg, M))) {
- NodeRefSet &RS = RealUseMap[P.first][SS.Reg];
- Changed |= RS.insert({V.first,SS.Mask}).second;
- }
- }
- }
-
- if (Changed)
- PhiUQ.push_back(P.first);
- }
- }
- }
-
- if (Trace) {
- dbgs() << "Real use map:\n";
- for (auto I : RealUseMap) {
- dbgs() << "phi " << Print<NodeId>(I.first, DFG);
- NodeAddr<PhiNode*> PA = DFG.addr<PhiNode*>(I.first);
- NodeList Ds = PA.Addr->members_if(DFG.IsRef<NodeAttrs::Def>, DFG);
- if (!Ds.empty()) {
- RegisterRef RR = NodeAddr<DefNode*>(Ds[0]).Addr->getRegRef(DFG);
- dbgs() << '<' << Print<RegisterRef>(RR, DFG) << '>';
- } else {
- dbgs() << "<noreg>";
- }
- dbgs() << " -> " << Print<RefMap>(I.second, DFG) << '\n';
- }
- }
-}
-
-void Liveness::computeLiveIns() {
- // Populate the node-to-block map. This speeds up the calculations
- // significantly.
- NBMap.clear();
- for (NodeAddr<BlockNode*> BA : DFG.getFunc().Addr->members(DFG)) {
- MachineBasicBlock *BB = BA.Addr->getCode();
- for (NodeAddr<InstrNode*> IA : BA.Addr->members(DFG)) {
- for (NodeAddr<RefNode*> RA : IA.Addr->members(DFG))
- NBMap.insert(std::make_pair(RA.Id, BB));
- NBMap.insert(std::make_pair(IA.Id, BB));
- }
- }
-
- MachineFunction &MF = DFG.getMF();
-
- // Compute IDF first, then the inverse.
- decltype(IIDF) IDF;
- for (MachineBasicBlock &B : MF) {
- auto F1 = MDF.find(&B);
- if (F1 == MDF.end())
- continue;
- SetVector<MachineBasicBlock*> IDFB(F1->second.begin(), F1->second.end());
- for (unsigned i = 0; i < IDFB.size(); ++i) {
- auto F2 = MDF.find(IDFB[i]);
- if (F2 != MDF.end())
- IDFB.insert(F2->second.begin(), F2->second.end());
- }
- // Add B to the IDF(B). This will put B in the IIDF(B).
- IDFB.insert(&B);
- IDF[&B].insert(IDFB.begin(), IDFB.end());
- }
-
- for (auto I : IDF)
- for (auto S : I.second)
- IIDF[S].insert(I.first);
-
- computePhiInfo();
-
- NodeAddr<FuncNode*> FA = DFG.getFunc();
- NodeList Blocks = FA.Addr->members(DFG);
-
- // Build the phi live-on-entry map.
- for (NodeAddr<BlockNode*> BA : Blocks) {
- MachineBasicBlock *MB = BA.Addr->getCode();
- RefMap &LON = PhiLON[MB];
- for (auto P : BA.Addr->members_if(DFG.IsCode<NodeAttrs::Phi>, DFG))
- for (const RefMap::value_type &S : RealUseMap[P.Id])
- LON[S.first].insert(S.second.begin(), S.second.end());
- }
-
- if (Trace) {
- dbgs() << "Phi live-on-entry map:\n";
- for (auto &I : PhiLON)
- dbgs() << "block #" << I.first->getNumber() << " -> "
- << Print<RefMap>(I.second, DFG) << '\n';
- }
-
- // Build the phi live-on-exit map. Each phi node has some set of reached
- // "real" uses. Propagate this set backwards into the block predecessors
- // through the reaching defs of the corresponding phi uses.
- for (NodeAddr<BlockNode*> BA : Blocks) {
- NodeList Phis = BA.Addr->members_if(DFG.IsCode<NodeAttrs::Phi>, DFG);
- for (NodeAddr<PhiNode*> PA : Phis) {
- RefMap &RUs = RealUseMap[PA.Id];
- if (RUs.empty())
- continue;
-
- NodeSet SeenUses;
- for (auto U : PA.Addr->members_if(DFG.IsRef<NodeAttrs::Use>, DFG)) {
- if (!SeenUses.insert(U.Id).second)
- continue;
- NodeAddr<PhiUseNode*> PUA = U;
- if (PUA.Addr->getReachingDef() == 0)
- continue;
-
- // Each phi has some set (possibly empty) of reached "real" uses,
- // that is, uses that are part of the compiled program. Such a use
- // may be located in some farther block, but following a chain of
- // reaching defs will eventually lead to this phi.
- // Any chain of reaching defs may fork at a phi node, but there
- // will be a path upwards that will lead to this phi. Now, this
- // chain will need to fork at this phi, since some of the reached
- // uses may have definitions joining in from multiple predecessors.
- // For each reached "real" use, identify the set of reaching defs
- // coming from each predecessor P, and add them to PhiLOX[P].
- //
- auto PrA = DFG.addr<BlockNode*>(PUA.Addr->getPredecessor());
- RefMap &LOX = PhiLOX[PrA.Addr->getCode()];
-
- for (const std::pair<const RegisterId, NodeRefSet> &RS : RUs) {
- // We need to visit each individual use.
- for (std::pair<NodeId,LaneBitmask> P : RS.second) {
- // Create a register ref corresponding to the use, and find
- // all reaching defs starting from the phi use, and treating
- // all related shadows as a single use cluster.
- RegisterRef S(RS.first, P.second);
- NodeList Ds = getAllReachingDefs(S, PUA, true, false, NoRegs);
- for (NodeAddr<DefNode*> D : Ds) {
- // Calculate the mask corresponding to the visited def.
- RegisterAggr TA(PRI);
- TA.insert(D.Addr->getRegRef(DFG)).intersect(S);
- LaneBitmask TM = TA.makeRegRef().Mask;
- LOX[S.Reg].insert({D.Id, TM});
- }
- }
- }
-
- for (NodeAddr<PhiUseNode*> T : DFG.getRelatedRefs(PA, PUA))
- SeenUses.insert(T.Id);
- } // for U : phi uses
- } // for P : Phis
- } // for B : Blocks
-
- if (Trace) {
- dbgs() << "Phi live-on-exit map:\n";
- for (auto &I : PhiLOX)
- dbgs() << "block #" << I.first->getNumber() << " -> "
- << Print<RefMap>(I.second, DFG) << '\n';
- }
-
- RefMap LiveIn;
- traverse(&MF.front(), LiveIn);
-
- // Add function live-ins to the live-in set of the function entry block.
- LiveMap[&MF.front()].insert(DFG.getLiveIns());
-
- if (Trace) {
- // Dump the liveness map
- for (MachineBasicBlock &B : MF) {
- std::vector<RegisterRef> LV;
- for (auto I = B.livein_begin(), E = B.livein_end(); I != E; ++I)
- LV.push_back(RegisterRef(I->PhysReg, I->LaneMask));
- llvm::sort(LV);
- dbgs() << printMBBReference(B) << "\t rec = {";
- for (auto I : LV)
- dbgs() << ' ' << Print<RegisterRef>(I, DFG);
- dbgs() << " }\n";
- //dbgs() << "\tcomp = " << Print<RegisterAggr>(LiveMap[&B], DFG) << '\n';
-
- LV.clear();
- const RegisterAggr &LG = LiveMap[&B];
- for (auto I = LG.rr_begin(), E = LG.rr_end(); I != E; ++I)
- LV.push_back(*I);
- llvm::sort(LV);
- dbgs() << "\tcomp = {";
- for (auto I : LV)
- dbgs() << ' ' << Print<RegisterRef>(I, DFG);
- dbgs() << " }\n";
-
- }
- }
-}
-
-void Liveness::resetLiveIns() {
- for (auto &B : DFG.getMF()) {
- // Remove all live-ins.
- std::vector<unsigned> T;
- for (auto I = B.livein_begin(), E = B.livein_end(); I != E; ++I)
- T.push_back(I->PhysReg);
- for (auto I : T)
- B.removeLiveIn(I);
- // Add the newly computed live-ins.
- const RegisterAggr &LiveIns = LiveMap[&B];
- for (auto I = LiveIns.rr_begin(), E = LiveIns.rr_end(); I != E; ++I) {
- RegisterRef R = *I;
- B.addLiveIn({MCPhysReg(R.Reg), R.Mask});
- }
- }
-}
-
-void Liveness::resetKills() {
- for (auto &B : DFG.getMF())
- resetKills(&B);
-}
-
-void Liveness::resetKills(MachineBasicBlock *B) {
- auto CopyLiveIns = [this] (MachineBasicBlock *B, BitVector &LV) -> void {
- for (auto I : B->liveins()) {
- MCSubRegIndexIterator S(I.PhysReg, &TRI);
- if (!S.isValid()) {
- LV.set(I.PhysReg);
- continue;
- }
- do {
- LaneBitmask M = TRI.getSubRegIndexLaneMask(S.getSubRegIndex());
- if ((M & I.LaneMask).any())
- LV.set(S.getSubReg());
- ++S;
- } while (S.isValid());
- }
- };
-
- BitVector LiveIn(TRI.getNumRegs()), Live(TRI.getNumRegs());
- CopyLiveIns(B, LiveIn);
- for (auto SI : B->successors())
- CopyLiveIns(SI, Live);
-
- for (auto I = B->rbegin(), E = B->rend(); I != E; ++I) {
- MachineInstr *MI = &*I;
- if (MI->isDebugInstr())
- continue;
-
- MI->clearKillInfo();
- for (auto &Op : MI->operands()) {
- // An implicit def of a super-register may not necessarily start a
- // live range of it, since an implicit use could be used to keep parts
- // of it live. Instead of analyzing the implicit operands, ignore
- // implicit defs.
- if (!Op.isReg() || !Op.isDef() || Op.isImplicit())
- continue;
- Register R = Op.getReg();
- if (!Register::isPhysicalRegister(R))
- continue;
- for (MCSubRegIterator SR(R, &TRI, true); SR.isValid(); ++SR)
- Live.reset(*SR);
- }
- for (auto &Op : MI->operands()) {
- if (!Op.isReg() || !Op.isUse() || Op.isUndef())
- continue;
- Register R = Op.getReg();
- if (!Register::isPhysicalRegister(R))
- continue;
- bool IsLive = false;
- for (MCRegAliasIterator AR(R, &TRI, true); AR.isValid(); ++AR) {
- if (!Live[*AR])
- continue;
- IsLive = true;
- break;
- }
- if (!IsLive)
- Op.setIsKill(true);
- for (MCSubRegIterator SR(R, &TRI, true); SR.isValid(); ++SR)
- Live.set(*SR);
- }
- }
-}
-
-// Helper function to obtain the basic block containing the reaching def
-// of the given use.
-MachineBasicBlock *Liveness::getBlockWithRef(NodeId RN) const {
- auto F = NBMap.find(RN);
- if (F != NBMap.end())
- return F->second;
- llvm_unreachable("Node id not in map");
-}
-
-void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) {
- // The LiveIn map, for each (physical) register, contains the set of live
- // reaching defs of that register that are live on entry to the associated
- // block.
-
- // The summary of the traversal algorithm:
- //
- // R is live-in in B, if there exists a U(R), such that rdef(R) dom B
- // and (U \in IDF(B) or B dom U).
- //
- // for (C : children) {
- // LU = {}
- // traverse(C, LU)
- // LiveUses += LU
- // }
- //
- // LiveUses -= Defs(B);
- // LiveUses += UpwardExposedUses(B);
- // for (C : IIDF[B])
- // for (U : LiveUses)
- // if (Rdef(U) dom C)
- // C.addLiveIn(U)
- //
-
- // Go up the dominator tree (depth-first).
- MachineDomTreeNode *N = MDT.getNode(B);
- for (auto I : *N) {
- RefMap L;
- MachineBasicBlock *SB = I->getBlock();
- traverse(SB, L);
-
- for (auto S : L)
- LiveIn[S.first].insert(S.second.begin(), S.second.end());
- }
-
- if (Trace) {
- dbgs() << "\n-- " << printMBBReference(*B) << ": " << __func__
- << " after recursion into: {";
- for (auto I : *N)
- dbgs() << ' ' << I->getBlock()->getNumber();
- dbgs() << " }\n";
- dbgs() << " LiveIn: " << Print<RefMap>(LiveIn, DFG) << '\n';
- dbgs() << " Local: " << Print<RegisterAggr>(LiveMap[B], DFG) << '\n';
- }
-
- // Add reaching defs of phi uses that are live on exit from this block.
- RefMap &PUs = PhiLOX[B];
- for (auto &S : PUs)
- LiveIn[S.first].insert(S.second.begin(), S.second.end());
-
- if (Trace) {
- dbgs() << "after LOX\n";
- dbgs() << " LiveIn: " << Print<RefMap>(LiveIn, DFG) << '\n';
- dbgs() << " Local: " << Print<RegisterAggr>(LiveMap[B], DFG) << '\n';
- }
-
- // The LiveIn map at this point has all defs that are live-on-exit from B,
- // as if they were live-on-entry to B. First, we need to filter out all
- // defs that are present in this block. Then we will add reaching defs of
- // all upward-exposed uses.
-
- // To filter out the defs, first make a copy of LiveIn, and then re-populate
- // LiveIn with the defs that should remain.
- RefMap LiveInCopy = LiveIn;
- LiveIn.clear();
-
- for (const std::pair<const RegisterId, NodeRefSet> &LE : LiveInCopy) {
- RegisterRef LRef(LE.first);
- NodeRefSet &NewDefs = LiveIn[LRef.Reg]; // To be filled.
- const NodeRefSet &OldDefs = LE.second;
- for (NodeRef OR : OldDefs) {
- // R is a def node that was live-on-exit
- auto DA = DFG.addr<DefNode*>(OR.first);
- NodeAddr<InstrNode*> IA = DA.Addr->getOwner(DFG);
- NodeAddr<BlockNode*> BA = IA.Addr->getOwner(DFG);
- if (B != BA.Addr->getCode()) {
- // Defs from a different block need to be preserved. Defs from this
- // block will need to be processed further, except for phi defs, the
- // liveness of which is handled through the PhiLON/PhiLOX maps.
- NewDefs.insert(OR);
- continue;
- }
-
- // Defs from this block need to stop the liveness from being
- // propagated upwards. This only applies to non-preserving defs,
- // and to the parts of the register actually covered by those defs.
- // (Note that phi defs should always be preserving.)
- RegisterAggr RRs(PRI);
- LRef.Mask = OR.second;
-
- if (!DFG.IsPreservingDef(DA)) {
- assert(!(IA.Addr->getFlags() & NodeAttrs::Phi));
- // DA is a non-phi def that is live-on-exit from this block, and
- // that is also located in this block. LRef is a register ref
- // whose use this def reaches. If DA covers LRef, then no part
- // of LRef is exposed upwards.A
- if (RRs.insert(DA.Addr->getRegRef(DFG)).hasCoverOf(LRef))
- continue;
- }
-
- // DA itself was not sufficient to cover LRef. In general, it is
- // the last in a chain of aliased defs before the exit from this block.
- // There could be other defs in this block that are a part of that
- // chain. Check that now: accumulate the registers from these defs,
- // and if they all together cover LRef, it is not live-on-entry.
- for (NodeAddr<DefNode*> TA : getAllReachingDefs(DA)) {
- // DefNode -> InstrNode -> BlockNode.
- NodeAddr<InstrNode*> ITA = TA.Addr->getOwner(DFG);
- NodeAddr<BlockNode*> BTA = ITA.Addr->getOwner(DFG);
- // Reaching defs are ordered in the upward direction.
- if (BTA.Addr->getCode() != B) {
- // We have reached past the beginning of B, and the accumulated
- // registers are not covering LRef. The first def from the
- // upward chain will be live.
- // Subtract all accumulated defs (RRs) from LRef.
- RegisterRef T = RRs.clearIn(LRef);
- assert(T);
- NewDefs.insert({TA.Id,T.Mask});
- break;
- }
-
- // TA is in B. Only add this def to the accumulated cover if it is
- // not preserving.
- if (!(TA.Addr->getFlags() & NodeAttrs::Preserving))
- RRs.insert(TA.Addr->getRegRef(DFG));
- // If this is enough to cover LRef, then stop.
- if (RRs.hasCoverOf(LRef))
- break;
- }
- }
- }
-
- emptify(LiveIn);
-
- if (Trace) {
- dbgs() << "after defs in block\n";
- dbgs() << " LiveIn: " << Print<RefMap>(LiveIn, DFG) << '\n';
- dbgs() << " Local: " << Print<RegisterAggr>(LiveMap[B], DFG) << '\n';
- }
-
- // Scan the block for upward-exposed uses and add them to the tracking set.
- for (auto I : DFG.getFunc().Addr->findBlock(B, DFG).Addr->members(DFG)) {
- NodeAddr<InstrNode*> IA = I;
- if (IA.Addr->getKind() != NodeAttrs::Stmt)
- continue;
- for (NodeAddr<UseNode*> UA : IA.Addr->members_if(DFG.IsUse, DFG)) {
- if (UA.Addr->getFlags() & NodeAttrs::Undef)
- continue;
- RegisterRef RR = PRI.normalize(UA.Addr->getRegRef(DFG));
- for (NodeAddr<DefNode*> D : getAllReachingDefs(UA))
- if (getBlockWithRef(D.Id) != B)
- LiveIn[RR.Reg].insert({D.Id,RR.Mask});
- }
- }
-
- if (Trace) {
- dbgs() << "after uses in block\n";
- dbgs() << " LiveIn: " << Print<RefMap>(LiveIn, DFG) << '\n';
- dbgs() << " Local: " << Print<RegisterAggr>(LiveMap[B], DFG) << '\n';
- }
-
- // Phi uses should not be propagated up the dominator tree, since they
- // are not dominated by their corresponding reaching defs.
- RegisterAggr &Local = LiveMap[B];
- RefMap &LON = PhiLON[B];
- for (auto &R : LON) {
- LaneBitmask M;
- for (auto P : R.second)
- M |= P.second;
- Local.insert(RegisterRef(R.first,M));
- }
-
- if (Trace) {
- dbgs() << "after phi uses in block\n";
- dbgs() << " LiveIn: " << Print<RefMap>(LiveIn, DFG) << '\n';
- dbgs() << " Local: " << Print<RegisterAggr>(Local, DFG) << '\n';
- }
-
- for (auto C : IIDF[B]) {
- RegisterAggr &LiveC = LiveMap[C];
- for (const std::pair<const RegisterId, NodeRefSet> &S : LiveIn)
- for (auto R : S.second)
- if (MDT.properlyDominates(getBlockWithRef(R.first), C))
- LiveC.insert(RegisterRef(S.first, R.second));
- }
-}
-
-void Liveness::emptify(RefMap &M) {
- for (auto I = M.begin(), E = M.end(); I != E; )
- I = I->second.empty() ? M.erase(I) : std::next(I);
-}
diff --git a/llvm/lib/Target/Hexagon/RDFLiveness.h b/llvm/lib/Target/Hexagon/RDFLiveness.h
deleted file mode 100644
index ea4890271726..000000000000
--- a/llvm/lib/Target/Hexagon/RDFLiveness.h
+++ /dev/null
@@ -1,151 +0,0 @@
-//===- RDFLiveness.h --------------------------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Recalculate the liveness information given a data flow graph.
-// This includes block live-ins and kill flags.
-
-#ifndef LLVM_LIB_TARGET_HEXAGON_RDFLIVENESS_H
-#define LLVM_LIB_TARGET_HEXAGON_RDFLIVENESS_H
-
-#include "RDFGraph.h"
-#include "RDFRegisters.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/MC/LaneBitmask.h"
-#include <map>
-#include <set>
-#include <utility>
-
-namespace llvm {
-
-class MachineBasicBlock;
-class MachineDominanceFrontier;
-class MachineDominatorTree;
-class MachineRegisterInfo;
-class TargetRegisterInfo;
-
-namespace rdf {
-
- struct Liveness {
- public:
- // This is really a std::map, except that it provides a non-trivial
- // default constructor to the element accessed via [].
- struct LiveMapType {
- LiveMapType(const PhysicalRegisterInfo &pri) : Empty(pri) {}
-
- RegisterAggr &operator[] (MachineBasicBlock *B) {
- return Map.emplace(B, Empty).first->second;
- }
-
- private:
- RegisterAggr Empty;
- std::map<MachineBasicBlock*,RegisterAggr> Map;
- };
-
- using NodeRef = std::pair<NodeId, LaneBitmask>;
- using NodeRefSet = std::set<NodeRef>;
- // RegisterId in RefMap must be normalized.
- using RefMap = std::map<RegisterId, NodeRefSet>;
-
- Liveness(MachineRegisterInfo &mri, const DataFlowGraph &g)
- : DFG(g), TRI(g.getTRI()), PRI(g.getPRI()), MDT(g.getDT()),
- MDF(g.getDF()), LiveMap(g.getPRI()), Empty(), NoRegs(g.getPRI()) {}
-
- NodeList getAllReachingDefs(RegisterRef RefRR, NodeAddr<RefNode*> RefA,
- bool TopShadows, bool FullChain, const RegisterAggr &DefRRs);
-
- NodeList getAllReachingDefs(NodeAddr<RefNode*> RefA) {
- return getAllReachingDefs(RefA.Addr->getRegRef(DFG), RefA, false,
- false, NoRegs);
- }
-
- NodeList getAllReachingDefs(RegisterRef RefRR, NodeAddr<RefNode*> RefA) {
- return getAllReachingDefs(RefRR, RefA, false, false, NoRegs);
- }
-
- NodeSet getAllReachedUses(RegisterRef RefRR, NodeAddr<DefNode*> DefA,
- const RegisterAggr &DefRRs);
-
- NodeSet getAllReachedUses(RegisterRef RefRR, NodeAddr<DefNode*> DefA) {
- return getAllReachedUses(RefRR, DefA, NoRegs);
- }
-
- std::pair<NodeSet,bool> getAllReachingDefsRec(RegisterRef RefRR,
- NodeAddr<RefNode*> RefA, NodeSet &Visited, const NodeSet &Defs);
-
- NodeAddr<RefNode*> getNearestAliasedRef(RegisterRef RefRR,
- NodeAddr<InstrNode*> IA);
-
- LiveMapType &getLiveMap() { return LiveMap; }
- const LiveMapType &getLiveMap() const { return LiveMap; }
-
- const RefMap &getRealUses(NodeId P) const {
- auto F = RealUseMap.find(P);
- return F == RealUseMap.end() ? Empty : F->second;
- }
-
- void computePhiInfo();
- void computeLiveIns();
- void resetLiveIns();
- void resetKills();
- void resetKills(MachineBasicBlock *B);
-
- void trace(bool T) { Trace = T; }
-
- private:
- const DataFlowGraph &DFG;
- const TargetRegisterInfo &TRI;
- const PhysicalRegisterInfo &PRI;
- const MachineDominatorTree &MDT;
- const MachineDominanceFrontier &MDF;
- LiveMapType LiveMap;
- const RefMap Empty;
- const RegisterAggr NoRegs;
- bool Trace = false;
-
- // Cache of mapping from node ids (for RefNodes) to the containing
- // basic blocks. Not computing it each time for each node reduces
- // the liveness calculation time by a large fraction.
- using NodeBlockMap = DenseMap<NodeId, MachineBasicBlock *>;
- NodeBlockMap NBMap;
-
- // Phi information:
- //
- // RealUseMap
- // map: NodeId -> (map: RegisterId -> NodeRefSet)
- // phi id -> (map: register -> set of reached non-phi uses)
- std::map<NodeId, RefMap> RealUseMap;
-
- // Inverse iterated dominance frontier.
- std::map<MachineBasicBlock*,std::set<MachineBasicBlock*>> IIDF;
-
- // Live on entry.
- std::map<MachineBasicBlock*,RefMap> PhiLON;
-
- // Phi uses are considered to be located at the end of the block that
- // they are associated with. The reaching def of a phi use dominates the
- // block that the use corresponds to, but not the block that contains
- // the phi itself. To include these uses in the liveness propagation (up
- // the dominator tree), create a map: block -> set of uses live on exit.
- std::map<MachineBasicBlock*,RefMap> PhiLOX;
-
- MachineBasicBlock *getBlockWithRef(NodeId RN) const;
- void traverse(MachineBasicBlock *B, RefMap &LiveIn);
- void emptify(RefMap &M);
-
- std::pair<NodeSet,bool> getAllReachingDefsRecImpl(RegisterRef RefRR,
- NodeAddr<RefNode*> RefA, NodeSet &Visited, const NodeSet &Defs,
- unsigned Nest, unsigned MaxNest);
- };
-
- raw_ostream &operator<<(raw_ostream &OS, const Print<Liveness::RefMap> &P);
-
-} // end namespace rdf
-
-} // end namespace llvm
-
-#endif // LLVM_LIB_TARGET_HEXAGON_RDFLIVENESS_H
diff --git a/llvm/lib/Target/Hexagon/RDFRegisters.cpp b/llvm/lib/Target/Hexagon/RDFRegisters.cpp
deleted file mode 100644
index b5675784e34b..000000000000
--- a/llvm/lib/Target/Hexagon/RDFRegisters.cpp
+++ /dev/null
@@ -1,380 +0,0 @@
-//===- RDFRegisters.cpp ---------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "RDFRegisters.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
-#include "llvm/MC/LaneBitmask.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include <cassert>
-#include <cstdint>
-#include <set>
-#include <utility>
-
-using namespace llvm;
-using namespace rdf;
-
-PhysicalRegisterInfo::PhysicalRegisterInfo(const TargetRegisterInfo &tri,
- const MachineFunction &mf)
- : TRI(tri) {
- RegInfos.resize(TRI.getNumRegs());
-
- BitVector BadRC(TRI.getNumRegs());
- for (const TargetRegisterClass *RC : TRI.regclasses()) {
- for (MCPhysReg R : *RC) {
- RegInfo &RI = RegInfos[R];
- if (RI.RegClass != nullptr && !BadRC[R]) {
- if (RC->LaneMask != RI.RegClass->LaneMask) {
- BadRC.set(R);
- RI.RegClass = nullptr;
- }
- } else
- RI.RegClass = RC;
- }
- }
-
- UnitInfos.resize(TRI.getNumRegUnits());
-
- for (uint32_t U = 0, NU = TRI.getNumRegUnits(); U != NU; ++U) {
- if (UnitInfos[U].Reg != 0)
- continue;
- MCRegUnitRootIterator R(U, &TRI);
- assert(R.isValid());
- RegisterId F = *R;
- ++R;
- if (R.isValid()) {
- UnitInfos[U].Mask = LaneBitmask::getAll();
- UnitInfos[U].Reg = F;
- } else {
- for (MCRegUnitMaskIterator I(F, &TRI); I.isValid(); ++I) {
- std::pair<uint32_t,LaneBitmask> P = *I;
- UnitInfo &UI = UnitInfos[P.first];
- UI.Reg = F;
- if (P.second.any()) {
- UI.Mask = P.second;
- } else {
- if (const TargetRegisterClass *RC = RegInfos[F].RegClass)
- UI.Mask = RC->LaneMask;
- else
- UI.Mask = LaneBitmask::getAll();
- }
- }
- }
- }
-
- for (const uint32_t *RM : TRI.getRegMasks())
- RegMasks.insert(RM);
- for (const MachineBasicBlock &B : mf)
- for (const MachineInstr &In : B)
- for (const MachineOperand &Op : In.operands())
- if (Op.isRegMask())
- RegMasks.insert(Op.getRegMask());
-
- MaskInfos.resize(RegMasks.size()+1);
- for (uint32_t M = 1, NM = RegMasks.size(); M <= NM; ++M) {
- BitVector PU(TRI.getNumRegUnits());
- const uint32_t *MB = RegMasks.get(M);
- for (unsigned i = 1, e = TRI.getNumRegs(); i != e; ++i) {
- if (!(MB[i/32] & (1u << (i%32))))
- continue;
- for (MCRegUnitIterator U(i, &TRI); U.isValid(); ++U)
- PU.set(*U);
- }
- MaskInfos[M].Units = PU.flip();
- }
-}
-
-RegisterRef PhysicalRegisterInfo::normalize(RegisterRef RR) const {
- return RR;
-}
-
-std::set<RegisterId> PhysicalRegisterInfo::getAliasSet(RegisterId Reg) const {
- // Do not include RR in the alias set.
- std::set<RegisterId> AS;
- assert(isRegMaskId(Reg) || Register::isPhysicalRegister(Reg));
- if (isRegMaskId(Reg)) {
- // XXX SLOW
- const uint32_t *MB = getRegMaskBits(Reg);
- for (unsigned i = 1, e = TRI.getNumRegs(); i != e; ++i) {
- if (MB[i/32] & (1u << (i%32)))
- continue;
- AS.insert(i);
- }
- for (const uint32_t *RM : RegMasks) {
- RegisterId MI = getRegMaskId(RM);
- if (MI != Reg && aliasMM(RegisterRef(Reg), RegisterRef(MI)))
- AS.insert(MI);
- }
- return AS;
- }
-
- for (MCRegAliasIterator AI(Reg, &TRI, false); AI.isValid(); ++AI)
- AS.insert(*AI);
- for (const uint32_t *RM : RegMasks) {
- RegisterId MI = getRegMaskId(RM);
- if (aliasRM(RegisterRef(Reg), RegisterRef(MI)))
- AS.insert(MI);
- }
- return AS;
-}
-
-bool PhysicalRegisterInfo::aliasRR(RegisterRef RA, RegisterRef RB) const {
- assert(Register::isPhysicalRegister(RA.Reg));
- assert(Register::isPhysicalRegister(RB.Reg));
-
- MCRegUnitMaskIterator UMA(RA.Reg, &TRI);
- MCRegUnitMaskIterator UMB(RB.Reg, &TRI);
- // Reg units are returned in the numerical order.
- while (UMA.isValid() && UMB.isValid()) {
- // Skip units that are masked off in RA.
- std::pair<RegisterId,LaneBitmask> PA = *UMA;
- if (PA.second.any() && (PA.second & RA.Mask).none()) {
- ++UMA;
- continue;
- }
- // Skip units that are masked off in RB.
- std::pair<RegisterId,LaneBitmask> PB = *UMB;
- if (PB.second.any() && (PB.second & RB.Mask).none()) {
- ++UMB;
- continue;
- }
-
- if (PA.first == PB.first)
- return true;
- if (PA.first < PB.first)
- ++UMA;
- else if (PB.first < PA.first)
- ++UMB;
- }
- return false;
-}
-
-bool PhysicalRegisterInfo::aliasRM(RegisterRef RR, RegisterRef RM) const {
- assert(Register::isPhysicalRegister(RR.Reg) && isRegMaskId(RM.Reg));
- const uint32_t *MB = getRegMaskBits(RM.Reg);
- bool Preserved = MB[RR.Reg/32] & (1u << (RR.Reg%32));
- // If the lane mask information is "full", e.g. when the given lane mask
- // is a superset of the lane mask from the register class, check the regmask
- // bit directly.
- if (RR.Mask == LaneBitmask::getAll())
- return !Preserved;
- const TargetRegisterClass *RC = RegInfos[RR.Reg].RegClass;
- if (RC != nullptr && (RR.Mask & RC->LaneMask) == RC->LaneMask)
- return !Preserved;
-
- // Otherwise, check all subregisters whose lane mask overlaps the given
- // mask. For each such register, if it is preserved by the regmask, then
- // clear the corresponding bits in the given mask. If at the end, all
- // bits have been cleared, the register does not alias the regmask (i.e.
- // is it preserved by it).
- LaneBitmask M = RR.Mask;
- for (MCSubRegIndexIterator SI(RR.Reg, &TRI); SI.isValid(); ++SI) {
- LaneBitmask SM = TRI.getSubRegIndexLaneMask(SI.getSubRegIndex());
- if ((SM & RR.Mask).none())
- continue;
- unsigned SR = SI.getSubReg();
- if (!(MB[SR/32] & (1u << (SR%32))))
- continue;
- // The subregister SR is preserved.
- M &= ~SM;
- if (M.none())
- return false;
- }
-
- return true;
-}
-
-bool PhysicalRegisterInfo::aliasMM(RegisterRef RM, RegisterRef RN) const {
- assert(isRegMaskId(RM.Reg) && isRegMaskId(RN.Reg));
- unsigned NumRegs = TRI.getNumRegs();
- const uint32_t *BM = getRegMaskBits(RM.Reg);
- const uint32_t *BN = getRegMaskBits(RN.Reg);
-
- for (unsigned w = 0, nw = NumRegs/32; w != nw; ++w) {
- // Intersect the negations of both words. Disregard reg=0,
- // i.e. 0th bit in the 0th word.
- uint32_t C = ~BM[w] & ~BN[w];
- if (w == 0)
- C &= ~1;
- if (C)
- return true;
- }
-
- // Check the remaining registers in the last word.
- unsigned TailRegs = NumRegs % 32;
- if (TailRegs == 0)
- return false;
- unsigned TW = NumRegs / 32;
- uint32_t TailMask = (1u << TailRegs) - 1;
- if (~BM[TW] & ~BN[TW] & TailMask)
- return true;
-
- return false;
-}
-
-RegisterRef PhysicalRegisterInfo::mapTo(RegisterRef RR, unsigned R) const {
- if (RR.Reg == R)
- return RR;
- if (unsigned Idx = TRI.getSubRegIndex(R, RR.Reg))
- return RegisterRef(R, TRI.composeSubRegIndexLaneMask(Idx, RR.Mask));
- if (unsigned Idx = TRI.getSubRegIndex(RR.Reg, R)) {
- const RegInfo &RI = RegInfos[R];
- LaneBitmask RCM = RI.RegClass ? RI.RegClass->LaneMask
- : LaneBitmask::getAll();
- LaneBitmask M = TRI.reverseComposeSubRegIndexLaneMask(Idx, RR.Mask);
- return RegisterRef(R, M & RCM);
- }
- llvm_unreachable("Invalid arguments: unrelated registers?");
-}
-
-bool RegisterAggr::hasAliasOf(RegisterRef RR) const {
- if (PhysicalRegisterInfo::isRegMaskId(RR.Reg))
- return Units.anyCommon(PRI.getMaskUnits(RR.Reg));
-
- for (MCRegUnitMaskIterator U(RR.Reg, &PRI.getTRI()); U.isValid(); ++U) {
- std::pair<uint32_t,LaneBitmask> P = *U;
- if (P.second.none() || (P.second & RR.Mask).any())
- if (Units.test(P.first))
- return true;
- }
- return false;
-}
-
-bool RegisterAggr::hasCoverOf(RegisterRef RR) const {
- if (PhysicalRegisterInfo::isRegMaskId(RR.Reg)) {
- BitVector T(PRI.getMaskUnits(RR.Reg));
- return T.reset(Units).none();
- }
-
- for (MCRegUnitMaskIterator U(RR.Reg, &PRI.getTRI()); U.isValid(); ++U) {
- std::pair<uint32_t,LaneBitmask> P = *U;
- if (P.second.none() || (P.second & RR.Mask).any())
- if (!Units.test(P.first))
- return false;
- }
- return true;
-}
-
-RegisterAggr &RegisterAggr::insert(RegisterRef RR) {
- if (PhysicalRegisterInfo::isRegMaskId(RR.Reg)) {
- Units |= PRI.getMaskUnits(RR.Reg);
- return *this;
- }
-
- for (MCRegUnitMaskIterator U(RR.Reg, &PRI.getTRI()); U.isValid(); ++U) {
- std::pair<uint32_t,LaneBitmask> P = *U;
- if (P.second.none() || (P.second & RR.Mask).any())
- Units.set(P.first);
- }
- return *this;
-}
-
-RegisterAggr &RegisterAggr::insert(const RegisterAggr &RG) {
- Units |= RG.Units;
- return *this;
-}
-
-RegisterAggr &RegisterAggr::intersect(RegisterRef RR) {
- return intersect(RegisterAggr(PRI).insert(RR));
-}
-
-RegisterAggr &RegisterAggr::intersect(const RegisterAggr &RG) {
- Units &= RG.Units;
- return *this;
-}
-
-RegisterAggr &RegisterAggr::clear(RegisterRef RR) {
- return clear(RegisterAggr(PRI).insert(RR));
-}
-
-RegisterAggr &RegisterAggr::clear(const RegisterAggr &RG) {
- Units.reset(RG.Units);
- return *this;
-}
-
-RegisterRef RegisterAggr::intersectWith(RegisterRef RR) const {
- RegisterAggr T(PRI);
- T.insert(RR).intersect(*this);
- if (T.empty())
- return RegisterRef();
- RegisterRef NR = T.makeRegRef();
- assert(NR);
- return NR;
-}
-
-RegisterRef RegisterAggr::clearIn(RegisterRef RR) const {
- return RegisterAggr(PRI).insert(RR).clear(*this).makeRegRef();
-}
-
-RegisterRef RegisterAggr::makeRegRef() const {
- int U = Units.find_first();
- if (U < 0)
- return RegisterRef();
-
- auto AliasedRegs = [this] (uint32_t Unit, BitVector &Regs) {
- for (MCRegUnitRootIterator R(Unit, &PRI.getTRI()); R.isValid(); ++R)
- for (MCSuperRegIterator S(*R, &PRI.getTRI(), true); S.isValid(); ++S)
- Regs.set(*S);
- };
-
- // Find the set of all registers that are aliased to all the units
- // in this aggregate.
-
- // Get all the registers aliased to the first unit in the bit vector.
- BitVector Regs(PRI.getTRI().getNumRegs());
- AliasedRegs(U, Regs);
- U = Units.find_next(U);
-
- // For each other unit, intersect it with the set of all registers
- // aliased that unit.
- while (U >= 0) {
- BitVector AR(PRI.getTRI().getNumRegs());
- AliasedRegs(U, AR);
- Regs &= AR;
- U = Units.find_next(U);
- }
-
- // If there is at least one register remaining, pick the first one,
- // and consolidate the masks of all of its units contained in this
- // aggregate.
-
- int F = Regs.find_first();
- if (F <= 0)
- return RegisterRef();
-
- LaneBitmask M;
- for (MCRegUnitMaskIterator I(F, &PRI.getTRI()); I.isValid(); ++I) {
- std::pair<uint32_t,LaneBitmask> P = *I;
- if (Units.test(P.first))
- M |= P.second.none() ? LaneBitmask::getAll() : P.second;
- }
- return RegisterRef(F, M);
-}
-
-void RegisterAggr::print(raw_ostream &OS) const {
- OS << '{';
- for (int U = Units.find_first(); U >= 0; U = Units.find_next(U))
- OS << ' ' << printRegUnit(U, &PRI.getTRI());
- OS << " }";
-}
-
-RegisterAggr::rr_iterator::rr_iterator(const RegisterAggr &RG,
- bool End)
- : Owner(&RG) {
- for (int U = RG.Units.find_first(); U >= 0; U = RG.Units.find_next(U)) {
- RegisterRef R = RG.PRI.getRefForUnit(U);
- Masks[R.Reg] |= R.Mask;
- }
- Pos = End ? Masks.end() : Masks.begin();
- Index = End ? Masks.size() : 0;
-}
diff --git a/llvm/lib/Target/Hexagon/RDFRegisters.h b/llvm/lib/Target/Hexagon/RDFRegisters.h
deleted file mode 100644
index 4afaf80e4659..000000000000
--- a/llvm/lib/Target/Hexagon/RDFRegisters.h
+++ /dev/null
@@ -1,240 +0,0 @@
-//===- RDFRegisters.h -------------------------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_HEXAGON_RDFREGISTERS_H
-#define LLVM_LIB_TARGET_HEXAGON_RDFREGISTERS_H
-
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
-#include "llvm/MC/LaneBitmask.h"
-#include <cassert>
-#include <cstdint>
-#include <map>
-#include <set>
-#include <vector>
-
-namespace llvm {
-
-class MachineFunction;
-class raw_ostream;
-
-namespace rdf {
-
- using RegisterId = uint32_t;
-
- // Template class for a map translating uint32_t into arbitrary types.
- // The map will act like an indexed set: upon insertion of a new object,
- // it will automatically assign a new index to it. Index of 0 is treated
- // as invalid and is never allocated.
- template <typename T, unsigned N = 32>
- struct IndexedSet {
- IndexedSet() { Map.reserve(N); }
-
- T get(uint32_t Idx) const {
- // Index Idx corresponds to Map[Idx-1].
- assert(Idx != 0 && !Map.empty() && Idx-1 < Map.size());
- return Map[Idx-1];
- }
-
- uint32_t insert(T Val) {
- // Linear search.
- auto F = llvm::find(Map, Val);
- if (F != Map.end())
- return F - Map.begin() + 1;
- Map.push_back(Val);
- return Map.size(); // Return actual_index + 1.
- }
-
- uint32_t find(T Val) const {
- auto F = llvm::find(Map, Val);
- assert(F != Map.end());
- return F - Map.begin() + 1;
- }
-
- uint32_t size() const { return Map.size(); }
-
- using const_iterator = typename std::vector<T>::const_iterator;
-
- const_iterator begin() const { return Map.begin(); }
- const_iterator end() const { return Map.end(); }
-
- private:
- std::vector<T> Map;
- };
-
- struct RegisterRef {
- RegisterId Reg = 0;
- LaneBitmask Mask = LaneBitmask::getNone();
-
- RegisterRef() = default;
- explicit RegisterRef(RegisterId R, LaneBitmask M = LaneBitmask::getAll())
- : Reg(R), Mask(R != 0 ? M : LaneBitmask::getNone()) {}
-
- operator bool() const {
- return Reg != 0 && Mask.any();
- }
-
- bool operator== (const RegisterRef &RR) const {
- return Reg == RR.Reg && Mask == RR.Mask;
- }
-
- bool operator!= (const RegisterRef &RR) const {
- return !operator==(RR);
- }
-
- bool operator< (const RegisterRef &RR) const {
- return Reg < RR.Reg || (Reg == RR.Reg && Mask < RR.Mask);
- }
- };
-
-
- struct PhysicalRegisterInfo {
- PhysicalRegisterInfo(const TargetRegisterInfo &tri,
- const MachineFunction &mf);
-
- static bool isRegMaskId(RegisterId R) {
- return Register::isStackSlot(R);
- }
-
- RegisterId getRegMaskId(const uint32_t *RM) const {
- return Register::index2StackSlot(RegMasks.find(RM));
- }
-
- const uint32_t *getRegMaskBits(RegisterId R) const {
- return RegMasks.get(Register::stackSlot2Index(R));
- }
-
- RegisterRef normalize(RegisterRef RR) const;
-
- bool alias(RegisterRef RA, RegisterRef RB) const {
- if (!isRegMaskId(RA.Reg))
- return !isRegMaskId(RB.Reg) ? aliasRR(RA, RB) : aliasRM(RA, RB);
- return !isRegMaskId(RB.Reg) ? aliasRM(RB, RA) : aliasMM(RA, RB);
- }
-
- std::set<RegisterId> getAliasSet(RegisterId Reg) const;
-
- RegisterRef getRefForUnit(uint32_t U) const {
- return RegisterRef(UnitInfos[U].Reg, UnitInfos[U].Mask);
- }
-
- const BitVector &getMaskUnits(RegisterId MaskId) const {
- return MaskInfos[Register::stackSlot2Index(MaskId)].Units;
- }
-
- RegisterRef mapTo(RegisterRef RR, unsigned R) const;
- const TargetRegisterInfo &getTRI() const { return TRI; }
-
- private:
- struct RegInfo {
- const TargetRegisterClass *RegClass = nullptr;
- };
- struct UnitInfo {
- RegisterId Reg = 0;
- LaneBitmask Mask;
- };
- struct MaskInfo {
- BitVector Units;
- };
-
- const TargetRegisterInfo &TRI;
- IndexedSet<const uint32_t*> RegMasks;
- std::vector<RegInfo> RegInfos;
- std::vector<UnitInfo> UnitInfos;
- std::vector<MaskInfo> MaskInfos;
-
- bool aliasRR(RegisterRef RA, RegisterRef RB) const;
- bool aliasRM(RegisterRef RR, RegisterRef RM) const;
- bool aliasMM(RegisterRef RM, RegisterRef RN) const;
- };
-
- struct RegisterAggr {
- RegisterAggr(const PhysicalRegisterInfo &pri)
- : Units(pri.getTRI().getNumRegUnits()), PRI(pri) {}
- RegisterAggr(const RegisterAggr &RG) = default;
-
- bool empty() const { return Units.none(); }
- bool hasAliasOf(RegisterRef RR) const;
- bool hasCoverOf(RegisterRef RR) const;
-
- static bool isCoverOf(RegisterRef RA, RegisterRef RB,
- const PhysicalRegisterInfo &PRI) {
- return RegisterAggr(PRI).insert(RA).hasCoverOf(RB);
- }
-
- RegisterAggr &insert(RegisterRef RR);
- RegisterAggr &insert(const RegisterAggr &RG);
- RegisterAggr &intersect(RegisterRef RR);
- RegisterAggr &intersect(const RegisterAggr &RG);
- RegisterAggr &clear(RegisterRef RR);
- RegisterAggr &clear(const RegisterAggr &RG);
-
- RegisterRef intersectWith(RegisterRef RR) const;
- RegisterRef clearIn(RegisterRef RR) const;
- RegisterRef makeRegRef() const;
-
- void print(raw_ostream &OS) const;
-
- struct rr_iterator {
- using MapType = std::map<RegisterId, LaneBitmask>;
-
- private:
- MapType Masks;
- MapType::iterator Pos;
- unsigned Index;
- const RegisterAggr *Owner;
-
- public:
- rr_iterator(const RegisterAggr &RG, bool End);
-
- RegisterRef operator*() const {
- return RegisterRef(Pos->first, Pos->second);
- }
-
- rr_iterator &operator++() {
- ++Pos;
- ++Index;
- return *this;
- }
-
- bool operator==(const rr_iterator &I) const {
- assert(Owner == I.Owner);
- (void)Owner;
- return Index == I.Index;
- }
-
- bool operator!=(const rr_iterator &I) const {
- return !(*this == I);
- }
- };
-
- rr_iterator rr_begin() const {
- return rr_iterator(*this, false);
- }
- rr_iterator rr_end() const {
- return rr_iterator(*this, true);
- }
-
- private:
- BitVector Units;
- const PhysicalRegisterInfo &PRI;
- };
-
- // Optionally print the lane mask, if it is not ~0.
- struct PrintLaneMaskOpt {
- PrintLaneMaskOpt(LaneBitmask M) : Mask(M) {}
- LaneBitmask Mask;
- };
- raw_ostream &operator<< (raw_ostream &OS, const PrintLaneMaskOpt &P);
-
-} // end namespace rdf
-
-} // end namespace llvm
-
-#endif // LLVM_LIB_TARGET_HEXAGON_RDFREGISTERS_H
diff --git a/llvm/lib/Target/PowerPC/P9InstrResources.td b/llvm/lib/Target/PowerPC/P9InstrResources.td
index 9b3d13989ee2..d7e3519d5539 100644
--- a/llvm/lib/Target/PowerPC/P9InstrResources.td
+++ b/llvm/lib/Target/PowerPC/P9InstrResources.td
@@ -373,6 +373,7 @@ def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
VMSUMSHS,
VMSUMUBM,
VMSUMUHM,
+ VMSUMUDM,
VMSUMUHS,
VMULESB,
VMULESH,
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 00f59bba52e8..ca1649fae258 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -167,6 +167,23 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
}
+ if (Subtarget.isISA3_0()) {
+ setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
+ setTruncStoreAction(MVT::f64, MVT::f16, Legal);
+ setTruncStoreAction(MVT::f32, MVT::f16, Legal);
+ } else {
+ // No extending loads from f16 or HW conversions back and forth.
+ setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
+ setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
+ setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
+ setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
+ setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
+ setTruncStoreAction(MVT::f64, MVT::f16, Expand);
+ setTruncStoreAction(MVT::f32, MVT::f16, Expand);
+ }
+
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
// PowerPC has pre-inc load and store's.
@@ -677,6 +694,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
}
}
+ setOperationAction(ISD::SELECT_CC, MVT::v4i32, Expand);
if (!Subtarget.hasP8Vector()) {
setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
@@ -10361,6 +10379,7 @@ SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
assert(Op.getOpcode() == ISD::FP_EXTEND &&
"Should only be called for ISD::FP_EXTEND");
+ // FIXME: handle extends from half precision float vectors on P9.
// We only want to custom lower an extend from v2f32 to v2f64.
if (Op.getValueType() != MVT::v2f64 ||
Op.getOperand(0).getValueType() != MVT::v2f32)
@@ -10574,6 +10593,11 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::BITCAST:
// Don't handle bitcast here.
return;
+ case ISD::FP_EXTEND:
+ SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
+ if (Lowered)
+ Results.push_back(Lowered);
+ return;
}
}
@@ -15255,7 +15279,8 @@ bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
if (!VT.isSimple())
return false;
- if (VT.isFloatingPoint() && !Subtarget.allowsUnalignedFPAccess())
+ if (VT.isFloatingPoint() && !VT.isVector() &&
+ !Subtarget.allowsUnalignedFPAccess())
return false;
if (VT.getSimpleVT().isVector()) {
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index e0c381827b87..2e1485373d19 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -637,7 +637,7 @@ namespace llvm {
/// then the VPERM for the shuffle. All in all a very slow sequence.
TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT)
const override {
- if (VT.getScalarSizeInBits() % 8 == 0)
+ if (VT.getVectorNumElements() != 1 && VT.getScalarSizeInBits() % 8 == 0)
return TypeWidenVector;
return TargetLoweringBase::getPreferredVectorAction(VT);
}
diff --git a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
index f94816a35f79..6e8635f2413c 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -1342,6 +1342,10 @@ def VSBOX : VXBX_Int_Ty<1480, "vsbox", int_ppc_altivec_crypto_vsbox, v2i64>;
def HasP9Altivec : Predicate<"PPCSubTarget->hasP9Altivec()">;
let Predicates = [HasP9Altivec] in {
+// Vector Multiply-Sum
+def VMSUMUDM : VA1a_Int_Ty3<35, "vmsumudm", int_ppc_altivec_vmsumudm,
+ v1i128, v2i64, v1i128>;
+
// i8 element comparisons.
def VCMPNEB : VCMP < 7, "vcmpneb $vD, $vA, $vB" , v16i8>;
def VCMPNEB_rec : VCMPo < 7, "vcmpneb. $vD, $vA, $vB" , v16i8>;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 30906a32b00c..d7925befcd37 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -2631,6 +2631,10 @@ bool PPCInstrInfo::isADDIInstrEligibleForFolding(MachineInstr &ADDIMI,
if (Opc != PPC::ADDI && Opc != PPC::ADDI8)
return false;
+ // The operand may not necessarily be an immediate - it could be a relocation.
+ if (!ADDIMI.getOperand(2).isImm())
+ return false;
+
Imm = ADDIMI.getOperand(2).getImm();
return true;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index be6b30ffa08b..95e5ff6b130d 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -3343,6 +3343,23 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
def : Pat<(v2i64 (scalar_to_vector ScalarLoads.SELi16i64)),
(v2i64 (XXPERMDIs (VEXTSH2Ds (LXSIHZX xoaddr:$src)), 0))>;
+ // Load/convert and convert/store patterns for f16.
+ def : Pat<(f64 (extloadf16 xoaddr:$src)),
+ (f64 (XSCVHPDP (LXSIHZX xoaddr:$src)))>;
+ def : Pat<(truncstoref16 f64:$src, xoaddr:$dst),
+ (STXSIHX (XSCVDPHP $src), xoaddr:$dst)>;
+ def : Pat<(f32 (extloadf16 xoaddr:$src)),
+ (f32 (COPY_TO_REGCLASS (XSCVHPDP (LXSIHZX xoaddr:$src)), VSSRC))>;
+ def : Pat<(truncstoref16 f32:$src, xoaddr:$dst),
+ (STXSIHX (XSCVDPHP (COPY_TO_REGCLASS $src, VSFRC)), xoaddr:$dst)>;
+ def : Pat<(f64 (f16_to_fp i32:$A)),
+ (f64 (XSCVHPDP (MTVSRWZ $A)))>;
+ def : Pat<(f32 (f16_to_fp i32:$A)),
+ (f32 (COPY_TO_REGCLASS (XSCVHPDP (MTVSRWZ $A)), VSSRC))>;
+ def : Pat<(i32 (fp_to_f16 f32:$A)),
+ (i32 (MFVSRWZ (XSCVDPHP (COPY_TO_REGCLASS $A, VSFRC))))>;
+ def : Pat<(i32 (fp_to_f16 f64:$A)), (i32 (MFVSRWZ (XSCVDPHP $A)))>;
+
let Predicates = [IsBigEndian, HasP9Vector] in {
// Scalar stores of i8
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst),
diff --git a/llvm/lib/Target/X86/ImmutableGraph.h b/llvm/lib/Target/X86/ImmutableGraph.h
new file mode 100644
index 000000000000..5833017037a5
--- /dev/null
+++ b/llvm/lib/Target/X86/ImmutableGraph.h
@@ -0,0 +1,446 @@
+//==========-- ImmutableGraph.h - A fast DAG implementation ---------=========//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// Description: ImmutableGraph is a fast DAG implementation that cannot be
+/// modified, except by creating a new ImmutableGraph. ImmutableGraph is
+/// implemented as two arrays: one containing nodes, and one containing edges.
+/// The advantages to this implementation are two-fold:
+/// 1. Iteration and traversal operations benefit from cache locality.
+/// 2. Operations on sets of nodes/edges are efficient, and representations of
+/// those sets in memory are compact. For instance, a set of edges is
+/// implemented as a bit vector, wherein each bit corresponds to one edge in
+/// the edge array. This implies a lower bound of 64x spatial improvement
+/// over, e.g., an llvm::DenseSet or llvm::SmallSet. It also means that
+/// insert/erase/contains operations complete in negligible constant time:
+/// insert and erase require one load and one store, and contains requires
+/// just one load.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_X86_IMMUTABLEGRAPH_H
+#define LLVM_LIB_TARGET_X86_IMMUTABLEGRAPH_H
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/GraphTraits.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <iterator>
+#include <utility>
+#include <vector>
+
+namespace llvm {
+
+template <typename NodeValueT, typename EdgeValueT> class ImmutableGraph {
+ using Traits = GraphTraits<ImmutableGraph<NodeValueT, EdgeValueT> *>;
+ template <typename> friend class ImmutableGraphBuilder;
+
+public:
+ using node_value_type = NodeValueT;
+ using edge_value_type = EdgeValueT;
+ using size_type = int;
+ class Node;
+ class Edge {
+ friend class ImmutableGraph;
+ template <typename> friend class ImmutableGraphBuilder;
+
+ const Node *Dest;
+ edge_value_type Value;
+
+ public:
+ const Node *getDest() const { return Dest; };
+ const edge_value_type &getValue() const { return Value; }
+ };
+ class Node {
+ friend class ImmutableGraph;
+ template <typename> friend class ImmutableGraphBuilder;
+
+ const Edge *Edges;
+ node_value_type Value;
+
+ public:
+ const node_value_type &getValue() const { return Value; }
+
+ const Edge *edges_begin() const { return Edges; }
+ // Nodes are allocated sequentially. Edges for a node are stored together.
+ // The end of this Node's edges is the beginning of the next node's edges.
+ // An extra node was allocated to hold the end pointer for the last real
+ // node.
+ const Edge *edges_end() const { return (this + 1)->Edges; }
+ ArrayRef<Edge> edges() const {
+ return makeArrayRef(edges_begin(), edges_end());
+ }
+ };
+
+protected:
+ ImmutableGraph(std::unique_ptr<Node[]> Nodes, std::unique_ptr<Edge[]> Edges,
+ size_type NodesSize, size_type EdgesSize)
+ : Nodes(std::move(Nodes)), Edges(std::move(Edges)), NodesSize(NodesSize),
+ EdgesSize(EdgesSize) {}
+ ImmutableGraph(const ImmutableGraph &) = delete;
+ ImmutableGraph(ImmutableGraph &&) = delete;
+ ImmutableGraph &operator=(const ImmutableGraph &) = delete;
+ ImmutableGraph &operator=(ImmutableGraph &&) = delete;
+
+public:
+ ArrayRef<Node> nodes() const { return makeArrayRef(Nodes.get(), NodesSize); }
+ const Node *nodes_begin() const { return nodes().begin(); }
+ const Node *nodes_end() const { return nodes().end(); }
+
+ ArrayRef<Edge> edges() const { return makeArrayRef(Edges.get(), EdgesSize); }
+ const Edge *edges_begin() const { return edges().begin(); }
+ const Edge *edges_end() const { return edges().end(); }
+
+ size_type nodes_size() const { return NodesSize; }
+ size_type edges_size() const { return EdgesSize; }
+
+ // Node N must belong to this ImmutableGraph.
+ size_type getNodeIndex(const Node &N) const {
+ return std::distance(nodes_begin(), &N);
+ }
+ // Edge E must belong to this ImmutableGraph.
+ size_type getEdgeIndex(const Edge &E) const {
+ return std::distance(edges_begin(), &E);
+ }
+
+ // FIXME: Could NodeSet and EdgeSet be templated to share code?
+ class NodeSet {
+ const ImmutableGraph &G;
+ BitVector V;
+
+ public:
+ NodeSet(const ImmutableGraph &G, bool ContainsAll = false)
+ : G{G}, V{static_cast<unsigned>(G.nodes_size()), ContainsAll} {}
+ bool insert(const Node &N) {
+ size_type Idx = G.getNodeIndex(N);
+ bool AlreadyExists = V.test(Idx);
+ V.set(Idx);
+ return !AlreadyExists;
+ }
+ void erase(const Node &N) {
+ size_type Idx = G.getNodeIndex(N);
+ V.reset(Idx);
+ }
+ bool contains(const Node &N) const {
+ size_type Idx = G.getNodeIndex(N);
+ return V.test(Idx);
+ }
+ void clear() { V.reset(); }
+ size_type empty() const { return V.none(); }
+ /// Return the number of elements in the set
+ size_type count() const { return V.count(); }
+ /// Return the size of the set's domain
+ size_type size() const { return V.size(); }
+ /// Set union
+ NodeSet &operator|=(const NodeSet &RHS) {
+ assert(&this->G == &RHS.G);
+ V |= RHS.V;
+ return *this;
+ }
+ /// Set intersection
+ NodeSet &operator&=(const NodeSet &RHS) {
+ assert(&this->G == &RHS.G);
+ V &= RHS.V;
+ return *this;
+ }
+ /// Set disjoint union
+ NodeSet &operator^=(const NodeSet &RHS) {
+ assert(&this->G == &RHS.G);
+ V ^= RHS.V;
+ return *this;
+ }
+
+ using index_iterator = typename BitVector::const_set_bits_iterator;
+ index_iterator index_begin() const { return V.set_bits_begin(); }
+ index_iterator index_end() const { return V.set_bits_end(); }
+ void set(size_type Idx) { V.set(Idx); }
+ void reset(size_type Idx) { V.reset(Idx); }
+
+ class iterator {
+ const NodeSet &Set;
+ size_type Current;
+
+ void advance() {
+ assert(Current != -1);
+ Current = Set.V.find_next(Current);
+ }
+
+ public:
+ iterator(const NodeSet &Set, size_type Begin)
+ : Set{Set}, Current{Begin} {}
+ iterator operator++(int) {
+ iterator Tmp = *this;
+ advance();
+ return Tmp;
+ }
+ iterator &operator++() {
+ advance();
+ return *this;
+ }
+ Node *operator*() const {
+ assert(Current != -1);
+ return Set.G.nodes_begin() + Current;
+ }
+ bool operator==(const iterator &other) const {
+ assert(&this->Set == &other.Set);
+ return this->Current == other.Current;
+ }
+ bool operator!=(const iterator &other) const { return !(*this == other); }
+ };
+
+ iterator begin() const { return iterator{*this, V.find_first()}; }
+ iterator end() const { return iterator{*this, -1}; }
+ };
+
+ class EdgeSet {
+ const ImmutableGraph &G;
+ BitVector V;
+
+ public:
+ EdgeSet(const ImmutableGraph &G, bool ContainsAll = false)
+ : G{G}, V{static_cast<unsigned>(G.edges_size()), ContainsAll} {}
+ bool insert(const Edge &E) {
+ size_type Idx = G.getEdgeIndex(E);
+ bool AlreadyExists = V.test(Idx);
+ V.set(Idx);
+ return !AlreadyExists;
+ }
+ void erase(const Edge &E) {
+ size_type Idx = G.getEdgeIndex(E);
+ V.reset(Idx);
+ }
+ bool contains(const Edge &E) const {
+ size_type Idx = G.getEdgeIndex(E);
+ return V.test(Idx);
+ }
+ void clear() { V.reset(); }
+ bool empty() const { return V.none(); }
+ /// Return the number of elements in the set
+ size_type count() const { return V.count(); }
+ /// Return the size of the set's domain
+ size_type size() const { return V.size(); }
+ /// Set union
+ EdgeSet &operator|=(const EdgeSet &RHS) {
+ assert(&this->G == &RHS.G);
+ V |= RHS.V;
+ return *this;
+ }
+ /// Set intersection
+ EdgeSet &operator&=(const EdgeSet &RHS) {
+ assert(&this->G == &RHS.G);
+ V &= RHS.V;
+ return *this;
+ }
+ /// Set disjoint union
+ EdgeSet &operator^=(const EdgeSet &RHS) {
+ assert(&this->G == &RHS.G);
+ V ^= RHS.V;
+ return *this;
+ }
+
+ using index_iterator = typename BitVector::const_set_bits_iterator;
+ index_iterator index_begin() const { return V.set_bits_begin(); }
+ index_iterator index_end() const { return V.set_bits_end(); }
+ void set(size_type Idx) { V.set(Idx); }
+ void reset(size_type Idx) { V.reset(Idx); }
+
+ class iterator {
+ const EdgeSet &Set;
+ size_type Current;
+
+ void advance() {
+ assert(Current != -1);
+ Current = Set.V.find_next(Current);
+ }
+
+ public:
+ iterator(const EdgeSet &Set, size_type Begin)
+ : Set{Set}, Current{Begin} {}
+ iterator operator++(int) {
+ iterator Tmp = *this;
+ advance();
+ return Tmp;
+ }
+ iterator &operator++() {
+ advance();
+ return *this;
+ }
+ Edge *operator*() const {
+ assert(Current != -1);
+ return Set.G.edges_begin() + Current;
+ }
+ bool operator==(const iterator &other) const {
+ assert(&this->Set == &other.Set);
+ return this->Current == other.Current;
+ }
+ bool operator!=(const iterator &other) const { return !(*this == other); }
+ };
+
+ iterator begin() const { return iterator{*this, V.find_first()}; }
+ iterator end() const { return iterator{*this, -1}; }
+ };
+
+private:
+ std::unique_ptr<Node[]> Nodes;
+ std::unique_ptr<Edge[]> Edges;
+ size_type NodesSize;
+ size_type EdgesSize;
+};
+
+template <typename GraphT> class ImmutableGraphBuilder {
+ using node_value_type = typename GraphT::node_value_type;
+ using edge_value_type = typename GraphT::edge_value_type;
+ static_assert(
+ std::is_base_of<ImmutableGraph<node_value_type, edge_value_type>,
+ GraphT>::value,
+ "Template argument to ImmutableGraphBuilder must derive from "
+ "ImmutableGraph<>");
+ using size_type = typename GraphT::size_type;
+ using NodeSet = typename GraphT::NodeSet;
+ using Node = typename GraphT::Node;
+ using EdgeSet = typename GraphT::EdgeSet;
+ using Edge = typename GraphT::Edge;
+ using BuilderEdge = std::pair<edge_value_type, size_type>;
+ using EdgeList = std::vector<BuilderEdge>;
+ using BuilderVertex = std::pair<node_value_type, EdgeList>;
+ using VertexVec = std::vector<BuilderVertex>;
+
+public:
+ using BuilderNodeRef = size_type;
+
+ BuilderNodeRef addVertex(const node_value_type &V) {
+ auto I = AdjList.emplace(AdjList.end(), V, EdgeList{});
+ return std::distance(AdjList.begin(), I);
+ }
+
+ void addEdge(const edge_value_type &E, BuilderNodeRef From,
+ BuilderNodeRef To) {
+ AdjList[From].second.emplace_back(E, To);
+ }
+
+ bool empty() const { return AdjList.empty(); }
+
+ template <typename... ArgT> std::unique_ptr<GraphT> get(ArgT &&... Args) {
+ size_type VertexSize = AdjList.size(), EdgeSize = 0;
+ for (const auto &V : AdjList) {
+ EdgeSize += V.second.size();
+ }
+ auto VertexArray =
+ std::make_unique<Node[]>(VertexSize + 1 /* terminator node */);
+ auto EdgeArray = std::make_unique<Edge[]>(EdgeSize);
+ size_type VI = 0, EI = 0;
+ for (; VI < VertexSize; ++VI) {
+ VertexArray[VI].Value = std::move(AdjList[VI].first);
+ VertexArray[VI].Edges = &EdgeArray[EI];
+ auto NumEdges = static_cast<size_type>(AdjList[VI].second.size());
+ for (size_type VEI = 0; VEI < NumEdges; ++VEI, ++EI) {
+ auto &E = AdjList[VI].second[VEI];
+ EdgeArray[EI].Value = std::move(E.first);
+ EdgeArray[EI].Dest = &VertexArray[E.second];
+ }
+ }
+ assert(VI == VertexSize && EI == EdgeSize && "ImmutableGraph malformed");
+ VertexArray[VI].Edges = &EdgeArray[EdgeSize]; // terminator node
+ return std::make_unique<GraphT>(std::move(VertexArray),
+ std::move(EdgeArray), VertexSize, EdgeSize,
+ std::forward<ArgT>(Args)...);
+ }
+
+ template <typename... ArgT>
+ static std::unique_ptr<GraphT> trim(const GraphT &G, const NodeSet &TrimNodes,
+ const EdgeSet &TrimEdges,
+ ArgT &&... Args) {
+ size_type NewVertexSize = G.nodes_size() - TrimNodes.count();
+ size_type NewEdgeSize = G.edges_size() - TrimEdges.count();
+ auto NewVertexArray =
+ std::make_unique<Node[]>(NewVertexSize + 1 /* terminator node */);
+ auto NewEdgeArray = std::make_unique<Edge[]>(NewEdgeSize);
+
+ // Walk the nodes and determine the new index for each node.
+ size_type NewNodeIndex = 0;
+ std::vector<size_type> RemappedNodeIndex(G.nodes_size());
+ for (const Node &N : G.nodes()) {
+ if (TrimNodes.contains(N))
+ continue;
+ RemappedNodeIndex[G.getNodeIndex(N)] = NewNodeIndex++;
+ }
+ assert(NewNodeIndex == NewVertexSize &&
+ "Should have assigned NewVertexSize indices");
+
+ size_type VertexI = 0, EdgeI = 0;
+ for (const Node &N : G.nodes()) {
+ if (TrimNodes.contains(N))
+ continue;
+ NewVertexArray[VertexI].Value = N.getValue();
+ NewVertexArray[VertexI].Edges = &NewEdgeArray[EdgeI];
+ for (const Edge &E : N.edges()) {
+ if (TrimEdges.contains(E))
+ continue;
+ NewEdgeArray[EdgeI].Value = E.getValue();
+ size_type DestIdx = G.getNodeIndex(*E.getDest());
+ size_type NewIdx = RemappedNodeIndex[DestIdx];
+ assert(NewIdx < NewVertexSize);
+ NewEdgeArray[EdgeI].Dest = &NewVertexArray[NewIdx];
+ ++EdgeI;
+ }
+ ++VertexI;
+ }
+ assert(VertexI == NewVertexSize && EdgeI == NewEdgeSize &&
+ "Gadget graph malformed");
+ NewVertexArray[VertexI].Edges = &NewEdgeArray[NewEdgeSize]; // terminator
+ return std::make_unique<GraphT>(std::move(NewVertexArray),
+ std::move(NewEdgeArray), NewVertexSize,
+ NewEdgeSize, std::forward<ArgT>(Args)...);
+ }
+
+private:
+ VertexVec AdjList;
+};
+
+template <typename NodeValueT, typename EdgeValueT>
+struct GraphTraits<ImmutableGraph<NodeValueT, EdgeValueT> *> {
+ using GraphT = ImmutableGraph<NodeValueT, EdgeValueT>;
+ using NodeRef = typename GraphT::Node const *;
+ using EdgeRef = typename GraphT::Edge const &;
+
+ static NodeRef edge_dest(EdgeRef E) { return E.getDest(); }
+ using ChildIteratorType =
+ mapped_iterator<typename GraphT::Edge const *, decltype(&edge_dest)>;
+
+ static NodeRef getEntryNode(GraphT *G) { return G->nodes_begin(); }
+ static ChildIteratorType child_begin(NodeRef N) {
+ return {N->edges_begin(), &edge_dest};
+ }
+ static ChildIteratorType child_end(NodeRef N) {
+ return {N->edges_end(), &edge_dest};
+ }
+
+ static NodeRef getNode(typename GraphT::Node const &N) { return NodeRef{&N}; }
+ using nodes_iterator =
+ mapped_iterator<typename GraphT::Node const *, decltype(&getNode)>;
+ static nodes_iterator nodes_begin(GraphT *G) {
+ return {G->nodes_begin(), &getNode};
+ }
+ static nodes_iterator nodes_end(GraphT *G) {
+ return {G->nodes_end(), &getNode};
+ }
+
+ using ChildEdgeIteratorType = typename GraphT::Edge const *;
+
+ static ChildEdgeIteratorType child_edge_begin(NodeRef N) {
+ return N->edges_begin();
+ }
+ static ChildEdgeIteratorType child_edge_end(NodeRef N) {
+ return N->edges_end();
+ }
+ static typename GraphT::size_type size(GraphT *G) { return G->nodes_size(); }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_X86_IMMUTABLEGRAPH_H
diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h
index 0481a40d462a..a0ab5c3a5b3c 100644
--- a/llvm/lib/Target/X86/X86.h
+++ b/llvm/lib/Target/X86/X86.h
@@ -120,7 +120,7 @@ FunctionPass *createX86DomainReassignmentPass();
FunctionPass *createX86EvexToVexInsts();
/// This pass creates the thunks for the retpoline feature.
-FunctionPass *createX86RetpolineThunksPass();
+FunctionPass *createX86IndirectThunksPass();
/// This pass ensures instructions featuring a memory operand
/// have distinctive <LineNumber, Discriminator> (with respect to eachother)
@@ -133,6 +133,9 @@ InstructionSelector *createX86InstructionSelector(const X86TargetMachine &TM,
X86Subtarget &,
X86RegisterBankInfo &);
+FunctionPass *createX86LoadValueInjectionLoadHardeningPass();
+FunctionPass *createX86LoadValueInjectionLoadHardeningUnoptimizedPass();
+FunctionPass *createX86LoadValueInjectionRetHardeningPass();
FunctionPass *createX86SpeculativeLoadHardeningPass();
void initializeEvexToVexInstPassPass(PassRegistry &);
@@ -148,6 +151,9 @@ void initializeX86DomainReassignmentPass(PassRegistry &);
void initializeX86ExecutionDomainFixPass(PassRegistry &);
void initializeX86ExpandPseudoPass(PassRegistry &);
void initializeX86FlagsCopyLoweringPassPass(PassRegistry &);
+void initializeX86LoadValueInjectionLoadHardeningUnoptimizedPassPass(PassRegistry &);
+void initializeX86LoadValueInjectionLoadHardeningPassPass(PassRegistry &);
+void initializeX86LoadValueInjectionRetHardeningPassPass(PassRegistry &);
void initializeX86OptimizeLEAPassPass(PassRegistry &);
void initializeX86SpeculativeLoadHardeningPassPass(PassRegistry &);
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index a2b11d55f650..bb8952f54e3a 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -426,6 +426,22 @@ def FeatureRetpolineExternalThunk
"ourselves. Only has effect when combined with some other retpoline "
"feature", [FeatureRetpolineIndirectCalls]>;
+// Mitigate LVI attacks against indirect calls/branches and call returns
+def FeatureLVIControlFlowIntegrity
+ : SubtargetFeature<
+ "lvi-cfi", "UseLVIControlFlowIntegrity", "true",
+ "Prevent indirect calls/branches from using a memory operand, and "
+ "precede all indirect calls/branches from a register with an "
+ "LFENCE instruction to serialize control flow. Also decompose RET "
+ "instructions into a POP+LFENCE+JMP sequence.">;
+
+// Mitigate LVI attacks against data loads
+def FeatureLVILoadHardening
+ : SubtargetFeature<
+ "lvi-load-hardening", "UseLVILoadHardening", "true",
+ "Insert LFENCE instructions to prevent data speculatively injected "
+ "into loads from being used maliciously.">;
+
// Direct Move instructions.
def FeatureMOVDIRI : SubtargetFeature<"movdiri", "HasMOVDIRI", "true",
"Support movdiri instruction">;
diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp
index 1dbf40683564..a1d256ea872d 100644
--- a/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/llvm/lib/Target/X86/X86FastISel.cpp
@@ -3202,8 +3202,8 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
(CalledFn && CalledFn->hasFnAttribute("no_caller_saved_registers")))
return false;
- // Functions using retpoline for indirect calls need to use SDISel.
- if (Subtarget->useRetpolineIndirectCalls())
+ // Functions using thunks for indirect calls need to use SDISel.
+ if (Subtarget->useIndirectThunkCalls())
return false;
// Handle only C, fastcc, and webkit_js calling conventions for now.
diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp
index 799c1f5d1285..1da20371caf5 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -765,10 +765,10 @@ void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
bool InProlog) const {
bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large;
- // FIXME: Add retpoline support and remove this.
- if (Is64Bit && IsLargeCodeModel && STI.useRetpolineIndirectCalls())
+ // FIXME: Add indirect thunk support and remove this.
+ if (Is64Bit && IsLargeCodeModel && STI.useIndirectThunkCalls())
report_fatal_error("Emitting stack probe calls on 64-bit with the large "
- "code model and retpoline not yet implemented.");
+ "code model and indirect thunks not yet implemented.");
unsigned CallOp;
if (Is64Bit)
@@ -2493,9 +2493,9 @@ void X86FrameLowering::adjustForSegmentedStacks(
// is laid out within 2^31 bytes of each function body, but this seems
// to be sufficient for JIT.
// FIXME: Add retpoline support and remove the error here..
- if (STI.useRetpolineIndirectCalls())
+ if (STI.useIndirectThunkCalls())
report_fatal_error("Emitting morestack calls on 64-bit with the large "
- "code model and retpoline not yet implemented.");
+ "code model and thunks not yet implemented.");
BuildMI(allocMBB, DL, TII.get(X86::CALL64m))
.addReg(X86::RIP)
.addImm(0)
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index bf33f399db28..88af0ebcfd0e 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -987,7 +987,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
if (OptLevel != CodeGenOpt::None &&
// Only do this when the target can fold the load into the call or
// jmp.
- !Subtarget->useRetpolineIndirectCalls() &&
+ !Subtarget->useIndirectThunkCalls() &&
((N->getOpcode() == X86ISD::CALL && !Subtarget->slowTwoMemOps()) ||
(N->getOpcode() == X86ISD::TC_RETURN &&
(Subtarget->is64Bit() ||
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 1523d56cc4e7..c8720d9ae3a6 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -30221,8 +30221,8 @@ bool X86TargetLowering::isVectorClearMaskLegal(ArrayRef<int> Mask,
}
bool X86TargetLowering::areJTsAllowed(const Function *Fn) const {
- // If the subtarget is using retpolines, we need to not generate jump tables.
- if (Subtarget.useRetpolineIndirectBranches())
+ // If the subtarget is using thunks, we need to not generate jump tables.
+ if (Subtarget.useIndirectThunkBranches())
return false;
// Otherwise, fallback on the generic logic.
@@ -31345,22 +31345,22 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr &MI,
return BB;
}
-static unsigned getOpcodeForRetpoline(unsigned RPOpc) {
+static unsigned getOpcodeForIndirectThunk(unsigned RPOpc) {
switch (RPOpc) {
- case X86::RETPOLINE_CALL32:
+ case X86::INDIRECT_THUNK_CALL32:
return X86::CALLpcrel32;
- case X86::RETPOLINE_CALL64:
+ case X86::INDIRECT_THUNK_CALL64:
return X86::CALL64pcrel32;
- case X86::RETPOLINE_TCRETURN32:
+ case X86::INDIRECT_THUNK_TCRETURN32:
return X86::TCRETURNdi;
- case X86::RETPOLINE_TCRETURN64:
+ case X86::INDIRECT_THUNK_TCRETURN64:
return X86::TCRETURNdi64;
}
- llvm_unreachable("not retpoline opcode");
+ llvm_unreachable("not indirect thunk opcode");
}
-static const char *getRetpolineSymbol(const X86Subtarget &Subtarget,
- unsigned Reg) {
+static const char *getIndirectThunkSymbol(const X86Subtarget &Subtarget,
+ unsigned Reg) {
if (Subtarget.useRetpolineExternalThunk()) {
// When using an external thunk for retpolines, we pick names that match the
// names GCC happens to use as well. This helps simplify the implementation
@@ -31392,39 +31392,48 @@ static const char *getRetpolineSymbol(const X86Subtarget &Subtarget,
assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");
return "__x86_indirect_thunk_r11";
}
+ llvm_unreachable("unexpected reg for external indirect thunk");
+ }
+
+ if (Subtarget.useRetpolineIndirectCalls() ||
+ Subtarget.useRetpolineIndirectBranches()) {
+ // When targeting an internal COMDAT thunk use an LLVM-specific name.
+ switch (Reg) {
+ case X86::EAX:
+ assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
+ return "__llvm_retpoline_eax";
+ case X86::ECX:
+ assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
+ return "__llvm_retpoline_ecx";
+ case X86::EDX:
+ assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
+ return "__llvm_retpoline_edx";
+ case X86::EDI:
+ assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
+ return "__llvm_retpoline_edi";
+ case X86::R11:
+ assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");
+ return "__llvm_retpoline_r11";
+ }
llvm_unreachable("unexpected reg for retpoline");
}
- // When targeting an internal COMDAT thunk use an LLVM-specific name.
- switch (Reg) {
- case X86::EAX:
- assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
- return "__llvm_retpoline_eax";
- case X86::ECX:
- assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
- return "__llvm_retpoline_ecx";
- case X86::EDX:
- assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
- return "__llvm_retpoline_edx";
- case X86::EDI:
- assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
- return "__llvm_retpoline_edi";
- case X86::R11:
+ if (Subtarget.useLVIControlFlowIntegrity()) {
assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");
- return "__llvm_retpoline_r11";
+ return "__llvm_lvi_thunk_r11";
}
- llvm_unreachable("unexpected reg for retpoline");
+ llvm_unreachable("getIndirectThunkSymbol() invoked without thunk feature");
}
MachineBasicBlock *
-X86TargetLowering::EmitLoweredRetpoline(MachineInstr &MI,
- MachineBasicBlock *BB) const {
+X86TargetLowering::EmitLoweredIndirectThunk(MachineInstr &MI,
+ MachineBasicBlock *BB) const {
// Copy the virtual register into the R11 physical register and
// call the retpoline thunk.
DebugLoc DL = MI.getDebugLoc();
const X86InstrInfo *TII = Subtarget.getInstrInfo();
Register CalleeVReg = MI.getOperand(0).getReg();
- unsigned Opc = getOpcodeForRetpoline(MI.getOpcode());
+ unsigned Opc = getOpcodeForIndirectThunk(MI.getOpcode());
// Find an available scratch register to hold the callee. On 64-bit, we can
// just use R11, but we scan for uses anyway to ensure we don't generate
@@ -31458,7 +31467,7 @@ X86TargetLowering::EmitLoweredRetpoline(MachineInstr &MI,
report_fatal_error("calling convention incompatible with retpoline, no "
"available registers");
- const char *Symbol = getRetpolineSymbol(Subtarget, AvailableReg);
+ const char *Symbol = getIndirectThunkSymbol(Subtarget, AvailableReg);
BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), AvailableReg)
.addReg(CalleeVReg);
@@ -32234,11 +32243,11 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case X86::TLS_base_addr32:
case X86::TLS_base_addr64:
return EmitLoweredTLSAddr(MI, BB);
- case X86::RETPOLINE_CALL32:
- case X86::RETPOLINE_CALL64:
- case X86::RETPOLINE_TCRETURN32:
- case X86::RETPOLINE_TCRETURN64:
- return EmitLoweredRetpoline(MI, BB);
+ case X86::INDIRECT_THUNK_CALL32:
+ case X86::INDIRECT_THUNK_CALL64:
+ case X86::INDIRECT_THUNK_TCRETURN32:
+ case X86::INDIRECT_THUNK_TCRETURN64:
+ return EmitLoweredIndirectThunk(MI, BB);
case X86::CATCHRET:
return EmitLoweredCatchRet(MI, BB);
case X86::CATCHPAD:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 3a17099da38f..830cdfc79c0a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1482,8 +1482,8 @@ namespace llvm {
MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
MachineBasicBlock *BB) const;
- MachineBasicBlock *EmitLoweredRetpoline(MachineInstr &MI,
- MachineBasicBlock *BB) const;
+ MachineBasicBlock *EmitLoweredIndirectThunk(MachineInstr &MI,
+ MachineBasicBlock *BB) const;
MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
MachineBasicBlock *MBB) const;
diff --git a/llvm/lib/Target/X86/X86IndirectThunks.cpp b/llvm/lib/Target/X86/X86IndirectThunks.cpp
new file mode 100644
index 000000000000..36b9c3ccc959
--- /dev/null
+++ b/llvm/lib/Target/X86/X86IndirectThunks.cpp
@@ -0,0 +1,364 @@
+//==- X86IndirectThunks.cpp - Construct indirect call/jump thunks for x86 --=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// Pass that injects an MI thunk that is used to lower indirect calls in a way
+/// that prevents speculation on some x86 processors and can be used to mitigate
+/// security vulnerabilities due to targeted speculative execution and side
+/// channels such as CVE-2017-5715.
+///
+/// Currently supported thunks include:
+/// - Retpoline -- A RET-implemented trampoline that lowers indirect calls
+/// - LVI Thunk -- A CALL/JMP-implemented thunk that forces load serialization
+/// before making an indirect call/jump
+///
+/// Note that the reason that this is implemented as a MachineFunctionPass and
+/// not a ModulePass is that ModulePasses at this point in the LLVM X86 pipeline
+/// serialize all transformations, which can consume lots of memory.
+///
+/// TODO(chandlerc): All of this code could use better comments and
+/// documentation.
+///
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86InstrBuilder.h"
+#include "X86Subtarget.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "x86-retpoline-thunks"
+
+static const char RetpolineNamePrefix[] = "__llvm_retpoline_";
+static const char R11RetpolineName[] = "__llvm_retpoline_r11";
+static const char EAXRetpolineName[] = "__llvm_retpoline_eax";
+static const char ECXRetpolineName[] = "__llvm_retpoline_ecx";
+static const char EDXRetpolineName[] = "__llvm_retpoline_edx";
+static const char EDIRetpolineName[] = "__llvm_retpoline_edi";
+
+static const char LVIThunkNamePrefix[] = "__llvm_lvi_thunk_";
+static const char R11LVIThunkName[] = "__llvm_lvi_thunk_r11";
+
+namespace {
+template <typename Derived> class ThunkInserter {
+ Derived &getDerived() { return *static_cast<Derived *>(this); }
+
+protected:
+ bool InsertedThunks;
+ void doInitialization(Module &M) {}
+ void createThunkFunction(MachineModuleInfo &MMI, StringRef Name);
+
+public:
+ void init(Module &M) {
+ InsertedThunks = false;
+ getDerived().doInitialization(M);
+ }
+ // return `true` if `MMI` or `MF` was modified
+ bool run(MachineModuleInfo &MMI, MachineFunction &MF);
+};
+
+struct RetpolineThunkInserter : ThunkInserter<RetpolineThunkInserter> {
+ const char *getThunkPrefix() { return RetpolineNamePrefix; }
+ bool mayUseThunk(const MachineFunction &MF) {
+ const auto &STI = MF.getSubtarget<X86Subtarget>();
+ return (STI.useRetpolineIndirectCalls() ||
+ STI.useRetpolineIndirectBranches()) &&
+ !STI.useRetpolineExternalThunk();
+ }
+ void insertThunks(MachineModuleInfo &MMI);
+ void populateThunk(MachineFunction &MF);
+};
+
+struct LVIThunkInserter : ThunkInserter<LVIThunkInserter> {
+ const char *getThunkPrefix() { return LVIThunkNamePrefix; }
+ bool mayUseThunk(const MachineFunction &MF) {
+ return MF.getSubtarget<X86Subtarget>().useLVIControlFlowIntegrity();
+ }
+ void insertThunks(MachineModuleInfo &MMI) {
+ createThunkFunction(MMI, R11LVIThunkName);
+ }
+ void populateThunk(MachineFunction &MF) {
+ // Grab the entry MBB and erase any other blocks. O0 codegen appears to
+ // generate two bbs for the entry block.
+ MachineBasicBlock *Entry = &MF.front();
+ Entry->clear();
+ while (MF.size() > 1)
+ MF.erase(std::next(MF.begin()));
+
+ // This code mitigates LVI by replacing each indirect call/jump with a
+ // direct call/jump to a thunk that looks like:
+ // ```
+ // lfence
+ // jmpq *%r11
+ // ```
+ // This ensures that if the value in register %r11 was loaded from memory,
+ // then the value in %r11 is (architecturally) correct prior to the jump.
+ const TargetInstrInfo *TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
+ BuildMI(&MF.front(), DebugLoc(), TII->get(X86::LFENCE));
+ BuildMI(&MF.front(), DebugLoc(), TII->get(X86::JMP64r)).addReg(X86::R11);
+ MF.front().addLiveIn(X86::R11);
+ return;
+ }
+};
+
+class X86IndirectThunks : public MachineFunctionPass {
+public:
+ static char ID;
+
+ X86IndirectThunks() : MachineFunctionPass(ID) {}
+
+ StringRef getPassName() const override { return "X86 Indirect Thunks"; }
+
+ bool doInitialization(Module &M) override;
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<MachineModuleInfoWrapperPass>();
+ AU.addPreserved<MachineModuleInfoWrapperPass>();
+ }
+
+private:
+ std::tuple<RetpolineThunkInserter, LVIThunkInserter> TIs;
+
+ // FIXME: When LLVM moves to C++17, these can become folds
+ template <typename... ThunkInserterT>
+ static void initTIs(Module &M,
+ std::tuple<ThunkInserterT...> &ThunkInserters) {
+ (void)std::initializer_list<int>{
+ (std::get<ThunkInserterT>(ThunkInserters).init(M), 0)...};
+ }
+ template <typename... ThunkInserterT>
+ static bool runTIs(MachineModuleInfo &MMI, MachineFunction &MF,
+ std::tuple<ThunkInserterT...> &ThunkInserters) {
+ bool Modified = false;
+ (void)std::initializer_list<int>{
+ Modified |= std::get<ThunkInserterT>(ThunkInserters).run(MMI, MF)...};
+ return Modified;
+ }
+};
+
+} // end anonymous namespace
+
+void RetpolineThunkInserter::insertThunks(MachineModuleInfo &MMI) {
+ if (MMI.getTarget().getTargetTriple().getArch() == Triple::x86_64)
+ createThunkFunction(MMI, R11RetpolineName);
+ else
+ for (StringRef Name : {EAXRetpolineName, ECXRetpolineName, EDXRetpolineName,
+ EDIRetpolineName})
+ createThunkFunction(MMI, Name);
+}
+
+void RetpolineThunkInserter::populateThunk(MachineFunction &MF) {
+ bool Is64Bit = MF.getTarget().getTargetTriple().getArch() == Triple::x86_64;
+ Register ThunkReg;
+ if (Is64Bit) {
+ assert(MF.getName() == "__llvm_retpoline_r11" &&
+ "Should only have an r11 thunk on 64-bit targets");
+
+ // __llvm_retpoline_r11:
+ // callq .Lr11_call_target
+ // .Lr11_capture_spec:
+ // pause
+ // lfence
+ // jmp .Lr11_capture_spec
+ // .align 16
+ // .Lr11_call_target:
+ // movq %r11, (%rsp)
+ // retq
+ ThunkReg = X86::R11;
+ } else {
+ // For 32-bit targets we need to emit a collection of thunks for various
+ // possible scratch registers as well as a fallback that uses EDI, which is
+ // normally callee saved.
+ // __llvm_retpoline_eax:
+ // calll .Leax_call_target
+ // .Leax_capture_spec:
+ // pause
+ // jmp .Leax_capture_spec
+ // .align 16
+ // .Leax_call_target:
+ // movl %eax, (%esp) # Clobber return addr
+ // retl
+ //
+ // __llvm_retpoline_ecx:
+ // ... # Same setup
+ // movl %ecx, (%esp)
+ // retl
+ //
+ // __llvm_retpoline_edx:
+ // ... # Same setup
+ // movl %edx, (%esp)
+ // retl
+ //
+ // __llvm_retpoline_edi:
+ // ... # Same setup
+ // movl %edi, (%esp)
+ // retl
+ if (MF.getName() == EAXRetpolineName)
+ ThunkReg = X86::EAX;
+ else if (MF.getName() == ECXRetpolineName)
+ ThunkReg = X86::ECX;
+ else if (MF.getName() == EDXRetpolineName)
+ ThunkReg = X86::EDX;
+ else if (MF.getName() == EDIRetpolineName)
+ ThunkReg = X86::EDI;
+ else
+ llvm_unreachable("Invalid thunk name on x86-32!");
+ }
+
+ const TargetInstrInfo *TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
+ // Grab the entry MBB and erase any other blocks. O0 codegen appears to
+ // generate two bbs for the entry block.
+ MachineBasicBlock *Entry = &MF.front();
+ Entry->clear();
+ while (MF.size() > 1)
+ MF.erase(std::next(MF.begin()));
+
+ MachineBasicBlock *CaptureSpec =
+ MF.CreateMachineBasicBlock(Entry->getBasicBlock());
+ MachineBasicBlock *CallTarget =
+ MF.CreateMachineBasicBlock(Entry->getBasicBlock());
+ MCSymbol *TargetSym = MF.getContext().createTempSymbol();
+ MF.push_back(CaptureSpec);
+ MF.push_back(CallTarget);
+
+ const unsigned CallOpc = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32;
+ const unsigned RetOpc = Is64Bit ? X86::RETQ : X86::RETL;
+
+ Entry->addLiveIn(ThunkReg);
+ BuildMI(Entry, DebugLoc(), TII->get(CallOpc)).addSym(TargetSym);
+
+ // The MIR verifier thinks that the CALL in the entry block will fall through
+ // to CaptureSpec, so mark it as the successor. Technically, CaptureTarget is
+ // the successor, but the MIR verifier doesn't know how to cope with that.
+ Entry->addSuccessor(CaptureSpec);
+
+ // In the capture loop for speculation, we want to stop the processor from
+ // speculating as fast as possible. On Intel processors, the PAUSE instruction
+ // will block speculation without consuming any execution resources. On AMD
+ // processors, the PAUSE instruction is (essentially) a nop, so we also use an
+ // LFENCE instruction which they have advised will stop speculation as well
+ // with minimal resource utilization. We still end the capture with a jump to
+ // form an infinite loop to fully guarantee that no matter what implementation
+ // of the x86 ISA, speculating this code path never escapes.
+ BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::PAUSE));
+ BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::LFENCE));
+ BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::JMP_1)).addMBB(CaptureSpec);
+ CaptureSpec->setHasAddressTaken();
+ CaptureSpec->addSuccessor(CaptureSpec);
+
+ CallTarget->addLiveIn(ThunkReg);
+ CallTarget->setHasAddressTaken();
+ CallTarget->setAlignment(Align(16));
+
+ // Insert return address clobber
+ const unsigned MovOpc = Is64Bit ? X86::MOV64mr : X86::MOV32mr;
+ const Register SPReg = Is64Bit ? X86::RSP : X86::ESP;
+ addRegOffset(BuildMI(CallTarget, DebugLoc(), TII->get(MovOpc)), SPReg, false,
+ 0)
+ .addReg(ThunkReg);
+
+ CallTarget->back().setPreInstrSymbol(MF, TargetSym);
+ BuildMI(CallTarget, DebugLoc(), TII->get(RetOpc));
+}
+
+template <typename Derived>
+void ThunkInserter<Derived>::createThunkFunction(MachineModuleInfo &MMI,
+ StringRef Name) {
+ assert(Name.startswith(getDerived().getThunkPrefix()) &&
+ "Created a thunk with an unexpected prefix!");
+
+ Module &M = const_cast<Module &>(*MMI.getModule());
+ LLVMContext &Ctx = M.getContext();
+ auto Type = FunctionType::get(Type::getVoidTy(Ctx), false);
+ Function *F =
+ Function::Create(Type, GlobalValue::LinkOnceODRLinkage, Name, &M);
+ F->setVisibility(GlobalValue::HiddenVisibility);
+ F->setComdat(M.getOrInsertComdat(Name));
+
+ // Add Attributes so that we don't create a frame, unwind information, or
+ // inline.
+ AttrBuilder B;
+ B.addAttribute(llvm::Attribute::NoUnwind);
+ B.addAttribute(llvm::Attribute::Naked);
+ F->addAttributes(llvm::AttributeList::FunctionIndex, B);
+
+ // Populate our function a bit so that we can verify.
+ BasicBlock *Entry = BasicBlock::Create(Ctx, "entry", F);
+ IRBuilder<> Builder(Entry);
+
+ Builder.CreateRetVoid();
+
+ // MachineFunctions/MachineBasicBlocks aren't created automatically for the
+ // IR-level constructs we already made. Create them and insert them into the
+ // module.
+ MachineFunction &MF = MMI.getOrCreateMachineFunction(*F);
+ MachineBasicBlock *EntryMBB = MF.CreateMachineBasicBlock(Entry);
+
+ // Insert EntryMBB into MF. It's not in the module until we do this.
+ MF.insert(MF.end(), EntryMBB);
+ // Set MF properties. We never use vregs...
+ MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs);
+}
+
+template <typename Derived>
+bool ThunkInserter<Derived>::run(MachineModuleInfo &MMI, MachineFunction &MF) {
+ // If MF is not a thunk, check to see if we need to insert a thunk.
+ if (!MF.getName().startswith(getDerived().getThunkPrefix())) {
+ // If we've already inserted a thunk, nothing else to do.
+ if (InsertedThunks)
+ return false;
+
+ // Only add a thunk if one of the functions has the corresponding feature
+ // enabled in its subtarget, and doesn't enable external thunks.
+ // FIXME: Conditionalize on indirect calls so we don't emit a thunk when
+ // nothing will end up calling it.
+ // FIXME: It's a little silly to look at every function just to enumerate
+ // the subtargets, but eventually we'll want to look at them for indirect
+ // calls, so maybe this is OK.
+ if (!getDerived().mayUseThunk(MF))
+ return false;
+
+ getDerived().insertThunks(MMI);
+ InsertedThunks = true;
+ return true;
+ }
+
+ // If this *is* a thunk function, we need to populate it with the correct MI.
+ getDerived().populateThunk(MF);
+ return true;
+}
+
+FunctionPass *llvm::createX86IndirectThunksPass() {
+ return new X86IndirectThunks();
+}
+
+char X86IndirectThunks::ID = 0;
+
+bool X86IndirectThunks::doInitialization(Module &M) {
+ initTIs(M, TIs);
+ return false;
+}
+
+bool X86IndirectThunks::runOnMachineFunction(MachineFunction &MF) {
+ LLVM_DEBUG(dbgs() << getPassName() << '\n');
+ auto &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
+ return runTIs(MMI, MF, TIs);
+}
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index 78d8dd3c0d03..1fdac104cb73 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -1213,14 +1213,14 @@ def X86tcret_6regs : PatFrag<(ops node:$ptr, node:$off),
def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
(TCRETURNri ptr_rc_tailcall:$dst, imm:$off)>,
- Requires<[Not64BitMode, NotUseRetpolineIndirectCalls]>;
+ Requires<[Not64BitMode, NotUseIndirectThunkCalls]>;
// FIXME: This is disabled for 32-bit PIC mode because the global base
// register which is part of the address mode may be assigned a
// callee-saved register.
def : Pat<(X86tcret (load addr:$dst), imm:$off),
(TCRETURNmi addr:$dst, imm:$off)>,
- Requires<[Not64BitMode, IsNotPIC, NotUseRetpolineIndirectCalls]>;
+ Requires<[Not64BitMode, IsNotPIC, NotUseIndirectThunkCalls]>;
def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off),
(TCRETURNdi tglobaladdr:$dst, imm:$off)>,
@@ -1232,21 +1232,21 @@ def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off),
def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
(TCRETURNri64 ptr_rc_tailcall:$dst, imm:$off)>,
- Requires<[In64BitMode, NotUseRetpolineIndirectCalls]>;
+ Requires<[In64BitMode, NotUseIndirectThunkCalls]>;
// Don't fold loads into X86tcret requiring more than 6 regs.
// There wouldn't be enough scratch registers for base+index.
def : Pat<(X86tcret_6regs (load addr:$dst), imm:$off),
(TCRETURNmi64 addr:$dst, imm:$off)>,
- Requires<[In64BitMode, NotUseRetpolineIndirectCalls]>;
+ Requires<[In64BitMode, NotUseIndirectThunkCalls]>;
def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
- (RETPOLINE_TCRETURN64 ptr_rc_tailcall:$dst, imm:$off)>,
- Requires<[In64BitMode, UseRetpolineIndirectCalls]>;
+ (INDIRECT_THUNK_TCRETURN64 ptr_rc_tailcall:$dst, imm:$off)>,
+ Requires<[In64BitMode, UseIndirectThunkCalls]>;
def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
- (RETPOLINE_TCRETURN32 ptr_rc_tailcall:$dst, imm:$off)>,
- Requires<[Not64BitMode, UseRetpolineIndirectCalls]>;
+ (INDIRECT_THUNK_TCRETURN32 ptr_rc_tailcall:$dst, imm:$off)>,
+ Requires<[Not64BitMode, UseIndirectThunkCalls]>;
def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off),
(TCRETURNdi64 tglobaladdr:$dst, imm:$off)>,
diff --git a/llvm/lib/Target/X86/X86InstrControl.td b/llvm/lib/Target/X86/X86InstrControl.td
index 32faeb1a86f2..1842dc19ec2e 100644
--- a/llvm/lib/Target/X86/X86InstrControl.td
+++ b/llvm/lib/Target/X86/X86InstrControl.td
@@ -237,13 +237,13 @@ let isCall = 1 in
Sched<[WriteJumpLd]>;
def CALL32r : I<0xFF, MRM2r, (outs), (ins GR32:$dst),
"call{l}\t{*}$dst", [(X86call GR32:$dst)]>, OpSize32,
- Requires<[Not64BitMode,NotUseRetpolineIndirectCalls]>,
+ Requires<[Not64BitMode,NotUseIndirectThunkCalls]>,
Sched<[WriteJump]>;
def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst),
"call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))]>,
OpSize32,
Requires<[Not64BitMode,FavorMemIndirectCall,
- NotUseRetpolineIndirectCalls]>,
+ NotUseIndirectThunkCalls]>,
Sched<[WriteJumpLd]>;
// Non-tracking calls for IBT, use with caution.
@@ -334,11 +334,11 @@ let isCall = 1, Uses = [RSP, SSP], SchedRW = [WriteJump] in {
Requires<[In64BitMode]>;
def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst),
"call{q}\t{*}$dst", [(X86call GR64:$dst)]>,
- Requires<[In64BitMode,NotUseRetpolineIndirectCalls]>;
+ Requires<[In64BitMode,NotUseIndirectThunkCalls]>;
def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst),
"call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))]>,
Requires<[In64BitMode,FavorMemIndirectCall,
- NotUseRetpolineIndirectCalls]>;
+ NotUseIndirectThunkCalls]>;
// Non-tracking calls for IBT, use with caution.
let isCodeGenOnly = 1 in {
@@ -393,19 +393,19 @@ let isPseudo = 1, isCall = 1, isCodeGenOnly = 1,
Uses = [RSP, SSP],
usesCustomInserter = 1,
SchedRW = [WriteJump] in {
- def RETPOLINE_CALL32 :
+ def INDIRECT_THUNK_CALL32 :
PseudoI<(outs), (ins GR32:$dst), [(X86call GR32:$dst)]>,
- Requires<[Not64BitMode,UseRetpolineIndirectCalls]>;
+ Requires<[Not64BitMode,UseIndirectThunkCalls]>;
- def RETPOLINE_CALL64 :
+ def INDIRECT_THUNK_CALL64 :
PseudoI<(outs), (ins GR64:$dst), [(X86call GR64:$dst)]>,
- Requires<[In64BitMode,UseRetpolineIndirectCalls]>;
+ Requires<[In64BitMode,UseIndirectThunkCalls]>;
- // Retpoline variant of indirect tail calls.
+ // Indirect thunk variant of indirect tail calls.
let isTerminator = 1, isReturn = 1, isBarrier = 1 in {
- def RETPOLINE_TCRETURN64 :
+ def INDIRECT_THUNK_TCRETURN64 :
PseudoI<(outs), (ins GR64:$dst, i32imm:$offset), []>;
- def RETPOLINE_TCRETURN32 :
+ def INDIRECT_THUNK_TCRETURN32 :
PseudoI<(outs), (ins GR32:$dst, i32imm:$offset), []>;
}
}
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index ca5425e8b89f..93f40c8ec996 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -996,8 +996,8 @@ def HasFastLZCNT : Predicate<"Subtarget->hasFastLZCNT()">;
def HasFastSHLDRotate : Predicate<"Subtarget->hasFastSHLDRotate()">;
def HasERMSB : Predicate<"Subtarget->hasERMSB()">;
def HasMFence : Predicate<"Subtarget->hasMFence()">;
-def UseRetpolineIndirectCalls : Predicate<"Subtarget->useRetpolineIndirectCalls()">;
-def NotUseRetpolineIndirectCalls : Predicate<"!Subtarget->useRetpolineIndirectCalls()">;
+def UseIndirectThunkCalls : Predicate<"Subtarget->useIndirectThunkCalls()">;
+def NotUseIndirectThunkCalls : Predicate<"!Subtarget->useIndirectThunkCalls()">;
//===----------------------------------------------------------------------===//
// X86 Instruction Format Definitions.
diff --git a/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp b/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp
new file mode 100644
index 000000000000..35fc439998f9
--- /dev/null
+++ b/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp
@@ -0,0 +1,900 @@
+//==-- X86LoadValueInjectionLoadHardening.cpp - LVI load hardening for x86 --=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// Description: This pass finds Load Value Injection (LVI) gadgets consisting
+/// of a load from memory (i.e., SOURCE), and any operation that may transmit
+/// the value loaded from memory over a covert channel, or use the value loaded
+/// from memory to determine a branch/call target (i.e., SINK). After finding
+/// all such gadgets in a given function, the pass minimally inserts LFENCE
+/// instructions in such a manner that the following property is satisfied: for
+/// all SOURCE+SINK pairs, all paths in the CFG from SOURCE to SINK contain at
+/// least one LFENCE instruction. The algorithm that implements this minimal
+/// insertion is influenced by an academic paper that minimally inserts memory
+/// fences for high-performance concurrent programs:
+/// http://www.cs.ucr.edu/~lesani/companion/oopsla15/OOPSLA15.pdf
+/// The algorithm implemented in this pass is as follows:
+/// 1. Build a condensed CFG (i.e., a GadgetGraph) consisting only of the
+/// following components:
+/// - SOURCE instructions (also includes function arguments)
+/// - SINK instructions
+/// - Basic block entry points
+/// - Basic block terminators
+/// - LFENCE instructions
+/// 2. Analyze the GadgetGraph to determine which SOURCE+SINK pairs (i.e.,
+/// gadgets) are already mitigated by existing LFENCEs. If all gadgets have been
+/// mitigated, go to step 6.
+/// 3. Use a heuristic or plugin to approximate minimal LFENCE insertion.
+/// 4. Insert one LFENCE along each CFG edge that was cut in step 3.
+/// 5. Go to step 2.
+/// 6. If any LFENCEs were inserted, return `true` from runOnMachineFunction()
+/// to tell LLVM that the function was modified.
+///
+//===----------------------------------------------------------------------===//
+
+#include "ImmutableGraph.h"
+#include "X86.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominanceFrontier.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RDFGraph.h"
+#include "llvm/CodeGen/RDFLiveness.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/DOTGraphTraits.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define PASS_KEY "x86-lvi-load"
+#define DEBUG_TYPE PASS_KEY
+
+STATISTIC(NumFences, "Number of LFENCEs inserted for LVI mitigation");
+STATISTIC(NumFunctionsConsidered, "Number of functions analyzed");
+STATISTIC(NumFunctionsMitigated, "Number of functions for which mitigations "
+ "were deployed");
+STATISTIC(NumGadgets, "Number of LVI gadgets detected during analysis");
+
+static cl::opt<std::string> OptimizePluginPath(
+ PASS_KEY "-opt-plugin",
+ cl::desc("Specify a plugin to optimize LFENCE insertion"), cl::Hidden);
+
+static cl::opt<bool> NoConditionalBranches(
+ PASS_KEY "-no-cbranch",
+ cl::desc("Don't treat conditional branches as disclosure gadgets. This "
+ "may improve performance, at the cost of security."),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool> EmitDot(
+ PASS_KEY "-dot",
+ cl::desc(
+ "For each function, emit a dot graph depicting potential LVI gadgets"),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool> EmitDotOnly(
+ PASS_KEY "-dot-only",
+ cl::desc("For each function, emit a dot graph depicting potential LVI "
+ "gadgets, and do not insert any fences"),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool> EmitDotVerify(
+ PASS_KEY "-dot-verify",
+ cl::desc("For each function, emit a dot graph to stdout depicting "
+ "potential LVI gadgets, used for testing purposes only"),
+ cl::init(false), cl::Hidden);
+
+static llvm::sys::DynamicLibrary OptimizeDL;
+typedef int (*OptimizeCutT)(unsigned int *nodes, unsigned int nodes_size,
+ unsigned int *edges, int *edge_values,
+ int *cut_edges /* out */, unsigned int edges_size);
+static OptimizeCutT OptimizeCut = nullptr;
+
+namespace {
+
+struct MachineGadgetGraph : ImmutableGraph<MachineInstr *, int> {
+ static constexpr int GadgetEdgeSentinel = -1;
+ static constexpr MachineInstr *const ArgNodeSentinel = nullptr;
+
+ using GraphT = ImmutableGraph<MachineInstr *, int>;
+ using Node = typename GraphT::Node;
+ using Edge = typename GraphT::Edge;
+ using size_type = typename GraphT::size_type;
+ MachineGadgetGraph(std::unique_ptr<Node[]> Nodes,
+ std::unique_ptr<Edge[]> Edges, size_type NodesSize,
+ size_type EdgesSize, int NumFences = 0, int NumGadgets = 0)
+ : GraphT(std::move(Nodes), std::move(Edges), NodesSize, EdgesSize),
+ NumFences(NumFences), NumGadgets(NumGadgets) {}
+ static inline bool isCFGEdge(const Edge &E) {
+ return E.getValue() != GadgetEdgeSentinel;
+ }
+ static inline bool isGadgetEdge(const Edge &E) {
+ return E.getValue() == GadgetEdgeSentinel;
+ }
+ int NumFences;
+ int NumGadgets;
+};
+
+class X86LoadValueInjectionLoadHardeningPass : public MachineFunctionPass {
+public:
+ X86LoadValueInjectionLoadHardeningPass() : MachineFunctionPass(ID) {}
+
+ StringRef getPassName() const override {
+ return "X86 Load Value Injection (LVI) Load Hardening";
+ }
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ static char ID;
+
+private:
+ using GraphBuilder = ImmutableGraphBuilder<MachineGadgetGraph>;
+ using EdgeSet = MachineGadgetGraph::EdgeSet;
+ using NodeSet = MachineGadgetGraph::NodeSet;
+ using Gadget = std::pair<MachineInstr *, MachineInstr *>;
+
+ const X86Subtarget *STI;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+
+ std::unique_ptr<MachineGadgetGraph>
+ getGadgetGraph(MachineFunction &MF, const MachineLoopInfo &MLI,
+ const MachineDominatorTree &MDT,
+ const MachineDominanceFrontier &MDF) const;
+ int hardenLoadsWithPlugin(MachineFunction &MF,
+ std::unique_ptr<MachineGadgetGraph> Graph) const;
+ int hardenLoadsWithGreedyHeuristic(
+ MachineFunction &MF, std::unique_ptr<MachineGadgetGraph> Graph) const;
+ int elimMitigatedEdgesAndNodes(MachineGadgetGraph &G,
+ EdgeSet &ElimEdges /* in, out */,
+ NodeSet &ElimNodes /* in, out */) const;
+ std::unique_ptr<MachineGadgetGraph>
+ trimMitigatedEdges(std::unique_ptr<MachineGadgetGraph> Graph) const;
+ void findAndCutEdges(MachineGadgetGraph &G,
+ EdgeSet &CutEdges /* out */) const;
+ int insertFences(MachineFunction &MF, MachineGadgetGraph &G,
+ EdgeSet &CutEdges /* in, out */) const;
+ bool instrUsesRegToAccessMemory(const MachineInstr &I, unsigned Reg) const;
+ bool instrUsesRegToBranch(const MachineInstr &I, unsigned Reg) const;
+ inline bool isFence(const MachineInstr *MI) const {
+ return MI && (MI->getOpcode() == X86::LFENCE ||
+ (STI->useLVIControlFlowIntegrity() && MI->isCall()));
+ }
+};
+
+} // end anonymous namespace
+
+namespace llvm {
+
+template <>
+struct GraphTraits<MachineGadgetGraph *>
+ : GraphTraits<ImmutableGraph<MachineInstr *, int> *> {};
+
+template <>
+struct DOTGraphTraits<MachineGadgetGraph *> : DefaultDOTGraphTraits {
+ using GraphType = MachineGadgetGraph;
+ using Traits = llvm::GraphTraits<GraphType *>;
+ using NodeRef = typename Traits::NodeRef;
+ using EdgeRef = typename Traits::EdgeRef;
+ using ChildIteratorType = typename Traits::ChildIteratorType;
+ using ChildEdgeIteratorType = typename Traits::ChildEdgeIteratorType;
+
+ DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
+
+ std::string getNodeLabel(NodeRef Node, GraphType *) {
+ if (Node->getValue() == MachineGadgetGraph::ArgNodeSentinel)
+ return "ARGS";
+
+ std::string Str;
+ raw_string_ostream OS(Str);
+ OS << *Node->getValue();
+ return OS.str();
+ }
+
+ static std::string getNodeAttributes(NodeRef Node, GraphType *) {
+ MachineInstr *MI = Node->getValue();
+ if (MI == MachineGadgetGraph::ArgNodeSentinel)
+ return "color = blue";
+ if (MI->getOpcode() == X86::LFENCE)
+ return "color = green";
+ return "";
+ }
+
+ static std::string getEdgeAttributes(NodeRef, ChildIteratorType E,
+ GraphType *) {
+ int EdgeVal = (*E.getCurrent()).getValue();
+ return EdgeVal >= 0 ? "label = " + std::to_string(EdgeVal)
+ : "color = red, style = \"dashed\"";
+ }
+};
+
+} // end namespace llvm
+
+constexpr MachineInstr *MachineGadgetGraph::ArgNodeSentinel;
+constexpr int MachineGadgetGraph::GadgetEdgeSentinel;
+
+char X86LoadValueInjectionLoadHardeningPass::ID = 0;
+
+void X86LoadValueInjectionLoadHardeningPass::getAnalysisUsage(
+ AnalysisUsage &AU) const {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachineDominanceFrontier>();
+ AU.setPreservesCFG();
+}
+
+static void WriteGadgetGraph(raw_ostream &OS, MachineFunction &MF,
+ MachineGadgetGraph *G) {
+ WriteGraph(OS, G, /*ShortNames*/ false,
+ "Speculative gadgets for \"" + MF.getName() + "\" function");
+}
+
+bool X86LoadValueInjectionLoadHardeningPass::runOnMachineFunction(
+ MachineFunction &MF) {
+ LLVM_DEBUG(dbgs() << "***** " << getPassName() << " : " << MF.getName()
+ << " *****\n");
+ STI = &MF.getSubtarget<X86Subtarget>();
+ if (!STI->useLVILoadHardening())
+ return false;
+
+ // FIXME: support 32-bit
+ if (!STI->is64Bit())
+ report_fatal_error("LVI load hardening is only supported on 64-bit", false);
+
+ // Don't skip functions with the "optnone" attr but participate in opt-bisect.
+ const Function &F = MF.getFunction();
+ if (!F.hasOptNone() && skipFunction(F))
+ return false;
+
+ ++NumFunctionsConsidered;
+ TII = STI->getInstrInfo();
+ TRI = STI->getRegisterInfo();
+ LLVM_DEBUG(dbgs() << "Building gadget graph...\n");
+ const auto &MLI = getAnalysis<MachineLoopInfo>();
+ const auto &MDT = getAnalysis<MachineDominatorTree>();
+ const auto &MDF = getAnalysis<MachineDominanceFrontier>();
+ std::unique_ptr<MachineGadgetGraph> Graph = getGadgetGraph(MF, MLI, MDT, MDF);
+ LLVM_DEBUG(dbgs() << "Building gadget graph... Done\n");
+ if (Graph == nullptr)
+ return false; // didn't find any gadgets
+
+ if (EmitDotVerify) {
+ WriteGadgetGraph(outs(), MF, Graph.get());
+ return false;
+ }
+
+ if (EmitDot || EmitDotOnly) {
+ LLVM_DEBUG(dbgs() << "Emitting gadget graph...\n");
+ std::error_code FileError;
+ std::string FileName = "lvi.";
+ FileName += MF.getName();
+ FileName += ".dot";
+ raw_fd_ostream FileOut(FileName, FileError);
+ if (FileError)
+ errs() << FileError.message();
+ WriteGadgetGraph(FileOut, MF, Graph.get());
+ FileOut.close();
+ LLVM_DEBUG(dbgs() << "Emitting gadget graph... Done\n");
+ if (EmitDotOnly)
+ return false;
+ }
+
+ int FencesInserted;
+ if (!OptimizePluginPath.empty()) {
+ if (!OptimizeDL.isValid()) {
+ std::string ErrorMsg;
+ OptimizeDL = llvm::sys::DynamicLibrary::getPermanentLibrary(
+ OptimizePluginPath.c_str(), &ErrorMsg);
+ if (!ErrorMsg.empty())
+ report_fatal_error("Failed to load opt plugin: \"" + ErrorMsg + '\"');
+ OptimizeCut = (OptimizeCutT)OptimizeDL.getAddressOfSymbol("optimize_cut");
+ if (!OptimizeCut)
+ report_fatal_error("Invalid optimization plugin");
+ }
+ FencesInserted = hardenLoadsWithPlugin(MF, std::move(Graph));
+ } else { // Use the default greedy heuristic
+ FencesInserted = hardenLoadsWithGreedyHeuristic(MF, std::move(Graph));
+ }
+
+ if (FencesInserted > 0)
+ ++NumFunctionsMitigated;
+ NumFences += FencesInserted;
+ return (FencesInserted > 0);
+}
+
+std::unique_ptr<MachineGadgetGraph>
+X86LoadValueInjectionLoadHardeningPass::getGadgetGraph(
+ MachineFunction &MF, const MachineLoopInfo &MLI,
+ const MachineDominatorTree &MDT,
+ const MachineDominanceFrontier &MDF) const {
+ using namespace rdf;
+
+ // Build the Register Dataflow Graph using the RDF framework
+ TargetOperandInfo TOI{*TII};
+ DataFlowGraph DFG{MF, *TII, *TRI, MDT, MDF, TOI};
+ DFG.build();
+ Liveness L{MF.getRegInfo(), DFG};
+ L.computePhiInfo();
+
+ GraphBuilder Builder;
+ using GraphIter = typename GraphBuilder::BuilderNodeRef;
+ DenseMap<MachineInstr *, GraphIter> NodeMap;
+ int FenceCount = 0, GadgetCount = 0;
+ auto MaybeAddNode = [&NodeMap, &Builder](MachineInstr *MI) {
+ auto Ref = NodeMap.find(MI);
+ if (Ref == NodeMap.end()) {
+ auto I = Builder.addVertex(MI);
+ NodeMap[MI] = I;
+ return std::pair<GraphIter, bool>{I, true};
+ }
+ return std::pair<GraphIter, bool>{Ref->getSecond(), false};
+ };
+
+ // The `Transmitters` map memoizes transmitters found for each def. If a def
+ // has not yet been analyzed, then it will not appear in the map. If a def
+ // has been analyzed and was determined not to have any transmitters, then
+ // its list of transmitters will be empty.
+ DenseMap<NodeId, std::vector<NodeId>> Transmitters;
+
+ // Analyze all machine instructions to find gadgets and LFENCEs, adding
+ // each interesting value to `Nodes`
+ auto AnalyzeDef = [&](NodeAddr<DefNode *> SourceDef) {
+ SmallSet<NodeId, 8> UsesVisited, DefsVisited;
+ std::function<void(NodeAddr<DefNode *>)> AnalyzeDefUseChain =
+ [&](NodeAddr<DefNode *> Def) {
+ if (Transmitters.find(Def.Id) != Transmitters.end())
+ return; // Already analyzed `Def`
+
+ // Use RDF to find all the uses of `Def`
+ rdf::NodeSet Uses;
+ RegisterRef DefReg = DFG.getPRI().normalize(Def.Addr->getRegRef(DFG));
+ for (auto UseID : L.getAllReachedUses(DefReg, Def)) {
+ auto Use = DFG.addr<UseNode *>(UseID);
+ if (Use.Addr->getFlags() & NodeAttrs::PhiRef) { // phi node
+ NodeAddr<PhiNode *> Phi = Use.Addr->getOwner(DFG);
+ for (auto I : L.getRealUses(Phi.Id)) {
+ if (DFG.getPRI().alias(RegisterRef(I.first), DefReg)) {
+ for (auto UA : I.second)
+ Uses.emplace(UA.first);
+ }
+ }
+ } else { // not a phi node
+ Uses.emplace(UseID);
+ }
+ }
+
+ // For each use of `Def`, we want to know whether:
+ // (1) The use can leak the Def'ed value,
+ // (2) The use can further propagate the Def'ed value to more defs
+ for (auto UseID : Uses) {
+ if (!UsesVisited.insert(UseID).second)
+ continue; // Already visited this use of `Def`
+
+ auto Use = DFG.addr<UseNode *>(UseID);
+ assert(!(Use.Addr->getFlags() & NodeAttrs::PhiRef));
+ MachineOperand &UseMO = Use.Addr->getOp();
+ MachineInstr &UseMI = *UseMO.getParent();
+ assert(UseMO.isReg());
+
+ // We naively assume that an instruction propagates any loaded
+ // uses to all defs unless the instruction is a call, in which
+ // case all arguments will be treated as gadget sources during
+ // analysis of the callee function.
+ if (UseMI.isCall())
+ continue;
+
+ // Check whether this use can transmit (leak) its value.
+ if (instrUsesRegToAccessMemory(UseMI, UseMO.getReg()) ||
+ (!NoConditionalBranches &&
+ instrUsesRegToBranch(UseMI, UseMO.getReg()))) {
+ Transmitters[Def.Id].push_back(Use.Addr->getOwner(DFG).Id);
+ if (UseMI.mayLoad())
+ continue; // Found a transmitting load -- no need to continue
+ // traversing its defs (i.e., this load will become
+ // a new gadget source anyways).
+ }
+
+ // Check whether the use propagates to more defs.
+ NodeAddr<InstrNode *> Owner{Use.Addr->getOwner(DFG)};
+ rdf::NodeList AnalyzedChildDefs;
+ for (auto &ChildDef :
+ Owner.Addr->members_if(DataFlowGraph::IsDef, DFG)) {
+ if (!DefsVisited.insert(ChildDef.Id).second)
+ continue; // Already visited this def
+ if (Def.Addr->getAttrs() & NodeAttrs::Dead)
+ continue;
+ if (Def.Id == ChildDef.Id)
+ continue; // `Def` uses itself (e.g., increment loop counter)
+
+ AnalyzeDefUseChain(ChildDef);
+
+ // `Def` inherits all of its child defs' transmitters.
+ for (auto TransmitterId : Transmitters[ChildDef.Id])
+ Transmitters[Def.Id].push_back(TransmitterId);
+ }
+ }
+
+ // Note that this statement adds `Def.Id` to the map if no
+ // transmitters were found for `Def`.
+ auto &DefTransmitters = Transmitters[Def.Id];
+
+ // Remove duplicate transmitters
+ llvm::sort(DefTransmitters);
+ DefTransmitters.erase(
+ std::unique(DefTransmitters.begin(), DefTransmitters.end()),
+ DefTransmitters.end());
+ };
+
+ // Find all of the transmitters
+ AnalyzeDefUseChain(SourceDef);
+ auto &SourceDefTransmitters = Transmitters[SourceDef.Id];
+ if (SourceDefTransmitters.empty())
+ return; // No transmitters for `SourceDef`
+
+ MachineInstr *Source = SourceDef.Addr->getFlags() & NodeAttrs::PhiRef
+ ? MachineGadgetGraph::ArgNodeSentinel
+ : SourceDef.Addr->getOp().getParent();
+ auto GadgetSource = MaybeAddNode(Source);
+ // Each transmitter is a sink for `SourceDef`.
+ for (auto TransmitterId : SourceDefTransmitters) {
+ MachineInstr *Sink = DFG.addr<StmtNode *>(TransmitterId).Addr->getCode();
+ auto GadgetSink = MaybeAddNode(Sink);
+ // Add the gadget edge to the graph.
+ Builder.addEdge(MachineGadgetGraph::GadgetEdgeSentinel,
+ GadgetSource.first, GadgetSink.first);
+ ++GadgetCount;
+ }
+ };
+
+ LLVM_DEBUG(dbgs() << "Analyzing def-use chains to find gadgets\n");
+ // Analyze function arguments
+ NodeAddr<BlockNode *> EntryBlock = DFG.getFunc().Addr->getEntryBlock(DFG);
+ for (NodeAddr<PhiNode *> ArgPhi :
+ EntryBlock.Addr->members_if(DataFlowGraph::IsPhi, DFG)) {
+ NodeList Defs = ArgPhi.Addr->members_if(DataFlowGraph::IsDef, DFG);
+ llvm::for_each(Defs, AnalyzeDef);
+ }
+ // Analyze every instruction in MF
+ for (NodeAddr<BlockNode *> BA : DFG.getFunc().Addr->members(DFG)) {
+ for (NodeAddr<StmtNode *> SA :
+ BA.Addr->members_if(DataFlowGraph::IsCode<NodeAttrs::Stmt>, DFG)) {
+ MachineInstr *MI = SA.Addr->getCode();
+ if (isFence(MI)) {
+ MaybeAddNode(MI);
+ ++FenceCount;
+ } else if (MI->mayLoad()) {
+ NodeList Defs = SA.Addr->members_if(DataFlowGraph::IsDef, DFG);
+ llvm::for_each(Defs, AnalyzeDef);
+ }
+ }
+ }
+ LLVM_DEBUG(dbgs() << "Found " << FenceCount << " fences\n");
+ LLVM_DEBUG(dbgs() << "Found " << GadgetCount << " gadgets\n");
+ if (GadgetCount == 0)
+ return nullptr;
+ NumGadgets += GadgetCount;
+
+ // Traverse CFG to build the rest of the graph
+ SmallSet<MachineBasicBlock *, 8> BlocksVisited;
+ std::function<void(MachineBasicBlock *, GraphIter, unsigned)> TraverseCFG =
+ [&](MachineBasicBlock *MBB, GraphIter GI, unsigned ParentDepth) {
+ unsigned LoopDepth = MLI.getLoopDepth(MBB);
+ if (!MBB->empty()) {
+ // Always add the first instruction in each block
+ auto NI = MBB->begin();
+ auto BeginBB = MaybeAddNode(&*NI);
+ Builder.addEdge(ParentDepth, GI, BeginBB.first);
+ if (!BlocksVisited.insert(MBB).second)
+ return;
+
+ // Add any instructions within the block that are gadget components
+ GI = BeginBB.first;
+ while (++NI != MBB->end()) {
+ auto Ref = NodeMap.find(&*NI);
+ if (Ref != NodeMap.end()) {
+ Builder.addEdge(LoopDepth, GI, Ref->getSecond());
+ GI = Ref->getSecond();
+ }
+ }
+
+ // Always add the terminator instruction, if one exists
+ auto T = MBB->getFirstTerminator();
+ if (T != MBB->end()) {
+ auto EndBB = MaybeAddNode(&*T);
+ if (EndBB.second)
+ Builder.addEdge(LoopDepth, GI, EndBB.first);
+ GI = EndBB.first;
+ }
+ }
+ for (MachineBasicBlock *Succ : MBB->successors())
+ TraverseCFG(Succ, GI, LoopDepth);
+ };
+ // ArgNodeSentinel is a pseudo-instruction that represents MF args in the
+ // GadgetGraph
+ GraphIter ArgNode = MaybeAddNode(MachineGadgetGraph::ArgNodeSentinel).first;
+ TraverseCFG(&MF.front(), ArgNode, 0);
+ std::unique_ptr<MachineGadgetGraph> G{Builder.get(FenceCount, GadgetCount)};
+ LLVM_DEBUG(dbgs() << "Found " << G->nodes_size() << " nodes\n");
+ return G;
+}
+
+// Returns the number of remaining gadget edges that could not be eliminated
+int X86LoadValueInjectionLoadHardeningPass::elimMitigatedEdgesAndNodes(
+ MachineGadgetGraph &G, MachineGadgetGraph::EdgeSet &ElimEdges /* in, out */,
+ MachineGadgetGraph::NodeSet &ElimNodes /* in, out */) const {
+ if (G.NumFences > 0) {
+ // Eliminate fences and CFG edges that ingress and egress the fence, as
+ // they are trivially mitigated.
+ for (const auto &E : G.edges()) {
+ const MachineGadgetGraph::Node *Dest = E.getDest();
+ if (isFence(Dest->getValue())) {
+ ElimNodes.insert(*Dest);
+ ElimEdges.insert(E);
+ for (const auto &DE : Dest->edges())
+ ElimEdges.insert(DE);
+ }
+ }
+ }
+
+ // Find and eliminate gadget edges that have been mitigated.
+ int MitigatedGadgets = 0, RemainingGadgets = 0;
+ MachineGadgetGraph::NodeSet ReachableNodes{G};
+ for (const auto &RootN : G.nodes()) {
+ if (llvm::none_of(RootN.edges(), MachineGadgetGraph::isGadgetEdge))
+ continue; // skip this node if it isn't a gadget source
+
+ // Find all of the nodes that are CFG-reachable from RootN using DFS
+ ReachableNodes.clear();
+ std::function<void(const MachineGadgetGraph::Node *, bool)>
+ FindReachableNodes =
+ [&](const MachineGadgetGraph::Node *N, bool FirstNode) {
+ if (!FirstNode)
+ ReachableNodes.insert(*N);
+ for (const auto &E : N->edges()) {
+ const MachineGadgetGraph::Node *Dest = E.getDest();
+ if (MachineGadgetGraph::isCFGEdge(E) &&
+ !ElimEdges.contains(E) && !ReachableNodes.contains(*Dest))
+ FindReachableNodes(Dest, false);
+ }
+ };
+ FindReachableNodes(&RootN, true);
+
+ // Any gadget whose sink is unreachable has been mitigated
+ for (const auto &E : RootN.edges()) {
+ if (MachineGadgetGraph::isGadgetEdge(E)) {
+ if (ReachableNodes.contains(*E.getDest())) {
+ // This gadget's sink is reachable
+ ++RemainingGadgets;
+ } else { // This gadget's sink is unreachable, and therefore mitigated
+ ++MitigatedGadgets;
+ ElimEdges.insert(E);
+ }
+ }
+ }
+ }
+ return RemainingGadgets;
+}
+
+std::unique_ptr<MachineGadgetGraph>
+X86LoadValueInjectionLoadHardeningPass::trimMitigatedEdges(
+ std::unique_ptr<MachineGadgetGraph> Graph) const {
+ MachineGadgetGraph::NodeSet ElimNodes{*Graph};
+ MachineGadgetGraph::EdgeSet ElimEdges{*Graph};
+ int RemainingGadgets =
+ elimMitigatedEdgesAndNodes(*Graph, ElimEdges, ElimNodes);
+ if (ElimEdges.empty() && ElimNodes.empty()) {
+ Graph->NumFences = 0;
+ Graph->NumGadgets = RemainingGadgets;
+ } else {
+ Graph = GraphBuilder::trim(*Graph, ElimNodes, ElimEdges, 0 /* NumFences */,
+ RemainingGadgets);
+ }
+ return Graph;
+}
+
+int X86LoadValueInjectionLoadHardeningPass::hardenLoadsWithPlugin(
+ MachineFunction &MF, std::unique_ptr<MachineGadgetGraph> Graph) const {
+ int FencesInserted = 0;
+
+ do {
+ LLVM_DEBUG(dbgs() << "Eliminating mitigated paths...\n");
+ Graph = trimMitigatedEdges(std::move(Graph));
+ LLVM_DEBUG(dbgs() << "Eliminating mitigated paths... Done\n");
+ if (Graph->NumGadgets == 0)
+ break;
+
+ LLVM_DEBUG(dbgs() << "Cutting edges...\n");
+ EdgeSet CutEdges{*Graph};
+ auto Nodes = std::make_unique<unsigned int[]>(Graph->nodes_size() +
+ 1 /* terminator node */);
+ auto Edges = std::make_unique<unsigned int[]>(Graph->edges_size());
+ auto EdgeCuts = std::make_unique<int[]>(Graph->edges_size());
+ auto EdgeValues = std::make_unique<int[]>(Graph->edges_size());
+ for (const auto &N : Graph->nodes()) {
+ Nodes[Graph->getNodeIndex(N)] = Graph->getEdgeIndex(*N.edges_begin());
+ }
+ Nodes[Graph->nodes_size()] = Graph->edges_size(); // terminator node
+ for (const auto &E : Graph->edges()) {
+ Edges[Graph->getEdgeIndex(E)] = Graph->getNodeIndex(*E.getDest());
+ EdgeValues[Graph->getEdgeIndex(E)] = E.getValue();
+ }
+ OptimizeCut(Nodes.get(), Graph->nodes_size(), Edges.get(), EdgeValues.get(),
+ EdgeCuts.get(), Graph->edges_size());
+ for (int I = 0; I < Graph->edges_size(); ++I)
+ if (EdgeCuts[I])
+ CutEdges.set(I);
+ LLVM_DEBUG(dbgs() << "Cutting edges... Done\n");
+ LLVM_DEBUG(dbgs() << "Cut " << CutEdges.count() << " edges\n");
+
+ LLVM_DEBUG(dbgs() << "Inserting LFENCEs...\n");
+ FencesInserted += insertFences(MF, *Graph, CutEdges);
+ LLVM_DEBUG(dbgs() << "Inserting LFENCEs... Done\n");
+ LLVM_DEBUG(dbgs() << "Inserted " << FencesInserted << " fences\n");
+
+ Graph = GraphBuilder::trim(*Graph, MachineGadgetGraph::NodeSet{*Graph},
+ CutEdges);
+ } while (true);
+
+ return FencesInserted;
+}
+
+int X86LoadValueInjectionLoadHardeningPass::hardenLoadsWithGreedyHeuristic(
+ MachineFunction &MF, std::unique_ptr<MachineGadgetGraph> Graph) const {
+ LLVM_DEBUG(dbgs() << "Eliminating mitigated paths...\n");
+ Graph = trimMitigatedEdges(std::move(Graph));
+ LLVM_DEBUG(dbgs() << "Eliminating mitigated paths... Done\n");
+ if (Graph->NumGadgets == 0)
+ return 0;
+
+ LLVM_DEBUG(dbgs() << "Cutting edges...\n");
+ MachineGadgetGraph::NodeSet ElimNodes{*Graph}, GadgetSinks{*Graph};
+ MachineGadgetGraph::EdgeSet ElimEdges{*Graph}, CutEdges{*Graph};
+ auto IsCFGEdge = [&ElimEdges, &CutEdges](const MachineGadgetGraph::Edge &E) {
+ return !ElimEdges.contains(E) && !CutEdges.contains(E) &&
+ MachineGadgetGraph::isCFGEdge(E);
+ };
+ auto IsGadgetEdge = [&ElimEdges,
+ &CutEdges](const MachineGadgetGraph::Edge &E) {
+ return !ElimEdges.contains(E) && !CutEdges.contains(E) &&
+ MachineGadgetGraph::isGadgetEdge(E);
+ };
+
+ // FIXME: this is O(E^2), we could probably do better.
+ do {
+ // Find the cheapest CFG edge that will eliminate a gadget (by being
+ // egress from a SOURCE node or ingress to a SINK node), and cut it.
+ const MachineGadgetGraph::Edge *CheapestSoFar = nullptr;
+
+ // First, collect all gadget source and sink nodes.
+ MachineGadgetGraph::NodeSet GadgetSources{*Graph}, GadgetSinks{*Graph};
+ for (const auto &N : Graph->nodes()) {
+ if (ElimNodes.contains(N))
+ continue;
+ for (const auto &E : N.edges()) {
+ if (IsGadgetEdge(E)) {
+ GadgetSources.insert(N);
+ GadgetSinks.insert(*E.getDest());
+ }
+ }
+ }
+
+ // Next, look for the cheapest CFG edge which, when cut, is guaranteed to
+ // mitigate at least one gadget by either:
+ // (a) being egress from a gadget source, or
+ // (b) being ingress to a gadget sink.
+ for (const auto &N : Graph->nodes()) {
+ if (ElimNodes.contains(N))
+ continue;
+ for (const auto &E : N.edges()) {
+ if (IsCFGEdge(E)) {
+ if (GadgetSources.contains(N) || GadgetSinks.contains(*E.getDest())) {
+ if (!CheapestSoFar || E.getValue() < CheapestSoFar->getValue())
+ CheapestSoFar = &E;
+ }
+ }
+ }
+ }
+
+ assert(CheapestSoFar && "Failed to cut an edge");
+ CutEdges.insert(*CheapestSoFar);
+ ElimEdges.insert(*CheapestSoFar);
+ } while (elimMitigatedEdgesAndNodes(*Graph, ElimEdges, ElimNodes));
+ LLVM_DEBUG(dbgs() << "Cutting edges... Done\n");
+ LLVM_DEBUG(dbgs() << "Cut " << CutEdges.count() << " edges\n");
+
+ LLVM_DEBUG(dbgs() << "Inserting LFENCEs...\n");
+ int FencesInserted = insertFences(MF, *Graph, CutEdges);
+ LLVM_DEBUG(dbgs() << "Inserting LFENCEs... Done\n");
+ LLVM_DEBUG(dbgs() << "Inserted " << FencesInserted << " fences\n");
+
+ return FencesInserted;
+}
+
+int X86LoadValueInjectionLoadHardeningPass::insertFences(
+ MachineFunction &MF, MachineGadgetGraph &G,
+ EdgeSet &CutEdges /* in, out */) const {
+ int FencesInserted = 0;
+ for (const auto &N : G.nodes()) {
+ for (const auto &E : N.edges()) {
+ if (CutEdges.contains(E)) {
+ MachineInstr *MI = N.getValue(), *Prev;
+ MachineBasicBlock *MBB; // Insert an LFENCE in this MBB
+ MachineBasicBlock::iterator InsertionPt; // ...at this point
+ if (MI == MachineGadgetGraph::ArgNodeSentinel) {
+ // insert LFENCE at beginning of entry block
+ MBB = &MF.front();
+ InsertionPt = MBB->begin();
+ Prev = nullptr;
+ } else if (MI->isBranch()) { // insert the LFENCE before the branch
+ MBB = MI->getParent();
+ InsertionPt = MI;
+ Prev = MI->getPrevNode();
+ // Remove all egress CFG edges from this branch because the inserted
+ // LFENCE prevents gadgets from crossing the branch.
+ for (const auto &E : N.edges()) {
+ if (MachineGadgetGraph::isCFGEdge(E))
+ CutEdges.insert(E);
+ }
+ } else { // insert the LFENCE after the instruction
+ MBB = MI->getParent();
+ InsertionPt = MI->getNextNode() ? MI->getNextNode() : MBB->end();
+ Prev = InsertionPt == MBB->end()
+ ? (MBB->empty() ? nullptr : &MBB->back())
+ : InsertionPt->getPrevNode();
+ }
+ // Ensure this insertion is not redundant (two LFENCEs in sequence).
+ if ((InsertionPt == MBB->end() || !isFence(&*InsertionPt)) &&
+ (!Prev || !isFence(Prev))) {
+ BuildMI(*MBB, InsertionPt, DebugLoc(), TII->get(X86::LFENCE));
+ ++FencesInserted;
+ }
+ }
+ }
+ }
+ return FencesInserted;
+}
+
+bool X86LoadValueInjectionLoadHardeningPass::instrUsesRegToAccessMemory(
+ const MachineInstr &MI, unsigned Reg) const {
+ if (!MI.mayLoadOrStore() || MI.getOpcode() == X86::MFENCE ||
+ MI.getOpcode() == X86::SFENCE || MI.getOpcode() == X86::LFENCE)
+ return false;
+
+ // FIXME: This does not handle pseudo loading instruction like TCRETURN*
+ const MCInstrDesc &Desc = MI.getDesc();
+ int MemRefBeginIdx = X86II::getMemoryOperandNo(Desc.TSFlags);
+ if (MemRefBeginIdx < 0) {
+ LLVM_DEBUG(dbgs() << "Warning: unable to obtain memory operand for loading "
+ "instruction:\n";
+ MI.print(dbgs()); dbgs() << '\n';);
+ return false;
+ }
+ MemRefBeginIdx += X86II::getOperandBias(Desc);
+
+ const MachineOperand &BaseMO =
+ MI.getOperand(MemRefBeginIdx + X86::AddrBaseReg);
+ const MachineOperand &IndexMO =
+ MI.getOperand(MemRefBeginIdx + X86::AddrIndexReg);
+ return (BaseMO.isReg() && BaseMO.getReg() != X86::NoRegister &&
+ TRI->regsOverlap(BaseMO.getReg(), Reg)) ||
+ (IndexMO.isReg() && IndexMO.getReg() != X86::NoRegister &&
+ TRI->regsOverlap(IndexMO.getReg(), Reg));
+}
+
+bool X86LoadValueInjectionLoadHardeningPass::instrUsesRegToBranch(
+ const MachineInstr &MI, unsigned Reg) const {
+ if (!MI.isConditionalBranch())
+ return false;
+ for (const MachineOperand &Use : MI.uses())
+ if (Use.isReg() && Use.getReg() == Reg)
+ return true;
+ return false;
+}
+
+INITIALIZE_PASS_BEGIN(X86LoadValueInjectionLoadHardeningPass, PASS_KEY,
+ "X86 LVI load hardening", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineDominanceFrontier)
+INITIALIZE_PASS_END(X86LoadValueInjectionLoadHardeningPass, PASS_KEY,
+ "X86 LVI load hardening", false, false)
+
+FunctionPass *llvm::createX86LoadValueInjectionLoadHardeningPass() {
+ return new X86LoadValueInjectionLoadHardeningPass();
+}
+
+namespace {
+
+/// The `X86LoadValueInjectionLoadHardeningPass` above depends on expensive
+/// analysis passes that add complexity to the pipeline. This complexity
+/// can cause noticable overhead when no optimizations are enabled, i.e., -O0.
+/// The purpose of `X86LoadValueInjectionLoadHardeningUnoptimizedPass` is to
+/// provide the same security as the optimized pass, but without adding
+/// unnecessary complexity to the LLVM pipeline.
+///
+/// The behavior of this pass is simply to insert an LFENCE after every load
+/// instruction.
+class X86LoadValueInjectionLoadHardeningUnoptimizedPass
+ : public MachineFunctionPass {
+public:
+ X86LoadValueInjectionLoadHardeningUnoptimizedPass()
+ : MachineFunctionPass(ID) {}
+
+ StringRef getPassName() const override {
+ return "X86 Load Value Injection (LVI) Load Hardening (Unoptimized)";
+ }
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ static char ID;
+};
+
+} // end anonymous namespace
+
+char X86LoadValueInjectionLoadHardeningUnoptimizedPass::ID = 0;
+
+bool X86LoadValueInjectionLoadHardeningUnoptimizedPass::runOnMachineFunction(
+ MachineFunction &MF) {
+ LLVM_DEBUG(dbgs() << "***** " << getPassName() << " : " << MF.getName()
+ << " *****\n");
+ const X86Subtarget *STI = &MF.getSubtarget<X86Subtarget>();
+ if (!STI->useLVILoadHardening())
+ return false;
+
+ // FIXME: support 32-bit
+ if (!STI->is64Bit())
+ report_fatal_error("LVI load hardening is only supported on 64-bit", false);
+
+ // Don't skip functions with the "optnone" attr but participate in opt-bisect.
+ const Function &F = MF.getFunction();
+ if (!F.hasOptNone() && skipFunction(F))
+ return false;
+
+ bool Modified = false;
+ ++NumFunctionsConsidered;
+
+ const TargetInstrInfo *TII = STI->getInstrInfo();
+ for (auto &MBB : MF) {
+ for (auto &MI : MBB) {
+ if (!MI.mayLoad() || MI.getOpcode() == X86::LFENCE ||
+ MI.getOpcode() == X86::MFENCE)
+ continue;
+
+ MachineBasicBlock::iterator InsertionPt =
+ MI.getNextNode() ? MI.getNextNode() : MBB.end();
+ BuildMI(MBB, InsertionPt, DebugLoc(), TII->get(X86::LFENCE));
+ ++NumFences;
+ Modified = true;
+ }
+ }
+
+ if (Modified)
+ ++NumFunctionsMitigated;
+
+ return Modified;
+}
+
+INITIALIZE_PASS(X86LoadValueInjectionLoadHardeningUnoptimizedPass, PASS_KEY,
+ "X86 LVI load hardening", false, false)
+
+FunctionPass *llvm::createX86LoadValueInjectionLoadHardeningUnoptimizedPass() {
+ return new X86LoadValueInjectionLoadHardeningUnoptimizedPass();
+}
diff --git a/llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp b/llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp
new file mode 100644
index 000000000000..6e1134a25950
--- /dev/null
+++ b/llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp
@@ -0,0 +1,143 @@
+//===-- X86LoadValueInjectionRetHardening.cpp - LVI RET hardening for x86 --==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// Description: Replaces every `ret` instruction with the sequence:
+/// ```
+/// pop <scratch-reg>
+/// lfence
+/// jmp *<scratch-reg>
+/// ```
+/// where `<scratch-reg>` is some available scratch register, according to the
+/// calling convention of the function being mitigated.
+///
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86InstrBuilder.h"
+#include "X86Subtarget.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/Debug.h"
+#include <bitset>
+
+using namespace llvm;
+
+#define PASS_KEY "x86-lvi-ret"
+#define DEBUG_TYPE PASS_KEY
+
+STATISTIC(NumFences, "Number of LFENCEs inserted for LVI mitigation");
+STATISTIC(NumFunctionsConsidered, "Number of functions analyzed");
+STATISTIC(NumFunctionsMitigated, "Number of functions for which mitigations "
+ "were deployed");
+
+namespace {
+
+class X86LoadValueInjectionRetHardeningPass : public MachineFunctionPass {
+public:
+ X86LoadValueInjectionRetHardeningPass() : MachineFunctionPass(ID) {}
+ StringRef getPassName() const override {
+ return "X86 Load Value Injection (LVI) Ret-Hardening";
+ }
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ static char ID;
+};
+
+} // end anonymous namespace
+
+char X86LoadValueInjectionRetHardeningPass::ID = 0;
+
+bool X86LoadValueInjectionRetHardeningPass::runOnMachineFunction(
+ MachineFunction &MF) {
+ LLVM_DEBUG(dbgs() << "***** " << getPassName() << " : " << MF.getName()
+ << " *****\n");
+ const X86Subtarget *Subtarget = &MF.getSubtarget<X86Subtarget>();
+ if (!Subtarget->useLVIControlFlowIntegrity() || !Subtarget->is64Bit())
+ return false; // FIXME: support 32-bit
+
+ // Don't skip functions with the "optnone" attr but participate in opt-bisect.
+ const Function &F = MF.getFunction();
+ if (!F.hasOptNone() && skipFunction(F))
+ return false;
+
+ ++NumFunctionsConsidered;
+ const X86RegisterInfo *TRI = Subtarget->getRegisterInfo();
+ const X86InstrInfo *TII = Subtarget->getInstrInfo();
+ unsigned ClobberReg = X86::NoRegister;
+ std::bitset<X86::NUM_TARGET_REGS> UnclobberableGR64s;
+ UnclobberableGR64s.set(X86::RSP); // can't clobber stack pointer
+ UnclobberableGR64s.set(X86::RIP); // can't clobber instruction pointer
+ UnclobberableGR64s.set(X86::RAX); // used for function return
+ UnclobberableGR64s.set(X86::RDX); // used for function return
+
+ // We can clobber any register allowed by the function's calling convention.
+ for (const MCPhysReg *PR = TRI->getCalleeSavedRegs(&MF); auto Reg = *PR; ++PR)
+ UnclobberableGR64s.set(Reg);
+ for (auto &Reg : X86::GR64RegClass) {
+ if (!UnclobberableGR64s.test(Reg)) {
+ ClobberReg = Reg;
+ break;
+ }
+ }
+
+ if (ClobberReg != X86::NoRegister) {
+ LLVM_DEBUG(dbgs() << "Selected register "
+ << Subtarget->getRegisterInfo()->getRegAsmName(ClobberReg)
+ << " to clobber\n");
+ } else {
+ LLVM_DEBUG(dbgs() << "Could not find a register to clobber\n");
+ }
+
+ bool Modified = false;
+ for (auto &MBB : MF) {
+ if (MBB.empty())
+ continue;
+
+ MachineInstr &MI = MBB.back();
+ if (MI.getOpcode() != X86::RETQ)
+ continue;
+
+ if (ClobberReg != X86::NoRegister) {
+ MBB.erase_instr(&MI);
+ BuildMI(MBB, MBB.end(), DebugLoc(), TII->get(X86::POP64r))
+ .addReg(ClobberReg, RegState::Define)
+ .setMIFlag(MachineInstr::FrameDestroy);
+ BuildMI(MBB, MBB.end(), DebugLoc(), TII->get(X86::LFENCE));
+ BuildMI(MBB, MBB.end(), DebugLoc(), TII->get(X86::JMP64r))
+ .addReg(ClobberReg);
+ } else {
+ // In case there is no available scratch register, we can still read from
+ // RSP to assert that RSP points to a valid page. The write to RSP is
+ // also helpful because it verifies that the stack's write permissions
+ // are intact.
+ MachineInstr *Fence = BuildMI(MBB, MI, DebugLoc(), TII->get(X86::LFENCE));
+ addRegOffset(BuildMI(MBB, Fence, DebugLoc(), TII->get(X86::SHL64mi)),
+ X86::RSP, false, 0)
+ .addImm(0)
+ ->addRegisterDead(X86::EFLAGS, TRI);
+ }
+
+ ++NumFences;
+ Modified = true;
+ }
+
+ if (Modified)
+ ++NumFunctionsMitigated;
+ return Modified;
+}
+
+INITIALIZE_PASS(X86LoadValueInjectionRetHardeningPass, PASS_KEY,
+ "X86 LVI ret hardener", false, false)
+
+FunctionPass *llvm::createX86LoadValueInjectionRetHardeningPass() {
+ return new X86LoadValueInjectionRetHardeningPass();
+}
diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp
index 7f49c6e861d4..f5caaaae4d84 100644
--- a/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -1220,8 +1220,8 @@ void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI,
break;
case MachineOperand::MO_Register:
// FIXME: Add retpoline support and remove this.
- if (Subtarget->useRetpolineIndirectCalls())
- report_fatal_error("Lowering register statepoints with retpoline not "
+ if (Subtarget->useIndirectThunkCalls())
+ report_fatal_error("Lowering register statepoints with thunks not "
"yet implemented.");
CallTargetMCOp = MCOperand::createReg(CallTarget.getReg());
CallOpcode = X86::CALL64r;
@@ -1399,9 +1399,9 @@ void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
EmitAndCountInstruction(
MCInstBuilder(X86::MOV64ri).addReg(ScratchReg).addOperand(CalleeMCOp));
// FIXME: Add retpoline support and remove this.
- if (Subtarget->useRetpolineIndirectCalls())
+ if (Subtarget->useIndirectThunkCalls())
report_fatal_error(
- "Lowering patchpoint with retpoline not yet implemented.");
+ "Lowering patchpoint with thunks not yet implemented.");
EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(ScratchReg));
}
diff --git a/llvm/lib/Target/X86/X86RetpolineThunks.cpp b/llvm/lib/Target/X86/X86RetpolineThunks.cpp
deleted file mode 100644
index 9085d7f068ac..000000000000
--- a/llvm/lib/Target/X86/X86RetpolineThunks.cpp
+++ /dev/null
@@ -1,286 +0,0 @@
-//======- X86RetpolineThunks.cpp - Construct retpoline thunks for x86 --=====//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// Pass that injects an MI thunk implementing a "retpoline". This is
-/// a RET-implemented trampoline that is used to lower indirect calls in a way
-/// that prevents speculation on some x86 processors and can be used to mitigate
-/// security vulnerabilities due to targeted speculative execution and side
-/// channels such as CVE-2017-5715.
-///
-/// TODO(chandlerc): All of this code could use better comments and
-/// documentation.
-///
-//===----------------------------------------------------------------------===//
-
-#include "X86.h"
-#include "X86InstrBuilder.h"
-#include "X86Subtarget.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "x86-retpoline-thunks"
-
-static const char ThunkNamePrefix[] = "__llvm_retpoline_";
-static const char R11ThunkName[] = "__llvm_retpoline_r11";
-static const char EAXThunkName[] = "__llvm_retpoline_eax";
-static const char ECXThunkName[] = "__llvm_retpoline_ecx";
-static const char EDXThunkName[] = "__llvm_retpoline_edx";
-static const char EDIThunkName[] = "__llvm_retpoline_edi";
-
-namespace {
-class X86RetpolineThunks : public MachineFunctionPass {
-public:
- static char ID;
-
- X86RetpolineThunks() : MachineFunctionPass(ID) {}
-
- StringRef getPassName() const override { return "X86 Retpoline Thunks"; }
-
- bool doInitialization(Module &M) override;
- bool runOnMachineFunction(MachineFunction &F) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- MachineFunctionPass::getAnalysisUsage(AU);
- AU.addRequired<MachineModuleInfoWrapperPass>();
- AU.addPreserved<MachineModuleInfoWrapperPass>();
- }
-
-private:
- MachineModuleInfo *MMI = nullptr;
- const TargetMachine *TM = nullptr;
- bool Is64Bit = false;
- const X86Subtarget *STI = nullptr;
- const X86InstrInfo *TII = nullptr;
-
- bool InsertedThunks = false;
-
- void createThunkFunction(Module &M, StringRef Name);
- void insertRegReturnAddrClobber(MachineBasicBlock &MBB, unsigned Reg);
- void populateThunk(MachineFunction &MF, unsigned Reg);
-};
-
-} // end anonymous namespace
-
-FunctionPass *llvm::createX86RetpolineThunksPass() {
- return new X86RetpolineThunks();
-}
-
-char X86RetpolineThunks::ID = 0;
-
-bool X86RetpolineThunks::doInitialization(Module &M) {
- InsertedThunks = false;
- return false;
-}
-
-bool X86RetpolineThunks::runOnMachineFunction(MachineFunction &MF) {
- LLVM_DEBUG(dbgs() << getPassName() << '\n');
-
- TM = &MF.getTarget();;
- STI = &MF.getSubtarget<X86Subtarget>();
- TII = STI->getInstrInfo();
- Is64Bit = TM->getTargetTriple().getArch() == Triple::x86_64;
-
- MMI = &getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
- Module &M = const_cast<Module &>(*MMI->getModule());
-
- // If this function is not a thunk, check to see if we need to insert
- // a thunk.
- if (!MF.getName().startswith(ThunkNamePrefix)) {
- // If we've already inserted a thunk, nothing else to do.
- if (InsertedThunks)
- return false;
-
- // Only add a thunk if one of the functions has the retpoline feature
- // enabled in its subtarget, and doesn't enable external thunks.
- // FIXME: Conditionalize on indirect calls so we don't emit a thunk when
- // nothing will end up calling it.
- // FIXME: It's a little silly to look at every function just to enumerate
- // the subtargets, but eventually we'll want to look at them for indirect
- // calls, so maybe this is OK.
- if ((!STI->useRetpolineIndirectCalls() &&
- !STI->useRetpolineIndirectBranches()) ||
- STI->useRetpolineExternalThunk())
- return false;
-
- // Otherwise, we need to insert the thunk.
- // WARNING: This is not really a well behaving thing to do in a function
- // pass. We extract the module and insert a new function (and machine
- // function) directly into the module.
- if (Is64Bit)
- createThunkFunction(M, R11ThunkName);
- else
- for (StringRef Name :
- {EAXThunkName, ECXThunkName, EDXThunkName, EDIThunkName})
- createThunkFunction(M, Name);
- InsertedThunks = true;
- return true;
- }
-
- // If this *is* a thunk function, we need to populate it with the correct MI.
- if (Is64Bit) {
- assert(MF.getName() == "__llvm_retpoline_r11" &&
- "Should only have an r11 thunk on 64-bit targets");
-
- // __llvm_retpoline_r11:
- // callq .Lr11_call_target
- // .Lr11_capture_spec:
- // pause
- // lfence
- // jmp .Lr11_capture_spec
- // .align 16
- // .Lr11_call_target:
- // movq %r11, (%rsp)
- // retq
- populateThunk(MF, X86::R11);
- } else {
- // For 32-bit targets we need to emit a collection of thunks for various
- // possible scratch registers as well as a fallback that uses EDI, which is
- // normally callee saved.
- // __llvm_retpoline_eax:
- // calll .Leax_call_target
- // .Leax_capture_spec:
- // pause
- // jmp .Leax_capture_spec
- // .align 16
- // .Leax_call_target:
- // movl %eax, (%esp) # Clobber return addr
- // retl
- //
- // __llvm_retpoline_ecx:
- // ... # Same setup
- // movl %ecx, (%esp)
- // retl
- //
- // __llvm_retpoline_edx:
- // ... # Same setup
- // movl %edx, (%esp)
- // retl
- //
- // __llvm_retpoline_edi:
- // ... # Same setup
- // movl %edi, (%esp)
- // retl
- if (MF.getName() == EAXThunkName)
- populateThunk(MF, X86::EAX);
- else if (MF.getName() == ECXThunkName)
- populateThunk(MF, X86::ECX);
- else if (MF.getName() == EDXThunkName)
- populateThunk(MF, X86::EDX);
- else if (MF.getName() == EDIThunkName)
- populateThunk(MF, X86::EDI);
- else
- llvm_unreachable("Invalid thunk name on x86-32!");
- }
-
- return true;
-}
-
-void X86RetpolineThunks::createThunkFunction(Module &M, StringRef Name) {
- assert(Name.startswith(ThunkNamePrefix) &&
- "Created a thunk with an unexpected prefix!");
-
- LLVMContext &Ctx = M.getContext();
- auto Type = FunctionType::get(Type::getVoidTy(Ctx), false);
- Function *F =
- Function::Create(Type, GlobalValue::LinkOnceODRLinkage, Name, &M);
- F->setVisibility(GlobalValue::HiddenVisibility);
- F->setComdat(M.getOrInsertComdat(Name));
-
- // Add Attributes so that we don't create a frame, unwind information, or
- // inline.
- AttrBuilder B;
- B.addAttribute(llvm::Attribute::NoUnwind);
- B.addAttribute(llvm::Attribute::Naked);
- F->addAttributes(llvm::AttributeList::FunctionIndex, B);
-
- // Populate our function a bit so that we can verify.
- BasicBlock *Entry = BasicBlock::Create(Ctx, "entry", F);
- IRBuilder<> Builder(Entry);
-
- Builder.CreateRetVoid();
-
- // MachineFunctions/MachineBasicBlocks aren't created automatically for the
- // IR-level constructs we already made. Create them and insert them into the
- // module.
- MachineFunction &MF = MMI->getOrCreateMachineFunction(*F);
- MachineBasicBlock *EntryMBB = MF.CreateMachineBasicBlock(Entry);
-
- // Insert EntryMBB into MF. It's not in the module until we do this.
- MF.insert(MF.end(), EntryMBB);
-}
-
-void X86RetpolineThunks::insertRegReturnAddrClobber(MachineBasicBlock &MBB,
- unsigned Reg) {
- const unsigned MovOpc = Is64Bit ? X86::MOV64mr : X86::MOV32mr;
- const unsigned SPReg = Is64Bit ? X86::RSP : X86::ESP;
- addRegOffset(BuildMI(&MBB, DebugLoc(), TII->get(MovOpc)), SPReg, false, 0)
- .addReg(Reg);
-}
-
-void X86RetpolineThunks::populateThunk(MachineFunction &MF,
- unsigned Reg) {
- // Set MF properties. We never use vregs...
- MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs);
-
- // Grab the entry MBB and erase any other blocks. O0 codegen appears to
- // generate two bbs for the entry block.
- MachineBasicBlock *Entry = &MF.front();
- Entry->clear();
- while (MF.size() > 1)
- MF.erase(std::next(MF.begin()));
-
- MachineBasicBlock *CaptureSpec = MF.CreateMachineBasicBlock(Entry->getBasicBlock());
- MachineBasicBlock *CallTarget = MF.CreateMachineBasicBlock(Entry->getBasicBlock());
- MCSymbol *TargetSym = MF.getContext().createTempSymbol();
- MF.push_back(CaptureSpec);
- MF.push_back(CallTarget);
-
- const unsigned CallOpc = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32;
- const unsigned RetOpc = Is64Bit ? X86::RETQ : X86::RETL;
-
- Entry->addLiveIn(Reg);
- BuildMI(Entry, DebugLoc(), TII->get(CallOpc)).addSym(TargetSym);
-
- // The MIR verifier thinks that the CALL in the entry block will fall through
- // to CaptureSpec, so mark it as the successor. Technically, CaptureTarget is
- // the successor, but the MIR verifier doesn't know how to cope with that.
- Entry->addSuccessor(CaptureSpec);
-
- // In the capture loop for speculation, we want to stop the processor from
- // speculating as fast as possible. On Intel processors, the PAUSE instruction
- // will block speculation without consuming any execution resources. On AMD
- // processors, the PAUSE instruction is (essentially) a nop, so we also use an
- // LFENCE instruction which they have advised will stop speculation as well
- // with minimal resource utilization. We still end the capture with a jump to
- // form an infinite loop to fully guarantee that no matter what implementation
- // of the x86 ISA, speculating this code path never escapes.
- BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::PAUSE));
- BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::LFENCE));
- BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::JMP_1)).addMBB(CaptureSpec);
- CaptureSpec->setHasAddressTaken();
- CaptureSpec->addSuccessor(CaptureSpec);
-
- CallTarget->addLiveIn(Reg);
- CallTarget->setHasAddressTaken();
- CallTarget->setAlignment(Align(16));
- insertRegReturnAddrClobber(*CallTarget, Reg);
- CallTarget->back().setPreInstrSymbol(MF, TargetSym);
- BuildMI(CallTarget, DebugLoc(), TII->get(RetOpc));
-}
diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h
index f4e8d30328ca..af5153243c8b 100644
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@@ -421,6 +421,16 @@ protected:
/// than emitting one inside the compiler.
bool UseRetpolineExternalThunk = false;
+ /// Prevent generation of indirect call/branch instructions from memory,
+ /// and force all indirect call/branch instructions from a register to be
+ /// preceded by an LFENCE. Also decompose RET instructions into a
+ /// POP+LFENCE+JMP sequence.
+ bool UseLVIControlFlowIntegrity = false;
+
+ /// Insert LFENCE instructions to prevent data speculatively injected into
+ /// loads from being used maliciously.
+ bool UseLVILoadHardening = false;
+
/// Use software floating point for code generation.
bool UseSoftFloat = false;
@@ -707,8 +717,21 @@ public:
return UseRetpolineIndirectBranches;
}
bool useRetpolineExternalThunk() const { return UseRetpolineExternalThunk; }
+
+ // These are generic getters that OR together all of the thunk types
+ // supported by the subtarget. Therefore useIndirectThunk*() will return true
+ // if any respective thunk feature is enabled.
+ bool useIndirectThunkCalls() const {
+ return useRetpolineIndirectCalls() || useLVIControlFlowIntegrity();
+ }
+ bool useIndirectThunkBranches() const {
+ return useRetpolineIndirectBranches() || useLVIControlFlowIntegrity();
+ }
+
bool preferMaskRegisters() const { return PreferMaskRegisters; }
bool useGLMDivSqrtCosts() const { return UseGLMDivSqrtCosts; }
+ bool useLVIControlFlowIntegrity() const { return UseLVIControlFlowIntegrity; }
+ bool useLVILoadHardening() const { return UseLVILoadHardening; }
unsigned getPreferVectorWidth() const { return PreferVectorWidth; }
unsigned getRequiredVectorWidth() const { return RequiredVectorWidth; }
@@ -853,10 +876,10 @@ public:
/// Return true if the subtarget allows calls to immediate address.
bool isLegalToCallImmediateAddr() const;
- /// If we are using retpolines, we need to expand indirectbr to avoid it
+ /// If we are using indirect thunks, we need to expand indirectbr to avoid it
/// lowering to an actual indirect jump.
bool enableIndirectBrExpand() const override {
- return useRetpolineIndirectBranches();
+ return useIndirectThunkBranches();
}
/// Enable the MachineScheduler pass for all X86 subtargets.
diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp
index 7176e46f07b1..9f639ffa22ec 100644
--- a/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -82,6 +82,8 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Target() {
initializeX86SpeculativeLoadHardeningPassPass(PR);
initializeX86FlagsCopyLoweringPassPass(PR);
initializeX86CondBrFoldingPassPass(PR);
+ initializeX86LoadValueInjectionLoadHardeningPassPass(PR);
+ initializeX86LoadValueInjectionRetHardeningPassPass(PR);
initializeX86OptimizeLEAPassPass(PR);
}
@@ -496,6 +498,10 @@ void X86PassConfig::addMachineSSAOptimization() {
void X86PassConfig::addPostRegAlloc() {
addPass(createX86FloatingPointStackifierPass());
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(createX86LoadValueInjectionLoadHardeningPass());
+ else
+ addPass(createX86LoadValueInjectionLoadHardeningUnoptimizedPass());
}
void X86PassConfig::addPreSched2() { addPass(createX86ExpandPseudoPass()); }
@@ -525,7 +531,7 @@ void X86PassConfig::addPreEmitPass2() {
const Triple &TT = TM->getTargetTriple();
const MCAsmInfo *MAI = TM->getMCAsmInfo();
- addPass(createX86RetpolineThunksPass());
+ addPass(createX86IndirectThunksPass());
// Insert extra int3 instructions after trailing call instructions to avoid
// issues in the unwinder.
@@ -542,6 +548,7 @@ void X86PassConfig::addPreEmitPass2() {
// Identify valid longjmp targets for Windows Control Flow Guard.
if (TT.isOSWindows())
addPass(createCFGuardLongjmpPass());
+ addPass(createX86LoadValueInjectionRetHardeningPass());
}
std::unique_ptr<CSEConfigBase> X86PassConfig::getCSEConfig() const {