diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2020-06-24 20:22:44 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2020-06-24 20:22:44 +0000 |
commit | 483b61a50e7423b063fc26985325f594560b3f7e (patch) | |
tree | 5bb205026b61f3dd88d63f43d0b790d518acefec /llvm/lib/Target | |
parent | 8055b7e383f74dbc58c8085a0f0c45f4c61f8231 (diff) | |
download | src-483b61a50e7423b063fc26985325f594560b3f7e.tar.gz src-483b61a50e7423b063fc26985325f594560b3f7e.zip |
Vendor import of llvm-project branch release/10.xvendor/llvm-project/llvmorg-10.0.0-129-gd24d5c8e308
llvmorg-10.0.0-129-gd24d5c8e308.
Notes
Notes:
svn path=/vendor/llvm-project/release-10.x/; revision=362593
svn path=/vendor/llvm-project/llvmorg-10.0.0-129-gd24d5c8e308/; revision=362594; tag=vendor/llvm-project/llvmorg-10.0.0-129-gd24d5c8e308
Diffstat (limited to 'llvm/lib/Target')
38 files changed, 2091 insertions, 5072 deletions
diff --git a/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp b/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp index 6f5f58554d09..d407edfbd966 100644 --- a/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp +++ b/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp @@ -304,7 +304,7 @@ void BPFDAGToDAGISel::PreprocessLoad(SDNode *Node, LLVM_DEBUG(dbgs() << "Replacing load of size " << size << " with constant " << val << '\n'); - SDValue NVal = CurDAG->getConstant(val, DL, MVT::i64); + SDValue NVal = CurDAG->getConstant(val, DL, LD->getValueType(0)); // After replacement, the current node is dead, we need to // go backward one step to make iterator still work diff --git a/llvm/lib/Target/BPF/BTFDebug.cpp b/llvm/lib/Target/BPF/BTFDebug.cpp index a9fb04f20d1c..6daeb3b4b63b 100644 --- a/llvm/lib/Target/BPF/BTFDebug.cpp +++ b/llvm/lib/Target/BPF/BTFDebug.cpp @@ -600,6 +600,38 @@ void BTFDebug::visitTypeEntry(const DIType *Ty, uint32_t &TypeId, bool CheckPointer, bool SeenPointer) { if (!Ty || DIToIdMap.find(Ty) != DIToIdMap.end()) { TypeId = DIToIdMap[Ty]; + + // To handle the case like the following: + // struct t; + // typedef struct t _t; + // struct s1 { _t *c; }; + // int test1(struct s1 *arg) { ... } + // + // struct t { int a; int b; }; + // struct s2 { _t c; } + // int test2(struct s2 *arg) { ... } + // + // During traversing test1() argument, "_t" is recorded + // in DIToIdMap and a forward declaration fixup is created + // for "struct t" to avoid pointee type traversal. + // + // During traversing test2() argument, even if we see "_t" is + // already defined, we should keep moving to eventually + // bring in types for "struct t". Otherwise, the "struct s2" + // definition won't be correct. + if (Ty && (!CheckPointer || !SeenPointer)) { + if (const auto *DTy = dyn_cast<DIDerivedType>(Ty)) { + unsigned Tag = DTy->getTag(); + if (Tag == dwarf::DW_TAG_typedef || Tag == dwarf::DW_TAG_const_type || + Tag == dwarf::DW_TAG_volatile_type || + Tag == dwarf::DW_TAG_restrict_type) { + uint32_t TmpTypeId; + visitTypeEntry(DTy->getBaseType(), TmpTypeId, CheckPointer, + SeenPointer); + } + } + } + return; } diff --git a/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp b/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp index 886034d9601a..f1fe51f5e54f 100644 --- a/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp +++ b/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp @@ -12,9 +12,6 @@ #include "HexagonInstrInfo.h" #include "HexagonSubtarget.h" #include "MCTargetDesc/HexagonBaseInfo.h" -#include "RDFGraph.h" -#include "RDFLiveness.h" -#include "RDFRegisters.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringRef.h" @@ -27,6 +24,9 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RDFGraph.h" +#include "llvm/CodeGen/RDFLiveness.h" +#include "llvm/CodeGen/RDFRegisters.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/InitializePasses.h" #include "llvm/MC/MCInstrDesc.h" diff --git a/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp b/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp index 517ad1c6ee7b..f26e23befde2 100644 --- a/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp +++ b/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp @@ -11,9 +11,6 @@ #include "MCTargetDesc/HexagonBaseInfo.h" #include "RDFCopy.h" #include "RDFDeadCode.h" -#include "RDFGraph.h" -#include "RDFLiveness.h" -#include "RDFRegisters.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" @@ -24,6 +21,9 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RDFGraph.h" +#include "llvm/CodeGen/RDFLiveness.h" +#include "llvm/CodeGen/RDFRegisters.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" diff --git a/llvm/lib/Target/Hexagon/RDFCopy.cpp b/llvm/lib/Target/Hexagon/RDFCopy.cpp index a9d39fd4b2dc..34d58f0a7a23 100644 --- a/llvm/lib/Target/Hexagon/RDFCopy.cpp +++ b/llvm/lib/Target/Hexagon/RDFCopy.cpp @@ -11,13 +11,13 @@ //===----------------------------------------------------------------------===// #include "RDFCopy.h" -#include "RDFGraph.h" -#include "RDFLiveness.h" -#include "RDFRegisters.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RDFGraph.h" +#include "llvm/CodeGen/RDFLiveness.h" +#include "llvm/CodeGen/RDFRegisters.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/MC/MCRegisterInfo.h" diff --git a/llvm/lib/Target/Hexagon/RDFCopy.h b/llvm/lib/Target/Hexagon/RDFCopy.h index 1450ab884849..99b18a75d8c2 100644 --- a/llvm/lib/Target/Hexagon/RDFCopy.h +++ b/llvm/lib/Target/Hexagon/RDFCopy.h @@ -9,9 +9,9 @@ #ifndef LLVM_LIB_TARGET_HEXAGON_RDFCOPY_H #define LLVM_LIB_TARGET_HEXAGON_RDFCOPY_H -#include "RDFGraph.h" -#include "RDFLiveness.h" -#include "RDFRegisters.h" +#include "llvm/CodeGen/RDFGraph.h" +#include "llvm/CodeGen/RDFLiveness.h" +#include "llvm/CodeGen/RDFRegisters.h" #include "llvm/CodeGen/MachineFunction.h" #include <map> #include <vector> diff --git a/llvm/lib/Target/Hexagon/RDFDeadCode.cpp b/llvm/lib/Target/Hexagon/RDFDeadCode.cpp index af86c7b1956b..5a98debd3c00 100644 --- a/llvm/lib/Target/Hexagon/RDFDeadCode.cpp +++ b/llvm/lib/Target/Hexagon/RDFDeadCode.cpp @@ -9,13 +9,13 @@ // RDF-based generic dead code elimination. #include "RDFDeadCode.h" -#include "RDFGraph.h" -#include "RDFLiveness.h" #include "llvm/ADT/SetVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RDFGraph.h" +#include "llvm/CodeGen/RDFLiveness.h" #include "llvm/Support/Debug.h" #include <queue> diff --git a/llvm/lib/Target/Hexagon/RDFDeadCode.h b/llvm/lib/Target/Hexagon/RDFDeadCode.h index 7f91977e1d6c..859c8161d355 100644 --- a/llvm/lib/Target/Hexagon/RDFDeadCode.h +++ b/llvm/lib/Target/Hexagon/RDFDeadCode.h @@ -23,8 +23,8 @@ #ifndef RDF_DEADCODE_H #define RDF_DEADCODE_H -#include "RDFGraph.h" -#include "RDFLiveness.h" +#include "llvm/CodeGen/RDFGraph.h" +#include "llvm/CodeGen/RDFLiveness.h" #include "llvm/ADT/SetVector.h" namespace llvm { diff --git a/llvm/lib/Target/Hexagon/RDFGraph.cpp b/llvm/lib/Target/Hexagon/RDFGraph.cpp deleted file mode 100644 index 0cb35dc98819..000000000000 --- a/llvm/lib/Target/Hexagon/RDFGraph.cpp +++ /dev/null @@ -1,1835 +0,0 @@ -//===- RDFGraph.cpp -------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Target-independent, SSA-based data flow graph for register data flow (RDF). -// -#include "RDFGraph.h" -#include "RDFRegisters.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineDominanceFrontier.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineOperand.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/TargetInstrInfo.h" -#include "llvm/CodeGen/TargetLowering.h" -#include "llvm/CodeGen/TargetRegisterInfo.h" -#include "llvm/CodeGen/TargetSubtargetInfo.h" -#include "llvm/IR/Function.h" -#include "llvm/MC/LaneBitmask.h" -#include "llvm/MC/MCInstrDesc.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include <algorithm> -#include <cassert> -#include <cstdint> -#include <cstring> -#include <iterator> -#include <set> -#include <utility> -#include <vector> - -using namespace llvm; -using namespace rdf; - -// Printing functions. Have them here first, so that the rest of the code -// can use them. -namespace llvm { -namespace rdf { - -raw_ostream &operator<< (raw_ostream &OS, const PrintLaneMaskOpt &P) { - if (!P.Mask.all()) - OS << ':' << PrintLaneMask(P.Mask); - return OS; -} - -raw_ostream &operator<< (raw_ostream &OS, const Print<RegisterRef> &P) { - auto &TRI = P.G.getTRI(); - if (P.Obj.Reg > 0 && P.Obj.Reg < TRI.getNumRegs()) - OS << TRI.getName(P.Obj.Reg); - else - OS << '#' << P.Obj.Reg; - OS << PrintLaneMaskOpt(P.Obj.Mask); - return OS; -} - -raw_ostream &operator<< (raw_ostream &OS, const Print<NodeId> &P) { - auto NA = P.G.addr<NodeBase*>(P.Obj); - uint16_t Attrs = NA.Addr->getAttrs(); - uint16_t Kind = NodeAttrs::kind(Attrs); - uint16_t Flags = NodeAttrs::flags(Attrs); - switch (NodeAttrs::type(Attrs)) { - case NodeAttrs::Code: - switch (Kind) { - case NodeAttrs::Func: OS << 'f'; break; - case NodeAttrs::Block: OS << 'b'; break; - case NodeAttrs::Stmt: OS << 's'; break; - case NodeAttrs::Phi: OS << 'p'; break; - default: OS << "c?"; break; - } - break; - case NodeAttrs::Ref: - if (Flags & NodeAttrs::Undef) - OS << '/'; - if (Flags & NodeAttrs::Dead) - OS << '\\'; - if (Flags & NodeAttrs::Preserving) - OS << '+'; - if (Flags & NodeAttrs::Clobbering) - OS << '~'; - switch (Kind) { - case NodeAttrs::Use: OS << 'u'; break; - case NodeAttrs::Def: OS << 'd'; break; - case NodeAttrs::Block: OS << 'b'; break; - default: OS << "r?"; break; - } - break; - default: - OS << '?'; - break; - } - OS << P.Obj; - if (Flags & NodeAttrs::Shadow) - OS << '"'; - return OS; -} - -static void printRefHeader(raw_ostream &OS, const NodeAddr<RefNode*> RA, - const DataFlowGraph &G) { - OS << Print<NodeId>(RA.Id, G) << '<' - << Print<RegisterRef>(RA.Addr->getRegRef(G), G) << '>'; - if (RA.Addr->getFlags() & NodeAttrs::Fixed) - OS << '!'; -} - -raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<DefNode*>> &P) { - printRefHeader(OS, P.Obj, P.G); - OS << '('; - if (NodeId N = P.Obj.Addr->getReachingDef()) - OS << Print<NodeId>(N, P.G); - OS << ','; - if (NodeId N = P.Obj.Addr->getReachedDef()) - OS << Print<NodeId>(N, P.G); - OS << ','; - if (NodeId N = P.Obj.Addr->getReachedUse()) - OS << Print<NodeId>(N, P.G); - OS << "):"; - if (NodeId N = P.Obj.Addr->getSibling()) - OS << Print<NodeId>(N, P.G); - return OS; -} - -raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<UseNode*>> &P) { - printRefHeader(OS, P.Obj, P.G); - OS << '('; - if (NodeId N = P.Obj.Addr->getReachingDef()) - OS << Print<NodeId>(N, P.G); - OS << "):"; - if (NodeId N = P.Obj.Addr->getSibling()) - OS << Print<NodeId>(N, P.G); - return OS; -} - -raw_ostream &operator<< (raw_ostream &OS, - const Print<NodeAddr<PhiUseNode*>> &P) { - printRefHeader(OS, P.Obj, P.G); - OS << '('; - if (NodeId N = P.Obj.Addr->getReachingDef()) - OS << Print<NodeId>(N, P.G); - OS << ','; - if (NodeId N = P.Obj.Addr->getPredecessor()) - OS << Print<NodeId>(N, P.G); - OS << "):"; - if (NodeId N = P.Obj.Addr->getSibling()) - OS << Print<NodeId>(N, P.G); - return OS; -} - -raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<RefNode*>> &P) { - switch (P.Obj.Addr->getKind()) { - case NodeAttrs::Def: - OS << PrintNode<DefNode*>(P.Obj, P.G); - break; - case NodeAttrs::Use: - if (P.Obj.Addr->getFlags() & NodeAttrs::PhiRef) - OS << PrintNode<PhiUseNode*>(P.Obj, P.G); - else - OS << PrintNode<UseNode*>(P.Obj, P.G); - break; - } - return OS; -} - -raw_ostream &operator<< (raw_ostream &OS, const Print<NodeList> &P) { - unsigned N = P.Obj.size(); - for (auto I : P.Obj) { - OS << Print<NodeId>(I.Id, P.G); - if (--N) - OS << ' '; - } - return OS; -} - -raw_ostream &operator<< (raw_ostream &OS, const Print<NodeSet> &P) { - unsigned N = P.Obj.size(); - for (auto I : P.Obj) { - OS << Print<NodeId>(I, P.G); - if (--N) - OS << ' '; - } - return OS; -} - -namespace { - - template <typename T> - struct PrintListV { - PrintListV(const NodeList &L, const DataFlowGraph &G) : List(L), G(G) {} - - using Type = T; - const NodeList &List; - const DataFlowGraph &G; - }; - - template <typename T> - raw_ostream &operator<< (raw_ostream &OS, const PrintListV<T> &P) { - unsigned N = P.List.size(); - for (NodeAddr<T> A : P.List) { - OS << PrintNode<T>(A, P.G); - if (--N) - OS << ", "; - } - return OS; - } - -} // end anonymous namespace - -raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<PhiNode*>> &P) { - OS << Print<NodeId>(P.Obj.Id, P.G) << ": phi [" - << PrintListV<RefNode*>(P.Obj.Addr->members(P.G), P.G) << ']'; - return OS; -} - -raw_ostream &operator<<(raw_ostream &OS, const Print<NodeAddr<StmtNode *>> &P) { - const MachineInstr &MI = *P.Obj.Addr->getCode(); - unsigned Opc = MI.getOpcode(); - OS << Print<NodeId>(P.Obj.Id, P.G) << ": " << P.G.getTII().getName(Opc); - // Print the target for calls and branches (for readability). - if (MI.isCall() || MI.isBranch()) { - MachineInstr::const_mop_iterator T = - llvm::find_if(MI.operands(), - [] (const MachineOperand &Op) -> bool { - return Op.isMBB() || Op.isGlobal() || Op.isSymbol(); - }); - if (T != MI.operands_end()) { - OS << ' '; - if (T->isMBB()) - OS << printMBBReference(*T->getMBB()); - else if (T->isGlobal()) - OS << T->getGlobal()->getName(); - else if (T->isSymbol()) - OS << T->getSymbolName(); - } - } - OS << " [" << PrintListV<RefNode*>(P.Obj.Addr->members(P.G), P.G) << ']'; - return OS; -} - -raw_ostream &operator<< (raw_ostream &OS, - const Print<NodeAddr<InstrNode*>> &P) { - switch (P.Obj.Addr->getKind()) { - case NodeAttrs::Phi: - OS << PrintNode<PhiNode*>(P.Obj, P.G); - break; - case NodeAttrs::Stmt: - OS << PrintNode<StmtNode*>(P.Obj, P.G); - break; - default: - OS << "instr? " << Print<NodeId>(P.Obj.Id, P.G); - break; - } - return OS; -} - -raw_ostream &operator<< (raw_ostream &OS, - const Print<NodeAddr<BlockNode*>> &P) { - MachineBasicBlock *BB = P.Obj.Addr->getCode(); - unsigned NP = BB->pred_size(); - std::vector<int> Ns; - auto PrintBBs = [&OS] (std::vector<int> Ns) -> void { - unsigned N = Ns.size(); - for (int I : Ns) { - OS << "%bb." << I; - if (--N) - OS << ", "; - } - }; - - OS << Print<NodeId>(P.Obj.Id, P.G) << ": --- " << printMBBReference(*BB) - << " --- preds(" << NP << "): "; - for (MachineBasicBlock *B : BB->predecessors()) - Ns.push_back(B->getNumber()); - PrintBBs(Ns); - - unsigned NS = BB->succ_size(); - OS << " succs(" << NS << "): "; - Ns.clear(); - for (MachineBasicBlock *B : BB->successors()) - Ns.push_back(B->getNumber()); - PrintBBs(Ns); - OS << '\n'; - - for (auto I : P.Obj.Addr->members(P.G)) - OS << PrintNode<InstrNode*>(I, P.G) << '\n'; - return OS; -} - -raw_ostream &operator<<(raw_ostream &OS, const Print<NodeAddr<FuncNode *>> &P) { - OS << "DFG dump:[\n" << Print<NodeId>(P.Obj.Id, P.G) << ": Function: " - << P.Obj.Addr->getCode()->getName() << '\n'; - for (auto I : P.Obj.Addr->members(P.G)) - OS << PrintNode<BlockNode*>(I, P.G) << '\n'; - OS << "]\n"; - return OS; -} - -raw_ostream &operator<< (raw_ostream &OS, const Print<RegisterSet> &P) { - OS << '{'; - for (auto I : P.Obj) - OS << ' ' << Print<RegisterRef>(I, P.G); - OS << " }"; - return OS; -} - -raw_ostream &operator<< (raw_ostream &OS, const Print<RegisterAggr> &P) { - P.Obj.print(OS); - return OS; -} - -raw_ostream &operator<< (raw_ostream &OS, - const Print<DataFlowGraph::DefStack> &P) { - for (auto I = P.Obj.top(), E = P.Obj.bottom(); I != E; ) { - OS << Print<NodeId>(I->Id, P.G) - << '<' << Print<RegisterRef>(I->Addr->getRegRef(P.G), P.G) << '>'; - I.down(); - if (I != E) - OS << ' '; - } - return OS; -} - -} // end namespace rdf -} // end namespace llvm - -// Node allocation functions. -// -// Node allocator is like a slab memory allocator: it allocates blocks of -// memory in sizes that are multiples of the size of a node. Each block has -// the same size. Nodes are allocated from the currently active block, and -// when it becomes full, a new one is created. -// There is a mapping scheme between node id and its location in a block, -// and within that block is described in the header file. -// -void NodeAllocator::startNewBlock() { - void *T = MemPool.Allocate(NodesPerBlock*NodeMemSize, NodeMemSize); - char *P = static_cast<char*>(T); - Blocks.push_back(P); - // Check if the block index is still within the allowed range, i.e. less - // than 2^N, where N is the number of bits in NodeId for the block index. - // BitsPerIndex is the number of bits per node index. - assert((Blocks.size() < ((size_t)1 << (8*sizeof(NodeId)-BitsPerIndex))) && - "Out of bits for block index"); - ActiveEnd = P; -} - -bool NodeAllocator::needNewBlock() { - if (Blocks.empty()) - return true; - - char *ActiveBegin = Blocks.back(); - uint32_t Index = (ActiveEnd-ActiveBegin)/NodeMemSize; - return Index >= NodesPerBlock; -} - -NodeAddr<NodeBase*> NodeAllocator::New() { - if (needNewBlock()) - startNewBlock(); - - uint32_t ActiveB = Blocks.size()-1; - uint32_t Index = (ActiveEnd - Blocks[ActiveB])/NodeMemSize; - NodeAddr<NodeBase*> NA = { reinterpret_cast<NodeBase*>(ActiveEnd), - makeId(ActiveB, Index) }; - ActiveEnd += NodeMemSize; - return NA; -} - -NodeId NodeAllocator::id(const NodeBase *P) const { - uintptr_t A = reinterpret_cast<uintptr_t>(P); - for (unsigned i = 0, n = Blocks.size(); i != n; ++i) { - uintptr_t B = reinterpret_cast<uintptr_t>(Blocks[i]); - if (A < B || A >= B + NodesPerBlock*NodeMemSize) - continue; - uint32_t Idx = (A-B)/NodeMemSize; - return makeId(i, Idx); - } - llvm_unreachable("Invalid node address"); -} - -void NodeAllocator::clear() { - MemPool.Reset(); - Blocks.clear(); - ActiveEnd = nullptr; -} - -// Insert node NA after "this" in the circular chain. -void NodeBase::append(NodeAddr<NodeBase*> NA) { - NodeId Nx = Next; - // If NA is already "next", do nothing. - if (Next != NA.Id) { - Next = NA.Id; - NA.Addr->Next = Nx; - } -} - -// Fundamental node manipulator functions. - -// Obtain the register reference from a reference node. -RegisterRef RefNode::getRegRef(const DataFlowGraph &G) const { - assert(NodeAttrs::type(Attrs) == NodeAttrs::Ref); - if (NodeAttrs::flags(Attrs) & NodeAttrs::PhiRef) - return G.unpack(Ref.PR); - assert(Ref.Op != nullptr); - return G.makeRegRef(*Ref.Op); -} - -// Set the register reference in the reference node directly (for references -// in phi nodes). -void RefNode::setRegRef(RegisterRef RR, DataFlowGraph &G) { - assert(NodeAttrs::type(Attrs) == NodeAttrs::Ref); - assert(NodeAttrs::flags(Attrs) & NodeAttrs::PhiRef); - Ref.PR = G.pack(RR); -} - -// Set the register reference in the reference node based on a machine -// operand (for references in statement nodes). -void RefNode::setRegRef(MachineOperand *Op, DataFlowGraph &G) { - assert(NodeAttrs::type(Attrs) == NodeAttrs::Ref); - assert(!(NodeAttrs::flags(Attrs) & NodeAttrs::PhiRef)); - (void)G; - Ref.Op = Op; -} - -// Get the owner of a given reference node. -NodeAddr<NodeBase*> RefNode::getOwner(const DataFlowGraph &G) { - NodeAddr<NodeBase*> NA = G.addr<NodeBase*>(getNext()); - - while (NA.Addr != this) { - if (NA.Addr->getType() == NodeAttrs::Code) - return NA; - NA = G.addr<NodeBase*>(NA.Addr->getNext()); - } - llvm_unreachable("No owner in circular list"); -} - -// Connect the def node to the reaching def node. -void DefNode::linkToDef(NodeId Self, NodeAddr<DefNode*> DA) { - Ref.RD = DA.Id; - Ref.Sib = DA.Addr->getReachedDef(); - DA.Addr->setReachedDef(Self); -} - -// Connect the use node to the reaching def node. -void UseNode::linkToDef(NodeId Self, NodeAddr<DefNode*> DA) { - Ref.RD = DA.Id; - Ref.Sib = DA.Addr->getReachedUse(); - DA.Addr->setReachedUse(Self); -} - -// Get the first member of the code node. -NodeAddr<NodeBase*> CodeNode::getFirstMember(const DataFlowGraph &G) const { - if (Code.FirstM == 0) - return NodeAddr<NodeBase*>(); - return G.addr<NodeBase*>(Code.FirstM); -} - -// Get the last member of the code node. -NodeAddr<NodeBase*> CodeNode::getLastMember(const DataFlowGraph &G) const { - if (Code.LastM == 0) - return NodeAddr<NodeBase*>(); - return G.addr<NodeBase*>(Code.LastM); -} - -// Add node NA at the end of the member list of the given code node. -void CodeNode::addMember(NodeAddr<NodeBase*> NA, const DataFlowGraph &G) { - NodeAddr<NodeBase*> ML = getLastMember(G); - if (ML.Id != 0) { - ML.Addr->append(NA); - } else { - Code.FirstM = NA.Id; - NodeId Self = G.id(this); - NA.Addr->setNext(Self); - } - Code.LastM = NA.Id; -} - -// Add node NA after member node MA in the given code node. -void CodeNode::addMemberAfter(NodeAddr<NodeBase*> MA, NodeAddr<NodeBase*> NA, - const DataFlowGraph &G) { - MA.Addr->append(NA); - if (Code.LastM == MA.Id) - Code.LastM = NA.Id; -} - -// Remove member node NA from the given code node. -void CodeNode::removeMember(NodeAddr<NodeBase*> NA, const DataFlowGraph &G) { - NodeAddr<NodeBase*> MA = getFirstMember(G); - assert(MA.Id != 0); - - // Special handling if the member to remove is the first member. - if (MA.Id == NA.Id) { - if (Code.LastM == MA.Id) { - // If it is the only member, set both first and last to 0. - Code.FirstM = Code.LastM = 0; - } else { - // Otherwise, advance the first member. - Code.FirstM = MA.Addr->getNext(); - } - return; - } - - while (MA.Addr != this) { - NodeId MX = MA.Addr->getNext(); - if (MX == NA.Id) { - MA.Addr->setNext(NA.Addr->getNext()); - // If the member to remove happens to be the last one, update the - // LastM indicator. - if (Code.LastM == NA.Id) - Code.LastM = MA.Id; - return; - } - MA = G.addr<NodeBase*>(MX); - } - llvm_unreachable("No such member"); -} - -// Return the list of all members of the code node. -NodeList CodeNode::members(const DataFlowGraph &G) const { - static auto True = [] (NodeAddr<NodeBase*>) -> bool { return true; }; - return members_if(True, G); -} - -// Return the owner of the given instr node. -NodeAddr<NodeBase*> InstrNode::getOwner(const DataFlowGraph &G) { - NodeAddr<NodeBase*> NA = G.addr<NodeBase*>(getNext()); - - while (NA.Addr != this) { - assert(NA.Addr->getType() == NodeAttrs::Code); - if (NA.Addr->getKind() == NodeAttrs::Block) - return NA; - NA = G.addr<NodeBase*>(NA.Addr->getNext()); - } - llvm_unreachable("No owner in circular list"); -} - -// Add the phi node PA to the given block node. -void BlockNode::addPhi(NodeAddr<PhiNode*> PA, const DataFlowGraph &G) { - NodeAddr<NodeBase*> M = getFirstMember(G); - if (M.Id == 0) { - addMember(PA, G); - return; - } - - assert(M.Addr->getType() == NodeAttrs::Code); - if (M.Addr->getKind() == NodeAttrs::Stmt) { - // If the first member of the block is a statement, insert the phi as - // the first member. - Code.FirstM = PA.Id; - PA.Addr->setNext(M.Id); - } else { - // If the first member is a phi, find the last phi, and append PA to it. - assert(M.Addr->getKind() == NodeAttrs::Phi); - NodeAddr<NodeBase*> MN = M; - do { - M = MN; - MN = G.addr<NodeBase*>(M.Addr->getNext()); - assert(MN.Addr->getType() == NodeAttrs::Code); - } while (MN.Addr->getKind() == NodeAttrs::Phi); - - // M is the last phi. - addMemberAfter(M, PA, G); - } -} - -// Find the block node corresponding to the machine basic block BB in the -// given func node. -NodeAddr<BlockNode*> FuncNode::findBlock(const MachineBasicBlock *BB, - const DataFlowGraph &G) const { - auto EqBB = [BB] (NodeAddr<NodeBase*> NA) -> bool { - return NodeAddr<BlockNode*>(NA).Addr->getCode() == BB; - }; - NodeList Ms = members_if(EqBB, G); - if (!Ms.empty()) - return Ms[0]; - return NodeAddr<BlockNode*>(); -} - -// Get the block node for the entry block in the given function. -NodeAddr<BlockNode*> FuncNode::getEntryBlock(const DataFlowGraph &G) { - MachineBasicBlock *EntryB = &getCode()->front(); - return findBlock(EntryB, G); -} - -// Target operand information. -// - -// For a given instruction, check if there are any bits of RR that can remain -// unchanged across this def. -bool TargetOperandInfo::isPreserving(const MachineInstr &In, unsigned OpNum) - const { - return TII.isPredicated(In); -} - -// Check if the definition of RR produces an unspecified value. -bool TargetOperandInfo::isClobbering(const MachineInstr &In, unsigned OpNum) - const { - const MachineOperand &Op = In.getOperand(OpNum); - if (Op.isRegMask()) - return true; - assert(Op.isReg()); - if (In.isCall()) - if (Op.isDef() && Op.isDead()) - return true; - return false; -} - -// Check if the given instruction specifically requires -bool TargetOperandInfo::isFixedReg(const MachineInstr &In, unsigned OpNum) - const { - if (In.isCall() || In.isReturn() || In.isInlineAsm()) - return true; - // Check for a tail call. - if (In.isBranch()) - for (const MachineOperand &O : In.operands()) - if (O.isGlobal() || O.isSymbol()) - return true; - - const MCInstrDesc &D = In.getDesc(); - if (!D.getImplicitDefs() && !D.getImplicitUses()) - return false; - const MachineOperand &Op = In.getOperand(OpNum); - // If there is a sub-register, treat the operand as non-fixed. Currently, - // fixed registers are those that are listed in the descriptor as implicit - // uses or defs, and those lists do not allow sub-registers. - if (Op.getSubReg() != 0) - return false; - Register Reg = Op.getReg(); - const MCPhysReg *ImpR = Op.isDef() ? D.getImplicitDefs() - : D.getImplicitUses(); - if (!ImpR) - return false; - while (*ImpR) - if (*ImpR++ == Reg) - return true; - return false; -} - -// -// The data flow graph construction. -// - -DataFlowGraph::DataFlowGraph(MachineFunction &mf, const TargetInstrInfo &tii, - const TargetRegisterInfo &tri, const MachineDominatorTree &mdt, - const MachineDominanceFrontier &mdf, const TargetOperandInfo &toi) - : MF(mf), TII(tii), TRI(tri), PRI(tri, mf), MDT(mdt), MDF(mdf), TOI(toi), - LiveIns(PRI) { -} - -// The implementation of the definition stack. -// Each register reference has its own definition stack. In particular, -// for a register references "Reg" and "Reg:subreg" will each have their -// own definition stacks. - -// Construct a stack iterator. -DataFlowGraph::DefStack::Iterator::Iterator(const DataFlowGraph::DefStack &S, - bool Top) : DS(S) { - if (!Top) { - // Initialize to bottom. - Pos = 0; - return; - } - // Initialize to the top, i.e. top-most non-delimiter (or 0, if empty). - Pos = DS.Stack.size(); - while (Pos > 0 && DS.isDelimiter(DS.Stack[Pos-1])) - Pos--; -} - -// Return the size of the stack, including block delimiters. -unsigned DataFlowGraph::DefStack::size() const { - unsigned S = 0; - for (auto I = top(), E = bottom(); I != E; I.down()) - S++; - return S; -} - -// Remove the top entry from the stack. Remove all intervening delimiters -// so that after this, the stack is either empty, or the top of the stack -// is a non-delimiter. -void DataFlowGraph::DefStack::pop() { - assert(!empty()); - unsigned P = nextDown(Stack.size()); - Stack.resize(P); -} - -// Push a delimiter for block node N on the stack. -void DataFlowGraph::DefStack::start_block(NodeId N) { - assert(N != 0); - Stack.push_back(NodeAddr<DefNode*>(nullptr, N)); -} - -// Remove all nodes from the top of the stack, until the delimited for -// block node N is encountered. Remove the delimiter as well. In effect, -// this will remove from the stack all definitions from block N. -void DataFlowGraph::DefStack::clear_block(NodeId N) { - assert(N != 0); - unsigned P = Stack.size(); - while (P > 0) { - bool Found = isDelimiter(Stack[P-1], N); - P--; - if (Found) - break; - } - // This will also remove the delimiter, if found. - Stack.resize(P); -} - -// Move the stack iterator up by one. -unsigned DataFlowGraph::DefStack::nextUp(unsigned P) const { - // Get the next valid position after P (skipping all delimiters). - // The input position P does not have to point to a non-delimiter. - unsigned SS = Stack.size(); - bool IsDelim; - assert(P < SS); - do { - P++; - IsDelim = isDelimiter(Stack[P-1]); - } while (P < SS && IsDelim); - assert(!IsDelim); - return P; -} - -// Move the stack iterator down by one. -unsigned DataFlowGraph::DefStack::nextDown(unsigned P) const { - // Get the preceding valid position before P (skipping all delimiters). - // The input position P does not have to point to a non-delimiter. - assert(P > 0 && P <= Stack.size()); - bool IsDelim = isDelimiter(Stack[P-1]); - do { - if (--P == 0) - break; - IsDelim = isDelimiter(Stack[P-1]); - } while (P > 0 && IsDelim); - assert(!IsDelim); - return P; -} - -// Register information. - -RegisterSet DataFlowGraph::getLandingPadLiveIns() const { - RegisterSet LR; - const Function &F = MF.getFunction(); - const Constant *PF = F.hasPersonalityFn() ? F.getPersonalityFn() - : nullptr; - const TargetLowering &TLI = *MF.getSubtarget().getTargetLowering(); - if (RegisterId R = TLI.getExceptionPointerRegister(PF)) - LR.insert(RegisterRef(R)); - if (RegisterId R = TLI.getExceptionSelectorRegister(PF)) - LR.insert(RegisterRef(R)); - return LR; -} - -// Node management functions. - -// Get the pointer to the node with the id N. -NodeBase *DataFlowGraph::ptr(NodeId N) const { - if (N == 0) - return nullptr; - return Memory.ptr(N); -} - -// Get the id of the node at the address P. -NodeId DataFlowGraph::id(const NodeBase *P) const { - if (P == nullptr) - return 0; - return Memory.id(P); -} - -// Allocate a new node and set the attributes to Attrs. -NodeAddr<NodeBase*> DataFlowGraph::newNode(uint16_t Attrs) { - NodeAddr<NodeBase*> P = Memory.New(); - P.Addr->init(); - P.Addr->setAttrs(Attrs); - return P; -} - -// Make a copy of the given node B, except for the data-flow links, which -// are set to 0. -NodeAddr<NodeBase*> DataFlowGraph::cloneNode(const NodeAddr<NodeBase*> B) { - NodeAddr<NodeBase*> NA = newNode(0); - memcpy(NA.Addr, B.Addr, sizeof(NodeBase)); - // Ref nodes need to have the data-flow links reset. - if (NA.Addr->getType() == NodeAttrs::Ref) { - NodeAddr<RefNode*> RA = NA; - RA.Addr->setReachingDef(0); - RA.Addr->setSibling(0); - if (NA.Addr->getKind() == NodeAttrs::Def) { - NodeAddr<DefNode*> DA = NA; - DA.Addr->setReachedDef(0); - DA.Addr->setReachedUse(0); - } - } - return NA; -} - -// Allocation routines for specific node types/kinds. - -NodeAddr<UseNode*> DataFlowGraph::newUse(NodeAddr<InstrNode*> Owner, - MachineOperand &Op, uint16_t Flags) { - NodeAddr<UseNode*> UA = newNode(NodeAttrs::Ref | NodeAttrs::Use | Flags); - UA.Addr->setRegRef(&Op, *this); - return UA; -} - -NodeAddr<PhiUseNode*> DataFlowGraph::newPhiUse(NodeAddr<PhiNode*> Owner, - RegisterRef RR, NodeAddr<BlockNode*> PredB, uint16_t Flags) { - NodeAddr<PhiUseNode*> PUA = newNode(NodeAttrs::Ref | NodeAttrs::Use | Flags); - assert(Flags & NodeAttrs::PhiRef); - PUA.Addr->setRegRef(RR, *this); - PUA.Addr->setPredecessor(PredB.Id); - return PUA; -} - -NodeAddr<DefNode*> DataFlowGraph::newDef(NodeAddr<InstrNode*> Owner, - MachineOperand &Op, uint16_t Flags) { - NodeAddr<DefNode*> DA = newNode(NodeAttrs::Ref | NodeAttrs::Def | Flags); - DA.Addr->setRegRef(&Op, *this); - return DA; -} - -NodeAddr<DefNode*> DataFlowGraph::newDef(NodeAddr<InstrNode*> Owner, - RegisterRef RR, uint16_t Flags) { - NodeAddr<DefNode*> DA = newNode(NodeAttrs::Ref | NodeAttrs::Def | Flags); - assert(Flags & NodeAttrs::PhiRef); - DA.Addr->setRegRef(RR, *this); - return DA; -} - -NodeAddr<PhiNode*> DataFlowGraph::newPhi(NodeAddr<BlockNode*> Owner) { - NodeAddr<PhiNode*> PA = newNode(NodeAttrs::Code | NodeAttrs::Phi); - Owner.Addr->addPhi(PA, *this); - return PA; -} - -NodeAddr<StmtNode*> DataFlowGraph::newStmt(NodeAddr<BlockNode*> Owner, - MachineInstr *MI) { - NodeAddr<StmtNode*> SA = newNode(NodeAttrs::Code | NodeAttrs::Stmt); - SA.Addr->setCode(MI); - Owner.Addr->addMember(SA, *this); - return SA; -} - -NodeAddr<BlockNode*> DataFlowGraph::newBlock(NodeAddr<FuncNode*> Owner, - MachineBasicBlock *BB) { - NodeAddr<BlockNode*> BA = newNode(NodeAttrs::Code | NodeAttrs::Block); - BA.Addr->setCode(BB); - Owner.Addr->addMember(BA, *this); - return BA; -} - -NodeAddr<FuncNode*> DataFlowGraph::newFunc(MachineFunction *MF) { - NodeAddr<FuncNode*> FA = newNode(NodeAttrs::Code | NodeAttrs::Func); - FA.Addr->setCode(MF); - return FA; -} - -// Build the data flow graph. -void DataFlowGraph::build(unsigned Options) { - reset(); - Func = newFunc(&MF); - - if (MF.empty()) - return; - - for (MachineBasicBlock &B : MF) { - NodeAddr<BlockNode*> BA = newBlock(Func, &B); - BlockNodes.insert(std::make_pair(&B, BA)); - for (MachineInstr &I : B) { - if (I.isDebugInstr()) - continue; - buildStmt(BA, I); - } - } - - NodeAddr<BlockNode*> EA = Func.Addr->getEntryBlock(*this); - NodeList Blocks = Func.Addr->members(*this); - - // Collect information about block references. - RegisterSet AllRefs; - for (NodeAddr<BlockNode*> BA : Blocks) - for (NodeAddr<InstrNode*> IA : BA.Addr->members(*this)) - for (NodeAddr<RefNode*> RA : IA.Addr->members(*this)) - AllRefs.insert(RA.Addr->getRegRef(*this)); - - // Collect function live-ins and entry block live-ins. - MachineRegisterInfo &MRI = MF.getRegInfo(); - MachineBasicBlock &EntryB = *EA.Addr->getCode(); - assert(EntryB.pred_empty() && "Function entry block has predecessors"); - for (std::pair<unsigned,unsigned> P : MRI.liveins()) - LiveIns.insert(RegisterRef(P.first)); - if (MRI.tracksLiveness()) { - for (auto I : EntryB.liveins()) - LiveIns.insert(RegisterRef(I.PhysReg, I.LaneMask)); - } - - // Add function-entry phi nodes for the live-in registers. - //for (std::pair<RegisterId,LaneBitmask> P : LiveIns) { - for (auto I = LiveIns.rr_begin(), E = LiveIns.rr_end(); I != E; ++I) { - RegisterRef RR = *I; - NodeAddr<PhiNode*> PA = newPhi(EA); - uint16_t PhiFlags = NodeAttrs::PhiRef | NodeAttrs::Preserving; - NodeAddr<DefNode*> DA = newDef(PA, RR, PhiFlags); - PA.Addr->addMember(DA, *this); - } - - // Add phis for landing pads. - // Landing pads, unlike usual backs blocks, are not entered through - // branches in the program, or fall-throughs from other blocks. They - // are entered from the exception handling runtime and target's ABI - // may define certain registers as defined on entry to such a block. - RegisterSet EHRegs = getLandingPadLiveIns(); - if (!EHRegs.empty()) { - for (NodeAddr<BlockNode*> BA : Blocks) { - const MachineBasicBlock &B = *BA.Addr->getCode(); - if (!B.isEHPad()) - continue; - - // Prepare a list of NodeIds of the block's predecessors. - NodeList Preds; - for (MachineBasicBlock *PB : B.predecessors()) - Preds.push_back(findBlock(PB)); - - // Build phi nodes for each live-in. - for (RegisterRef RR : EHRegs) { - NodeAddr<PhiNode*> PA = newPhi(BA); - uint16_t PhiFlags = NodeAttrs::PhiRef | NodeAttrs::Preserving; - // Add def: - NodeAddr<DefNode*> DA = newDef(PA, RR, PhiFlags); - PA.Addr->addMember(DA, *this); - // Add uses (no reaching defs for phi uses): - for (NodeAddr<BlockNode*> PBA : Preds) { - NodeAddr<PhiUseNode*> PUA = newPhiUse(PA, RR, PBA); - PA.Addr->addMember(PUA, *this); - } - } - } - } - - // Build a map "PhiM" which will contain, for each block, the set - // of references that will require phi definitions in that block. - BlockRefsMap PhiM; - for (NodeAddr<BlockNode*> BA : Blocks) - recordDefsForDF(PhiM, BA); - for (NodeAddr<BlockNode*> BA : Blocks) - buildPhis(PhiM, AllRefs, BA); - - // Link all the refs. This will recursively traverse the dominator tree. - DefStackMap DM; - linkBlockRefs(DM, EA); - - // Finally, remove all unused phi nodes. - if (!(Options & BuildOptions::KeepDeadPhis)) - removeUnusedPhis(); -} - -RegisterRef DataFlowGraph::makeRegRef(unsigned Reg, unsigned Sub) const { - assert(PhysicalRegisterInfo::isRegMaskId(Reg) || - Register::isPhysicalRegister(Reg)); - assert(Reg != 0); - if (Sub != 0) - Reg = TRI.getSubReg(Reg, Sub); - return RegisterRef(Reg); -} - -RegisterRef DataFlowGraph::makeRegRef(const MachineOperand &Op) const { - assert(Op.isReg() || Op.isRegMask()); - if (Op.isReg()) - return makeRegRef(Op.getReg(), Op.getSubReg()); - return RegisterRef(PRI.getRegMaskId(Op.getRegMask()), LaneBitmask::getAll()); -} - -RegisterRef DataFlowGraph::restrictRef(RegisterRef AR, RegisterRef BR) const { - if (AR.Reg == BR.Reg) { - LaneBitmask M = AR.Mask & BR.Mask; - return M.any() ? RegisterRef(AR.Reg, M) : RegisterRef(); - } -#ifndef NDEBUG -// RegisterRef NAR = PRI.normalize(AR); -// RegisterRef NBR = PRI.normalize(BR); -// assert(NAR.Reg != NBR.Reg); -#endif - // This isn't strictly correct, because the overlap may happen in the - // part masked out. - if (PRI.alias(AR, BR)) - return AR; - return RegisterRef(); -} - -// For each stack in the map DefM, push the delimiter for block B on it. -void DataFlowGraph::markBlock(NodeId B, DefStackMap &DefM) { - // Push block delimiters. - for (auto I = DefM.begin(), E = DefM.end(); I != E; ++I) - I->second.start_block(B); -} - -// Remove all definitions coming from block B from each stack in DefM. -void DataFlowGraph::releaseBlock(NodeId B, DefStackMap &DefM) { - // Pop all defs from this block from the definition stack. Defs that were - // added to the map during the traversal of instructions will not have a - // delimiter, but for those, the whole stack will be emptied. - for (auto I = DefM.begin(), E = DefM.end(); I != E; ++I) - I->second.clear_block(B); - - // Finally, remove empty stacks from the map. - for (auto I = DefM.begin(), E = DefM.end(), NextI = I; I != E; I = NextI) { - NextI = std::next(I); - // This preserves the validity of iterators other than I. - if (I->second.empty()) - DefM.erase(I); - } -} - -// Push all definitions from the instruction node IA to an appropriate -// stack in DefM. -void DataFlowGraph::pushAllDefs(NodeAddr<InstrNode*> IA, DefStackMap &DefM) { - pushClobbers(IA, DefM); - pushDefs(IA, DefM); -} - -// Push all definitions from the instruction node IA to an appropriate -// stack in DefM. -void DataFlowGraph::pushClobbers(NodeAddr<InstrNode*> IA, DefStackMap &DefM) { - NodeSet Visited; - std::set<RegisterId> Defined; - - // The important objectives of this function are: - // - to be able to handle instructions both while the graph is being - // constructed, and after the graph has been constructed, and - // - maintain proper ordering of definitions on the stack for each - // register reference: - // - if there are two or more related defs in IA (i.e. coming from - // the same machine operand), then only push one def on the stack, - // - if there are multiple unrelated defs of non-overlapping - // subregisters of S, then the stack for S will have both (in an - // unspecified order), but the order does not matter from the data- - // -flow perspective. - - for (NodeAddr<DefNode*> DA : IA.Addr->members_if(IsDef, *this)) { - if (Visited.count(DA.Id)) - continue; - if (!(DA.Addr->getFlags() & NodeAttrs::Clobbering)) - continue; - - NodeList Rel = getRelatedRefs(IA, DA); - NodeAddr<DefNode*> PDA = Rel.front(); - RegisterRef RR = PDA.Addr->getRegRef(*this); - - // Push the definition on the stack for the register and all aliases. - // The def stack traversal in linkNodeUp will check the exact aliasing. - DefM[RR.Reg].push(DA); - Defined.insert(RR.Reg); - for (RegisterId A : PRI.getAliasSet(RR.Reg)) { - // Check that we don't push the same def twice. - assert(A != RR.Reg); - if (!Defined.count(A)) - DefM[A].push(DA); - } - // Mark all the related defs as visited. - for (NodeAddr<NodeBase*> T : Rel) - Visited.insert(T.Id); - } -} - -// Push all definitions from the instruction node IA to an appropriate -// stack in DefM. -void DataFlowGraph::pushDefs(NodeAddr<InstrNode*> IA, DefStackMap &DefM) { - NodeSet Visited; -#ifndef NDEBUG - std::set<RegisterId> Defined; -#endif - - // The important objectives of this function are: - // - to be able to handle instructions both while the graph is being - // constructed, and after the graph has been constructed, and - // - maintain proper ordering of definitions on the stack for each - // register reference: - // - if there are two or more related defs in IA (i.e. coming from - // the same machine operand), then only push one def on the stack, - // - if there are multiple unrelated defs of non-overlapping - // subregisters of S, then the stack for S will have both (in an - // unspecified order), but the order does not matter from the data- - // -flow perspective. - - for (NodeAddr<DefNode*> DA : IA.Addr->members_if(IsDef, *this)) { - if (Visited.count(DA.Id)) - continue; - if (DA.Addr->getFlags() & NodeAttrs::Clobbering) - continue; - - NodeList Rel = getRelatedRefs(IA, DA); - NodeAddr<DefNode*> PDA = Rel.front(); - RegisterRef RR = PDA.Addr->getRegRef(*this); -#ifndef NDEBUG - // Assert if the register is defined in two or more unrelated defs. - // This could happen if there are two or more def operands defining it. - if (!Defined.insert(RR.Reg).second) { - MachineInstr *MI = NodeAddr<StmtNode*>(IA).Addr->getCode(); - dbgs() << "Multiple definitions of register: " - << Print<RegisterRef>(RR, *this) << " in\n " << *MI << "in " - << printMBBReference(*MI->getParent()) << '\n'; - llvm_unreachable(nullptr); - } -#endif - // Push the definition on the stack for the register and all aliases. - // The def stack traversal in linkNodeUp will check the exact aliasing. - DefM[RR.Reg].push(DA); - for (RegisterId A : PRI.getAliasSet(RR.Reg)) { - // Check that we don't push the same def twice. - assert(A != RR.Reg); - DefM[A].push(DA); - } - // Mark all the related defs as visited. - for (NodeAddr<NodeBase*> T : Rel) - Visited.insert(T.Id); - } -} - -// Return the list of all reference nodes related to RA, including RA itself. -// See "getNextRelated" for the meaning of a "related reference". -NodeList DataFlowGraph::getRelatedRefs(NodeAddr<InstrNode*> IA, - NodeAddr<RefNode*> RA) const { - assert(IA.Id != 0 && RA.Id != 0); - - NodeList Refs; - NodeId Start = RA.Id; - do { - Refs.push_back(RA); - RA = getNextRelated(IA, RA); - } while (RA.Id != 0 && RA.Id != Start); - return Refs; -} - -// Clear all information in the graph. -void DataFlowGraph::reset() { - Memory.clear(); - BlockNodes.clear(); - Func = NodeAddr<FuncNode*>(); -} - -// Return the next reference node in the instruction node IA that is related -// to RA. Conceptually, two reference nodes are related if they refer to the -// same instance of a register access, but differ in flags or other minor -// characteristics. Specific examples of related nodes are shadow reference -// nodes. -// Return the equivalent of nullptr if there are no more related references. -NodeAddr<RefNode*> DataFlowGraph::getNextRelated(NodeAddr<InstrNode*> IA, - NodeAddr<RefNode*> RA) const { - assert(IA.Id != 0 && RA.Id != 0); - - auto Related = [this,RA](NodeAddr<RefNode*> TA) -> bool { - if (TA.Addr->getKind() != RA.Addr->getKind()) - return false; - if (TA.Addr->getRegRef(*this) != RA.Addr->getRegRef(*this)) - return false; - return true; - }; - auto RelatedStmt = [&Related,RA](NodeAddr<RefNode*> TA) -> bool { - return Related(TA) && - &RA.Addr->getOp() == &TA.Addr->getOp(); - }; - auto RelatedPhi = [&Related,RA](NodeAddr<RefNode*> TA) -> bool { - if (!Related(TA)) - return false; - if (TA.Addr->getKind() != NodeAttrs::Use) - return true; - // For phi uses, compare predecessor blocks. - const NodeAddr<const PhiUseNode*> TUA = TA; - const NodeAddr<const PhiUseNode*> RUA = RA; - return TUA.Addr->getPredecessor() == RUA.Addr->getPredecessor(); - }; - - RegisterRef RR = RA.Addr->getRegRef(*this); - if (IA.Addr->getKind() == NodeAttrs::Stmt) - return RA.Addr->getNextRef(RR, RelatedStmt, true, *this); - return RA.Addr->getNextRef(RR, RelatedPhi, true, *this); -} - -// Find the next node related to RA in IA that satisfies condition P. -// If such a node was found, return a pair where the second element is the -// located node. If such a node does not exist, return a pair where the -// first element is the element after which such a node should be inserted, -// and the second element is a null-address. -template <typename Predicate> -std::pair<NodeAddr<RefNode*>,NodeAddr<RefNode*>> -DataFlowGraph::locateNextRef(NodeAddr<InstrNode*> IA, NodeAddr<RefNode*> RA, - Predicate P) const { - assert(IA.Id != 0 && RA.Id != 0); - - NodeAddr<RefNode*> NA; - NodeId Start = RA.Id; - while (true) { - NA = getNextRelated(IA, RA); - if (NA.Id == 0 || NA.Id == Start) - break; - if (P(NA)) - break; - RA = NA; - } - - if (NA.Id != 0 && NA.Id != Start) - return std::make_pair(RA, NA); - return std::make_pair(RA, NodeAddr<RefNode*>()); -} - -// Get the next shadow node in IA corresponding to RA, and optionally create -// such a node if it does not exist. -NodeAddr<RefNode*> DataFlowGraph::getNextShadow(NodeAddr<InstrNode*> IA, - NodeAddr<RefNode*> RA, bool Create) { - assert(IA.Id != 0 && RA.Id != 0); - - uint16_t Flags = RA.Addr->getFlags() | NodeAttrs::Shadow; - auto IsShadow = [Flags] (NodeAddr<RefNode*> TA) -> bool { - return TA.Addr->getFlags() == Flags; - }; - auto Loc = locateNextRef(IA, RA, IsShadow); - if (Loc.second.Id != 0 || !Create) - return Loc.second; - - // Create a copy of RA and mark is as shadow. - NodeAddr<RefNode*> NA = cloneNode(RA); - NA.Addr->setFlags(Flags | NodeAttrs::Shadow); - IA.Addr->addMemberAfter(Loc.first, NA, *this); - return NA; -} - -// Get the next shadow node in IA corresponding to RA. Return null-address -// if such a node does not exist. -NodeAddr<RefNode*> DataFlowGraph::getNextShadow(NodeAddr<InstrNode*> IA, - NodeAddr<RefNode*> RA) const { - assert(IA.Id != 0 && RA.Id != 0); - uint16_t Flags = RA.Addr->getFlags() | NodeAttrs::Shadow; - auto IsShadow = [Flags] (NodeAddr<RefNode*> TA) -> bool { - return TA.Addr->getFlags() == Flags; - }; - return locateNextRef(IA, RA, IsShadow).second; -} - -// Create a new statement node in the block node BA that corresponds to -// the machine instruction MI. -void DataFlowGraph::buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In) { - NodeAddr<StmtNode*> SA = newStmt(BA, &In); - - auto isCall = [] (const MachineInstr &In) -> bool { - if (In.isCall()) - return true; - // Is tail call? - if (In.isBranch()) { - for (const MachineOperand &Op : In.operands()) - if (Op.isGlobal() || Op.isSymbol()) - return true; - // Assume indirect branches are calls. This is for the purpose of - // keeping implicit operands, and so it won't hurt on intra-function - // indirect branches. - if (In.isIndirectBranch()) - return true; - } - return false; - }; - - auto isDefUndef = [this] (const MachineInstr &In, RegisterRef DR) -> bool { - // This instruction defines DR. Check if there is a use operand that - // would make DR live on entry to the instruction. - for (const MachineOperand &Op : In.operands()) { - if (!Op.isReg() || Op.getReg() == 0 || !Op.isUse() || Op.isUndef()) - continue; - RegisterRef UR = makeRegRef(Op); - if (PRI.alias(DR, UR)) - return false; - } - return true; - }; - - bool IsCall = isCall(In); - unsigned NumOps = In.getNumOperands(); - - // Avoid duplicate implicit defs. This will not detect cases of implicit - // defs that define registers that overlap, but it is not clear how to - // interpret that in the absence of explicit defs. Overlapping explicit - // defs are likely illegal already. - BitVector DoneDefs(TRI.getNumRegs()); - // Process explicit defs first. - for (unsigned OpN = 0; OpN < NumOps; ++OpN) { - MachineOperand &Op = In.getOperand(OpN); - if (!Op.isReg() || !Op.isDef() || Op.isImplicit()) - continue; - Register R = Op.getReg(); - if (!R || !Register::isPhysicalRegister(R)) - continue; - uint16_t Flags = NodeAttrs::None; - if (TOI.isPreserving(In, OpN)) { - Flags |= NodeAttrs::Preserving; - // If the def is preserving, check if it is also undefined. - if (isDefUndef(In, makeRegRef(Op))) - Flags |= NodeAttrs::Undef; - } - if (TOI.isClobbering(In, OpN)) - Flags |= NodeAttrs::Clobbering; - if (TOI.isFixedReg(In, OpN)) - Flags |= NodeAttrs::Fixed; - if (IsCall && Op.isDead()) - Flags |= NodeAttrs::Dead; - NodeAddr<DefNode*> DA = newDef(SA, Op, Flags); - SA.Addr->addMember(DA, *this); - assert(!DoneDefs.test(R)); - DoneDefs.set(R); - } - - // Process reg-masks (as clobbers). - BitVector DoneClobbers(TRI.getNumRegs()); - for (unsigned OpN = 0; OpN < NumOps; ++OpN) { - MachineOperand &Op = In.getOperand(OpN); - if (!Op.isRegMask()) - continue; - uint16_t Flags = NodeAttrs::Clobbering | NodeAttrs::Fixed | - NodeAttrs::Dead; - NodeAddr<DefNode*> DA = newDef(SA, Op, Flags); - SA.Addr->addMember(DA, *this); - // Record all clobbered registers in DoneDefs. - const uint32_t *RM = Op.getRegMask(); - for (unsigned i = 1, e = TRI.getNumRegs(); i != e; ++i) - if (!(RM[i/32] & (1u << (i%32)))) - DoneClobbers.set(i); - } - - // Process implicit defs, skipping those that have already been added - // as explicit. - for (unsigned OpN = 0; OpN < NumOps; ++OpN) { - MachineOperand &Op = In.getOperand(OpN); - if (!Op.isReg() || !Op.isDef() || !Op.isImplicit()) - continue; - Register R = Op.getReg(); - if (!R || !Register::isPhysicalRegister(R) || DoneDefs.test(R)) - continue; - RegisterRef RR = makeRegRef(Op); - uint16_t Flags = NodeAttrs::None; - if (TOI.isPreserving(In, OpN)) { - Flags |= NodeAttrs::Preserving; - // If the def is preserving, check if it is also undefined. - if (isDefUndef(In, RR)) - Flags |= NodeAttrs::Undef; - } - if (TOI.isClobbering(In, OpN)) - Flags |= NodeAttrs::Clobbering; - if (TOI.isFixedReg(In, OpN)) - Flags |= NodeAttrs::Fixed; - if (IsCall && Op.isDead()) { - if (DoneClobbers.test(R)) - continue; - Flags |= NodeAttrs::Dead; - } - NodeAddr<DefNode*> DA = newDef(SA, Op, Flags); - SA.Addr->addMember(DA, *this); - DoneDefs.set(R); - } - - for (unsigned OpN = 0; OpN < NumOps; ++OpN) { - MachineOperand &Op = In.getOperand(OpN); - if (!Op.isReg() || !Op.isUse()) - continue; - Register R = Op.getReg(); - if (!R || !Register::isPhysicalRegister(R)) - continue; - uint16_t Flags = NodeAttrs::None; - if (Op.isUndef()) - Flags |= NodeAttrs::Undef; - if (TOI.isFixedReg(In, OpN)) - Flags |= NodeAttrs::Fixed; - NodeAddr<UseNode*> UA = newUse(SA, Op, Flags); - SA.Addr->addMember(UA, *this); - } -} - -// Scan all defs in the block node BA and record in PhiM the locations of -// phi nodes corresponding to these defs. -void DataFlowGraph::recordDefsForDF(BlockRefsMap &PhiM, - NodeAddr<BlockNode*> BA) { - // Check all defs from block BA and record them in each block in BA's - // iterated dominance frontier. This information will later be used to - // create phi nodes. - MachineBasicBlock *BB = BA.Addr->getCode(); - assert(BB); - auto DFLoc = MDF.find(BB); - if (DFLoc == MDF.end() || DFLoc->second.empty()) - return; - - // Traverse all instructions in the block and collect the set of all - // defined references. For each reference there will be a phi created - // in the block's iterated dominance frontier. - // This is done to make sure that each defined reference gets only one - // phi node, even if it is defined multiple times. - RegisterSet Defs; - for (NodeAddr<InstrNode*> IA : BA.Addr->members(*this)) - for (NodeAddr<RefNode*> RA : IA.Addr->members_if(IsDef, *this)) - Defs.insert(RA.Addr->getRegRef(*this)); - - // Calculate the iterated dominance frontier of BB. - const MachineDominanceFrontier::DomSetType &DF = DFLoc->second; - SetVector<MachineBasicBlock*> IDF(DF.begin(), DF.end()); - for (unsigned i = 0; i < IDF.size(); ++i) { - auto F = MDF.find(IDF[i]); - if (F != MDF.end()) - IDF.insert(F->second.begin(), F->second.end()); - } - - // Finally, add the set of defs to each block in the iterated dominance - // frontier. - for (auto DB : IDF) { - NodeAddr<BlockNode*> DBA = findBlock(DB); - PhiM[DBA.Id].insert(Defs.begin(), Defs.end()); - } -} - -// Given the locations of phi nodes in the map PhiM, create the phi nodes -// that are located in the block node BA. -void DataFlowGraph::buildPhis(BlockRefsMap &PhiM, RegisterSet &AllRefs, - NodeAddr<BlockNode*> BA) { - // Check if this blocks has any DF defs, i.e. if there are any defs - // that this block is in the iterated dominance frontier of. - auto HasDF = PhiM.find(BA.Id); - if (HasDF == PhiM.end() || HasDF->second.empty()) - return; - - // First, remove all R in Refs in such that there exists T in Refs - // such that T covers R. In other words, only leave those refs that - // are not covered by another ref (i.e. maximal with respect to covering). - - auto MaxCoverIn = [this] (RegisterRef RR, RegisterSet &RRs) -> RegisterRef { - for (RegisterRef I : RRs) - if (I != RR && RegisterAggr::isCoverOf(I, RR, PRI)) - RR = I; - return RR; - }; - - RegisterSet MaxDF; - for (RegisterRef I : HasDF->second) - MaxDF.insert(MaxCoverIn(I, HasDF->second)); - - std::vector<RegisterRef> MaxRefs; - for (RegisterRef I : MaxDF) - MaxRefs.push_back(MaxCoverIn(I, AllRefs)); - - // Now, for each R in MaxRefs, get the alias closure of R. If the closure - // only has R in it, create a phi a def for R. Otherwise, create a phi, - // and add a def for each S in the closure. - - // Sort the refs so that the phis will be created in a deterministic order. - llvm::sort(MaxRefs); - // Remove duplicates. - auto NewEnd = std::unique(MaxRefs.begin(), MaxRefs.end()); - MaxRefs.erase(NewEnd, MaxRefs.end()); - - auto Aliased = [this,&MaxRefs](RegisterRef RR, - std::vector<unsigned> &Closure) -> bool { - for (unsigned I : Closure) - if (PRI.alias(RR, MaxRefs[I])) - return true; - return false; - }; - - // Prepare a list of NodeIds of the block's predecessors. - NodeList Preds; - const MachineBasicBlock *MBB = BA.Addr->getCode(); - for (MachineBasicBlock *PB : MBB->predecessors()) - Preds.push_back(findBlock(PB)); - - while (!MaxRefs.empty()) { - // Put the first element in the closure, and then add all subsequent - // elements from MaxRefs to it, if they alias at least one element - // already in the closure. - // ClosureIdx: vector of indices in MaxRefs of members of the closure. - std::vector<unsigned> ClosureIdx = { 0 }; - for (unsigned i = 1; i != MaxRefs.size(); ++i) - if (Aliased(MaxRefs[i], ClosureIdx)) - ClosureIdx.push_back(i); - - // Build a phi for the closure. - unsigned CS = ClosureIdx.size(); - NodeAddr<PhiNode*> PA = newPhi(BA); - - // Add defs. - for (unsigned X = 0; X != CS; ++X) { - RegisterRef RR = MaxRefs[ClosureIdx[X]]; - uint16_t PhiFlags = NodeAttrs::PhiRef | NodeAttrs::Preserving; - NodeAddr<DefNode*> DA = newDef(PA, RR, PhiFlags); - PA.Addr->addMember(DA, *this); - } - // Add phi uses. - for (NodeAddr<BlockNode*> PBA : Preds) { - for (unsigned X = 0; X != CS; ++X) { - RegisterRef RR = MaxRefs[ClosureIdx[X]]; - NodeAddr<PhiUseNode*> PUA = newPhiUse(PA, RR, PBA); - PA.Addr->addMember(PUA, *this); - } - } - - // Erase from MaxRefs all elements in the closure. - auto Begin = MaxRefs.begin(); - for (unsigned i = ClosureIdx.size(); i != 0; --i) - MaxRefs.erase(Begin + ClosureIdx[i-1]); - } -} - -// Remove any unneeded phi nodes that were created during the build process. -void DataFlowGraph::removeUnusedPhis() { - // This will remove unused phis, i.e. phis where each def does not reach - // any uses or other defs. This will not detect or remove circular phi - // chains that are otherwise dead. Unused/dead phis are created during - // the build process and this function is intended to remove these cases - // that are easily determinable to be unnecessary. - - SetVector<NodeId> PhiQ; - for (NodeAddr<BlockNode*> BA : Func.Addr->members(*this)) { - for (auto P : BA.Addr->members_if(IsPhi, *this)) - PhiQ.insert(P.Id); - } - - static auto HasUsedDef = [](NodeList &Ms) -> bool { - for (NodeAddr<NodeBase*> M : Ms) { - if (M.Addr->getKind() != NodeAttrs::Def) - continue; - NodeAddr<DefNode*> DA = M; - if (DA.Addr->getReachedDef() != 0 || DA.Addr->getReachedUse() != 0) - return true; - } - return false; - }; - - // Any phi, if it is removed, may affect other phis (make them dead). - // For each removed phi, collect the potentially affected phis and add - // them back to the queue. - while (!PhiQ.empty()) { - auto PA = addr<PhiNode*>(PhiQ[0]); - PhiQ.remove(PA.Id); - NodeList Refs = PA.Addr->members(*this); - if (HasUsedDef(Refs)) - continue; - for (NodeAddr<RefNode*> RA : Refs) { - if (NodeId RD = RA.Addr->getReachingDef()) { - auto RDA = addr<DefNode*>(RD); - NodeAddr<InstrNode*> OA = RDA.Addr->getOwner(*this); - if (IsPhi(OA)) - PhiQ.insert(OA.Id); - } - if (RA.Addr->isDef()) - unlinkDef(RA, true); - else - unlinkUse(RA, true); - } - NodeAddr<BlockNode*> BA = PA.Addr->getOwner(*this); - BA.Addr->removeMember(PA, *this); - } -} - -// For a given reference node TA in an instruction node IA, connect the -// reaching def of TA to the appropriate def node. Create any shadow nodes -// as appropriate. -template <typename T> -void DataFlowGraph::linkRefUp(NodeAddr<InstrNode*> IA, NodeAddr<T> TA, - DefStack &DS) { - if (DS.empty()) - return; - RegisterRef RR = TA.Addr->getRegRef(*this); - NodeAddr<T> TAP; - - // References from the def stack that have been examined so far. - RegisterAggr Defs(PRI); - - for (auto I = DS.top(), E = DS.bottom(); I != E; I.down()) { - RegisterRef QR = I->Addr->getRegRef(*this); - - // Skip all defs that are aliased to any of the defs that we have already - // seen. If this completes a cover of RR, stop the stack traversal. - bool Alias = Defs.hasAliasOf(QR); - bool Cover = Defs.insert(QR).hasCoverOf(RR); - if (Alias) { - if (Cover) - break; - continue; - } - - // The reaching def. - NodeAddr<DefNode*> RDA = *I; - - // Pick the reached node. - if (TAP.Id == 0) { - TAP = TA; - } else { - // Mark the existing ref as "shadow" and create a new shadow. - TAP.Addr->setFlags(TAP.Addr->getFlags() | NodeAttrs::Shadow); - TAP = getNextShadow(IA, TAP, true); - } - - // Create the link. - TAP.Addr->linkToDef(TAP.Id, RDA); - - if (Cover) - break; - } -} - -// Create data-flow links for all reference nodes in the statement node SA. -template <typename Predicate> -void DataFlowGraph::linkStmtRefs(DefStackMap &DefM, NodeAddr<StmtNode*> SA, - Predicate P) { -#ifndef NDEBUG - RegisterSet Defs; -#endif - - // Link all nodes (upwards in the data-flow) with their reaching defs. - for (NodeAddr<RefNode*> RA : SA.Addr->members_if(P, *this)) { - uint16_t Kind = RA.Addr->getKind(); - assert(Kind == NodeAttrs::Def || Kind == NodeAttrs::Use); - RegisterRef RR = RA.Addr->getRegRef(*this); -#ifndef NDEBUG - // Do not expect multiple defs of the same reference. - assert(Kind != NodeAttrs::Def || !Defs.count(RR)); - Defs.insert(RR); -#endif - - auto F = DefM.find(RR.Reg); - if (F == DefM.end()) - continue; - DefStack &DS = F->second; - if (Kind == NodeAttrs::Use) - linkRefUp<UseNode*>(SA, RA, DS); - else if (Kind == NodeAttrs::Def) - linkRefUp<DefNode*>(SA, RA, DS); - else - llvm_unreachable("Unexpected node in instruction"); - } -} - -// Create data-flow links for all instructions in the block node BA. This -// will include updating any phi nodes in BA. -void DataFlowGraph::linkBlockRefs(DefStackMap &DefM, NodeAddr<BlockNode*> BA) { - // Push block delimiters. - markBlock(BA.Id, DefM); - - auto IsClobber = [] (NodeAddr<RefNode*> RA) -> bool { - return IsDef(RA) && (RA.Addr->getFlags() & NodeAttrs::Clobbering); - }; - auto IsNoClobber = [] (NodeAddr<RefNode*> RA) -> bool { - return IsDef(RA) && !(RA.Addr->getFlags() & NodeAttrs::Clobbering); - }; - - assert(BA.Addr && "block node address is needed to create a data-flow link"); - // For each non-phi instruction in the block, link all the defs and uses - // to their reaching defs. For any member of the block (including phis), - // push the defs on the corresponding stacks. - for (NodeAddr<InstrNode*> IA : BA.Addr->members(*this)) { - // Ignore phi nodes here. They will be linked part by part from the - // predecessors. - if (IA.Addr->getKind() == NodeAttrs::Stmt) { - linkStmtRefs(DefM, IA, IsUse); - linkStmtRefs(DefM, IA, IsClobber); - } - - // Push the definitions on the stack. - pushClobbers(IA, DefM); - - if (IA.Addr->getKind() == NodeAttrs::Stmt) - linkStmtRefs(DefM, IA, IsNoClobber); - - pushDefs(IA, DefM); - } - - // Recursively process all children in the dominator tree. - MachineDomTreeNode *N = MDT.getNode(BA.Addr->getCode()); - for (auto I : *N) { - MachineBasicBlock *SB = I->getBlock(); - NodeAddr<BlockNode*> SBA = findBlock(SB); - linkBlockRefs(DefM, SBA); - } - - // Link the phi uses from the successor blocks. - auto IsUseForBA = [BA](NodeAddr<NodeBase*> NA) -> bool { - if (NA.Addr->getKind() != NodeAttrs::Use) - return false; - assert(NA.Addr->getFlags() & NodeAttrs::PhiRef); - NodeAddr<PhiUseNode*> PUA = NA; - return PUA.Addr->getPredecessor() == BA.Id; - }; - - RegisterSet EHLiveIns = getLandingPadLiveIns(); - MachineBasicBlock *MBB = BA.Addr->getCode(); - - for (MachineBasicBlock *SB : MBB->successors()) { - bool IsEHPad = SB->isEHPad(); - NodeAddr<BlockNode*> SBA = findBlock(SB); - for (NodeAddr<InstrNode*> IA : SBA.Addr->members_if(IsPhi, *this)) { - // Do not link phi uses for landing pad live-ins. - if (IsEHPad) { - // Find what register this phi is for. - NodeAddr<RefNode*> RA = IA.Addr->getFirstMember(*this); - assert(RA.Id != 0); - if (EHLiveIns.count(RA.Addr->getRegRef(*this))) - continue; - } - // Go over each phi use associated with MBB, and link it. - for (auto U : IA.Addr->members_if(IsUseForBA, *this)) { - NodeAddr<PhiUseNode*> PUA = U; - RegisterRef RR = PUA.Addr->getRegRef(*this); - linkRefUp<UseNode*>(IA, PUA, DefM[RR.Reg]); - } - } - } - - // Pop all defs from this block from the definition stacks. - releaseBlock(BA.Id, DefM); -} - -// Remove the use node UA from any data-flow and structural links. -void DataFlowGraph::unlinkUseDF(NodeAddr<UseNode*> UA) { - NodeId RD = UA.Addr->getReachingDef(); - NodeId Sib = UA.Addr->getSibling(); - - if (RD == 0) { - assert(Sib == 0); - return; - } - - auto RDA = addr<DefNode*>(RD); - auto TA = addr<UseNode*>(RDA.Addr->getReachedUse()); - if (TA.Id == UA.Id) { - RDA.Addr->setReachedUse(Sib); - return; - } - - while (TA.Id != 0) { - NodeId S = TA.Addr->getSibling(); - if (S == UA.Id) { - TA.Addr->setSibling(UA.Addr->getSibling()); - return; - } - TA = addr<UseNode*>(S); - } -} - -// Remove the def node DA from any data-flow and structural links. -void DataFlowGraph::unlinkDefDF(NodeAddr<DefNode*> DA) { - // - // RD - // | reached - // | def - // : - // . - // +----+ - // ... -- | DA | -- ... -- 0 : sibling chain of DA - // +----+ - // | | reached - // | : def - // | . - // | ... : Siblings (defs) - // | - // : reached - // . use - // ... : sibling chain of reached uses - - NodeId RD = DA.Addr->getReachingDef(); - - // Visit all siblings of the reached def and reset their reaching defs. - // Also, defs reached by DA are now "promoted" to being reached by RD, - // so all of them will need to be spliced into the sibling chain where - // DA belongs. - auto getAllNodes = [this] (NodeId N) -> NodeList { - NodeList Res; - while (N) { - auto RA = addr<RefNode*>(N); - // Keep the nodes in the exact sibling order. - Res.push_back(RA); - N = RA.Addr->getSibling(); - } - return Res; - }; - NodeList ReachedDefs = getAllNodes(DA.Addr->getReachedDef()); - NodeList ReachedUses = getAllNodes(DA.Addr->getReachedUse()); - - if (RD == 0) { - for (NodeAddr<RefNode*> I : ReachedDefs) - I.Addr->setSibling(0); - for (NodeAddr<RefNode*> I : ReachedUses) - I.Addr->setSibling(0); - } - for (NodeAddr<DefNode*> I : ReachedDefs) - I.Addr->setReachingDef(RD); - for (NodeAddr<UseNode*> I : ReachedUses) - I.Addr->setReachingDef(RD); - - NodeId Sib = DA.Addr->getSibling(); - if (RD == 0) { - assert(Sib == 0); - return; - } - - // Update the reaching def node and remove DA from the sibling list. - auto RDA = addr<DefNode*>(RD); - auto TA = addr<DefNode*>(RDA.Addr->getReachedDef()); - if (TA.Id == DA.Id) { - // If DA is the first reached def, just update the RD's reached def - // to the DA's sibling. - RDA.Addr->setReachedDef(Sib); - } else { - // Otherwise, traverse the sibling list of the reached defs and remove - // DA from it. - while (TA.Id != 0) { - NodeId S = TA.Addr->getSibling(); - if (S == DA.Id) { - TA.Addr->setSibling(Sib); - break; - } - TA = addr<DefNode*>(S); - } - } - - // Splice the DA's reached defs into the RDA's reached def chain. - if (!ReachedDefs.empty()) { - auto Last = NodeAddr<DefNode*>(ReachedDefs.back()); - Last.Addr->setSibling(RDA.Addr->getReachedDef()); - RDA.Addr->setReachedDef(ReachedDefs.front().Id); - } - // Splice the DA's reached uses into the RDA's reached use chain. - if (!ReachedUses.empty()) { - auto Last = NodeAddr<UseNode*>(ReachedUses.back()); - Last.Addr->setSibling(RDA.Addr->getReachedUse()); - RDA.Addr->setReachedUse(ReachedUses.front().Id); - } -} diff --git a/llvm/lib/Target/Hexagon/RDFGraph.h b/llvm/lib/Target/Hexagon/RDFGraph.h deleted file mode 100644 index 585f43e116f9..000000000000 --- a/llvm/lib/Target/Hexagon/RDFGraph.h +++ /dev/null @@ -1,968 +0,0 @@ -//===- RDFGraph.h -----------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Target-independent, SSA-based data flow graph for register data flow (RDF) -// for a non-SSA program representation (e.g. post-RA machine code). -// -// -// *** Introduction -// -// The RDF graph is a collection of nodes, each of which denotes some element -// of the program. There are two main types of such elements: code and refe- -// rences. Conceptually, "code" is something that represents the structure -// of the program, e.g. basic block or a statement, while "reference" is an -// instance of accessing a register, e.g. a definition or a use. Nodes are -// connected with each other based on the structure of the program (such as -// blocks, instructions, etc.), and based on the data flow (e.g. reaching -// definitions, reached uses, etc.). The single-reaching-definition principle -// of SSA is generally observed, although, due to the non-SSA representation -// of the program, there are some differences between the graph and a "pure" -// SSA representation. -// -// -// *** Implementation remarks -// -// Since the graph can contain a large number of nodes, memory consumption -// was one of the major design considerations. As a result, there is a single -// base class NodeBase which defines all members used by all possible derived -// classes. The members are arranged in a union, and a derived class cannot -// add any data members of its own. Each derived class only defines the -// functional interface, i.e. member functions. NodeBase must be a POD, -// which implies that all of its members must also be PODs. -// Since nodes need to be connected with other nodes, pointers have been -// replaced with 32-bit identifiers: each node has an id of type NodeId. -// There are mapping functions in the graph that translate between actual -// memory addresses and the corresponding identifiers. -// A node id of 0 is equivalent to nullptr. -// -// -// *** Structure of the graph -// -// A code node is always a collection of other nodes. For example, a code -// node corresponding to a basic block will contain code nodes corresponding -// to instructions. In turn, a code node corresponding to an instruction will -// contain a list of reference nodes that correspond to the definitions and -// uses of registers in that instruction. The members are arranged into a -// circular list, which is yet another consequence of the effort to save -// memory: for each member node it should be possible to obtain its owner, -// and it should be possible to access all other members. There are other -// ways to accomplish that, but the circular list seemed the most natural. -// -// +- CodeNode -+ -// | | <---------------------------------------------------+ -// +-+--------+-+ | -// |FirstM |LastM | -// | +-------------------------------------+ | -// | | | -// V V | -// +----------+ Next +----------+ Next Next +----------+ Next | -// | |----->| |-----> ... ----->| |----->-+ -// +- Member -+ +- Member -+ +- Member -+ -// -// The order of members is such that related reference nodes (see below) -// should be contiguous on the member list. -// -// A reference node is a node that encapsulates an access to a register, -// in other words, data flowing into or out of a register. There are two -// major kinds of reference nodes: defs and uses. A def node will contain -// the id of the first reached use, and the id of the first reached def. -// Each def and use will contain the id of the reaching def, and also the -// id of the next reached def (for def nodes) or use (for use nodes). -// The "next node sharing the same reaching def" is denoted as "sibling". -// In summary: -// - Def node contains: reaching def, sibling, first reached def, and first -// reached use. -// - Use node contains: reaching def and sibling. -// -// +-- DefNode --+ -// | R2 = ... | <---+--------------------+ -// ++---------+--+ | | -// |Reached |Reached | | -// |Def |Use | | -// | | |Reaching |Reaching -// | V |Def |Def -// | +-- UseNode --+ Sib +-- UseNode --+ Sib Sib -// | | ... = R2 |----->| ... = R2 |----> ... ----> 0 -// | +-------------+ +-------------+ -// V -// +-- DefNode --+ Sib -// | R2 = ... |----> ... -// ++---------+--+ -// | | -// | | -// ... ... -// -// To get a full picture, the circular lists connecting blocks within a -// function, instructions within a block, etc. should be superimposed with -// the def-def, def-use links shown above. -// To illustrate this, consider a small example in a pseudo-assembly: -// foo: -// add r2, r0, r1 ; r2 = r0+r1 -// addi r0, r2, 1 ; r0 = r2+1 -// ret r0 ; return value in r0 -// -// The graph (in a format used by the debugging functions) would look like: -// -// DFG dump:[ -// f1: Function foo -// b2: === %bb.0 === preds(0), succs(0): -// p3: phi [d4<r0>(,d12,u9):] -// p5: phi [d6<r1>(,,u10):] -// s7: add [d8<r2>(,,u13):, u9<r0>(d4):, u10<r1>(d6):] -// s11: addi [d12<r0>(d4,,u15):, u13<r2>(d8):] -// s14: ret [u15<r0>(d12):] -// ] -// -// The f1, b2, p3, etc. are node ids. The letter is prepended to indicate the -// kind of the node (i.e. f - function, b - basic block, p - phi, s - state- -// ment, d - def, u - use). -// The format of a def node is: -// dN<R>(rd,d,u):sib, -// where -// N - numeric node id, -// R - register being defined -// rd - reaching def, -// d - reached def, -// u - reached use, -// sib - sibling. -// The format of a use node is: -// uN<R>[!](rd):sib, -// where -// N - numeric node id, -// R - register being used, -// rd - reaching def, -// sib - sibling. -// Possible annotations (usually preceding the node id): -// + - preserving def, -// ~ - clobbering def, -// " - shadow ref (follows the node id), -// ! - fixed register (appears after register name). -// -// The circular lists are not explicit in the dump. -// -// -// *** Node attributes -// -// NodeBase has a member "Attrs", which is the primary way of determining -// the node's characteristics. The fields in this member decide whether -// the node is a code node or a reference node (i.e. node's "type"), then -// within each type, the "kind" determines what specifically this node -// represents. The remaining bits, "flags", contain additional information -// that is even more detailed than the "kind". -// CodeNode's kinds are: -// - Phi: Phi node, members are reference nodes. -// - Stmt: Statement, members are reference nodes. -// - Block: Basic block, members are instruction nodes (i.e. Phi or Stmt). -// - Func: The whole function. The members are basic block nodes. -// RefNode's kinds are: -// - Use. -// - Def. -// -// Meaning of flags: -// - Preserving: applies only to defs. A preserving def is one that can -// preserve some of the original bits among those that are included in -// the register associated with that def. For example, if R0 is a 32-bit -// register, but a def can only change the lower 16 bits, then it will -// be marked as preserving. -// - Shadow: a reference that has duplicates holding additional reaching -// defs (see more below). -// - Clobbering: applied only to defs, indicates that the value generated -// by this def is unspecified. A typical example would be volatile registers -// after function calls. -// - Fixed: the register in this def/use cannot be replaced with any other -// register. A typical case would be a parameter register to a call, or -// the register with the return value from a function. -// - Undef: the register in this reference the register is assumed to have -// no pre-existing value, even if it appears to be reached by some def. -// This is typically used to prevent keeping registers artificially live -// in cases when they are defined via predicated instructions. For example: -// r0 = add-if-true cond, r10, r11 (1) -// r0 = add-if-false cond, r12, r13, implicit r0 (2) -// ... = r0 (3) -// Before (1), r0 is not intended to be live, and the use of r0 in (3) is -// not meant to be reached by any def preceding (1). However, since the -// defs in (1) and (2) are both preserving, these properties alone would -// imply that the use in (3) may indeed be reached by some prior def. -// Adding Undef flag to the def in (1) prevents that. The Undef flag -// may be applied to both defs and uses. -// - Dead: applies only to defs. The value coming out of a "dead" def is -// assumed to be unused, even if the def appears to be reaching other defs -// or uses. The motivation for this flag comes from dead defs on function -// calls: there is no way to determine if such a def is dead without -// analyzing the target's ABI. Hence the graph should contain this info, -// as it is unavailable otherwise. On the other hand, a def without any -// uses on a typical instruction is not the intended target for this flag. -// -// *** Shadow references -// -// It may happen that a super-register can have two (or more) non-overlapping -// sub-registers. When both of these sub-registers are defined and followed -// by a use of the super-register, the use of the super-register will not -// have a unique reaching def: both defs of the sub-registers need to be -// accounted for. In such cases, a duplicate use of the super-register is -// added and it points to the extra reaching def. Both uses are marked with -// a flag "shadow". Example: -// Assume t0 is a super-register of r0 and r1, r0 and r1 do not overlap: -// set r0, 1 ; r0 = 1 -// set r1, 1 ; r1 = 1 -// addi t1, t0, 1 ; t1 = t0+1 -// -// The DFG: -// s1: set [d2<r0>(,,u9):] -// s3: set [d4<r1>(,,u10):] -// s5: addi [d6<t1>(,,):, u7"<t0>(d2):, u8"<t0>(d4):] -// -// The statement s5 has two use nodes for t0: u7" and u9". The quotation -// mark " indicates that the node is a shadow. -// - -#ifndef LLVM_LIB_TARGET_HEXAGON_RDFGRAPH_H -#define LLVM_LIB_TARGET_HEXAGON_RDFGRAPH_H - -#include "RDFRegisters.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/MC/LaneBitmask.h" -#include "llvm/Support/Allocator.h" -#include "llvm/Support/MathExtras.h" -#include <cassert> -#include <cstdint> -#include <cstring> -#include <map> -#include <set> -#include <unordered_map> -#include <utility> -#include <vector> - -// RDF uses uint32_t to refer to registers. This is to ensure that the type -// size remains specific. In other places, registers are often stored using -// unsigned. -static_assert(sizeof(uint32_t) == sizeof(unsigned), "Those should be equal"); - -namespace llvm { - -class MachineBasicBlock; -class MachineDominanceFrontier; -class MachineDominatorTree; -class MachineFunction; -class MachineInstr; -class MachineOperand; -class raw_ostream; -class TargetInstrInfo; -class TargetRegisterInfo; - -namespace rdf { - - using NodeId = uint32_t; - - struct DataFlowGraph; - - struct NodeAttrs { - enum : uint16_t { - None = 0x0000, // Nothing - - // Types: 2 bits - TypeMask = 0x0003, - Code = 0x0001, // 01, Container - Ref = 0x0002, // 10, Reference - - // Kind: 3 bits - KindMask = 0x0007 << 2, - Def = 0x0001 << 2, // 001 - Use = 0x0002 << 2, // 010 - Phi = 0x0003 << 2, // 011 - Stmt = 0x0004 << 2, // 100 - Block = 0x0005 << 2, // 101 - Func = 0x0006 << 2, // 110 - - // Flags: 7 bits for now - FlagMask = 0x007F << 5, - Shadow = 0x0001 << 5, // 0000001, Has extra reaching defs. - Clobbering = 0x0002 << 5, // 0000010, Produces unspecified values. - PhiRef = 0x0004 << 5, // 0000100, Member of PhiNode. - Preserving = 0x0008 << 5, // 0001000, Def can keep original bits. - Fixed = 0x0010 << 5, // 0010000, Fixed register. - Undef = 0x0020 << 5, // 0100000, Has no pre-existing value. - Dead = 0x0040 << 5, // 1000000, Does not define a value. - }; - - static uint16_t type(uint16_t T) { return T & TypeMask; } - static uint16_t kind(uint16_t T) { return T & KindMask; } - static uint16_t flags(uint16_t T) { return T & FlagMask; } - - static uint16_t set_type(uint16_t A, uint16_t T) { - return (A & ~TypeMask) | T; - } - - static uint16_t set_kind(uint16_t A, uint16_t K) { - return (A & ~KindMask) | K; - } - - static uint16_t set_flags(uint16_t A, uint16_t F) { - return (A & ~FlagMask) | F; - } - - // Test if A contains B. - static bool contains(uint16_t A, uint16_t B) { - if (type(A) != Code) - return false; - uint16_t KB = kind(B); - switch (kind(A)) { - case Func: - return KB == Block; - case Block: - return KB == Phi || KB == Stmt; - case Phi: - case Stmt: - return type(B) == Ref; - } - return false; - } - }; - - struct BuildOptions { - enum : unsigned { - None = 0x00, - KeepDeadPhis = 0x01, // Do not remove dead phis during build. - }; - }; - - template <typename T> struct NodeAddr { - NodeAddr() = default; - NodeAddr(T A, NodeId I) : Addr(A), Id(I) {} - - // Type cast (casting constructor). The reason for having this class - // instead of std::pair. - template <typename S> NodeAddr(const NodeAddr<S> &NA) - : Addr(static_cast<T>(NA.Addr)), Id(NA.Id) {} - - bool operator== (const NodeAddr<T> &NA) const { - assert((Addr == NA.Addr) == (Id == NA.Id)); - return Addr == NA.Addr; - } - bool operator!= (const NodeAddr<T> &NA) const { - return !operator==(NA); - } - - T Addr = nullptr; - NodeId Id = 0; - }; - - struct NodeBase; - - // Fast memory allocation and translation between node id and node address. - // This is really the same idea as the one underlying the "bump pointer - // allocator", the difference being in the translation. A node id is - // composed of two components: the index of the block in which it was - // allocated, and the index within the block. With the default settings, - // where the number of nodes per block is 4096, the node id (minus 1) is: - // - // bit position: 11 0 - // +----------------------------+--------------+ - // | Index of the block |Index in block| - // +----------------------------+--------------+ - // - // The actual node id is the above plus 1, to avoid creating a node id of 0. - // - // This method significantly improved the build time, compared to using maps - // (std::unordered_map or DenseMap) to translate between pointers and ids. - struct NodeAllocator { - // Amount of storage for a single node. - enum { NodeMemSize = 32 }; - - NodeAllocator(uint32_t NPB = 4096) - : NodesPerBlock(NPB), BitsPerIndex(Log2_32(NPB)), - IndexMask((1 << BitsPerIndex)-1) { - assert(isPowerOf2_32(NPB)); - } - - NodeBase *ptr(NodeId N) const { - uint32_t N1 = N-1; - uint32_t BlockN = N1 >> BitsPerIndex; - uint32_t Offset = (N1 & IndexMask) * NodeMemSize; - return reinterpret_cast<NodeBase*>(Blocks[BlockN]+Offset); - } - - NodeId id(const NodeBase *P) const; - NodeAddr<NodeBase*> New(); - void clear(); - - private: - void startNewBlock(); - bool needNewBlock(); - - uint32_t makeId(uint32_t Block, uint32_t Index) const { - // Add 1 to the id, to avoid the id of 0, which is treated as "null". - return ((Block << BitsPerIndex) | Index) + 1; - } - - const uint32_t NodesPerBlock; - const uint32_t BitsPerIndex; - const uint32_t IndexMask; - char *ActiveEnd = nullptr; - std::vector<char*> Blocks; - using AllocatorTy = BumpPtrAllocatorImpl<MallocAllocator, 65536>; - AllocatorTy MemPool; - }; - - using RegisterSet = std::set<RegisterRef>; - - struct TargetOperandInfo { - TargetOperandInfo(const TargetInstrInfo &tii) : TII(tii) {} - virtual ~TargetOperandInfo() = default; - - virtual bool isPreserving(const MachineInstr &In, unsigned OpNum) const; - virtual bool isClobbering(const MachineInstr &In, unsigned OpNum) const; - virtual bool isFixedReg(const MachineInstr &In, unsigned OpNum) const; - - const TargetInstrInfo &TII; - }; - - // Packed register reference. Only used for storage. - struct PackedRegisterRef { - RegisterId Reg; - uint32_t MaskId; - }; - - struct LaneMaskIndex : private IndexedSet<LaneBitmask> { - LaneMaskIndex() = default; - - LaneBitmask getLaneMaskForIndex(uint32_t K) const { - return K == 0 ? LaneBitmask::getAll() : get(K); - } - - uint32_t getIndexForLaneMask(LaneBitmask LM) { - assert(LM.any()); - return LM.all() ? 0 : insert(LM); - } - - uint32_t getIndexForLaneMask(LaneBitmask LM) const { - assert(LM.any()); - return LM.all() ? 0 : find(LM); - } - }; - - struct NodeBase { - public: - // Make sure this is a POD. - NodeBase() = default; - - uint16_t getType() const { return NodeAttrs::type(Attrs); } - uint16_t getKind() const { return NodeAttrs::kind(Attrs); } - uint16_t getFlags() const { return NodeAttrs::flags(Attrs); } - NodeId getNext() const { return Next; } - - uint16_t getAttrs() const { return Attrs; } - void setAttrs(uint16_t A) { Attrs = A; } - void setFlags(uint16_t F) { setAttrs(NodeAttrs::set_flags(getAttrs(), F)); } - - // Insert node NA after "this" in the circular chain. - void append(NodeAddr<NodeBase*> NA); - - // Initialize all members to 0. - void init() { memset(this, 0, sizeof *this); } - - void setNext(NodeId N) { Next = N; } - - protected: - uint16_t Attrs; - uint16_t Reserved; - NodeId Next; // Id of the next node in the circular chain. - // Definitions of nested types. Using anonymous nested structs would make - // this class definition clearer, but unnamed structs are not a part of - // the standard. - struct Def_struct { - NodeId DD, DU; // Ids of the first reached def and use. - }; - struct PhiU_struct { - NodeId PredB; // Id of the predecessor block for a phi use. - }; - struct Code_struct { - void *CP; // Pointer to the actual code. - NodeId FirstM, LastM; // Id of the first member and last. - }; - struct Ref_struct { - NodeId RD, Sib; // Ids of the reaching def and the sibling. - union { - Def_struct Def; - PhiU_struct PhiU; - }; - union { - MachineOperand *Op; // Non-phi refs point to a machine operand. - PackedRegisterRef PR; // Phi refs store register info directly. - }; - }; - - // The actual payload. - union { - Ref_struct Ref; - Code_struct Code; - }; - }; - // The allocator allocates chunks of 32 bytes for each node. The fact that - // each node takes 32 bytes in memory is used for fast translation between - // the node id and the node address. - static_assert(sizeof(NodeBase) <= NodeAllocator::NodeMemSize, - "NodeBase must be at most NodeAllocator::NodeMemSize bytes"); - - using NodeList = SmallVector<NodeAddr<NodeBase *>, 4>; - using NodeSet = std::set<NodeId>; - - struct RefNode : public NodeBase { - RefNode() = default; - - RegisterRef getRegRef(const DataFlowGraph &G) const; - - MachineOperand &getOp() { - assert(!(getFlags() & NodeAttrs::PhiRef)); - return *Ref.Op; - } - - void setRegRef(RegisterRef RR, DataFlowGraph &G); - void setRegRef(MachineOperand *Op, DataFlowGraph &G); - - NodeId getReachingDef() const { - return Ref.RD; - } - void setReachingDef(NodeId RD) { - Ref.RD = RD; - } - - NodeId getSibling() const { - return Ref.Sib; - } - void setSibling(NodeId Sib) { - Ref.Sib = Sib; - } - - bool isUse() const { - assert(getType() == NodeAttrs::Ref); - return getKind() == NodeAttrs::Use; - } - - bool isDef() const { - assert(getType() == NodeAttrs::Ref); - return getKind() == NodeAttrs::Def; - } - - template <typename Predicate> - NodeAddr<RefNode*> getNextRef(RegisterRef RR, Predicate P, bool NextOnly, - const DataFlowGraph &G); - NodeAddr<NodeBase*> getOwner(const DataFlowGraph &G); - }; - - struct DefNode : public RefNode { - NodeId getReachedDef() const { - return Ref.Def.DD; - } - void setReachedDef(NodeId D) { - Ref.Def.DD = D; - } - NodeId getReachedUse() const { - return Ref.Def.DU; - } - void setReachedUse(NodeId U) { - Ref.Def.DU = U; - } - - void linkToDef(NodeId Self, NodeAddr<DefNode*> DA); - }; - - struct UseNode : public RefNode { - void linkToDef(NodeId Self, NodeAddr<DefNode*> DA); - }; - - struct PhiUseNode : public UseNode { - NodeId getPredecessor() const { - assert(getFlags() & NodeAttrs::PhiRef); - return Ref.PhiU.PredB; - } - void setPredecessor(NodeId B) { - assert(getFlags() & NodeAttrs::PhiRef); - Ref.PhiU.PredB = B; - } - }; - - struct CodeNode : public NodeBase { - template <typename T> T getCode() const { - return static_cast<T>(Code.CP); - } - void setCode(void *C) { - Code.CP = C; - } - - NodeAddr<NodeBase*> getFirstMember(const DataFlowGraph &G) const; - NodeAddr<NodeBase*> getLastMember(const DataFlowGraph &G) const; - void addMember(NodeAddr<NodeBase*> NA, const DataFlowGraph &G); - void addMemberAfter(NodeAddr<NodeBase*> MA, NodeAddr<NodeBase*> NA, - const DataFlowGraph &G); - void removeMember(NodeAddr<NodeBase*> NA, const DataFlowGraph &G); - - NodeList members(const DataFlowGraph &G) const; - template <typename Predicate> - NodeList members_if(Predicate P, const DataFlowGraph &G) const; - }; - - struct InstrNode : public CodeNode { - NodeAddr<NodeBase*> getOwner(const DataFlowGraph &G); - }; - - struct PhiNode : public InstrNode { - MachineInstr *getCode() const { - return nullptr; - } - }; - - struct StmtNode : public InstrNode { - MachineInstr *getCode() const { - return CodeNode::getCode<MachineInstr*>(); - } - }; - - struct BlockNode : public CodeNode { - MachineBasicBlock *getCode() const { - return CodeNode::getCode<MachineBasicBlock*>(); - } - - void addPhi(NodeAddr<PhiNode*> PA, const DataFlowGraph &G); - }; - - struct FuncNode : public CodeNode { - MachineFunction *getCode() const { - return CodeNode::getCode<MachineFunction*>(); - } - - NodeAddr<BlockNode*> findBlock(const MachineBasicBlock *BB, - const DataFlowGraph &G) const; - NodeAddr<BlockNode*> getEntryBlock(const DataFlowGraph &G); - }; - - struct DataFlowGraph { - DataFlowGraph(MachineFunction &mf, const TargetInstrInfo &tii, - const TargetRegisterInfo &tri, const MachineDominatorTree &mdt, - const MachineDominanceFrontier &mdf, const TargetOperandInfo &toi); - - NodeBase *ptr(NodeId N) const; - template <typename T> T ptr(NodeId N) const { - return static_cast<T>(ptr(N)); - } - - NodeId id(const NodeBase *P) const; - - template <typename T> NodeAddr<T> addr(NodeId N) const { - return { ptr<T>(N), N }; - } - - NodeAddr<FuncNode*> getFunc() const { return Func; } - MachineFunction &getMF() const { return MF; } - const TargetInstrInfo &getTII() const { return TII; } - const TargetRegisterInfo &getTRI() const { return TRI; } - const PhysicalRegisterInfo &getPRI() const { return PRI; } - const MachineDominatorTree &getDT() const { return MDT; } - const MachineDominanceFrontier &getDF() const { return MDF; } - const RegisterAggr &getLiveIns() const { return LiveIns; } - - struct DefStack { - DefStack() = default; - - bool empty() const { return Stack.empty() || top() == bottom(); } - - private: - using value_type = NodeAddr<DefNode *>; - struct Iterator { - using value_type = DefStack::value_type; - - Iterator &up() { Pos = DS.nextUp(Pos); return *this; } - Iterator &down() { Pos = DS.nextDown(Pos); return *this; } - - value_type operator*() const { - assert(Pos >= 1); - return DS.Stack[Pos-1]; - } - const value_type *operator->() const { - assert(Pos >= 1); - return &DS.Stack[Pos-1]; - } - bool operator==(const Iterator &It) const { return Pos == It.Pos; } - bool operator!=(const Iterator &It) const { return Pos != It.Pos; } - - private: - friend struct DefStack; - - Iterator(const DefStack &S, bool Top); - - // Pos-1 is the index in the StorageType object that corresponds to - // the top of the DefStack. - const DefStack &DS; - unsigned Pos; - }; - - public: - using iterator = Iterator; - - iterator top() const { return Iterator(*this, true); } - iterator bottom() const { return Iterator(*this, false); } - unsigned size() const; - - void push(NodeAddr<DefNode*> DA) { Stack.push_back(DA); } - void pop(); - void start_block(NodeId N); - void clear_block(NodeId N); - - private: - friend struct Iterator; - - using StorageType = std::vector<value_type>; - - bool isDelimiter(const StorageType::value_type &P, NodeId N = 0) const { - return (P.Addr == nullptr) && (N == 0 || P.Id == N); - } - - unsigned nextUp(unsigned P) const; - unsigned nextDown(unsigned P) const; - - StorageType Stack; - }; - - // Make this std::unordered_map for speed of accessing elements. - // Map: Register (physical or virtual) -> DefStack - using DefStackMap = std::unordered_map<RegisterId, DefStack>; - - void build(unsigned Options = BuildOptions::None); - void pushAllDefs(NodeAddr<InstrNode*> IA, DefStackMap &DM); - void markBlock(NodeId B, DefStackMap &DefM); - void releaseBlock(NodeId B, DefStackMap &DefM); - - PackedRegisterRef pack(RegisterRef RR) { - return { RR.Reg, LMI.getIndexForLaneMask(RR.Mask) }; - } - PackedRegisterRef pack(RegisterRef RR) const { - return { RR.Reg, LMI.getIndexForLaneMask(RR.Mask) }; - } - RegisterRef unpack(PackedRegisterRef PR) const { - return RegisterRef(PR.Reg, LMI.getLaneMaskForIndex(PR.MaskId)); - } - - RegisterRef makeRegRef(unsigned Reg, unsigned Sub) const; - RegisterRef makeRegRef(const MachineOperand &Op) const; - RegisterRef restrictRef(RegisterRef AR, RegisterRef BR) const; - - NodeAddr<RefNode*> getNextRelated(NodeAddr<InstrNode*> IA, - NodeAddr<RefNode*> RA) const; - NodeAddr<RefNode*> getNextImp(NodeAddr<InstrNode*> IA, - NodeAddr<RefNode*> RA, bool Create); - NodeAddr<RefNode*> getNextImp(NodeAddr<InstrNode*> IA, - NodeAddr<RefNode*> RA) const; - NodeAddr<RefNode*> getNextShadow(NodeAddr<InstrNode*> IA, - NodeAddr<RefNode*> RA, bool Create); - NodeAddr<RefNode*> getNextShadow(NodeAddr<InstrNode*> IA, - NodeAddr<RefNode*> RA) const; - - NodeList getRelatedRefs(NodeAddr<InstrNode*> IA, - NodeAddr<RefNode*> RA) const; - - NodeAddr<BlockNode*> findBlock(MachineBasicBlock *BB) const { - return BlockNodes.at(BB); - } - - void unlinkUse(NodeAddr<UseNode*> UA, bool RemoveFromOwner) { - unlinkUseDF(UA); - if (RemoveFromOwner) - removeFromOwner(UA); - } - - void unlinkDef(NodeAddr<DefNode*> DA, bool RemoveFromOwner) { - unlinkDefDF(DA); - if (RemoveFromOwner) - removeFromOwner(DA); - } - - // Some useful filters. - template <uint16_t Kind> - static bool IsRef(const NodeAddr<NodeBase*> BA) { - return BA.Addr->getType() == NodeAttrs::Ref && - BA.Addr->getKind() == Kind; - } - - template <uint16_t Kind> - static bool IsCode(const NodeAddr<NodeBase*> BA) { - return BA.Addr->getType() == NodeAttrs::Code && - BA.Addr->getKind() == Kind; - } - - static bool IsDef(const NodeAddr<NodeBase*> BA) { - return BA.Addr->getType() == NodeAttrs::Ref && - BA.Addr->getKind() == NodeAttrs::Def; - } - - static bool IsUse(const NodeAddr<NodeBase*> BA) { - return BA.Addr->getType() == NodeAttrs::Ref && - BA.Addr->getKind() == NodeAttrs::Use; - } - - static bool IsPhi(const NodeAddr<NodeBase*> BA) { - return BA.Addr->getType() == NodeAttrs::Code && - BA.Addr->getKind() == NodeAttrs::Phi; - } - - static bool IsPreservingDef(const NodeAddr<DefNode*> DA) { - uint16_t Flags = DA.Addr->getFlags(); - return (Flags & NodeAttrs::Preserving) && !(Flags & NodeAttrs::Undef); - } - - private: - void reset(); - - RegisterSet getLandingPadLiveIns() const; - - NodeAddr<NodeBase*> newNode(uint16_t Attrs); - NodeAddr<NodeBase*> cloneNode(const NodeAddr<NodeBase*> B); - NodeAddr<UseNode*> newUse(NodeAddr<InstrNode*> Owner, - MachineOperand &Op, uint16_t Flags = NodeAttrs::None); - NodeAddr<PhiUseNode*> newPhiUse(NodeAddr<PhiNode*> Owner, - RegisterRef RR, NodeAddr<BlockNode*> PredB, - uint16_t Flags = NodeAttrs::PhiRef); - NodeAddr<DefNode*> newDef(NodeAddr<InstrNode*> Owner, - MachineOperand &Op, uint16_t Flags = NodeAttrs::None); - NodeAddr<DefNode*> newDef(NodeAddr<InstrNode*> Owner, - RegisterRef RR, uint16_t Flags = NodeAttrs::PhiRef); - NodeAddr<PhiNode*> newPhi(NodeAddr<BlockNode*> Owner); - NodeAddr<StmtNode*> newStmt(NodeAddr<BlockNode*> Owner, - MachineInstr *MI); - NodeAddr<BlockNode*> newBlock(NodeAddr<FuncNode*> Owner, - MachineBasicBlock *BB); - NodeAddr<FuncNode*> newFunc(MachineFunction *MF); - - template <typename Predicate> - std::pair<NodeAddr<RefNode*>,NodeAddr<RefNode*>> - locateNextRef(NodeAddr<InstrNode*> IA, NodeAddr<RefNode*> RA, - Predicate P) const; - - using BlockRefsMap = std::map<NodeId, RegisterSet>; - - void buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In); - void recordDefsForDF(BlockRefsMap &PhiM, NodeAddr<BlockNode*> BA); - void buildPhis(BlockRefsMap &PhiM, RegisterSet &AllRefs, - NodeAddr<BlockNode*> BA); - void removeUnusedPhis(); - - void pushClobbers(NodeAddr<InstrNode*> IA, DefStackMap &DM); - void pushDefs(NodeAddr<InstrNode*> IA, DefStackMap &DM); - template <typename T> void linkRefUp(NodeAddr<InstrNode*> IA, - NodeAddr<T> TA, DefStack &DS); - template <typename Predicate> void linkStmtRefs(DefStackMap &DefM, - NodeAddr<StmtNode*> SA, Predicate P); - void linkBlockRefs(DefStackMap &DefM, NodeAddr<BlockNode*> BA); - - void unlinkUseDF(NodeAddr<UseNode*> UA); - void unlinkDefDF(NodeAddr<DefNode*> DA); - - void removeFromOwner(NodeAddr<RefNode*> RA) { - NodeAddr<InstrNode*> IA = RA.Addr->getOwner(*this); - IA.Addr->removeMember(RA, *this); - } - - MachineFunction &MF; - const TargetInstrInfo &TII; - const TargetRegisterInfo &TRI; - const PhysicalRegisterInfo PRI; - const MachineDominatorTree &MDT; - const MachineDominanceFrontier &MDF; - const TargetOperandInfo &TOI; - - RegisterAggr LiveIns; - NodeAddr<FuncNode*> Func; - NodeAllocator Memory; - // Local map: MachineBasicBlock -> NodeAddr<BlockNode*> - std::map<MachineBasicBlock*,NodeAddr<BlockNode*>> BlockNodes; - // Lane mask map. - LaneMaskIndex LMI; - }; // struct DataFlowGraph - - template <typename Predicate> - NodeAddr<RefNode*> RefNode::getNextRef(RegisterRef RR, Predicate P, - bool NextOnly, const DataFlowGraph &G) { - // Get the "Next" reference in the circular list that references RR and - // satisfies predicate "Pred". - auto NA = G.addr<NodeBase*>(getNext()); - - while (NA.Addr != this) { - if (NA.Addr->getType() == NodeAttrs::Ref) { - NodeAddr<RefNode*> RA = NA; - if (RA.Addr->getRegRef(G) == RR && P(NA)) - return NA; - if (NextOnly) - break; - NA = G.addr<NodeBase*>(NA.Addr->getNext()); - } else { - // We've hit the beginning of the chain. - assert(NA.Addr->getType() == NodeAttrs::Code); - NodeAddr<CodeNode*> CA = NA; - NA = CA.Addr->getFirstMember(G); - } - } - // Return the equivalent of "nullptr" if such a node was not found. - return NodeAddr<RefNode*>(); - } - - template <typename Predicate> - NodeList CodeNode::members_if(Predicate P, const DataFlowGraph &G) const { - NodeList MM; - auto M = getFirstMember(G); - if (M.Id == 0) - return MM; - - while (M.Addr != this) { - if (P(M)) - MM.push_back(M); - M = G.addr<NodeBase*>(M.Addr->getNext()); - } - return MM; - } - - template <typename T> - struct Print { - Print(const T &x, const DataFlowGraph &g) : Obj(x), G(g) {} - - const T &Obj; - const DataFlowGraph &G; - }; - - template <typename T> - struct PrintNode : Print<NodeAddr<T>> { - PrintNode(const NodeAddr<T> &x, const DataFlowGraph &g) - : Print<NodeAddr<T>>(x, g) {} - }; - - raw_ostream &operator<<(raw_ostream &OS, const Print<RegisterRef> &P); - raw_ostream &operator<<(raw_ostream &OS, const Print<NodeId> &P); - raw_ostream &operator<<(raw_ostream &OS, const Print<NodeAddr<DefNode *>> &P); - raw_ostream &operator<<(raw_ostream &OS, const Print<NodeAddr<UseNode *>> &P); - raw_ostream &operator<<(raw_ostream &OS, - const Print<NodeAddr<PhiUseNode *>> &P); - raw_ostream &operator<<(raw_ostream &OS, const Print<NodeAddr<RefNode *>> &P); - raw_ostream &operator<<(raw_ostream &OS, const Print<NodeList> &P); - raw_ostream &operator<<(raw_ostream &OS, const Print<NodeSet> &P); - raw_ostream &operator<<(raw_ostream &OS, const Print<NodeAddr<PhiNode *>> &P); - raw_ostream &operator<<(raw_ostream &OS, - const Print<NodeAddr<StmtNode *>> &P); - raw_ostream &operator<<(raw_ostream &OS, - const Print<NodeAddr<InstrNode *>> &P); - raw_ostream &operator<<(raw_ostream &OS, - const Print<NodeAddr<BlockNode *>> &P); - raw_ostream &operator<<(raw_ostream &OS, - const Print<NodeAddr<FuncNode *>> &P); - raw_ostream &operator<<(raw_ostream &OS, const Print<RegisterSet> &P); - raw_ostream &operator<<(raw_ostream &OS, const Print<RegisterAggr> &P); - raw_ostream &operator<<(raw_ostream &OS, - const Print<DataFlowGraph::DefStack> &P); - -} // end namespace rdf - -} // end namespace llvm - -#endif // LLVM_LIB_TARGET_HEXAGON_RDFGRAPH_H diff --git a/llvm/lib/Target/Hexagon/RDFLiveness.cpp b/llvm/lib/Target/Hexagon/RDFLiveness.cpp deleted file mode 100644 index e2c007c9d01a..000000000000 --- a/llvm/lib/Target/Hexagon/RDFLiveness.cpp +++ /dev/null @@ -1,1118 +0,0 @@ -//===- RDFLiveness.cpp ----------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Computation of the liveness information from the data-flow graph. -// -// The main functionality of this code is to compute block live-in -// information. With the live-in information in place, the placement -// of kill flags can also be recalculated. -// -// The block live-in calculation is based on the ideas from the following -// publication: -// -// Dibyendu Das, Ramakrishna Upadrasta, Benoit Dupont de Dinechin. -// "Efficient Liveness Computation Using Merge Sets and DJ-Graphs." -// ACM Transactions on Architecture and Code Optimization, Association for -// Computing Machinery, 2012, ACM TACO Special Issue on "High-Performance -// and Embedded Architectures and Compilers", 8 (4), -// <10.1145/2086696.2086706>. <hal-00647369> -// -#include "RDFLiveness.h" -#include "RDFGraph.h" -#include "RDFRegisters.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineDominanceFrontier.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/TargetRegisterInfo.h" -#include "llvm/MC/LaneBitmask.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include <algorithm> -#include <cassert> -#include <cstdint> -#include <iterator> -#include <map> -#include <utility> -#include <vector> - -using namespace llvm; -using namespace rdf; - -static cl::opt<unsigned> MaxRecNest("rdf-liveness-max-rec", cl::init(25), - cl::Hidden, cl::desc("Maximum recursion level")); - -namespace llvm { -namespace rdf { - - raw_ostream &operator<< (raw_ostream &OS, const Print<Liveness::RefMap> &P) { - OS << '{'; - for (auto &I : P.Obj) { - OS << ' ' << printReg(I.first, &P.G.getTRI()) << '{'; - for (auto J = I.second.begin(), E = I.second.end(); J != E; ) { - OS << Print<NodeId>(J->first, P.G) << PrintLaneMaskOpt(J->second); - if (++J != E) - OS << ','; - } - OS << '}'; - } - OS << " }"; - return OS; - } - -} // end namespace rdf -} // end namespace llvm - -// The order in the returned sequence is the order of reaching defs in the -// upward traversal: the first def is the closest to the given reference RefA, -// the next one is further up, and so on. -// The list ends at a reaching phi def, or when the reference from RefA is -// covered by the defs in the list (see FullChain). -// This function provides two modes of operation: -// (1) Returning the sequence of reaching defs for a particular reference -// node. This sequence will terminate at the first phi node [1]. -// (2) Returning a partial sequence of reaching defs, where the final goal -// is to traverse past phi nodes to the actual defs arising from the code -// itself. -// In mode (2), the register reference for which the search was started -// may be different from the reference node RefA, for which this call was -// made, hence the argument RefRR, which holds the original register. -// Also, some definitions may have already been encountered in a previous -// call that will influence register covering. The register references -// already defined are passed in through DefRRs. -// In mode (1), the "continuation" considerations do not apply, and the -// RefRR is the same as the register in RefA, and the set DefRRs is empty. -// -// [1] It is possible for multiple phi nodes to be included in the returned -// sequence: -// SubA = phi ... -// SubB = phi ... -// ... = SuperAB(rdef:SubA), SuperAB"(rdef:SubB) -// However, these phi nodes are independent from one another in terms of -// the data-flow. - -NodeList Liveness::getAllReachingDefs(RegisterRef RefRR, - NodeAddr<RefNode*> RefA, bool TopShadows, bool FullChain, - const RegisterAggr &DefRRs) { - NodeList RDefs; // Return value. - SetVector<NodeId> DefQ; - SetVector<NodeId> Owners; - - // Dead defs will be treated as if they were live, since they are actually - // on the data-flow path. They cannot be ignored because even though they - // do not generate meaningful values, they still modify registers. - - // If the reference is undefined, there is nothing to do. - if (RefA.Addr->getFlags() & NodeAttrs::Undef) - return RDefs; - - // The initial queue should not have reaching defs for shadows. The - // whole point of a shadow is that it will have a reaching def that - // is not aliased to the reaching defs of the related shadows. - NodeId Start = RefA.Id; - auto SNA = DFG.addr<RefNode*>(Start); - if (NodeId RD = SNA.Addr->getReachingDef()) - DefQ.insert(RD); - if (TopShadows) { - for (auto S : DFG.getRelatedRefs(RefA.Addr->getOwner(DFG), RefA)) - if (NodeId RD = NodeAddr<RefNode*>(S).Addr->getReachingDef()) - DefQ.insert(RD); - } - - // Collect all the reaching defs, going up until a phi node is encountered, - // or there are no more reaching defs. From this set, the actual set of - // reaching defs will be selected. - // The traversal upwards must go on until a covering def is encountered. - // It is possible that a collection of non-covering (individually) defs - // will be sufficient, but keep going until a covering one is found. - for (unsigned i = 0; i < DefQ.size(); ++i) { - auto TA = DFG.addr<DefNode*>(DefQ[i]); - if (TA.Addr->getFlags() & NodeAttrs::PhiRef) - continue; - // Stop at the covering/overwriting def of the initial register reference. - RegisterRef RR = TA.Addr->getRegRef(DFG); - if (!DFG.IsPreservingDef(TA)) - if (RegisterAggr::isCoverOf(RR, RefRR, PRI)) - continue; - // Get the next level of reaching defs. This will include multiple - // reaching defs for shadows. - for (auto S : DFG.getRelatedRefs(TA.Addr->getOwner(DFG), TA)) - if (NodeId RD = NodeAddr<RefNode*>(S).Addr->getReachingDef()) - DefQ.insert(RD); - } - - // Remove all non-phi defs that are not aliased to RefRR, and collect - // the owners of the remaining defs. - SetVector<NodeId> Defs; - for (NodeId N : DefQ) { - auto TA = DFG.addr<DefNode*>(N); - bool IsPhi = TA.Addr->getFlags() & NodeAttrs::PhiRef; - if (!IsPhi && !PRI.alias(RefRR, TA.Addr->getRegRef(DFG))) - continue; - Defs.insert(TA.Id); - Owners.insert(TA.Addr->getOwner(DFG).Id); - } - - // Return the MachineBasicBlock containing a given instruction. - auto Block = [this] (NodeAddr<InstrNode*> IA) -> MachineBasicBlock* { - if (IA.Addr->getKind() == NodeAttrs::Stmt) - return NodeAddr<StmtNode*>(IA).Addr->getCode()->getParent(); - assert(IA.Addr->getKind() == NodeAttrs::Phi); - NodeAddr<PhiNode*> PA = IA; - NodeAddr<BlockNode*> BA = PA.Addr->getOwner(DFG); - return BA.Addr->getCode(); - }; - // Less(A,B) iff instruction A is further down in the dominator tree than B. - auto Less = [&Block,this] (NodeId A, NodeId B) -> bool { - if (A == B) - return false; - auto OA = DFG.addr<InstrNode*>(A), OB = DFG.addr<InstrNode*>(B); - MachineBasicBlock *BA = Block(OA), *BB = Block(OB); - if (BA != BB) - return MDT.dominates(BB, BA); - // They are in the same block. - bool StmtA = OA.Addr->getKind() == NodeAttrs::Stmt; - bool StmtB = OB.Addr->getKind() == NodeAttrs::Stmt; - if (StmtA) { - if (!StmtB) // OB is a phi and phis dominate statements. - return true; - MachineInstr *CA = NodeAddr<StmtNode*>(OA).Addr->getCode(); - MachineInstr *CB = NodeAddr<StmtNode*>(OB).Addr->getCode(); - // The order must be linear, so tie-break such equalities. - if (CA == CB) - return A < B; - return MDT.dominates(CB, CA); - } else { - // OA is a phi. - if (StmtB) - return false; - // Both are phis. There is no ordering between phis (in terms of - // the data-flow), so tie-break this via node id comparison. - return A < B; - } - }; - - std::vector<NodeId> Tmp(Owners.begin(), Owners.end()); - llvm::sort(Tmp, Less); - - // The vector is a list of instructions, so that defs coming from - // the same instruction don't need to be artificially ordered. - // Then, when computing the initial segment, and iterating over an - // instruction, pick the defs that contribute to the covering (i.e. is - // not covered by previously added defs). Check the defs individually, - // i.e. first check each def if is covered or not (without adding them - // to the tracking set), and then add all the selected ones. - - // The reason for this is this example: - // *d1<A>, *d2<B>, ... Assume A and B are aliased (can happen in phi nodes). - // *d3<C> If A \incl BuC, and B \incl AuC, then *d2 would be - // covered if we added A first, and A would be covered - // if we added B first. - - RegisterAggr RRs(DefRRs); - - auto DefInSet = [&Defs] (NodeAddr<RefNode*> TA) -> bool { - return TA.Addr->getKind() == NodeAttrs::Def && - Defs.count(TA.Id); - }; - for (NodeId T : Tmp) { - if (!FullChain && RRs.hasCoverOf(RefRR)) - break; - auto TA = DFG.addr<InstrNode*>(T); - bool IsPhi = DFG.IsCode<NodeAttrs::Phi>(TA); - NodeList Ds; - for (NodeAddr<DefNode*> DA : TA.Addr->members_if(DefInSet, DFG)) { - RegisterRef QR = DA.Addr->getRegRef(DFG); - // Add phi defs even if they are covered by subsequent defs. This is - // for cases where the reached use is not covered by any of the defs - // encountered so far: the phi def is needed to expose the liveness - // of that use to the entry of the block. - // Example: - // phi d1<R3>(,d2,), ... Phi def d1 is covered by d2. - // d2<R3>(d1,,u3), ... - // ..., u3<D1>(d2) This use needs to be live on entry. - if (FullChain || IsPhi || !RRs.hasCoverOf(QR)) - Ds.push_back(DA); - } - RDefs.insert(RDefs.end(), Ds.begin(), Ds.end()); - for (NodeAddr<DefNode*> DA : Ds) { - // When collecting a full chain of definitions, do not consider phi - // defs to actually define a register. - uint16_t Flags = DA.Addr->getFlags(); - if (!FullChain || !(Flags & NodeAttrs::PhiRef)) - if (!(Flags & NodeAttrs::Preserving)) // Don't care about Undef here. - RRs.insert(DA.Addr->getRegRef(DFG)); - } - } - - auto DeadP = [](const NodeAddr<DefNode*> DA) -> bool { - return DA.Addr->getFlags() & NodeAttrs::Dead; - }; - RDefs.resize(std::distance(RDefs.begin(), llvm::remove_if(RDefs, DeadP))); - - return RDefs; -} - -std::pair<NodeSet,bool> -Liveness::getAllReachingDefsRec(RegisterRef RefRR, NodeAddr<RefNode*> RefA, - NodeSet &Visited, const NodeSet &Defs) { - return getAllReachingDefsRecImpl(RefRR, RefA, Visited, Defs, 0, MaxRecNest); -} - -std::pair<NodeSet,bool> -Liveness::getAllReachingDefsRecImpl(RegisterRef RefRR, NodeAddr<RefNode*> RefA, - NodeSet &Visited, const NodeSet &Defs, unsigned Nest, unsigned MaxNest) { - if (Nest > MaxNest) - return { NodeSet(), false }; - // Collect all defined registers. Do not consider phis to be defining - // anything, only collect "real" definitions. - RegisterAggr DefRRs(PRI); - for (NodeId D : Defs) { - const auto DA = DFG.addr<const DefNode*>(D); - if (!(DA.Addr->getFlags() & NodeAttrs::PhiRef)) - DefRRs.insert(DA.Addr->getRegRef(DFG)); - } - - NodeList RDs = getAllReachingDefs(RefRR, RefA, false, true, DefRRs); - if (RDs.empty()) - return { Defs, true }; - - // Make a copy of the preexisting definitions and add the newly found ones. - NodeSet TmpDefs = Defs; - for (NodeAddr<NodeBase*> R : RDs) - TmpDefs.insert(R.Id); - - NodeSet Result = Defs; - - for (NodeAddr<DefNode*> DA : RDs) { - Result.insert(DA.Id); - if (!(DA.Addr->getFlags() & NodeAttrs::PhiRef)) - continue; - NodeAddr<PhiNode*> PA = DA.Addr->getOwner(DFG); - if (Visited.count(PA.Id)) - continue; - Visited.insert(PA.Id); - // Go over all phi uses and get the reaching defs for each use. - for (auto U : PA.Addr->members_if(DFG.IsRef<NodeAttrs::Use>, DFG)) { - const auto &T = getAllReachingDefsRecImpl(RefRR, U, Visited, TmpDefs, - Nest+1, MaxNest); - if (!T.second) - return { T.first, false }; - Result.insert(T.first.begin(), T.first.end()); - } - } - - return { Result, true }; -} - -/// Find the nearest ref node aliased to RefRR, going upwards in the data -/// flow, starting from the instruction immediately preceding Inst. -NodeAddr<RefNode*> Liveness::getNearestAliasedRef(RegisterRef RefRR, - NodeAddr<InstrNode*> IA) { - NodeAddr<BlockNode*> BA = IA.Addr->getOwner(DFG); - NodeList Ins = BA.Addr->members(DFG); - NodeId FindId = IA.Id; - auto E = Ins.rend(); - auto B = std::find_if(Ins.rbegin(), E, - [FindId] (const NodeAddr<InstrNode*> T) { - return T.Id == FindId; - }); - // Do not scan IA (which is what B would point to). - if (B != E) - ++B; - - do { - // Process the range of instructions from B to E. - for (NodeAddr<InstrNode*> I : make_range(B, E)) { - NodeList Refs = I.Addr->members(DFG); - NodeAddr<RefNode*> Clob, Use; - // Scan all the refs in I aliased to RefRR, and return the one that - // is the closest to the output of I, i.e. def > clobber > use. - for (NodeAddr<RefNode*> R : Refs) { - if (!PRI.alias(R.Addr->getRegRef(DFG), RefRR)) - continue; - if (DFG.IsDef(R)) { - // If it's a non-clobbering def, just return it. - if (!(R.Addr->getFlags() & NodeAttrs::Clobbering)) - return R; - Clob = R; - } else { - Use = R; - } - } - if (Clob.Id != 0) - return Clob; - if (Use.Id != 0) - return Use; - } - - // Go up to the immediate dominator, if any. - MachineBasicBlock *BB = BA.Addr->getCode(); - BA = NodeAddr<BlockNode*>(); - if (MachineDomTreeNode *N = MDT.getNode(BB)) { - if ((N = N->getIDom())) - BA = DFG.findBlock(N->getBlock()); - } - if (!BA.Id) - break; - - Ins = BA.Addr->members(DFG); - B = Ins.rbegin(); - E = Ins.rend(); - } while (true); - - return NodeAddr<RefNode*>(); -} - -NodeSet Liveness::getAllReachedUses(RegisterRef RefRR, - NodeAddr<DefNode*> DefA, const RegisterAggr &DefRRs) { - NodeSet Uses; - - // If the original register is already covered by all the intervening - // defs, no more uses can be reached. - if (DefRRs.hasCoverOf(RefRR)) - return Uses; - - // Add all directly reached uses. - // If the def is dead, it does not provide a value for any use. - bool IsDead = DefA.Addr->getFlags() & NodeAttrs::Dead; - NodeId U = !IsDead ? DefA.Addr->getReachedUse() : 0; - while (U != 0) { - auto UA = DFG.addr<UseNode*>(U); - if (!(UA.Addr->getFlags() & NodeAttrs::Undef)) { - RegisterRef UR = UA.Addr->getRegRef(DFG); - if (PRI.alias(RefRR, UR) && !DefRRs.hasCoverOf(UR)) - Uses.insert(U); - } - U = UA.Addr->getSibling(); - } - - // Traverse all reached defs. This time dead defs cannot be ignored. - for (NodeId D = DefA.Addr->getReachedDef(), NextD; D != 0; D = NextD) { - auto DA = DFG.addr<DefNode*>(D); - NextD = DA.Addr->getSibling(); - RegisterRef DR = DA.Addr->getRegRef(DFG); - // If this def is already covered, it cannot reach anything new. - // Similarly, skip it if it is not aliased to the interesting register. - if (DefRRs.hasCoverOf(DR) || !PRI.alias(RefRR, DR)) - continue; - NodeSet T; - if (DFG.IsPreservingDef(DA)) { - // If it is a preserving def, do not update the set of intervening defs. - T = getAllReachedUses(RefRR, DA, DefRRs); - } else { - RegisterAggr NewDefRRs = DefRRs; - NewDefRRs.insert(DR); - T = getAllReachedUses(RefRR, DA, NewDefRRs); - } - Uses.insert(T.begin(), T.end()); - } - return Uses; -} - -void Liveness::computePhiInfo() { - RealUseMap.clear(); - - NodeList Phis; - NodeAddr<FuncNode*> FA = DFG.getFunc(); - NodeList Blocks = FA.Addr->members(DFG); - for (NodeAddr<BlockNode*> BA : Blocks) { - auto Ps = BA.Addr->members_if(DFG.IsCode<NodeAttrs::Phi>, DFG); - Phis.insert(Phis.end(), Ps.begin(), Ps.end()); - } - - // phi use -> (map: reaching phi -> set of registers defined in between) - std::map<NodeId,std::map<NodeId,RegisterAggr>> PhiUp; - std::vector<NodeId> PhiUQ; // Work list of phis for upward propagation. - std::map<NodeId,RegisterAggr> PhiDRs; // Phi -> registers defined by it. - - // Go over all phis. - for (NodeAddr<PhiNode*> PhiA : Phis) { - // Go over all defs and collect the reached uses that are non-phi uses - // (i.e. the "real uses"). - RefMap &RealUses = RealUseMap[PhiA.Id]; - NodeList PhiRefs = PhiA.Addr->members(DFG); - - // Have a work queue of defs whose reached uses need to be found. - // For each def, add to the queue all reached (non-phi) defs. - SetVector<NodeId> DefQ; - NodeSet PhiDefs; - RegisterAggr DRs(PRI); - for (NodeAddr<RefNode*> R : PhiRefs) { - if (!DFG.IsRef<NodeAttrs::Def>(R)) - continue; - DRs.insert(R.Addr->getRegRef(DFG)); - DefQ.insert(R.Id); - PhiDefs.insert(R.Id); - } - PhiDRs.insert(std::make_pair(PhiA.Id, DRs)); - - // Collect the super-set of all possible reached uses. This set will - // contain all uses reached from this phi, either directly from the - // phi defs, or (recursively) via non-phi defs reached by the phi defs. - // This set of uses will later be trimmed to only contain these uses that - // are actually reached by the phi defs. - for (unsigned i = 0; i < DefQ.size(); ++i) { - NodeAddr<DefNode*> DA = DFG.addr<DefNode*>(DefQ[i]); - // Visit all reached uses. Phi defs should not really have the "dead" - // flag set, but check it anyway for consistency. - bool IsDead = DA.Addr->getFlags() & NodeAttrs::Dead; - NodeId UN = !IsDead ? DA.Addr->getReachedUse() : 0; - while (UN != 0) { - NodeAddr<UseNode*> A = DFG.addr<UseNode*>(UN); - uint16_t F = A.Addr->getFlags(); - if ((F & (NodeAttrs::Undef | NodeAttrs::PhiRef)) == 0) { - RegisterRef R = PRI.normalize(A.Addr->getRegRef(DFG)); - RealUses[R.Reg].insert({A.Id,R.Mask}); - } - UN = A.Addr->getSibling(); - } - // Visit all reached defs, and add them to the queue. These defs may - // override some of the uses collected here, but that will be handled - // later. - NodeId DN = DA.Addr->getReachedDef(); - while (DN != 0) { - NodeAddr<DefNode*> A = DFG.addr<DefNode*>(DN); - for (auto T : DFG.getRelatedRefs(A.Addr->getOwner(DFG), A)) { - uint16_t Flags = NodeAddr<DefNode*>(T).Addr->getFlags(); - // Must traverse the reached-def chain. Consider: - // def(D0) -> def(R0) -> def(R0) -> use(D0) - // The reachable use of D0 passes through a def of R0. - if (!(Flags & NodeAttrs::PhiRef)) - DefQ.insert(T.Id); - } - DN = A.Addr->getSibling(); - } - } - // Filter out these uses that appear to be reachable, but really - // are not. For example: - // - // R1:0 = d1 - // = R1:0 u2 Reached by d1. - // R0 = d3 - // = R1:0 u4 Still reached by d1: indirectly through - // the def d3. - // R1 = d5 - // = R1:0 u6 Not reached by d1 (covered collectively - // by d3 and d5), but following reached - // defs and uses from d1 will lead here. - for (auto UI = RealUses.begin(), UE = RealUses.end(); UI != UE; ) { - // For each reached register UI->first, there is a set UI->second, of - // uses of it. For each such use, check if it is reached by this phi, - // i.e. check if the set of its reaching uses intersects the set of - // this phi's defs. - NodeRefSet Uses = UI->second; - UI->second.clear(); - for (std::pair<NodeId,LaneBitmask> I : Uses) { - auto UA = DFG.addr<UseNode*>(I.first); - // Undef flag is checked above. - assert((UA.Addr->getFlags() & NodeAttrs::Undef) == 0); - RegisterRef R(UI->first, I.second); - // Calculate the exposed part of the reached use. - RegisterAggr Covered(PRI); - for (NodeAddr<DefNode*> DA : getAllReachingDefs(R, UA)) { - if (PhiDefs.count(DA.Id)) - break; - Covered.insert(DA.Addr->getRegRef(DFG)); - } - if (RegisterRef RC = Covered.clearIn(R)) { - // We are updating the map for register UI->first, so we need - // to map RC to be expressed in terms of that register. - RegisterRef S = PRI.mapTo(RC, UI->first); - UI->second.insert({I.first, S.Mask}); - } - } - UI = UI->second.empty() ? RealUses.erase(UI) : std::next(UI); - } - - // If this phi reaches some "real" uses, add it to the queue for upward - // propagation. - if (!RealUses.empty()) - PhiUQ.push_back(PhiA.Id); - - // Go over all phi uses and check if the reaching def is another phi. - // Collect the phis that are among the reaching defs of these uses. - // While traversing the list of reaching defs for each phi use, accumulate - // the set of registers defined between this phi (PhiA) and the owner phi - // of the reaching def. - NodeSet SeenUses; - - for (auto I : PhiRefs) { - if (!DFG.IsRef<NodeAttrs::Use>(I) || SeenUses.count(I.Id)) - continue; - NodeAddr<PhiUseNode*> PUA = I; - if (PUA.Addr->getReachingDef() == 0) - continue; - - RegisterRef UR = PUA.Addr->getRegRef(DFG); - NodeList Ds = getAllReachingDefs(UR, PUA, true, false, NoRegs); - RegisterAggr DefRRs(PRI); - - for (NodeAddr<DefNode*> D : Ds) { - if (D.Addr->getFlags() & NodeAttrs::PhiRef) { - NodeId RP = D.Addr->getOwner(DFG).Id; - std::map<NodeId,RegisterAggr> &M = PhiUp[PUA.Id]; - auto F = M.find(RP); - if (F == M.end()) - M.insert(std::make_pair(RP, DefRRs)); - else - F->second.insert(DefRRs); - } - DefRRs.insert(D.Addr->getRegRef(DFG)); - } - - for (NodeAddr<PhiUseNode*> T : DFG.getRelatedRefs(PhiA, PUA)) - SeenUses.insert(T.Id); - } - } - - if (Trace) { - dbgs() << "Phi-up-to-phi map with intervening defs:\n"; - for (auto I : PhiUp) { - dbgs() << "phi " << Print<NodeId>(I.first, DFG) << " -> {"; - for (auto R : I.second) - dbgs() << ' ' << Print<NodeId>(R.first, DFG) - << Print<RegisterAggr>(R.second, DFG); - dbgs() << " }\n"; - } - } - - // Propagate the reached registers up in the phi chain. - // - // The following type of situation needs careful handling: - // - // phi d1<R1:0> (1) - // | - // ... d2<R1> - // | - // phi u3<R1:0> (2) - // | - // ... u4<R1> - // - // The phi node (2) defines a register pair R1:0, and reaches a "real" - // use u4 of just R1. The same phi node is also known to reach (upwards) - // the phi node (1). However, the use u4 is not reached by phi (1), - // because of the intervening definition d2 of R1. The data flow between - // phis (1) and (2) is restricted to R1:0 minus R1, i.e. R0. - // - // When propagating uses up the phi chains, get the all reaching defs - // for a given phi use, and traverse the list until the propagated ref - // is covered, or until reaching the final phi. Only assume that the - // reference reaches the phi in the latter case. - - for (unsigned i = 0; i < PhiUQ.size(); ++i) { - auto PA = DFG.addr<PhiNode*>(PhiUQ[i]); - NodeList PUs = PA.Addr->members_if(DFG.IsRef<NodeAttrs::Use>, DFG); - RefMap &RUM = RealUseMap[PA.Id]; - - for (NodeAddr<UseNode*> UA : PUs) { - std::map<NodeId,RegisterAggr> &PUM = PhiUp[UA.Id]; - RegisterRef UR = PRI.normalize(UA.Addr->getRegRef(DFG)); - for (const std::pair<const NodeId, RegisterAggr> &P : PUM) { - bool Changed = false; - const RegisterAggr &MidDefs = P.second; - - // Collect the set PropUp of uses that are reached by the current - // phi PA, and are not covered by any intervening def between the - // currently visited use UA and the upward phi P. - - if (MidDefs.hasCoverOf(UR)) - continue; - - // General algorithm: - // for each (R,U) : U is use node of R, U is reached by PA - // if MidDefs does not cover (R,U) - // then add (R-MidDefs,U) to RealUseMap[P] - // - for (const std::pair<const RegisterId, NodeRefSet> &T : RUM) { - RegisterRef R(T.first); - // The current phi (PA) could be a phi for a regmask. It could - // reach a whole variety of uses that are not related to the - // specific upward phi (P.first). - const RegisterAggr &DRs = PhiDRs.at(P.first); - if (!DRs.hasAliasOf(R)) - continue; - R = PRI.mapTo(DRs.intersectWith(R), T.first); - for (std::pair<NodeId,LaneBitmask> V : T.second) { - LaneBitmask M = R.Mask & V.second; - if (M.none()) - continue; - if (RegisterRef SS = MidDefs.clearIn(RegisterRef(R.Reg, M))) { - NodeRefSet &RS = RealUseMap[P.first][SS.Reg]; - Changed |= RS.insert({V.first,SS.Mask}).second; - } - } - } - - if (Changed) - PhiUQ.push_back(P.first); - } - } - } - - if (Trace) { - dbgs() << "Real use map:\n"; - for (auto I : RealUseMap) { - dbgs() << "phi " << Print<NodeId>(I.first, DFG); - NodeAddr<PhiNode*> PA = DFG.addr<PhiNode*>(I.first); - NodeList Ds = PA.Addr->members_if(DFG.IsRef<NodeAttrs::Def>, DFG); - if (!Ds.empty()) { - RegisterRef RR = NodeAddr<DefNode*>(Ds[0]).Addr->getRegRef(DFG); - dbgs() << '<' << Print<RegisterRef>(RR, DFG) << '>'; - } else { - dbgs() << "<noreg>"; - } - dbgs() << " -> " << Print<RefMap>(I.second, DFG) << '\n'; - } - } -} - -void Liveness::computeLiveIns() { - // Populate the node-to-block map. This speeds up the calculations - // significantly. - NBMap.clear(); - for (NodeAddr<BlockNode*> BA : DFG.getFunc().Addr->members(DFG)) { - MachineBasicBlock *BB = BA.Addr->getCode(); - for (NodeAddr<InstrNode*> IA : BA.Addr->members(DFG)) { - for (NodeAddr<RefNode*> RA : IA.Addr->members(DFG)) - NBMap.insert(std::make_pair(RA.Id, BB)); - NBMap.insert(std::make_pair(IA.Id, BB)); - } - } - - MachineFunction &MF = DFG.getMF(); - - // Compute IDF first, then the inverse. - decltype(IIDF) IDF; - for (MachineBasicBlock &B : MF) { - auto F1 = MDF.find(&B); - if (F1 == MDF.end()) - continue; - SetVector<MachineBasicBlock*> IDFB(F1->second.begin(), F1->second.end()); - for (unsigned i = 0; i < IDFB.size(); ++i) { - auto F2 = MDF.find(IDFB[i]); - if (F2 != MDF.end()) - IDFB.insert(F2->second.begin(), F2->second.end()); - } - // Add B to the IDF(B). This will put B in the IIDF(B). - IDFB.insert(&B); - IDF[&B].insert(IDFB.begin(), IDFB.end()); - } - - for (auto I : IDF) - for (auto S : I.second) - IIDF[S].insert(I.first); - - computePhiInfo(); - - NodeAddr<FuncNode*> FA = DFG.getFunc(); - NodeList Blocks = FA.Addr->members(DFG); - - // Build the phi live-on-entry map. - for (NodeAddr<BlockNode*> BA : Blocks) { - MachineBasicBlock *MB = BA.Addr->getCode(); - RefMap &LON = PhiLON[MB]; - for (auto P : BA.Addr->members_if(DFG.IsCode<NodeAttrs::Phi>, DFG)) - for (const RefMap::value_type &S : RealUseMap[P.Id]) - LON[S.first].insert(S.second.begin(), S.second.end()); - } - - if (Trace) { - dbgs() << "Phi live-on-entry map:\n"; - for (auto &I : PhiLON) - dbgs() << "block #" << I.first->getNumber() << " -> " - << Print<RefMap>(I.second, DFG) << '\n'; - } - - // Build the phi live-on-exit map. Each phi node has some set of reached - // "real" uses. Propagate this set backwards into the block predecessors - // through the reaching defs of the corresponding phi uses. - for (NodeAddr<BlockNode*> BA : Blocks) { - NodeList Phis = BA.Addr->members_if(DFG.IsCode<NodeAttrs::Phi>, DFG); - for (NodeAddr<PhiNode*> PA : Phis) { - RefMap &RUs = RealUseMap[PA.Id]; - if (RUs.empty()) - continue; - - NodeSet SeenUses; - for (auto U : PA.Addr->members_if(DFG.IsRef<NodeAttrs::Use>, DFG)) { - if (!SeenUses.insert(U.Id).second) - continue; - NodeAddr<PhiUseNode*> PUA = U; - if (PUA.Addr->getReachingDef() == 0) - continue; - - // Each phi has some set (possibly empty) of reached "real" uses, - // that is, uses that are part of the compiled program. Such a use - // may be located in some farther block, but following a chain of - // reaching defs will eventually lead to this phi. - // Any chain of reaching defs may fork at a phi node, but there - // will be a path upwards that will lead to this phi. Now, this - // chain will need to fork at this phi, since some of the reached - // uses may have definitions joining in from multiple predecessors. - // For each reached "real" use, identify the set of reaching defs - // coming from each predecessor P, and add them to PhiLOX[P]. - // - auto PrA = DFG.addr<BlockNode*>(PUA.Addr->getPredecessor()); - RefMap &LOX = PhiLOX[PrA.Addr->getCode()]; - - for (const std::pair<const RegisterId, NodeRefSet> &RS : RUs) { - // We need to visit each individual use. - for (std::pair<NodeId,LaneBitmask> P : RS.second) { - // Create a register ref corresponding to the use, and find - // all reaching defs starting from the phi use, and treating - // all related shadows as a single use cluster. - RegisterRef S(RS.first, P.second); - NodeList Ds = getAllReachingDefs(S, PUA, true, false, NoRegs); - for (NodeAddr<DefNode*> D : Ds) { - // Calculate the mask corresponding to the visited def. - RegisterAggr TA(PRI); - TA.insert(D.Addr->getRegRef(DFG)).intersect(S); - LaneBitmask TM = TA.makeRegRef().Mask; - LOX[S.Reg].insert({D.Id, TM}); - } - } - } - - for (NodeAddr<PhiUseNode*> T : DFG.getRelatedRefs(PA, PUA)) - SeenUses.insert(T.Id); - } // for U : phi uses - } // for P : Phis - } // for B : Blocks - - if (Trace) { - dbgs() << "Phi live-on-exit map:\n"; - for (auto &I : PhiLOX) - dbgs() << "block #" << I.first->getNumber() << " -> " - << Print<RefMap>(I.second, DFG) << '\n'; - } - - RefMap LiveIn; - traverse(&MF.front(), LiveIn); - - // Add function live-ins to the live-in set of the function entry block. - LiveMap[&MF.front()].insert(DFG.getLiveIns()); - - if (Trace) { - // Dump the liveness map - for (MachineBasicBlock &B : MF) { - std::vector<RegisterRef> LV; - for (auto I = B.livein_begin(), E = B.livein_end(); I != E; ++I) - LV.push_back(RegisterRef(I->PhysReg, I->LaneMask)); - llvm::sort(LV); - dbgs() << printMBBReference(B) << "\t rec = {"; - for (auto I : LV) - dbgs() << ' ' << Print<RegisterRef>(I, DFG); - dbgs() << " }\n"; - //dbgs() << "\tcomp = " << Print<RegisterAggr>(LiveMap[&B], DFG) << '\n'; - - LV.clear(); - const RegisterAggr &LG = LiveMap[&B]; - for (auto I = LG.rr_begin(), E = LG.rr_end(); I != E; ++I) - LV.push_back(*I); - llvm::sort(LV); - dbgs() << "\tcomp = {"; - for (auto I : LV) - dbgs() << ' ' << Print<RegisterRef>(I, DFG); - dbgs() << " }\n"; - - } - } -} - -void Liveness::resetLiveIns() { - for (auto &B : DFG.getMF()) { - // Remove all live-ins. - std::vector<unsigned> T; - for (auto I = B.livein_begin(), E = B.livein_end(); I != E; ++I) - T.push_back(I->PhysReg); - for (auto I : T) - B.removeLiveIn(I); - // Add the newly computed live-ins. - const RegisterAggr &LiveIns = LiveMap[&B]; - for (auto I = LiveIns.rr_begin(), E = LiveIns.rr_end(); I != E; ++I) { - RegisterRef R = *I; - B.addLiveIn({MCPhysReg(R.Reg), R.Mask}); - } - } -} - -void Liveness::resetKills() { - for (auto &B : DFG.getMF()) - resetKills(&B); -} - -void Liveness::resetKills(MachineBasicBlock *B) { - auto CopyLiveIns = [this] (MachineBasicBlock *B, BitVector &LV) -> void { - for (auto I : B->liveins()) { - MCSubRegIndexIterator S(I.PhysReg, &TRI); - if (!S.isValid()) { - LV.set(I.PhysReg); - continue; - } - do { - LaneBitmask M = TRI.getSubRegIndexLaneMask(S.getSubRegIndex()); - if ((M & I.LaneMask).any()) - LV.set(S.getSubReg()); - ++S; - } while (S.isValid()); - } - }; - - BitVector LiveIn(TRI.getNumRegs()), Live(TRI.getNumRegs()); - CopyLiveIns(B, LiveIn); - for (auto SI : B->successors()) - CopyLiveIns(SI, Live); - - for (auto I = B->rbegin(), E = B->rend(); I != E; ++I) { - MachineInstr *MI = &*I; - if (MI->isDebugInstr()) - continue; - - MI->clearKillInfo(); - for (auto &Op : MI->operands()) { - // An implicit def of a super-register may not necessarily start a - // live range of it, since an implicit use could be used to keep parts - // of it live. Instead of analyzing the implicit operands, ignore - // implicit defs. - if (!Op.isReg() || !Op.isDef() || Op.isImplicit()) - continue; - Register R = Op.getReg(); - if (!Register::isPhysicalRegister(R)) - continue; - for (MCSubRegIterator SR(R, &TRI, true); SR.isValid(); ++SR) - Live.reset(*SR); - } - for (auto &Op : MI->operands()) { - if (!Op.isReg() || !Op.isUse() || Op.isUndef()) - continue; - Register R = Op.getReg(); - if (!Register::isPhysicalRegister(R)) - continue; - bool IsLive = false; - for (MCRegAliasIterator AR(R, &TRI, true); AR.isValid(); ++AR) { - if (!Live[*AR]) - continue; - IsLive = true; - break; - } - if (!IsLive) - Op.setIsKill(true); - for (MCSubRegIterator SR(R, &TRI, true); SR.isValid(); ++SR) - Live.set(*SR); - } - } -} - -// Helper function to obtain the basic block containing the reaching def -// of the given use. -MachineBasicBlock *Liveness::getBlockWithRef(NodeId RN) const { - auto F = NBMap.find(RN); - if (F != NBMap.end()) - return F->second; - llvm_unreachable("Node id not in map"); -} - -void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) { - // The LiveIn map, for each (physical) register, contains the set of live - // reaching defs of that register that are live on entry to the associated - // block. - - // The summary of the traversal algorithm: - // - // R is live-in in B, if there exists a U(R), such that rdef(R) dom B - // and (U \in IDF(B) or B dom U). - // - // for (C : children) { - // LU = {} - // traverse(C, LU) - // LiveUses += LU - // } - // - // LiveUses -= Defs(B); - // LiveUses += UpwardExposedUses(B); - // for (C : IIDF[B]) - // for (U : LiveUses) - // if (Rdef(U) dom C) - // C.addLiveIn(U) - // - - // Go up the dominator tree (depth-first). - MachineDomTreeNode *N = MDT.getNode(B); - for (auto I : *N) { - RefMap L; - MachineBasicBlock *SB = I->getBlock(); - traverse(SB, L); - - for (auto S : L) - LiveIn[S.first].insert(S.second.begin(), S.second.end()); - } - - if (Trace) { - dbgs() << "\n-- " << printMBBReference(*B) << ": " << __func__ - << " after recursion into: {"; - for (auto I : *N) - dbgs() << ' ' << I->getBlock()->getNumber(); - dbgs() << " }\n"; - dbgs() << " LiveIn: " << Print<RefMap>(LiveIn, DFG) << '\n'; - dbgs() << " Local: " << Print<RegisterAggr>(LiveMap[B], DFG) << '\n'; - } - - // Add reaching defs of phi uses that are live on exit from this block. - RefMap &PUs = PhiLOX[B]; - for (auto &S : PUs) - LiveIn[S.first].insert(S.second.begin(), S.second.end()); - - if (Trace) { - dbgs() << "after LOX\n"; - dbgs() << " LiveIn: " << Print<RefMap>(LiveIn, DFG) << '\n'; - dbgs() << " Local: " << Print<RegisterAggr>(LiveMap[B], DFG) << '\n'; - } - - // The LiveIn map at this point has all defs that are live-on-exit from B, - // as if they were live-on-entry to B. First, we need to filter out all - // defs that are present in this block. Then we will add reaching defs of - // all upward-exposed uses. - - // To filter out the defs, first make a copy of LiveIn, and then re-populate - // LiveIn with the defs that should remain. - RefMap LiveInCopy = LiveIn; - LiveIn.clear(); - - for (const std::pair<const RegisterId, NodeRefSet> &LE : LiveInCopy) { - RegisterRef LRef(LE.first); - NodeRefSet &NewDefs = LiveIn[LRef.Reg]; // To be filled. - const NodeRefSet &OldDefs = LE.second; - for (NodeRef OR : OldDefs) { - // R is a def node that was live-on-exit - auto DA = DFG.addr<DefNode*>(OR.first); - NodeAddr<InstrNode*> IA = DA.Addr->getOwner(DFG); - NodeAddr<BlockNode*> BA = IA.Addr->getOwner(DFG); - if (B != BA.Addr->getCode()) { - // Defs from a different block need to be preserved. Defs from this - // block will need to be processed further, except for phi defs, the - // liveness of which is handled through the PhiLON/PhiLOX maps. - NewDefs.insert(OR); - continue; - } - - // Defs from this block need to stop the liveness from being - // propagated upwards. This only applies to non-preserving defs, - // and to the parts of the register actually covered by those defs. - // (Note that phi defs should always be preserving.) - RegisterAggr RRs(PRI); - LRef.Mask = OR.second; - - if (!DFG.IsPreservingDef(DA)) { - assert(!(IA.Addr->getFlags() & NodeAttrs::Phi)); - // DA is a non-phi def that is live-on-exit from this block, and - // that is also located in this block. LRef is a register ref - // whose use this def reaches. If DA covers LRef, then no part - // of LRef is exposed upwards.A - if (RRs.insert(DA.Addr->getRegRef(DFG)).hasCoverOf(LRef)) - continue; - } - - // DA itself was not sufficient to cover LRef. In general, it is - // the last in a chain of aliased defs before the exit from this block. - // There could be other defs in this block that are a part of that - // chain. Check that now: accumulate the registers from these defs, - // and if they all together cover LRef, it is not live-on-entry. - for (NodeAddr<DefNode*> TA : getAllReachingDefs(DA)) { - // DefNode -> InstrNode -> BlockNode. - NodeAddr<InstrNode*> ITA = TA.Addr->getOwner(DFG); - NodeAddr<BlockNode*> BTA = ITA.Addr->getOwner(DFG); - // Reaching defs are ordered in the upward direction. - if (BTA.Addr->getCode() != B) { - // We have reached past the beginning of B, and the accumulated - // registers are not covering LRef. The first def from the - // upward chain will be live. - // Subtract all accumulated defs (RRs) from LRef. - RegisterRef T = RRs.clearIn(LRef); - assert(T); - NewDefs.insert({TA.Id,T.Mask}); - break; - } - - // TA is in B. Only add this def to the accumulated cover if it is - // not preserving. - if (!(TA.Addr->getFlags() & NodeAttrs::Preserving)) - RRs.insert(TA.Addr->getRegRef(DFG)); - // If this is enough to cover LRef, then stop. - if (RRs.hasCoverOf(LRef)) - break; - } - } - } - - emptify(LiveIn); - - if (Trace) { - dbgs() << "after defs in block\n"; - dbgs() << " LiveIn: " << Print<RefMap>(LiveIn, DFG) << '\n'; - dbgs() << " Local: " << Print<RegisterAggr>(LiveMap[B], DFG) << '\n'; - } - - // Scan the block for upward-exposed uses and add them to the tracking set. - for (auto I : DFG.getFunc().Addr->findBlock(B, DFG).Addr->members(DFG)) { - NodeAddr<InstrNode*> IA = I; - if (IA.Addr->getKind() != NodeAttrs::Stmt) - continue; - for (NodeAddr<UseNode*> UA : IA.Addr->members_if(DFG.IsUse, DFG)) { - if (UA.Addr->getFlags() & NodeAttrs::Undef) - continue; - RegisterRef RR = PRI.normalize(UA.Addr->getRegRef(DFG)); - for (NodeAddr<DefNode*> D : getAllReachingDefs(UA)) - if (getBlockWithRef(D.Id) != B) - LiveIn[RR.Reg].insert({D.Id,RR.Mask}); - } - } - - if (Trace) { - dbgs() << "after uses in block\n"; - dbgs() << " LiveIn: " << Print<RefMap>(LiveIn, DFG) << '\n'; - dbgs() << " Local: " << Print<RegisterAggr>(LiveMap[B], DFG) << '\n'; - } - - // Phi uses should not be propagated up the dominator tree, since they - // are not dominated by their corresponding reaching defs. - RegisterAggr &Local = LiveMap[B]; - RefMap &LON = PhiLON[B]; - for (auto &R : LON) { - LaneBitmask M; - for (auto P : R.second) - M |= P.second; - Local.insert(RegisterRef(R.first,M)); - } - - if (Trace) { - dbgs() << "after phi uses in block\n"; - dbgs() << " LiveIn: " << Print<RefMap>(LiveIn, DFG) << '\n'; - dbgs() << " Local: " << Print<RegisterAggr>(Local, DFG) << '\n'; - } - - for (auto C : IIDF[B]) { - RegisterAggr &LiveC = LiveMap[C]; - for (const std::pair<const RegisterId, NodeRefSet> &S : LiveIn) - for (auto R : S.second) - if (MDT.properlyDominates(getBlockWithRef(R.first), C)) - LiveC.insert(RegisterRef(S.first, R.second)); - } -} - -void Liveness::emptify(RefMap &M) { - for (auto I = M.begin(), E = M.end(); I != E; ) - I = I->second.empty() ? M.erase(I) : std::next(I); -} diff --git a/llvm/lib/Target/Hexagon/RDFLiveness.h b/llvm/lib/Target/Hexagon/RDFLiveness.h deleted file mode 100644 index ea4890271726..000000000000 --- a/llvm/lib/Target/Hexagon/RDFLiveness.h +++ /dev/null @@ -1,151 +0,0 @@ -//===- RDFLiveness.h --------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Recalculate the liveness information given a data flow graph. -// This includes block live-ins and kill flags. - -#ifndef LLVM_LIB_TARGET_HEXAGON_RDFLIVENESS_H -#define LLVM_LIB_TARGET_HEXAGON_RDFLIVENESS_H - -#include "RDFGraph.h" -#include "RDFRegisters.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/MC/LaneBitmask.h" -#include <map> -#include <set> -#include <utility> - -namespace llvm { - -class MachineBasicBlock; -class MachineDominanceFrontier; -class MachineDominatorTree; -class MachineRegisterInfo; -class TargetRegisterInfo; - -namespace rdf { - - struct Liveness { - public: - // This is really a std::map, except that it provides a non-trivial - // default constructor to the element accessed via []. - struct LiveMapType { - LiveMapType(const PhysicalRegisterInfo &pri) : Empty(pri) {} - - RegisterAggr &operator[] (MachineBasicBlock *B) { - return Map.emplace(B, Empty).first->second; - } - - private: - RegisterAggr Empty; - std::map<MachineBasicBlock*,RegisterAggr> Map; - }; - - using NodeRef = std::pair<NodeId, LaneBitmask>; - using NodeRefSet = std::set<NodeRef>; - // RegisterId in RefMap must be normalized. - using RefMap = std::map<RegisterId, NodeRefSet>; - - Liveness(MachineRegisterInfo &mri, const DataFlowGraph &g) - : DFG(g), TRI(g.getTRI()), PRI(g.getPRI()), MDT(g.getDT()), - MDF(g.getDF()), LiveMap(g.getPRI()), Empty(), NoRegs(g.getPRI()) {} - - NodeList getAllReachingDefs(RegisterRef RefRR, NodeAddr<RefNode*> RefA, - bool TopShadows, bool FullChain, const RegisterAggr &DefRRs); - - NodeList getAllReachingDefs(NodeAddr<RefNode*> RefA) { - return getAllReachingDefs(RefA.Addr->getRegRef(DFG), RefA, false, - false, NoRegs); - } - - NodeList getAllReachingDefs(RegisterRef RefRR, NodeAddr<RefNode*> RefA) { - return getAllReachingDefs(RefRR, RefA, false, false, NoRegs); - } - - NodeSet getAllReachedUses(RegisterRef RefRR, NodeAddr<DefNode*> DefA, - const RegisterAggr &DefRRs); - - NodeSet getAllReachedUses(RegisterRef RefRR, NodeAddr<DefNode*> DefA) { - return getAllReachedUses(RefRR, DefA, NoRegs); - } - - std::pair<NodeSet,bool> getAllReachingDefsRec(RegisterRef RefRR, - NodeAddr<RefNode*> RefA, NodeSet &Visited, const NodeSet &Defs); - - NodeAddr<RefNode*> getNearestAliasedRef(RegisterRef RefRR, - NodeAddr<InstrNode*> IA); - - LiveMapType &getLiveMap() { return LiveMap; } - const LiveMapType &getLiveMap() const { return LiveMap; } - - const RefMap &getRealUses(NodeId P) const { - auto F = RealUseMap.find(P); - return F == RealUseMap.end() ? Empty : F->second; - } - - void computePhiInfo(); - void computeLiveIns(); - void resetLiveIns(); - void resetKills(); - void resetKills(MachineBasicBlock *B); - - void trace(bool T) { Trace = T; } - - private: - const DataFlowGraph &DFG; - const TargetRegisterInfo &TRI; - const PhysicalRegisterInfo &PRI; - const MachineDominatorTree &MDT; - const MachineDominanceFrontier &MDF; - LiveMapType LiveMap; - const RefMap Empty; - const RegisterAggr NoRegs; - bool Trace = false; - - // Cache of mapping from node ids (for RefNodes) to the containing - // basic blocks. Not computing it each time for each node reduces - // the liveness calculation time by a large fraction. - using NodeBlockMap = DenseMap<NodeId, MachineBasicBlock *>; - NodeBlockMap NBMap; - - // Phi information: - // - // RealUseMap - // map: NodeId -> (map: RegisterId -> NodeRefSet) - // phi id -> (map: register -> set of reached non-phi uses) - std::map<NodeId, RefMap> RealUseMap; - - // Inverse iterated dominance frontier. - std::map<MachineBasicBlock*,std::set<MachineBasicBlock*>> IIDF; - - // Live on entry. - std::map<MachineBasicBlock*,RefMap> PhiLON; - - // Phi uses are considered to be located at the end of the block that - // they are associated with. The reaching def of a phi use dominates the - // block that the use corresponds to, but not the block that contains - // the phi itself. To include these uses in the liveness propagation (up - // the dominator tree), create a map: block -> set of uses live on exit. - std::map<MachineBasicBlock*,RefMap> PhiLOX; - - MachineBasicBlock *getBlockWithRef(NodeId RN) const; - void traverse(MachineBasicBlock *B, RefMap &LiveIn); - void emptify(RefMap &M); - - std::pair<NodeSet,bool> getAllReachingDefsRecImpl(RegisterRef RefRR, - NodeAddr<RefNode*> RefA, NodeSet &Visited, const NodeSet &Defs, - unsigned Nest, unsigned MaxNest); - }; - - raw_ostream &operator<<(raw_ostream &OS, const Print<Liveness::RefMap> &P); - -} // end namespace rdf - -} // end namespace llvm - -#endif // LLVM_LIB_TARGET_HEXAGON_RDFLIVENESS_H diff --git a/llvm/lib/Target/Hexagon/RDFRegisters.cpp b/llvm/lib/Target/Hexagon/RDFRegisters.cpp deleted file mode 100644 index b5675784e34b..000000000000 --- a/llvm/lib/Target/Hexagon/RDFRegisters.cpp +++ /dev/null @@ -1,380 +0,0 @@ -//===- RDFRegisters.cpp ---------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "RDFRegisters.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineOperand.h" -#include "llvm/CodeGen/TargetRegisterInfo.h" -#include "llvm/MC/LaneBitmask.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include <cassert> -#include <cstdint> -#include <set> -#include <utility> - -using namespace llvm; -using namespace rdf; - -PhysicalRegisterInfo::PhysicalRegisterInfo(const TargetRegisterInfo &tri, - const MachineFunction &mf) - : TRI(tri) { - RegInfos.resize(TRI.getNumRegs()); - - BitVector BadRC(TRI.getNumRegs()); - for (const TargetRegisterClass *RC : TRI.regclasses()) { - for (MCPhysReg R : *RC) { - RegInfo &RI = RegInfos[R]; - if (RI.RegClass != nullptr && !BadRC[R]) { - if (RC->LaneMask != RI.RegClass->LaneMask) { - BadRC.set(R); - RI.RegClass = nullptr; - } - } else - RI.RegClass = RC; - } - } - - UnitInfos.resize(TRI.getNumRegUnits()); - - for (uint32_t U = 0, NU = TRI.getNumRegUnits(); U != NU; ++U) { - if (UnitInfos[U].Reg != 0) - continue; - MCRegUnitRootIterator R(U, &TRI); - assert(R.isValid()); - RegisterId F = *R; - ++R; - if (R.isValid()) { - UnitInfos[U].Mask = LaneBitmask::getAll(); - UnitInfos[U].Reg = F; - } else { - for (MCRegUnitMaskIterator I(F, &TRI); I.isValid(); ++I) { - std::pair<uint32_t,LaneBitmask> P = *I; - UnitInfo &UI = UnitInfos[P.first]; - UI.Reg = F; - if (P.second.any()) { - UI.Mask = P.second; - } else { - if (const TargetRegisterClass *RC = RegInfos[F].RegClass) - UI.Mask = RC->LaneMask; - else - UI.Mask = LaneBitmask::getAll(); - } - } - } - } - - for (const uint32_t *RM : TRI.getRegMasks()) - RegMasks.insert(RM); - for (const MachineBasicBlock &B : mf) - for (const MachineInstr &In : B) - for (const MachineOperand &Op : In.operands()) - if (Op.isRegMask()) - RegMasks.insert(Op.getRegMask()); - - MaskInfos.resize(RegMasks.size()+1); - for (uint32_t M = 1, NM = RegMasks.size(); M <= NM; ++M) { - BitVector PU(TRI.getNumRegUnits()); - const uint32_t *MB = RegMasks.get(M); - for (unsigned i = 1, e = TRI.getNumRegs(); i != e; ++i) { - if (!(MB[i/32] & (1u << (i%32)))) - continue; - for (MCRegUnitIterator U(i, &TRI); U.isValid(); ++U) - PU.set(*U); - } - MaskInfos[M].Units = PU.flip(); - } -} - -RegisterRef PhysicalRegisterInfo::normalize(RegisterRef RR) const { - return RR; -} - -std::set<RegisterId> PhysicalRegisterInfo::getAliasSet(RegisterId Reg) const { - // Do not include RR in the alias set. - std::set<RegisterId> AS; - assert(isRegMaskId(Reg) || Register::isPhysicalRegister(Reg)); - if (isRegMaskId(Reg)) { - // XXX SLOW - const uint32_t *MB = getRegMaskBits(Reg); - for (unsigned i = 1, e = TRI.getNumRegs(); i != e; ++i) { - if (MB[i/32] & (1u << (i%32))) - continue; - AS.insert(i); - } - for (const uint32_t *RM : RegMasks) { - RegisterId MI = getRegMaskId(RM); - if (MI != Reg && aliasMM(RegisterRef(Reg), RegisterRef(MI))) - AS.insert(MI); - } - return AS; - } - - for (MCRegAliasIterator AI(Reg, &TRI, false); AI.isValid(); ++AI) - AS.insert(*AI); - for (const uint32_t *RM : RegMasks) { - RegisterId MI = getRegMaskId(RM); - if (aliasRM(RegisterRef(Reg), RegisterRef(MI))) - AS.insert(MI); - } - return AS; -} - -bool PhysicalRegisterInfo::aliasRR(RegisterRef RA, RegisterRef RB) const { - assert(Register::isPhysicalRegister(RA.Reg)); - assert(Register::isPhysicalRegister(RB.Reg)); - - MCRegUnitMaskIterator UMA(RA.Reg, &TRI); - MCRegUnitMaskIterator UMB(RB.Reg, &TRI); - // Reg units are returned in the numerical order. - while (UMA.isValid() && UMB.isValid()) { - // Skip units that are masked off in RA. - std::pair<RegisterId,LaneBitmask> PA = *UMA; - if (PA.second.any() && (PA.second & RA.Mask).none()) { - ++UMA; - continue; - } - // Skip units that are masked off in RB. - std::pair<RegisterId,LaneBitmask> PB = *UMB; - if (PB.second.any() && (PB.second & RB.Mask).none()) { - ++UMB; - continue; - } - - if (PA.first == PB.first) - return true; - if (PA.first < PB.first) - ++UMA; - else if (PB.first < PA.first) - ++UMB; - } - return false; -} - -bool PhysicalRegisterInfo::aliasRM(RegisterRef RR, RegisterRef RM) const { - assert(Register::isPhysicalRegister(RR.Reg) && isRegMaskId(RM.Reg)); - const uint32_t *MB = getRegMaskBits(RM.Reg); - bool Preserved = MB[RR.Reg/32] & (1u << (RR.Reg%32)); - // If the lane mask information is "full", e.g. when the given lane mask - // is a superset of the lane mask from the register class, check the regmask - // bit directly. - if (RR.Mask == LaneBitmask::getAll()) - return !Preserved; - const TargetRegisterClass *RC = RegInfos[RR.Reg].RegClass; - if (RC != nullptr && (RR.Mask & RC->LaneMask) == RC->LaneMask) - return !Preserved; - - // Otherwise, check all subregisters whose lane mask overlaps the given - // mask. For each such register, if it is preserved by the regmask, then - // clear the corresponding bits in the given mask. If at the end, all - // bits have been cleared, the register does not alias the regmask (i.e. - // is it preserved by it). - LaneBitmask M = RR.Mask; - for (MCSubRegIndexIterator SI(RR.Reg, &TRI); SI.isValid(); ++SI) { - LaneBitmask SM = TRI.getSubRegIndexLaneMask(SI.getSubRegIndex()); - if ((SM & RR.Mask).none()) - continue; - unsigned SR = SI.getSubReg(); - if (!(MB[SR/32] & (1u << (SR%32)))) - continue; - // The subregister SR is preserved. - M &= ~SM; - if (M.none()) - return false; - } - - return true; -} - -bool PhysicalRegisterInfo::aliasMM(RegisterRef RM, RegisterRef RN) const { - assert(isRegMaskId(RM.Reg) && isRegMaskId(RN.Reg)); - unsigned NumRegs = TRI.getNumRegs(); - const uint32_t *BM = getRegMaskBits(RM.Reg); - const uint32_t *BN = getRegMaskBits(RN.Reg); - - for (unsigned w = 0, nw = NumRegs/32; w != nw; ++w) { - // Intersect the negations of both words. Disregard reg=0, - // i.e. 0th bit in the 0th word. - uint32_t C = ~BM[w] & ~BN[w]; - if (w == 0) - C &= ~1; - if (C) - return true; - } - - // Check the remaining registers in the last word. - unsigned TailRegs = NumRegs % 32; - if (TailRegs == 0) - return false; - unsigned TW = NumRegs / 32; - uint32_t TailMask = (1u << TailRegs) - 1; - if (~BM[TW] & ~BN[TW] & TailMask) - return true; - - return false; -} - -RegisterRef PhysicalRegisterInfo::mapTo(RegisterRef RR, unsigned R) const { - if (RR.Reg == R) - return RR; - if (unsigned Idx = TRI.getSubRegIndex(R, RR.Reg)) - return RegisterRef(R, TRI.composeSubRegIndexLaneMask(Idx, RR.Mask)); - if (unsigned Idx = TRI.getSubRegIndex(RR.Reg, R)) { - const RegInfo &RI = RegInfos[R]; - LaneBitmask RCM = RI.RegClass ? RI.RegClass->LaneMask - : LaneBitmask::getAll(); - LaneBitmask M = TRI.reverseComposeSubRegIndexLaneMask(Idx, RR.Mask); - return RegisterRef(R, M & RCM); - } - llvm_unreachable("Invalid arguments: unrelated registers?"); -} - -bool RegisterAggr::hasAliasOf(RegisterRef RR) const { - if (PhysicalRegisterInfo::isRegMaskId(RR.Reg)) - return Units.anyCommon(PRI.getMaskUnits(RR.Reg)); - - for (MCRegUnitMaskIterator U(RR.Reg, &PRI.getTRI()); U.isValid(); ++U) { - std::pair<uint32_t,LaneBitmask> P = *U; - if (P.second.none() || (P.second & RR.Mask).any()) - if (Units.test(P.first)) - return true; - } - return false; -} - -bool RegisterAggr::hasCoverOf(RegisterRef RR) const { - if (PhysicalRegisterInfo::isRegMaskId(RR.Reg)) { - BitVector T(PRI.getMaskUnits(RR.Reg)); - return T.reset(Units).none(); - } - - for (MCRegUnitMaskIterator U(RR.Reg, &PRI.getTRI()); U.isValid(); ++U) { - std::pair<uint32_t,LaneBitmask> P = *U; - if (P.second.none() || (P.second & RR.Mask).any()) - if (!Units.test(P.first)) - return false; - } - return true; -} - -RegisterAggr &RegisterAggr::insert(RegisterRef RR) { - if (PhysicalRegisterInfo::isRegMaskId(RR.Reg)) { - Units |= PRI.getMaskUnits(RR.Reg); - return *this; - } - - for (MCRegUnitMaskIterator U(RR.Reg, &PRI.getTRI()); U.isValid(); ++U) { - std::pair<uint32_t,LaneBitmask> P = *U; - if (P.second.none() || (P.second & RR.Mask).any()) - Units.set(P.first); - } - return *this; -} - -RegisterAggr &RegisterAggr::insert(const RegisterAggr &RG) { - Units |= RG.Units; - return *this; -} - -RegisterAggr &RegisterAggr::intersect(RegisterRef RR) { - return intersect(RegisterAggr(PRI).insert(RR)); -} - -RegisterAggr &RegisterAggr::intersect(const RegisterAggr &RG) { - Units &= RG.Units; - return *this; -} - -RegisterAggr &RegisterAggr::clear(RegisterRef RR) { - return clear(RegisterAggr(PRI).insert(RR)); -} - -RegisterAggr &RegisterAggr::clear(const RegisterAggr &RG) { - Units.reset(RG.Units); - return *this; -} - -RegisterRef RegisterAggr::intersectWith(RegisterRef RR) const { - RegisterAggr T(PRI); - T.insert(RR).intersect(*this); - if (T.empty()) - return RegisterRef(); - RegisterRef NR = T.makeRegRef(); - assert(NR); - return NR; -} - -RegisterRef RegisterAggr::clearIn(RegisterRef RR) const { - return RegisterAggr(PRI).insert(RR).clear(*this).makeRegRef(); -} - -RegisterRef RegisterAggr::makeRegRef() const { - int U = Units.find_first(); - if (U < 0) - return RegisterRef(); - - auto AliasedRegs = [this] (uint32_t Unit, BitVector &Regs) { - for (MCRegUnitRootIterator R(Unit, &PRI.getTRI()); R.isValid(); ++R) - for (MCSuperRegIterator S(*R, &PRI.getTRI(), true); S.isValid(); ++S) - Regs.set(*S); - }; - - // Find the set of all registers that are aliased to all the units - // in this aggregate. - - // Get all the registers aliased to the first unit in the bit vector. - BitVector Regs(PRI.getTRI().getNumRegs()); - AliasedRegs(U, Regs); - U = Units.find_next(U); - - // For each other unit, intersect it with the set of all registers - // aliased that unit. - while (U >= 0) { - BitVector AR(PRI.getTRI().getNumRegs()); - AliasedRegs(U, AR); - Regs &= AR; - U = Units.find_next(U); - } - - // If there is at least one register remaining, pick the first one, - // and consolidate the masks of all of its units contained in this - // aggregate. - - int F = Regs.find_first(); - if (F <= 0) - return RegisterRef(); - - LaneBitmask M; - for (MCRegUnitMaskIterator I(F, &PRI.getTRI()); I.isValid(); ++I) { - std::pair<uint32_t,LaneBitmask> P = *I; - if (Units.test(P.first)) - M |= P.second.none() ? LaneBitmask::getAll() : P.second; - } - return RegisterRef(F, M); -} - -void RegisterAggr::print(raw_ostream &OS) const { - OS << '{'; - for (int U = Units.find_first(); U >= 0; U = Units.find_next(U)) - OS << ' ' << printRegUnit(U, &PRI.getTRI()); - OS << " }"; -} - -RegisterAggr::rr_iterator::rr_iterator(const RegisterAggr &RG, - bool End) - : Owner(&RG) { - for (int U = RG.Units.find_first(); U >= 0; U = RG.Units.find_next(U)) { - RegisterRef R = RG.PRI.getRefForUnit(U); - Masks[R.Reg] |= R.Mask; - } - Pos = End ? Masks.end() : Masks.begin(); - Index = End ? Masks.size() : 0; -} diff --git a/llvm/lib/Target/Hexagon/RDFRegisters.h b/llvm/lib/Target/Hexagon/RDFRegisters.h deleted file mode 100644 index 4afaf80e4659..000000000000 --- a/llvm/lib/Target/Hexagon/RDFRegisters.h +++ /dev/null @@ -1,240 +0,0 @@ -//===- RDFRegisters.h -------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_TARGET_HEXAGON_RDFREGISTERS_H -#define LLVM_LIB_TARGET_HEXAGON_RDFREGISTERS_H - -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/CodeGen/TargetRegisterInfo.h" -#include "llvm/MC/LaneBitmask.h" -#include <cassert> -#include <cstdint> -#include <map> -#include <set> -#include <vector> - -namespace llvm { - -class MachineFunction; -class raw_ostream; - -namespace rdf { - - using RegisterId = uint32_t; - - // Template class for a map translating uint32_t into arbitrary types. - // The map will act like an indexed set: upon insertion of a new object, - // it will automatically assign a new index to it. Index of 0 is treated - // as invalid and is never allocated. - template <typename T, unsigned N = 32> - struct IndexedSet { - IndexedSet() { Map.reserve(N); } - - T get(uint32_t Idx) const { - // Index Idx corresponds to Map[Idx-1]. - assert(Idx != 0 && !Map.empty() && Idx-1 < Map.size()); - return Map[Idx-1]; - } - - uint32_t insert(T Val) { - // Linear search. - auto F = llvm::find(Map, Val); - if (F != Map.end()) - return F - Map.begin() + 1; - Map.push_back(Val); - return Map.size(); // Return actual_index + 1. - } - - uint32_t find(T Val) const { - auto F = llvm::find(Map, Val); - assert(F != Map.end()); - return F - Map.begin() + 1; - } - - uint32_t size() const { return Map.size(); } - - using const_iterator = typename std::vector<T>::const_iterator; - - const_iterator begin() const { return Map.begin(); } - const_iterator end() const { return Map.end(); } - - private: - std::vector<T> Map; - }; - - struct RegisterRef { - RegisterId Reg = 0; - LaneBitmask Mask = LaneBitmask::getNone(); - - RegisterRef() = default; - explicit RegisterRef(RegisterId R, LaneBitmask M = LaneBitmask::getAll()) - : Reg(R), Mask(R != 0 ? M : LaneBitmask::getNone()) {} - - operator bool() const { - return Reg != 0 && Mask.any(); - } - - bool operator== (const RegisterRef &RR) const { - return Reg == RR.Reg && Mask == RR.Mask; - } - - bool operator!= (const RegisterRef &RR) const { - return !operator==(RR); - } - - bool operator< (const RegisterRef &RR) const { - return Reg < RR.Reg || (Reg == RR.Reg && Mask < RR.Mask); - } - }; - - - struct PhysicalRegisterInfo { - PhysicalRegisterInfo(const TargetRegisterInfo &tri, - const MachineFunction &mf); - - static bool isRegMaskId(RegisterId R) { - return Register::isStackSlot(R); - } - - RegisterId getRegMaskId(const uint32_t *RM) const { - return Register::index2StackSlot(RegMasks.find(RM)); - } - - const uint32_t *getRegMaskBits(RegisterId R) const { - return RegMasks.get(Register::stackSlot2Index(R)); - } - - RegisterRef normalize(RegisterRef RR) const; - - bool alias(RegisterRef RA, RegisterRef RB) const { - if (!isRegMaskId(RA.Reg)) - return !isRegMaskId(RB.Reg) ? aliasRR(RA, RB) : aliasRM(RA, RB); - return !isRegMaskId(RB.Reg) ? aliasRM(RB, RA) : aliasMM(RA, RB); - } - - std::set<RegisterId> getAliasSet(RegisterId Reg) const; - - RegisterRef getRefForUnit(uint32_t U) const { - return RegisterRef(UnitInfos[U].Reg, UnitInfos[U].Mask); - } - - const BitVector &getMaskUnits(RegisterId MaskId) const { - return MaskInfos[Register::stackSlot2Index(MaskId)].Units; - } - - RegisterRef mapTo(RegisterRef RR, unsigned R) const; - const TargetRegisterInfo &getTRI() const { return TRI; } - - private: - struct RegInfo { - const TargetRegisterClass *RegClass = nullptr; - }; - struct UnitInfo { - RegisterId Reg = 0; - LaneBitmask Mask; - }; - struct MaskInfo { - BitVector Units; - }; - - const TargetRegisterInfo &TRI; - IndexedSet<const uint32_t*> RegMasks; - std::vector<RegInfo> RegInfos; - std::vector<UnitInfo> UnitInfos; - std::vector<MaskInfo> MaskInfos; - - bool aliasRR(RegisterRef RA, RegisterRef RB) const; - bool aliasRM(RegisterRef RR, RegisterRef RM) const; - bool aliasMM(RegisterRef RM, RegisterRef RN) const; - }; - - struct RegisterAggr { - RegisterAggr(const PhysicalRegisterInfo &pri) - : Units(pri.getTRI().getNumRegUnits()), PRI(pri) {} - RegisterAggr(const RegisterAggr &RG) = default; - - bool empty() const { return Units.none(); } - bool hasAliasOf(RegisterRef RR) const; - bool hasCoverOf(RegisterRef RR) const; - - static bool isCoverOf(RegisterRef RA, RegisterRef RB, - const PhysicalRegisterInfo &PRI) { - return RegisterAggr(PRI).insert(RA).hasCoverOf(RB); - } - - RegisterAggr &insert(RegisterRef RR); - RegisterAggr &insert(const RegisterAggr &RG); - RegisterAggr &intersect(RegisterRef RR); - RegisterAggr &intersect(const RegisterAggr &RG); - RegisterAggr &clear(RegisterRef RR); - RegisterAggr &clear(const RegisterAggr &RG); - - RegisterRef intersectWith(RegisterRef RR) const; - RegisterRef clearIn(RegisterRef RR) const; - RegisterRef makeRegRef() const; - - void print(raw_ostream &OS) const; - - struct rr_iterator { - using MapType = std::map<RegisterId, LaneBitmask>; - - private: - MapType Masks; - MapType::iterator Pos; - unsigned Index; - const RegisterAggr *Owner; - - public: - rr_iterator(const RegisterAggr &RG, bool End); - - RegisterRef operator*() const { - return RegisterRef(Pos->first, Pos->second); - } - - rr_iterator &operator++() { - ++Pos; - ++Index; - return *this; - } - - bool operator==(const rr_iterator &I) const { - assert(Owner == I.Owner); - (void)Owner; - return Index == I.Index; - } - - bool operator!=(const rr_iterator &I) const { - return !(*this == I); - } - }; - - rr_iterator rr_begin() const { - return rr_iterator(*this, false); - } - rr_iterator rr_end() const { - return rr_iterator(*this, true); - } - - private: - BitVector Units; - const PhysicalRegisterInfo &PRI; - }; - - // Optionally print the lane mask, if it is not ~0. - struct PrintLaneMaskOpt { - PrintLaneMaskOpt(LaneBitmask M) : Mask(M) {} - LaneBitmask Mask; - }; - raw_ostream &operator<< (raw_ostream &OS, const PrintLaneMaskOpt &P); - -} // end namespace rdf - -} // end namespace llvm - -#endif // LLVM_LIB_TARGET_HEXAGON_RDFREGISTERS_H diff --git a/llvm/lib/Target/PowerPC/P9InstrResources.td b/llvm/lib/Target/PowerPC/P9InstrResources.td index 9b3d13989ee2..d7e3519d5539 100644 --- a/llvm/lib/Target/PowerPC/P9InstrResources.td +++ b/llvm/lib/Target/PowerPC/P9InstrResources.td @@ -373,6 +373,7 @@ def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], VMSUMSHS, VMSUMUBM, VMSUMUHM, + VMSUMUDM, VMSUMUHS, VMULESB, VMULESH, diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 00f59bba52e8..ca1649fae258 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -167,6 +167,23 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand); } + if (Subtarget.isISA3_0()) { + setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal); + setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal); + setTruncStoreAction(MVT::f64, MVT::f16, Legal); + setTruncStoreAction(MVT::f32, MVT::f16, Legal); + } else { + // No extending loads from f16 or HW conversions back and forth. + setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); + setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); + setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); + setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand); + setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand); + setTruncStoreAction(MVT::f64, MVT::f16, Expand); + setTruncStoreAction(MVT::f32, MVT::f16, Expand); + } + setTruncStoreAction(MVT::f64, MVT::f32, Expand); // PowerPC has pre-inc load and store's. @@ -677,6 +694,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); } } + setOperationAction(ISD::SELECT_CC, MVT::v4i32, Expand); if (!Subtarget.hasP8Vector()) { setOperationAction(ISD::SMAX, MVT::v2i64, Expand); setOperationAction(ISD::SMIN, MVT::v2i64, Expand); @@ -10361,6 +10379,7 @@ SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { assert(Op.getOpcode() == ISD::FP_EXTEND && "Should only be called for ISD::FP_EXTEND"); + // FIXME: handle extends from half precision float vectors on P9. // We only want to custom lower an extend from v2f32 to v2f64. if (Op.getValueType() != MVT::v2f64 || Op.getOperand(0).getValueType() != MVT::v2f32) @@ -10574,6 +10593,11 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, case ISD::BITCAST: // Don't handle bitcast here. return; + case ISD::FP_EXTEND: + SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG); + if (Lowered) + Results.push_back(Lowered); + return; } } @@ -15255,7 +15279,8 @@ bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, if (!VT.isSimple()) return false; - if (VT.isFloatingPoint() && !Subtarget.allowsUnalignedFPAccess()) + if (VT.isFloatingPoint() && !VT.isVector() && + !Subtarget.allowsUnalignedFPAccess()) return false; if (VT.getSimpleVT().isVector()) { diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index e0c381827b87..2e1485373d19 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -637,7 +637,7 @@ namespace llvm { /// then the VPERM for the shuffle. All in all a very slow sequence. TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const override { - if (VT.getScalarSizeInBits() % 8 == 0) + if (VT.getVectorNumElements() != 1 && VT.getScalarSizeInBits() % 8 == 0) return TypeWidenVector; return TargetLoweringBase::getPreferredVectorAction(VT); } diff --git a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td index f94816a35f79..6e8635f2413c 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td @@ -1342,6 +1342,10 @@ def VSBOX : VXBX_Int_Ty<1480, "vsbox", int_ppc_altivec_crypto_vsbox, v2i64>; def HasP9Altivec : Predicate<"PPCSubTarget->hasP9Altivec()">; let Predicates = [HasP9Altivec] in { +// Vector Multiply-Sum +def VMSUMUDM : VA1a_Int_Ty3<35, "vmsumudm", int_ppc_altivec_vmsumudm, + v1i128, v2i64, v1i128>; + // i8 element comparisons. def VCMPNEB : VCMP < 7, "vcmpneb $vD, $vA, $vB" , v16i8>; def VCMPNEB_rec : VCMPo < 7, "vcmpneb. $vD, $vA, $vB" , v16i8>; diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 30906a32b00c..d7925befcd37 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -2631,6 +2631,10 @@ bool PPCInstrInfo::isADDIInstrEligibleForFolding(MachineInstr &ADDIMI, if (Opc != PPC::ADDI && Opc != PPC::ADDI8) return false; + // The operand may not necessarily be an immediate - it could be a relocation. + if (!ADDIMI.getOperand(2).isImm()) + return false; + Imm = ADDIMI.getOperand(2).getImm(); return true; diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td index be6b30ffa08b..95e5ff6b130d 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -3343,6 +3343,23 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { def : Pat<(v2i64 (scalar_to_vector ScalarLoads.SELi16i64)), (v2i64 (XXPERMDIs (VEXTSH2Ds (LXSIHZX xoaddr:$src)), 0))>; + // Load/convert and convert/store patterns for f16. + def : Pat<(f64 (extloadf16 xoaddr:$src)), + (f64 (XSCVHPDP (LXSIHZX xoaddr:$src)))>; + def : Pat<(truncstoref16 f64:$src, xoaddr:$dst), + (STXSIHX (XSCVDPHP $src), xoaddr:$dst)>; + def : Pat<(f32 (extloadf16 xoaddr:$src)), + (f32 (COPY_TO_REGCLASS (XSCVHPDP (LXSIHZX xoaddr:$src)), VSSRC))>; + def : Pat<(truncstoref16 f32:$src, xoaddr:$dst), + (STXSIHX (XSCVDPHP (COPY_TO_REGCLASS $src, VSFRC)), xoaddr:$dst)>; + def : Pat<(f64 (f16_to_fp i32:$A)), + (f64 (XSCVHPDP (MTVSRWZ $A)))>; + def : Pat<(f32 (f16_to_fp i32:$A)), + (f32 (COPY_TO_REGCLASS (XSCVHPDP (MTVSRWZ $A)), VSSRC))>; + def : Pat<(i32 (fp_to_f16 f32:$A)), + (i32 (MFVSRWZ (XSCVDPHP (COPY_TO_REGCLASS $A, VSFRC))))>; + def : Pat<(i32 (fp_to_f16 f64:$A)), (i32 (MFVSRWZ (XSCVDPHP $A)))>; + let Predicates = [IsBigEndian, HasP9Vector] in { // Scalar stores of i8 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst), diff --git a/llvm/lib/Target/X86/ImmutableGraph.h b/llvm/lib/Target/X86/ImmutableGraph.h new file mode 100644 index 000000000000..5833017037a5 --- /dev/null +++ b/llvm/lib/Target/X86/ImmutableGraph.h @@ -0,0 +1,446 @@ +//==========-- ImmutableGraph.h - A fast DAG implementation ---------=========// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// Description: ImmutableGraph is a fast DAG implementation that cannot be +/// modified, except by creating a new ImmutableGraph. ImmutableGraph is +/// implemented as two arrays: one containing nodes, and one containing edges. +/// The advantages to this implementation are two-fold: +/// 1. Iteration and traversal operations benefit from cache locality. +/// 2. Operations on sets of nodes/edges are efficient, and representations of +/// those sets in memory are compact. For instance, a set of edges is +/// implemented as a bit vector, wherein each bit corresponds to one edge in +/// the edge array. This implies a lower bound of 64x spatial improvement +/// over, e.g., an llvm::DenseSet or llvm::SmallSet. It also means that +/// insert/erase/contains operations complete in negligible constant time: +/// insert and erase require one load and one store, and contains requires +/// just one load. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_X86_IMMUTABLEGRAPH_H +#define LLVM_LIB_TARGET_X86_IMMUTABLEGRAPH_H + +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/GraphTraits.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <iterator> +#include <utility> +#include <vector> + +namespace llvm { + +template <typename NodeValueT, typename EdgeValueT> class ImmutableGraph { + using Traits = GraphTraits<ImmutableGraph<NodeValueT, EdgeValueT> *>; + template <typename> friend class ImmutableGraphBuilder; + +public: + using node_value_type = NodeValueT; + using edge_value_type = EdgeValueT; + using size_type = int; + class Node; + class Edge { + friend class ImmutableGraph; + template <typename> friend class ImmutableGraphBuilder; + + const Node *Dest; + edge_value_type Value; + + public: + const Node *getDest() const { return Dest; }; + const edge_value_type &getValue() const { return Value; } + }; + class Node { + friend class ImmutableGraph; + template <typename> friend class ImmutableGraphBuilder; + + const Edge *Edges; + node_value_type Value; + + public: + const node_value_type &getValue() const { return Value; } + + const Edge *edges_begin() const { return Edges; } + // Nodes are allocated sequentially. Edges for a node are stored together. + // The end of this Node's edges is the beginning of the next node's edges. + // An extra node was allocated to hold the end pointer for the last real + // node. + const Edge *edges_end() const { return (this + 1)->Edges; } + ArrayRef<Edge> edges() const { + return makeArrayRef(edges_begin(), edges_end()); + } + }; + +protected: + ImmutableGraph(std::unique_ptr<Node[]> Nodes, std::unique_ptr<Edge[]> Edges, + size_type NodesSize, size_type EdgesSize) + : Nodes(std::move(Nodes)), Edges(std::move(Edges)), NodesSize(NodesSize), + EdgesSize(EdgesSize) {} + ImmutableGraph(const ImmutableGraph &) = delete; + ImmutableGraph(ImmutableGraph &&) = delete; + ImmutableGraph &operator=(const ImmutableGraph &) = delete; + ImmutableGraph &operator=(ImmutableGraph &&) = delete; + +public: + ArrayRef<Node> nodes() const { return makeArrayRef(Nodes.get(), NodesSize); } + const Node *nodes_begin() const { return nodes().begin(); } + const Node *nodes_end() const { return nodes().end(); } + + ArrayRef<Edge> edges() const { return makeArrayRef(Edges.get(), EdgesSize); } + const Edge *edges_begin() const { return edges().begin(); } + const Edge *edges_end() const { return edges().end(); } + + size_type nodes_size() const { return NodesSize; } + size_type edges_size() const { return EdgesSize; } + + // Node N must belong to this ImmutableGraph. + size_type getNodeIndex(const Node &N) const { + return std::distance(nodes_begin(), &N); + } + // Edge E must belong to this ImmutableGraph. + size_type getEdgeIndex(const Edge &E) const { + return std::distance(edges_begin(), &E); + } + + // FIXME: Could NodeSet and EdgeSet be templated to share code? + class NodeSet { + const ImmutableGraph &G; + BitVector V; + + public: + NodeSet(const ImmutableGraph &G, bool ContainsAll = false) + : G{G}, V{static_cast<unsigned>(G.nodes_size()), ContainsAll} {} + bool insert(const Node &N) { + size_type Idx = G.getNodeIndex(N); + bool AlreadyExists = V.test(Idx); + V.set(Idx); + return !AlreadyExists; + } + void erase(const Node &N) { + size_type Idx = G.getNodeIndex(N); + V.reset(Idx); + } + bool contains(const Node &N) const { + size_type Idx = G.getNodeIndex(N); + return V.test(Idx); + } + void clear() { V.reset(); } + size_type empty() const { return V.none(); } + /// Return the number of elements in the set + size_type count() const { return V.count(); } + /// Return the size of the set's domain + size_type size() const { return V.size(); } + /// Set union + NodeSet &operator|=(const NodeSet &RHS) { + assert(&this->G == &RHS.G); + V |= RHS.V; + return *this; + } + /// Set intersection + NodeSet &operator&=(const NodeSet &RHS) { + assert(&this->G == &RHS.G); + V &= RHS.V; + return *this; + } + /// Set disjoint union + NodeSet &operator^=(const NodeSet &RHS) { + assert(&this->G == &RHS.G); + V ^= RHS.V; + return *this; + } + + using index_iterator = typename BitVector::const_set_bits_iterator; + index_iterator index_begin() const { return V.set_bits_begin(); } + index_iterator index_end() const { return V.set_bits_end(); } + void set(size_type Idx) { V.set(Idx); } + void reset(size_type Idx) { V.reset(Idx); } + + class iterator { + const NodeSet &Set; + size_type Current; + + void advance() { + assert(Current != -1); + Current = Set.V.find_next(Current); + } + + public: + iterator(const NodeSet &Set, size_type Begin) + : Set{Set}, Current{Begin} {} + iterator operator++(int) { + iterator Tmp = *this; + advance(); + return Tmp; + } + iterator &operator++() { + advance(); + return *this; + } + Node *operator*() const { + assert(Current != -1); + return Set.G.nodes_begin() + Current; + } + bool operator==(const iterator &other) const { + assert(&this->Set == &other.Set); + return this->Current == other.Current; + } + bool operator!=(const iterator &other) const { return !(*this == other); } + }; + + iterator begin() const { return iterator{*this, V.find_first()}; } + iterator end() const { return iterator{*this, -1}; } + }; + + class EdgeSet { + const ImmutableGraph &G; + BitVector V; + + public: + EdgeSet(const ImmutableGraph &G, bool ContainsAll = false) + : G{G}, V{static_cast<unsigned>(G.edges_size()), ContainsAll} {} + bool insert(const Edge &E) { + size_type Idx = G.getEdgeIndex(E); + bool AlreadyExists = V.test(Idx); + V.set(Idx); + return !AlreadyExists; + } + void erase(const Edge &E) { + size_type Idx = G.getEdgeIndex(E); + V.reset(Idx); + } + bool contains(const Edge &E) const { + size_type Idx = G.getEdgeIndex(E); + return V.test(Idx); + } + void clear() { V.reset(); } + bool empty() const { return V.none(); } + /// Return the number of elements in the set + size_type count() const { return V.count(); } + /// Return the size of the set's domain + size_type size() const { return V.size(); } + /// Set union + EdgeSet &operator|=(const EdgeSet &RHS) { + assert(&this->G == &RHS.G); + V |= RHS.V; + return *this; + } + /// Set intersection + EdgeSet &operator&=(const EdgeSet &RHS) { + assert(&this->G == &RHS.G); + V &= RHS.V; + return *this; + } + /// Set disjoint union + EdgeSet &operator^=(const EdgeSet &RHS) { + assert(&this->G == &RHS.G); + V ^= RHS.V; + return *this; + } + + using index_iterator = typename BitVector::const_set_bits_iterator; + index_iterator index_begin() const { return V.set_bits_begin(); } + index_iterator index_end() const { return V.set_bits_end(); } + void set(size_type Idx) { V.set(Idx); } + void reset(size_type Idx) { V.reset(Idx); } + + class iterator { + const EdgeSet &Set; + size_type Current; + + void advance() { + assert(Current != -1); + Current = Set.V.find_next(Current); + } + + public: + iterator(const EdgeSet &Set, size_type Begin) + : Set{Set}, Current{Begin} {} + iterator operator++(int) { + iterator Tmp = *this; + advance(); + return Tmp; + } + iterator &operator++() { + advance(); + return *this; + } + Edge *operator*() const { + assert(Current != -1); + return Set.G.edges_begin() + Current; + } + bool operator==(const iterator &other) const { + assert(&this->Set == &other.Set); + return this->Current == other.Current; + } + bool operator!=(const iterator &other) const { return !(*this == other); } + }; + + iterator begin() const { return iterator{*this, V.find_first()}; } + iterator end() const { return iterator{*this, -1}; } + }; + +private: + std::unique_ptr<Node[]> Nodes; + std::unique_ptr<Edge[]> Edges; + size_type NodesSize; + size_type EdgesSize; +}; + +template <typename GraphT> class ImmutableGraphBuilder { + using node_value_type = typename GraphT::node_value_type; + using edge_value_type = typename GraphT::edge_value_type; + static_assert( + std::is_base_of<ImmutableGraph<node_value_type, edge_value_type>, + GraphT>::value, + "Template argument to ImmutableGraphBuilder must derive from " + "ImmutableGraph<>"); + using size_type = typename GraphT::size_type; + using NodeSet = typename GraphT::NodeSet; + using Node = typename GraphT::Node; + using EdgeSet = typename GraphT::EdgeSet; + using Edge = typename GraphT::Edge; + using BuilderEdge = std::pair<edge_value_type, size_type>; + using EdgeList = std::vector<BuilderEdge>; + using BuilderVertex = std::pair<node_value_type, EdgeList>; + using VertexVec = std::vector<BuilderVertex>; + +public: + using BuilderNodeRef = size_type; + + BuilderNodeRef addVertex(const node_value_type &V) { + auto I = AdjList.emplace(AdjList.end(), V, EdgeList{}); + return std::distance(AdjList.begin(), I); + } + + void addEdge(const edge_value_type &E, BuilderNodeRef From, + BuilderNodeRef To) { + AdjList[From].second.emplace_back(E, To); + } + + bool empty() const { return AdjList.empty(); } + + template <typename... ArgT> std::unique_ptr<GraphT> get(ArgT &&... Args) { + size_type VertexSize = AdjList.size(), EdgeSize = 0; + for (const auto &V : AdjList) { + EdgeSize += V.second.size(); + } + auto VertexArray = + std::make_unique<Node[]>(VertexSize + 1 /* terminator node */); + auto EdgeArray = std::make_unique<Edge[]>(EdgeSize); + size_type VI = 0, EI = 0; + for (; VI < VertexSize; ++VI) { + VertexArray[VI].Value = std::move(AdjList[VI].first); + VertexArray[VI].Edges = &EdgeArray[EI]; + auto NumEdges = static_cast<size_type>(AdjList[VI].second.size()); + for (size_type VEI = 0; VEI < NumEdges; ++VEI, ++EI) { + auto &E = AdjList[VI].second[VEI]; + EdgeArray[EI].Value = std::move(E.first); + EdgeArray[EI].Dest = &VertexArray[E.second]; + } + } + assert(VI == VertexSize && EI == EdgeSize && "ImmutableGraph malformed"); + VertexArray[VI].Edges = &EdgeArray[EdgeSize]; // terminator node + return std::make_unique<GraphT>(std::move(VertexArray), + std::move(EdgeArray), VertexSize, EdgeSize, + std::forward<ArgT>(Args)...); + } + + template <typename... ArgT> + static std::unique_ptr<GraphT> trim(const GraphT &G, const NodeSet &TrimNodes, + const EdgeSet &TrimEdges, + ArgT &&... Args) { + size_type NewVertexSize = G.nodes_size() - TrimNodes.count(); + size_type NewEdgeSize = G.edges_size() - TrimEdges.count(); + auto NewVertexArray = + std::make_unique<Node[]>(NewVertexSize + 1 /* terminator node */); + auto NewEdgeArray = std::make_unique<Edge[]>(NewEdgeSize); + + // Walk the nodes and determine the new index for each node. + size_type NewNodeIndex = 0; + std::vector<size_type> RemappedNodeIndex(G.nodes_size()); + for (const Node &N : G.nodes()) { + if (TrimNodes.contains(N)) + continue; + RemappedNodeIndex[G.getNodeIndex(N)] = NewNodeIndex++; + } + assert(NewNodeIndex == NewVertexSize && + "Should have assigned NewVertexSize indices"); + + size_type VertexI = 0, EdgeI = 0; + for (const Node &N : G.nodes()) { + if (TrimNodes.contains(N)) + continue; + NewVertexArray[VertexI].Value = N.getValue(); + NewVertexArray[VertexI].Edges = &NewEdgeArray[EdgeI]; + for (const Edge &E : N.edges()) { + if (TrimEdges.contains(E)) + continue; + NewEdgeArray[EdgeI].Value = E.getValue(); + size_type DestIdx = G.getNodeIndex(*E.getDest()); + size_type NewIdx = RemappedNodeIndex[DestIdx]; + assert(NewIdx < NewVertexSize); + NewEdgeArray[EdgeI].Dest = &NewVertexArray[NewIdx]; + ++EdgeI; + } + ++VertexI; + } + assert(VertexI == NewVertexSize && EdgeI == NewEdgeSize && + "Gadget graph malformed"); + NewVertexArray[VertexI].Edges = &NewEdgeArray[NewEdgeSize]; // terminator + return std::make_unique<GraphT>(std::move(NewVertexArray), + std::move(NewEdgeArray), NewVertexSize, + NewEdgeSize, std::forward<ArgT>(Args)...); + } + +private: + VertexVec AdjList; +}; + +template <typename NodeValueT, typename EdgeValueT> +struct GraphTraits<ImmutableGraph<NodeValueT, EdgeValueT> *> { + using GraphT = ImmutableGraph<NodeValueT, EdgeValueT>; + using NodeRef = typename GraphT::Node const *; + using EdgeRef = typename GraphT::Edge const &; + + static NodeRef edge_dest(EdgeRef E) { return E.getDest(); } + using ChildIteratorType = + mapped_iterator<typename GraphT::Edge const *, decltype(&edge_dest)>; + + static NodeRef getEntryNode(GraphT *G) { return G->nodes_begin(); } + static ChildIteratorType child_begin(NodeRef N) { + return {N->edges_begin(), &edge_dest}; + } + static ChildIteratorType child_end(NodeRef N) { + return {N->edges_end(), &edge_dest}; + } + + static NodeRef getNode(typename GraphT::Node const &N) { return NodeRef{&N}; } + using nodes_iterator = + mapped_iterator<typename GraphT::Node const *, decltype(&getNode)>; + static nodes_iterator nodes_begin(GraphT *G) { + return {G->nodes_begin(), &getNode}; + } + static nodes_iterator nodes_end(GraphT *G) { + return {G->nodes_end(), &getNode}; + } + + using ChildEdgeIteratorType = typename GraphT::Edge const *; + + static ChildEdgeIteratorType child_edge_begin(NodeRef N) { + return N->edges_begin(); + } + static ChildEdgeIteratorType child_edge_end(NodeRef N) { + return N->edges_end(); + } + static typename GraphT::size_type size(GraphT *G) { return G->nodes_size(); } +}; + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_X86_IMMUTABLEGRAPH_H diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h index 0481a40d462a..a0ab5c3a5b3c 100644 --- a/llvm/lib/Target/X86/X86.h +++ b/llvm/lib/Target/X86/X86.h @@ -120,7 +120,7 @@ FunctionPass *createX86DomainReassignmentPass(); FunctionPass *createX86EvexToVexInsts(); /// This pass creates the thunks for the retpoline feature. -FunctionPass *createX86RetpolineThunksPass(); +FunctionPass *createX86IndirectThunksPass(); /// This pass ensures instructions featuring a memory operand /// have distinctive <LineNumber, Discriminator> (with respect to eachother) @@ -133,6 +133,9 @@ InstructionSelector *createX86InstructionSelector(const X86TargetMachine &TM, X86Subtarget &, X86RegisterBankInfo &); +FunctionPass *createX86LoadValueInjectionLoadHardeningPass(); +FunctionPass *createX86LoadValueInjectionLoadHardeningUnoptimizedPass(); +FunctionPass *createX86LoadValueInjectionRetHardeningPass(); FunctionPass *createX86SpeculativeLoadHardeningPass(); void initializeEvexToVexInstPassPass(PassRegistry &); @@ -148,6 +151,9 @@ void initializeX86DomainReassignmentPass(PassRegistry &); void initializeX86ExecutionDomainFixPass(PassRegistry &); void initializeX86ExpandPseudoPass(PassRegistry &); void initializeX86FlagsCopyLoweringPassPass(PassRegistry &); +void initializeX86LoadValueInjectionLoadHardeningUnoptimizedPassPass(PassRegistry &); +void initializeX86LoadValueInjectionLoadHardeningPassPass(PassRegistry &); +void initializeX86LoadValueInjectionRetHardeningPassPass(PassRegistry &); void initializeX86OptimizeLEAPassPass(PassRegistry &); void initializeX86SpeculativeLoadHardeningPassPass(PassRegistry &); diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index a2b11d55f650..bb8952f54e3a 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -426,6 +426,22 @@ def FeatureRetpolineExternalThunk "ourselves. Only has effect when combined with some other retpoline " "feature", [FeatureRetpolineIndirectCalls]>; +// Mitigate LVI attacks against indirect calls/branches and call returns +def FeatureLVIControlFlowIntegrity + : SubtargetFeature< + "lvi-cfi", "UseLVIControlFlowIntegrity", "true", + "Prevent indirect calls/branches from using a memory operand, and " + "precede all indirect calls/branches from a register with an " + "LFENCE instruction to serialize control flow. Also decompose RET " + "instructions into a POP+LFENCE+JMP sequence.">; + +// Mitigate LVI attacks against data loads +def FeatureLVILoadHardening + : SubtargetFeature< + "lvi-load-hardening", "UseLVILoadHardening", "true", + "Insert LFENCE instructions to prevent data speculatively injected " + "into loads from being used maliciously.">; + // Direct Move instructions. def FeatureMOVDIRI : SubtargetFeature<"movdiri", "HasMOVDIRI", "true", "Support movdiri instruction">; diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp index 1dbf40683564..a1d256ea872d 100644 --- a/llvm/lib/Target/X86/X86FastISel.cpp +++ b/llvm/lib/Target/X86/X86FastISel.cpp @@ -3202,8 +3202,8 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { (CalledFn && CalledFn->hasFnAttribute("no_caller_saved_registers"))) return false; - // Functions using retpoline for indirect calls need to use SDISel. - if (Subtarget->useRetpolineIndirectCalls()) + // Functions using thunks for indirect calls need to use SDISel. + if (Subtarget->useIndirectThunkCalls()) return false; // Handle only C, fastcc, and webkit_js calling conventions for now. diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp index 799c1f5d1285..1da20371caf5 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -765,10 +765,10 @@ void X86FrameLowering::emitStackProbeCall(MachineFunction &MF, bool InProlog) const { bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large; - // FIXME: Add retpoline support and remove this. - if (Is64Bit && IsLargeCodeModel && STI.useRetpolineIndirectCalls()) + // FIXME: Add indirect thunk support and remove this. + if (Is64Bit && IsLargeCodeModel && STI.useIndirectThunkCalls()) report_fatal_error("Emitting stack probe calls on 64-bit with the large " - "code model and retpoline not yet implemented."); + "code model and indirect thunks not yet implemented."); unsigned CallOp; if (Is64Bit) @@ -2493,9 +2493,9 @@ void X86FrameLowering::adjustForSegmentedStacks( // is laid out within 2^31 bytes of each function body, but this seems // to be sufficient for JIT. // FIXME: Add retpoline support and remove the error here.. - if (STI.useRetpolineIndirectCalls()) + if (STI.useIndirectThunkCalls()) report_fatal_error("Emitting morestack calls on 64-bit with the large " - "code model and retpoline not yet implemented."); + "code model and thunks not yet implemented."); BuildMI(allocMBB, DL, TII.get(X86::CALL64m)) .addReg(X86::RIP) .addImm(0) diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index bf33f399db28..88af0ebcfd0e 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -987,7 +987,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() { if (OptLevel != CodeGenOpt::None && // Only do this when the target can fold the load into the call or // jmp. - !Subtarget->useRetpolineIndirectCalls() && + !Subtarget->useIndirectThunkCalls() && ((N->getOpcode() == X86ISD::CALL && !Subtarget->slowTwoMemOps()) || (N->getOpcode() == X86ISD::TC_RETURN && (Subtarget->is64Bit() || diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 1523d56cc4e7..c8720d9ae3a6 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -30221,8 +30221,8 @@ bool X86TargetLowering::isVectorClearMaskLegal(ArrayRef<int> Mask, } bool X86TargetLowering::areJTsAllowed(const Function *Fn) const { - // If the subtarget is using retpolines, we need to not generate jump tables. - if (Subtarget.useRetpolineIndirectBranches()) + // If the subtarget is using thunks, we need to not generate jump tables. + if (Subtarget.useIndirectThunkBranches()) return false; // Otherwise, fallback on the generic logic. @@ -31345,22 +31345,22 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr &MI, return BB; } -static unsigned getOpcodeForRetpoline(unsigned RPOpc) { +static unsigned getOpcodeForIndirectThunk(unsigned RPOpc) { switch (RPOpc) { - case X86::RETPOLINE_CALL32: + case X86::INDIRECT_THUNK_CALL32: return X86::CALLpcrel32; - case X86::RETPOLINE_CALL64: + case X86::INDIRECT_THUNK_CALL64: return X86::CALL64pcrel32; - case X86::RETPOLINE_TCRETURN32: + case X86::INDIRECT_THUNK_TCRETURN32: return X86::TCRETURNdi; - case X86::RETPOLINE_TCRETURN64: + case X86::INDIRECT_THUNK_TCRETURN64: return X86::TCRETURNdi64; } - llvm_unreachable("not retpoline opcode"); + llvm_unreachable("not indirect thunk opcode"); } -static const char *getRetpolineSymbol(const X86Subtarget &Subtarget, - unsigned Reg) { +static const char *getIndirectThunkSymbol(const X86Subtarget &Subtarget, + unsigned Reg) { if (Subtarget.useRetpolineExternalThunk()) { // When using an external thunk for retpolines, we pick names that match the // names GCC happens to use as well. This helps simplify the implementation @@ -31392,39 +31392,48 @@ static const char *getRetpolineSymbol(const X86Subtarget &Subtarget, assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!"); return "__x86_indirect_thunk_r11"; } + llvm_unreachable("unexpected reg for external indirect thunk"); + } + + if (Subtarget.useRetpolineIndirectCalls() || + Subtarget.useRetpolineIndirectBranches()) { + // When targeting an internal COMDAT thunk use an LLVM-specific name. + switch (Reg) { + case X86::EAX: + assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); + return "__llvm_retpoline_eax"; + case X86::ECX: + assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); + return "__llvm_retpoline_ecx"; + case X86::EDX: + assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); + return "__llvm_retpoline_edx"; + case X86::EDI: + assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); + return "__llvm_retpoline_edi"; + case X86::R11: + assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!"); + return "__llvm_retpoline_r11"; + } llvm_unreachable("unexpected reg for retpoline"); } - // When targeting an internal COMDAT thunk use an LLVM-specific name. - switch (Reg) { - case X86::EAX: - assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); - return "__llvm_retpoline_eax"; - case X86::ECX: - assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); - return "__llvm_retpoline_ecx"; - case X86::EDX: - assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); - return "__llvm_retpoline_edx"; - case X86::EDI: - assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); - return "__llvm_retpoline_edi"; - case X86::R11: + if (Subtarget.useLVIControlFlowIntegrity()) { assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!"); - return "__llvm_retpoline_r11"; + return "__llvm_lvi_thunk_r11"; } - llvm_unreachable("unexpected reg for retpoline"); + llvm_unreachable("getIndirectThunkSymbol() invoked without thunk feature"); } MachineBasicBlock * -X86TargetLowering::EmitLoweredRetpoline(MachineInstr &MI, - MachineBasicBlock *BB) const { +X86TargetLowering::EmitLoweredIndirectThunk(MachineInstr &MI, + MachineBasicBlock *BB) const { // Copy the virtual register into the R11 physical register and // call the retpoline thunk. DebugLoc DL = MI.getDebugLoc(); const X86InstrInfo *TII = Subtarget.getInstrInfo(); Register CalleeVReg = MI.getOperand(0).getReg(); - unsigned Opc = getOpcodeForRetpoline(MI.getOpcode()); + unsigned Opc = getOpcodeForIndirectThunk(MI.getOpcode()); // Find an available scratch register to hold the callee. On 64-bit, we can // just use R11, but we scan for uses anyway to ensure we don't generate @@ -31458,7 +31467,7 @@ X86TargetLowering::EmitLoweredRetpoline(MachineInstr &MI, report_fatal_error("calling convention incompatible with retpoline, no " "available registers"); - const char *Symbol = getRetpolineSymbol(Subtarget, AvailableReg); + const char *Symbol = getIndirectThunkSymbol(Subtarget, AvailableReg); BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), AvailableReg) .addReg(CalleeVReg); @@ -32234,11 +32243,11 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, case X86::TLS_base_addr32: case X86::TLS_base_addr64: return EmitLoweredTLSAddr(MI, BB); - case X86::RETPOLINE_CALL32: - case X86::RETPOLINE_CALL64: - case X86::RETPOLINE_TCRETURN32: - case X86::RETPOLINE_TCRETURN64: - return EmitLoweredRetpoline(MI, BB); + case X86::INDIRECT_THUNK_CALL32: + case X86::INDIRECT_THUNK_CALL64: + case X86::INDIRECT_THUNK_TCRETURN32: + case X86::INDIRECT_THUNK_TCRETURN64: + return EmitLoweredIndirectThunk(MI, BB); case X86::CATCHRET: return EmitLoweredCatchRet(MI, BB); case X86::CATCHPAD: diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 3a17099da38f..830cdfc79c0a 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1482,8 +1482,8 @@ namespace llvm { MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI, MachineBasicBlock *BB) const; - MachineBasicBlock *EmitLoweredRetpoline(MachineInstr &MI, - MachineBasicBlock *BB) const; + MachineBasicBlock *EmitLoweredIndirectThunk(MachineInstr &MI, + MachineBasicBlock *BB) const; MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const; diff --git a/llvm/lib/Target/X86/X86IndirectThunks.cpp b/llvm/lib/Target/X86/X86IndirectThunks.cpp new file mode 100644 index 000000000000..36b9c3ccc959 --- /dev/null +++ b/llvm/lib/Target/X86/X86IndirectThunks.cpp @@ -0,0 +1,364 @@ +//==- X86IndirectThunks.cpp - Construct indirect call/jump thunks for x86 --=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// Pass that injects an MI thunk that is used to lower indirect calls in a way +/// that prevents speculation on some x86 processors and can be used to mitigate +/// security vulnerabilities due to targeted speculative execution and side +/// channels such as CVE-2017-5715. +/// +/// Currently supported thunks include: +/// - Retpoline -- A RET-implemented trampoline that lowers indirect calls +/// - LVI Thunk -- A CALL/JMP-implemented thunk that forces load serialization +/// before making an indirect call/jump +/// +/// Note that the reason that this is implemented as a MachineFunctionPass and +/// not a ModulePass is that ModulePasses at this point in the LLVM X86 pipeline +/// serialize all transformations, which can consume lots of memory. +/// +/// TODO(chandlerc): All of this code could use better comments and +/// documentation. +/// +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "X86InstrBuilder.h" +#include "X86Subtarget.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define DEBUG_TYPE "x86-retpoline-thunks" + +static const char RetpolineNamePrefix[] = "__llvm_retpoline_"; +static const char R11RetpolineName[] = "__llvm_retpoline_r11"; +static const char EAXRetpolineName[] = "__llvm_retpoline_eax"; +static const char ECXRetpolineName[] = "__llvm_retpoline_ecx"; +static const char EDXRetpolineName[] = "__llvm_retpoline_edx"; +static const char EDIRetpolineName[] = "__llvm_retpoline_edi"; + +static const char LVIThunkNamePrefix[] = "__llvm_lvi_thunk_"; +static const char R11LVIThunkName[] = "__llvm_lvi_thunk_r11"; + +namespace { +template <typename Derived> class ThunkInserter { + Derived &getDerived() { return *static_cast<Derived *>(this); } + +protected: + bool InsertedThunks; + void doInitialization(Module &M) {} + void createThunkFunction(MachineModuleInfo &MMI, StringRef Name); + +public: + void init(Module &M) { + InsertedThunks = false; + getDerived().doInitialization(M); + } + // return `true` if `MMI` or `MF` was modified + bool run(MachineModuleInfo &MMI, MachineFunction &MF); +}; + +struct RetpolineThunkInserter : ThunkInserter<RetpolineThunkInserter> { + const char *getThunkPrefix() { return RetpolineNamePrefix; } + bool mayUseThunk(const MachineFunction &MF) { + const auto &STI = MF.getSubtarget<X86Subtarget>(); + return (STI.useRetpolineIndirectCalls() || + STI.useRetpolineIndirectBranches()) && + !STI.useRetpolineExternalThunk(); + } + void insertThunks(MachineModuleInfo &MMI); + void populateThunk(MachineFunction &MF); +}; + +struct LVIThunkInserter : ThunkInserter<LVIThunkInserter> { + const char *getThunkPrefix() { return LVIThunkNamePrefix; } + bool mayUseThunk(const MachineFunction &MF) { + return MF.getSubtarget<X86Subtarget>().useLVIControlFlowIntegrity(); + } + void insertThunks(MachineModuleInfo &MMI) { + createThunkFunction(MMI, R11LVIThunkName); + } + void populateThunk(MachineFunction &MF) { + // Grab the entry MBB and erase any other blocks. O0 codegen appears to + // generate two bbs for the entry block. + MachineBasicBlock *Entry = &MF.front(); + Entry->clear(); + while (MF.size() > 1) + MF.erase(std::next(MF.begin())); + + // This code mitigates LVI by replacing each indirect call/jump with a + // direct call/jump to a thunk that looks like: + // ``` + // lfence + // jmpq *%r11 + // ``` + // This ensures that if the value in register %r11 was loaded from memory, + // then the value in %r11 is (architecturally) correct prior to the jump. + const TargetInstrInfo *TII = MF.getSubtarget<X86Subtarget>().getInstrInfo(); + BuildMI(&MF.front(), DebugLoc(), TII->get(X86::LFENCE)); + BuildMI(&MF.front(), DebugLoc(), TII->get(X86::JMP64r)).addReg(X86::R11); + MF.front().addLiveIn(X86::R11); + return; + } +}; + +class X86IndirectThunks : public MachineFunctionPass { +public: + static char ID; + + X86IndirectThunks() : MachineFunctionPass(ID) {} + + StringRef getPassName() const override { return "X86 Indirect Thunks"; } + + bool doInitialization(Module &M) override; + bool runOnMachineFunction(MachineFunction &MF) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + MachineFunctionPass::getAnalysisUsage(AU); + AU.addRequired<MachineModuleInfoWrapperPass>(); + AU.addPreserved<MachineModuleInfoWrapperPass>(); + } + +private: + std::tuple<RetpolineThunkInserter, LVIThunkInserter> TIs; + + // FIXME: When LLVM moves to C++17, these can become folds + template <typename... ThunkInserterT> + static void initTIs(Module &M, + std::tuple<ThunkInserterT...> &ThunkInserters) { + (void)std::initializer_list<int>{ + (std::get<ThunkInserterT>(ThunkInserters).init(M), 0)...}; + } + template <typename... ThunkInserterT> + static bool runTIs(MachineModuleInfo &MMI, MachineFunction &MF, + std::tuple<ThunkInserterT...> &ThunkInserters) { + bool Modified = false; + (void)std::initializer_list<int>{ + Modified |= std::get<ThunkInserterT>(ThunkInserters).run(MMI, MF)...}; + return Modified; + } +}; + +} // end anonymous namespace + +void RetpolineThunkInserter::insertThunks(MachineModuleInfo &MMI) { + if (MMI.getTarget().getTargetTriple().getArch() == Triple::x86_64) + createThunkFunction(MMI, R11RetpolineName); + else + for (StringRef Name : {EAXRetpolineName, ECXRetpolineName, EDXRetpolineName, + EDIRetpolineName}) + createThunkFunction(MMI, Name); +} + +void RetpolineThunkInserter::populateThunk(MachineFunction &MF) { + bool Is64Bit = MF.getTarget().getTargetTriple().getArch() == Triple::x86_64; + Register ThunkReg; + if (Is64Bit) { + assert(MF.getName() == "__llvm_retpoline_r11" && + "Should only have an r11 thunk on 64-bit targets"); + + // __llvm_retpoline_r11: + // callq .Lr11_call_target + // .Lr11_capture_spec: + // pause + // lfence + // jmp .Lr11_capture_spec + // .align 16 + // .Lr11_call_target: + // movq %r11, (%rsp) + // retq + ThunkReg = X86::R11; + } else { + // For 32-bit targets we need to emit a collection of thunks for various + // possible scratch registers as well as a fallback that uses EDI, which is + // normally callee saved. + // __llvm_retpoline_eax: + // calll .Leax_call_target + // .Leax_capture_spec: + // pause + // jmp .Leax_capture_spec + // .align 16 + // .Leax_call_target: + // movl %eax, (%esp) # Clobber return addr + // retl + // + // __llvm_retpoline_ecx: + // ... # Same setup + // movl %ecx, (%esp) + // retl + // + // __llvm_retpoline_edx: + // ... # Same setup + // movl %edx, (%esp) + // retl + // + // __llvm_retpoline_edi: + // ... # Same setup + // movl %edi, (%esp) + // retl + if (MF.getName() == EAXRetpolineName) + ThunkReg = X86::EAX; + else if (MF.getName() == ECXRetpolineName) + ThunkReg = X86::ECX; + else if (MF.getName() == EDXRetpolineName) + ThunkReg = X86::EDX; + else if (MF.getName() == EDIRetpolineName) + ThunkReg = X86::EDI; + else + llvm_unreachable("Invalid thunk name on x86-32!"); + } + + const TargetInstrInfo *TII = MF.getSubtarget<X86Subtarget>().getInstrInfo(); + // Grab the entry MBB and erase any other blocks. O0 codegen appears to + // generate two bbs for the entry block. + MachineBasicBlock *Entry = &MF.front(); + Entry->clear(); + while (MF.size() > 1) + MF.erase(std::next(MF.begin())); + + MachineBasicBlock *CaptureSpec = + MF.CreateMachineBasicBlock(Entry->getBasicBlock()); + MachineBasicBlock *CallTarget = + MF.CreateMachineBasicBlock(Entry->getBasicBlock()); + MCSymbol *TargetSym = MF.getContext().createTempSymbol(); + MF.push_back(CaptureSpec); + MF.push_back(CallTarget); + + const unsigned CallOpc = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32; + const unsigned RetOpc = Is64Bit ? X86::RETQ : X86::RETL; + + Entry->addLiveIn(ThunkReg); + BuildMI(Entry, DebugLoc(), TII->get(CallOpc)).addSym(TargetSym); + + // The MIR verifier thinks that the CALL in the entry block will fall through + // to CaptureSpec, so mark it as the successor. Technically, CaptureTarget is + // the successor, but the MIR verifier doesn't know how to cope with that. + Entry->addSuccessor(CaptureSpec); + + // In the capture loop for speculation, we want to stop the processor from + // speculating as fast as possible. On Intel processors, the PAUSE instruction + // will block speculation without consuming any execution resources. On AMD + // processors, the PAUSE instruction is (essentially) a nop, so we also use an + // LFENCE instruction which they have advised will stop speculation as well + // with minimal resource utilization. We still end the capture with a jump to + // form an infinite loop to fully guarantee that no matter what implementation + // of the x86 ISA, speculating this code path never escapes. + BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::PAUSE)); + BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::LFENCE)); + BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::JMP_1)).addMBB(CaptureSpec); + CaptureSpec->setHasAddressTaken(); + CaptureSpec->addSuccessor(CaptureSpec); + + CallTarget->addLiveIn(ThunkReg); + CallTarget->setHasAddressTaken(); + CallTarget->setAlignment(Align(16)); + + // Insert return address clobber + const unsigned MovOpc = Is64Bit ? X86::MOV64mr : X86::MOV32mr; + const Register SPReg = Is64Bit ? X86::RSP : X86::ESP; + addRegOffset(BuildMI(CallTarget, DebugLoc(), TII->get(MovOpc)), SPReg, false, + 0) + .addReg(ThunkReg); + + CallTarget->back().setPreInstrSymbol(MF, TargetSym); + BuildMI(CallTarget, DebugLoc(), TII->get(RetOpc)); +} + +template <typename Derived> +void ThunkInserter<Derived>::createThunkFunction(MachineModuleInfo &MMI, + StringRef Name) { + assert(Name.startswith(getDerived().getThunkPrefix()) && + "Created a thunk with an unexpected prefix!"); + + Module &M = const_cast<Module &>(*MMI.getModule()); + LLVMContext &Ctx = M.getContext(); + auto Type = FunctionType::get(Type::getVoidTy(Ctx), false); + Function *F = + Function::Create(Type, GlobalValue::LinkOnceODRLinkage, Name, &M); + F->setVisibility(GlobalValue::HiddenVisibility); + F->setComdat(M.getOrInsertComdat(Name)); + + // Add Attributes so that we don't create a frame, unwind information, or + // inline. + AttrBuilder B; + B.addAttribute(llvm::Attribute::NoUnwind); + B.addAttribute(llvm::Attribute::Naked); + F->addAttributes(llvm::AttributeList::FunctionIndex, B); + + // Populate our function a bit so that we can verify. + BasicBlock *Entry = BasicBlock::Create(Ctx, "entry", F); + IRBuilder<> Builder(Entry); + + Builder.CreateRetVoid(); + + // MachineFunctions/MachineBasicBlocks aren't created automatically for the + // IR-level constructs we already made. Create them and insert them into the + // module. + MachineFunction &MF = MMI.getOrCreateMachineFunction(*F); + MachineBasicBlock *EntryMBB = MF.CreateMachineBasicBlock(Entry); + + // Insert EntryMBB into MF. It's not in the module until we do this. + MF.insert(MF.end(), EntryMBB); + // Set MF properties. We never use vregs... + MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs); +} + +template <typename Derived> +bool ThunkInserter<Derived>::run(MachineModuleInfo &MMI, MachineFunction &MF) { + // If MF is not a thunk, check to see if we need to insert a thunk. + if (!MF.getName().startswith(getDerived().getThunkPrefix())) { + // If we've already inserted a thunk, nothing else to do. + if (InsertedThunks) + return false; + + // Only add a thunk if one of the functions has the corresponding feature + // enabled in its subtarget, and doesn't enable external thunks. + // FIXME: Conditionalize on indirect calls so we don't emit a thunk when + // nothing will end up calling it. + // FIXME: It's a little silly to look at every function just to enumerate + // the subtargets, but eventually we'll want to look at them for indirect + // calls, so maybe this is OK. + if (!getDerived().mayUseThunk(MF)) + return false; + + getDerived().insertThunks(MMI); + InsertedThunks = true; + return true; + } + + // If this *is* a thunk function, we need to populate it with the correct MI. + getDerived().populateThunk(MF); + return true; +} + +FunctionPass *llvm::createX86IndirectThunksPass() { + return new X86IndirectThunks(); +} + +char X86IndirectThunks::ID = 0; + +bool X86IndirectThunks::doInitialization(Module &M) { + initTIs(M, TIs); + return false; +} + +bool X86IndirectThunks::runOnMachineFunction(MachineFunction &MF) { + LLVM_DEBUG(dbgs() << getPassName() << '\n'); + auto &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI(); + return runTIs(MMI, MF, TIs); +} diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index 78d8dd3c0d03..1fdac104cb73 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -1213,14 +1213,14 @@ def X86tcret_6regs : PatFrag<(ops node:$ptr, node:$off), def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off), (TCRETURNri ptr_rc_tailcall:$dst, imm:$off)>, - Requires<[Not64BitMode, NotUseRetpolineIndirectCalls]>; + Requires<[Not64BitMode, NotUseIndirectThunkCalls]>; // FIXME: This is disabled for 32-bit PIC mode because the global base // register which is part of the address mode may be assigned a // callee-saved register. def : Pat<(X86tcret (load addr:$dst), imm:$off), (TCRETURNmi addr:$dst, imm:$off)>, - Requires<[Not64BitMode, IsNotPIC, NotUseRetpolineIndirectCalls]>; + Requires<[Not64BitMode, IsNotPIC, NotUseIndirectThunkCalls]>; def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off), (TCRETURNdi tglobaladdr:$dst, imm:$off)>, @@ -1232,21 +1232,21 @@ def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off), def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off), (TCRETURNri64 ptr_rc_tailcall:$dst, imm:$off)>, - Requires<[In64BitMode, NotUseRetpolineIndirectCalls]>; + Requires<[In64BitMode, NotUseIndirectThunkCalls]>; // Don't fold loads into X86tcret requiring more than 6 regs. // There wouldn't be enough scratch registers for base+index. def : Pat<(X86tcret_6regs (load addr:$dst), imm:$off), (TCRETURNmi64 addr:$dst, imm:$off)>, - Requires<[In64BitMode, NotUseRetpolineIndirectCalls]>; + Requires<[In64BitMode, NotUseIndirectThunkCalls]>; def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off), - (RETPOLINE_TCRETURN64 ptr_rc_tailcall:$dst, imm:$off)>, - Requires<[In64BitMode, UseRetpolineIndirectCalls]>; + (INDIRECT_THUNK_TCRETURN64 ptr_rc_tailcall:$dst, imm:$off)>, + Requires<[In64BitMode, UseIndirectThunkCalls]>; def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off), - (RETPOLINE_TCRETURN32 ptr_rc_tailcall:$dst, imm:$off)>, - Requires<[Not64BitMode, UseRetpolineIndirectCalls]>; + (INDIRECT_THUNK_TCRETURN32 ptr_rc_tailcall:$dst, imm:$off)>, + Requires<[Not64BitMode, UseIndirectThunkCalls]>; def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off), (TCRETURNdi64 tglobaladdr:$dst, imm:$off)>, diff --git a/llvm/lib/Target/X86/X86InstrControl.td b/llvm/lib/Target/X86/X86InstrControl.td index 32faeb1a86f2..1842dc19ec2e 100644 --- a/llvm/lib/Target/X86/X86InstrControl.td +++ b/llvm/lib/Target/X86/X86InstrControl.td @@ -237,13 +237,13 @@ let isCall = 1 in Sched<[WriteJumpLd]>; def CALL32r : I<0xFF, MRM2r, (outs), (ins GR32:$dst), "call{l}\t{*}$dst", [(X86call GR32:$dst)]>, OpSize32, - Requires<[Not64BitMode,NotUseRetpolineIndirectCalls]>, + Requires<[Not64BitMode,NotUseIndirectThunkCalls]>, Sched<[WriteJump]>; def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst), "call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))]>, OpSize32, Requires<[Not64BitMode,FavorMemIndirectCall, - NotUseRetpolineIndirectCalls]>, + NotUseIndirectThunkCalls]>, Sched<[WriteJumpLd]>; // Non-tracking calls for IBT, use with caution. @@ -334,11 +334,11 @@ let isCall = 1, Uses = [RSP, SSP], SchedRW = [WriteJump] in { Requires<[In64BitMode]>; def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst), "call{q}\t{*}$dst", [(X86call GR64:$dst)]>, - Requires<[In64BitMode,NotUseRetpolineIndirectCalls]>; + Requires<[In64BitMode,NotUseIndirectThunkCalls]>; def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst), "call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))]>, Requires<[In64BitMode,FavorMemIndirectCall, - NotUseRetpolineIndirectCalls]>; + NotUseIndirectThunkCalls]>; // Non-tracking calls for IBT, use with caution. let isCodeGenOnly = 1 in { @@ -393,19 +393,19 @@ let isPseudo = 1, isCall = 1, isCodeGenOnly = 1, Uses = [RSP, SSP], usesCustomInserter = 1, SchedRW = [WriteJump] in { - def RETPOLINE_CALL32 : + def INDIRECT_THUNK_CALL32 : PseudoI<(outs), (ins GR32:$dst), [(X86call GR32:$dst)]>, - Requires<[Not64BitMode,UseRetpolineIndirectCalls]>; + Requires<[Not64BitMode,UseIndirectThunkCalls]>; - def RETPOLINE_CALL64 : + def INDIRECT_THUNK_CALL64 : PseudoI<(outs), (ins GR64:$dst), [(X86call GR64:$dst)]>, - Requires<[In64BitMode,UseRetpolineIndirectCalls]>; + Requires<[In64BitMode,UseIndirectThunkCalls]>; - // Retpoline variant of indirect tail calls. + // Indirect thunk variant of indirect tail calls. let isTerminator = 1, isReturn = 1, isBarrier = 1 in { - def RETPOLINE_TCRETURN64 : + def INDIRECT_THUNK_TCRETURN64 : PseudoI<(outs), (ins GR64:$dst, i32imm:$offset), []>; - def RETPOLINE_TCRETURN32 : + def INDIRECT_THUNK_TCRETURN32 : PseudoI<(outs), (ins GR32:$dst, i32imm:$offset), []>; } } diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td index ca5425e8b89f..93f40c8ec996 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -996,8 +996,8 @@ def HasFastLZCNT : Predicate<"Subtarget->hasFastLZCNT()">; def HasFastSHLDRotate : Predicate<"Subtarget->hasFastSHLDRotate()">; def HasERMSB : Predicate<"Subtarget->hasERMSB()">; def HasMFence : Predicate<"Subtarget->hasMFence()">; -def UseRetpolineIndirectCalls : Predicate<"Subtarget->useRetpolineIndirectCalls()">; -def NotUseRetpolineIndirectCalls : Predicate<"!Subtarget->useRetpolineIndirectCalls()">; +def UseIndirectThunkCalls : Predicate<"Subtarget->useIndirectThunkCalls()">; +def NotUseIndirectThunkCalls : Predicate<"!Subtarget->useIndirectThunkCalls()">; //===----------------------------------------------------------------------===// // X86 Instruction Format Definitions. diff --git a/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp b/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp new file mode 100644 index 000000000000..35fc439998f9 --- /dev/null +++ b/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp @@ -0,0 +1,900 @@ +//==-- X86LoadValueInjectionLoadHardening.cpp - LVI load hardening for x86 --=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// Description: This pass finds Load Value Injection (LVI) gadgets consisting +/// of a load from memory (i.e., SOURCE), and any operation that may transmit +/// the value loaded from memory over a covert channel, or use the value loaded +/// from memory to determine a branch/call target (i.e., SINK). After finding +/// all such gadgets in a given function, the pass minimally inserts LFENCE +/// instructions in such a manner that the following property is satisfied: for +/// all SOURCE+SINK pairs, all paths in the CFG from SOURCE to SINK contain at +/// least one LFENCE instruction. The algorithm that implements this minimal +/// insertion is influenced by an academic paper that minimally inserts memory +/// fences for high-performance concurrent programs: +/// http://www.cs.ucr.edu/~lesani/companion/oopsla15/OOPSLA15.pdf +/// The algorithm implemented in this pass is as follows: +/// 1. Build a condensed CFG (i.e., a GadgetGraph) consisting only of the +/// following components: +/// - SOURCE instructions (also includes function arguments) +/// - SINK instructions +/// - Basic block entry points +/// - Basic block terminators +/// - LFENCE instructions +/// 2. Analyze the GadgetGraph to determine which SOURCE+SINK pairs (i.e., +/// gadgets) are already mitigated by existing LFENCEs. If all gadgets have been +/// mitigated, go to step 6. +/// 3. Use a heuristic or plugin to approximate minimal LFENCE insertion. +/// 4. Insert one LFENCE along each CFG edge that was cut in step 3. +/// 5. Go to step 2. +/// 6. If any LFENCEs were inserted, return `true` from runOnMachineFunction() +/// to tell LLVM that the function was modified. +/// +//===----------------------------------------------------------------------===// + +#include "ImmutableGraph.h" +#include "X86.h" +#include "X86Subtarget.h" +#include "X86TargetMachine.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineDominanceFrontier.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RDFGraph.h" +#include "llvm/CodeGen/RDFLiveness.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/DOTGraphTraits.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/DynamicLibrary.h" +#include "llvm/Support/GraphWriter.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define PASS_KEY "x86-lvi-load" +#define DEBUG_TYPE PASS_KEY + +STATISTIC(NumFences, "Number of LFENCEs inserted for LVI mitigation"); +STATISTIC(NumFunctionsConsidered, "Number of functions analyzed"); +STATISTIC(NumFunctionsMitigated, "Number of functions for which mitigations " + "were deployed"); +STATISTIC(NumGadgets, "Number of LVI gadgets detected during analysis"); + +static cl::opt<std::string> OptimizePluginPath( + PASS_KEY "-opt-plugin", + cl::desc("Specify a plugin to optimize LFENCE insertion"), cl::Hidden); + +static cl::opt<bool> NoConditionalBranches( + PASS_KEY "-no-cbranch", + cl::desc("Don't treat conditional branches as disclosure gadgets. This " + "may improve performance, at the cost of security."), + cl::init(false), cl::Hidden); + +static cl::opt<bool> EmitDot( + PASS_KEY "-dot", + cl::desc( + "For each function, emit a dot graph depicting potential LVI gadgets"), + cl::init(false), cl::Hidden); + +static cl::opt<bool> EmitDotOnly( + PASS_KEY "-dot-only", + cl::desc("For each function, emit a dot graph depicting potential LVI " + "gadgets, and do not insert any fences"), + cl::init(false), cl::Hidden); + +static cl::opt<bool> EmitDotVerify( + PASS_KEY "-dot-verify", + cl::desc("For each function, emit a dot graph to stdout depicting " + "potential LVI gadgets, used for testing purposes only"), + cl::init(false), cl::Hidden); + +static llvm::sys::DynamicLibrary OptimizeDL; +typedef int (*OptimizeCutT)(unsigned int *nodes, unsigned int nodes_size, + unsigned int *edges, int *edge_values, + int *cut_edges /* out */, unsigned int edges_size); +static OptimizeCutT OptimizeCut = nullptr; + +namespace { + +struct MachineGadgetGraph : ImmutableGraph<MachineInstr *, int> { + static constexpr int GadgetEdgeSentinel = -1; + static constexpr MachineInstr *const ArgNodeSentinel = nullptr; + + using GraphT = ImmutableGraph<MachineInstr *, int>; + using Node = typename GraphT::Node; + using Edge = typename GraphT::Edge; + using size_type = typename GraphT::size_type; + MachineGadgetGraph(std::unique_ptr<Node[]> Nodes, + std::unique_ptr<Edge[]> Edges, size_type NodesSize, + size_type EdgesSize, int NumFences = 0, int NumGadgets = 0) + : GraphT(std::move(Nodes), std::move(Edges), NodesSize, EdgesSize), + NumFences(NumFences), NumGadgets(NumGadgets) {} + static inline bool isCFGEdge(const Edge &E) { + return E.getValue() != GadgetEdgeSentinel; + } + static inline bool isGadgetEdge(const Edge &E) { + return E.getValue() == GadgetEdgeSentinel; + } + int NumFences; + int NumGadgets; +}; + +class X86LoadValueInjectionLoadHardeningPass : public MachineFunctionPass { +public: + X86LoadValueInjectionLoadHardeningPass() : MachineFunctionPass(ID) {} + + StringRef getPassName() const override { + return "X86 Load Value Injection (LVI) Load Hardening"; + } + void getAnalysisUsage(AnalysisUsage &AU) const override; + bool runOnMachineFunction(MachineFunction &MF) override; + + static char ID; + +private: + using GraphBuilder = ImmutableGraphBuilder<MachineGadgetGraph>; + using EdgeSet = MachineGadgetGraph::EdgeSet; + using NodeSet = MachineGadgetGraph::NodeSet; + using Gadget = std::pair<MachineInstr *, MachineInstr *>; + + const X86Subtarget *STI; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + + std::unique_ptr<MachineGadgetGraph> + getGadgetGraph(MachineFunction &MF, const MachineLoopInfo &MLI, + const MachineDominatorTree &MDT, + const MachineDominanceFrontier &MDF) const; + int hardenLoadsWithPlugin(MachineFunction &MF, + std::unique_ptr<MachineGadgetGraph> Graph) const; + int hardenLoadsWithGreedyHeuristic( + MachineFunction &MF, std::unique_ptr<MachineGadgetGraph> Graph) const; + int elimMitigatedEdgesAndNodes(MachineGadgetGraph &G, + EdgeSet &ElimEdges /* in, out */, + NodeSet &ElimNodes /* in, out */) const; + std::unique_ptr<MachineGadgetGraph> + trimMitigatedEdges(std::unique_ptr<MachineGadgetGraph> Graph) const; + void findAndCutEdges(MachineGadgetGraph &G, + EdgeSet &CutEdges /* out */) const; + int insertFences(MachineFunction &MF, MachineGadgetGraph &G, + EdgeSet &CutEdges /* in, out */) const; + bool instrUsesRegToAccessMemory(const MachineInstr &I, unsigned Reg) const; + bool instrUsesRegToBranch(const MachineInstr &I, unsigned Reg) const; + inline bool isFence(const MachineInstr *MI) const { + return MI && (MI->getOpcode() == X86::LFENCE || + (STI->useLVIControlFlowIntegrity() && MI->isCall())); + } +}; + +} // end anonymous namespace + +namespace llvm { + +template <> +struct GraphTraits<MachineGadgetGraph *> + : GraphTraits<ImmutableGraph<MachineInstr *, int> *> {}; + +template <> +struct DOTGraphTraits<MachineGadgetGraph *> : DefaultDOTGraphTraits { + using GraphType = MachineGadgetGraph; + using Traits = llvm::GraphTraits<GraphType *>; + using NodeRef = typename Traits::NodeRef; + using EdgeRef = typename Traits::EdgeRef; + using ChildIteratorType = typename Traits::ChildIteratorType; + using ChildEdgeIteratorType = typename Traits::ChildEdgeIteratorType; + + DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {} + + std::string getNodeLabel(NodeRef Node, GraphType *) { + if (Node->getValue() == MachineGadgetGraph::ArgNodeSentinel) + return "ARGS"; + + std::string Str; + raw_string_ostream OS(Str); + OS << *Node->getValue(); + return OS.str(); + } + + static std::string getNodeAttributes(NodeRef Node, GraphType *) { + MachineInstr *MI = Node->getValue(); + if (MI == MachineGadgetGraph::ArgNodeSentinel) + return "color = blue"; + if (MI->getOpcode() == X86::LFENCE) + return "color = green"; + return ""; + } + + static std::string getEdgeAttributes(NodeRef, ChildIteratorType E, + GraphType *) { + int EdgeVal = (*E.getCurrent()).getValue(); + return EdgeVal >= 0 ? "label = " + std::to_string(EdgeVal) + : "color = red, style = \"dashed\""; + } +}; + +} // end namespace llvm + +constexpr MachineInstr *MachineGadgetGraph::ArgNodeSentinel; +constexpr int MachineGadgetGraph::GadgetEdgeSentinel; + +char X86LoadValueInjectionLoadHardeningPass::ID = 0; + +void X86LoadValueInjectionLoadHardeningPass::getAnalysisUsage( + AnalysisUsage &AU) const { + MachineFunctionPass::getAnalysisUsage(AU); + AU.addRequired<MachineLoopInfo>(); + AU.addRequired<MachineDominatorTree>(); + AU.addRequired<MachineDominanceFrontier>(); + AU.setPreservesCFG(); +} + +static void WriteGadgetGraph(raw_ostream &OS, MachineFunction &MF, + MachineGadgetGraph *G) { + WriteGraph(OS, G, /*ShortNames*/ false, + "Speculative gadgets for \"" + MF.getName() + "\" function"); +} + +bool X86LoadValueInjectionLoadHardeningPass::runOnMachineFunction( + MachineFunction &MF) { + LLVM_DEBUG(dbgs() << "***** " << getPassName() << " : " << MF.getName() + << " *****\n"); + STI = &MF.getSubtarget<X86Subtarget>(); + if (!STI->useLVILoadHardening()) + return false; + + // FIXME: support 32-bit + if (!STI->is64Bit()) + report_fatal_error("LVI load hardening is only supported on 64-bit", false); + + // Don't skip functions with the "optnone" attr but participate in opt-bisect. + const Function &F = MF.getFunction(); + if (!F.hasOptNone() && skipFunction(F)) + return false; + + ++NumFunctionsConsidered; + TII = STI->getInstrInfo(); + TRI = STI->getRegisterInfo(); + LLVM_DEBUG(dbgs() << "Building gadget graph...\n"); + const auto &MLI = getAnalysis<MachineLoopInfo>(); + const auto &MDT = getAnalysis<MachineDominatorTree>(); + const auto &MDF = getAnalysis<MachineDominanceFrontier>(); + std::unique_ptr<MachineGadgetGraph> Graph = getGadgetGraph(MF, MLI, MDT, MDF); + LLVM_DEBUG(dbgs() << "Building gadget graph... Done\n"); + if (Graph == nullptr) + return false; // didn't find any gadgets + + if (EmitDotVerify) { + WriteGadgetGraph(outs(), MF, Graph.get()); + return false; + } + + if (EmitDot || EmitDotOnly) { + LLVM_DEBUG(dbgs() << "Emitting gadget graph...\n"); + std::error_code FileError; + std::string FileName = "lvi."; + FileName += MF.getName(); + FileName += ".dot"; + raw_fd_ostream FileOut(FileName, FileError); + if (FileError) + errs() << FileError.message(); + WriteGadgetGraph(FileOut, MF, Graph.get()); + FileOut.close(); + LLVM_DEBUG(dbgs() << "Emitting gadget graph... Done\n"); + if (EmitDotOnly) + return false; + } + + int FencesInserted; + if (!OptimizePluginPath.empty()) { + if (!OptimizeDL.isValid()) { + std::string ErrorMsg; + OptimizeDL = llvm::sys::DynamicLibrary::getPermanentLibrary( + OptimizePluginPath.c_str(), &ErrorMsg); + if (!ErrorMsg.empty()) + report_fatal_error("Failed to load opt plugin: \"" + ErrorMsg + '\"'); + OptimizeCut = (OptimizeCutT)OptimizeDL.getAddressOfSymbol("optimize_cut"); + if (!OptimizeCut) + report_fatal_error("Invalid optimization plugin"); + } + FencesInserted = hardenLoadsWithPlugin(MF, std::move(Graph)); + } else { // Use the default greedy heuristic + FencesInserted = hardenLoadsWithGreedyHeuristic(MF, std::move(Graph)); + } + + if (FencesInserted > 0) + ++NumFunctionsMitigated; + NumFences += FencesInserted; + return (FencesInserted > 0); +} + +std::unique_ptr<MachineGadgetGraph> +X86LoadValueInjectionLoadHardeningPass::getGadgetGraph( + MachineFunction &MF, const MachineLoopInfo &MLI, + const MachineDominatorTree &MDT, + const MachineDominanceFrontier &MDF) const { + using namespace rdf; + + // Build the Register Dataflow Graph using the RDF framework + TargetOperandInfo TOI{*TII}; + DataFlowGraph DFG{MF, *TII, *TRI, MDT, MDF, TOI}; + DFG.build(); + Liveness L{MF.getRegInfo(), DFG}; + L.computePhiInfo(); + + GraphBuilder Builder; + using GraphIter = typename GraphBuilder::BuilderNodeRef; + DenseMap<MachineInstr *, GraphIter> NodeMap; + int FenceCount = 0, GadgetCount = 0; + auto MaybeAddNode = [&NodeMap, &Builder](MachineInstr *MI) { + auto Ref = NodeMap.find(MI); + if (Ref == NodeMap.end()) { + auto I = Builder.addVertex(MI); + NodeMap[MI] = I; + return std::pair<GraphIter, bool>{I, true}; + } + return std::pair<GraphIter, bool>{Ref->getSecond(), false}; + }; + + // The `Transmitters` map memoizes transmitters found for each def. If a def + // has not yet been analyzed, then it will not appear in the map. If a def + // has been analyzed and was determined not to have any transmitters, then + // its list of transmitters will be empty. + DenseMap<NodeId, std::vector<NodeId>> Transmitters; + + // Analyze all machine instructions to find gadgets and LFENCEs, adding + // each interesting value to `Nodes` + auto AnalyzeDef = [&](NodeAddr<DefNode *> SourceDef) { + SmallSet<NodeId, 8> UsesVisited, DefsVisited; + std::function<void(NodeAddr<DefNode *>)> AnalyzeDefUseChain = + [&](NodeAddr<DefNode *> Def) { + if (Transmitters.find(Def.Id) != Transmitters.end()) + return; // Already analyzed `Def` + + // Use RDF to find all the uses of `Def` + rdf::NodeSet Uses; + RegisterRef DefReg = DFG.getPRI().normalize(Def.Addr->getRegRef(DFG)); + for (auto UseID : L.getAllReachedUses(DefReg, Def)) { + auto Use = DFG.addr<UseNode *>(UseID); + if (Use.Addr->getFlags() & NodeAttrs::PhiRef) { // phi node + NodeAddr<PhiNode *> Phi = Use.Addr->getOwner(DFG); + for (auto I : L.getRealUses(Phi.Id)) { + if (DFG.getPRI().alias(RegisterRef(I.first), DefReg)) { + for (auto UA : I.second) + Uses.emplace(UA.first); + } + } + } else { // not a phi node + Uses.emplace(UseID); + } + } + + // For each use of `Def`, we want to know whether: + // (1) The use can leak the Def'ed value, + // (2) The use can further propagate the Def'ed value to more defs + for (auto UseID : Uses) { + if (!UsesVisited.insert(UseID).second) + continue; // Already visited this use of `Def` + + auto Use = DFG.addr<UseNode *>(UseID); + assert(!(Use.Addr->getFlags() & NodeAttrs::PhiRef)); + MachineOperand &UseMO = Use.Addr->getOp(); + MachineInstr &UseMI = *UseMO.getParent(); + assert(UseMO.isReg()); + + // We naively assume that an instruction propagates any loaded + // uses to all defs unless the instruction is a call, in which + // case all arguments will be treated as gadget sources during + // analysis of the callee function. + if (UseMI.isCall()) + continue; + + // Check whether this use can transmit (leak) its value. + if (instrUsesRegToAccessMemory(UseMI, UseMO.getReg()) || + (!NoConditionalBranches && + instrUsesRegToBranch(UseMI, UseMO.getReg()))) { + Transmitters[Def.Id].push_back(Use.Addr->getOwner(DFG).Id); + if (UseMI.mayLoad()) + continue; // Found a transmitting load -- no need to continue + // traversing its defs (i.e., this load will become + // a new gadget source anyways). + } + + // Check whether the use propagates to more defs. + NodeAddr<InstrNode *> Owner{Use.Addr->getOwner(DFG)}; + rdf::NodeList AnalyzedChildDefs; + for (auto &ChildDef : + Owner.Addr->members_if(DataFlowGraph::IsDef, DFG)) { + if (!DefsVisited.insert(ChildDef.Id).second) + continue; // Already visited this def + if (Def.Addr->getAttrs() & NodeAttrs::Dead) + continue; + if (Def.Id == ChildDef.Id) + continue; // `Def` uses itself (e.g., increment loop counter) + + AnalyzeDefUseChain(ChildDef); + + // `Def` inherits all of its child defs' transmitters. + for (auto TransmitterId : Transmitters[ChildDef.Id]) + Transmitters[Def.Id].push_back(TransmitterId); + } + } + + // Note that this statement adds `Def.Id` to the map if no + // transmitters were found for `Def`. + auto &DefTransmitters = Transmitters[Def.Id]; + + // Remove duplicate transmitters + llvm::sort(DefTransmitters); + DefTransmitters.erase( + std::unique(DefTransmitters.begin(), DefTransmitters.end()), + DefTransmitters.end()); + }; + + // Find all of the transmitters + AnalyzeDefUseChain(SourceDef); + auto &SourceDefTransmitters = Transmitters[SourceDef.Id]; + if (SourceDefTransmitters.empty()) + return; // No transmitters for `SourceDef` + + MachineInstr *Source = SourceDef.Addr->getFlags() & NodeAttrs::PhiRef + ? MachineGadgetGraph::ArgNodeSentinel + : SourceDef.Addr->getOp().getParent(); + auto GadgetSource = MaybeAddNode(Source); + // Each transmitter is a sink for `SourceDef`. + for (auto TransmitterId : SourceDefTransmitters) { + MachineInstr *Sink = DFG.addr<StmtNode *>(TransmitterId).Addr->getCode(); + auto GadgetSink = MaybeAddNode(Sink); + // Add the gadget edge to the graph. + Builder.addEdge(MachineGadgetGraph::GadgetEdgeSentinel, + GadgetSource.first, GadgetSink.first); + ++GadgetCount; + } + }; + + LLVM_DEBUG(dbgs() << "Analyzing def-use chains to find gadgets\n"); + // Analyze function arguments + NodeAddr<BlockNode *> EntryBlock = DFG.getFunc().Addr->getEntryBlock(DFG); + for (NodeAddr<PhiNode *> ArgPhi : + EntryBlock.Addr->members_if(DataFlowGraph::IsPhi, DFG)) { + NodeList Defs = ArgPhi.Addr->members_if(DataFlowGraph::IsDef, DFG); + llvm::for_each(Defs, AnalyzeDef); + } + // Analyze every instruction in MF + for (NodeAddr<BlockNode *> BA : DFG.getFunc().Addr->members(DFG)) { + for (NodeAddr<StmtNode *> SA : + BA.Addr->members_if(DataFlowGraph::IsCode<NodeAttrs::Stmt>, DFG)) { + MachineInstr *MI = SA.Addr->getCode(); + if (isFence(MI)) { + MaybeAddNode(MI); + ++FenceCount; + } else if (MI->mayLoad()) { + NodeList Defs = SA.Addr->members_if(DataFlowGraph::IsDef, DFG); + llvm::for_each(Defs, AnalyzeDef); + } + } + } + LLVM_DEBUG(dbgs() << "Found " << FenceCount << " fences\n"); + LLVM_DEBUG(dbgs() << "Found " << GadgetCount << " gadgets\n"); + if (GadgetCount == 0) + return nullptr; + NumGadgets += GadgetCount; + + // Traverse CFG to build the rest of the graph + SmallSet<MachineBasicBlock *, 8> BlocksVisited; + std::function<void(MachineBasicBlock *, GraphIter, unsigned)> TraverseCFG = + [&](MachineBasicBlock *MBB, GraphIter GI, unsigned ParentDepth) { + unsigned LoopDepth = MLI.getLoopDepth(MBB); + if (!MBB->empty()) { + // Always add the first instruction in each block + auto NI = MBB->begin(); + auto BeginBB = MaybeAddNode(&*NI); + Builder.addEdge(ParentDepth, GI, BeginBB.first); + if (!BlocksVisited.insert(MBB).second) + return; + + // Add any instructions within the block that are gadget components + GI = BeginBB.first; + while (++NI != MBB->end()) { + auto Ref = NodeMap.find(&*NI); + if (Ref != NodeMap.end()) { + Builder.addEdge(LoopDepth, GI, Ref->getSecond()); + GI = Ref->getSecond(); + } + } + + // Always add the terminator instruction, if one exists + auto T = MBB->getFirstTerminator(); + if (T != MBB->end()) { + auto EndBB = MaybeAddNode(&*T); + if (EndBB.second) + Builder.addEdge(LoopDepth, GI, EndBB.first); + GI = EndBB.first; + } + } + for (MachineBasicBlock *Succ : MBB->successors()) + TraverseCFG(Succ, GI, LoopDepth); + }; + // ArgNodeSentinel is a pseudo-instruction that represents MF args in the + // GadgetGraph + GraphIter ArgNode = MaybeAddNode(MachineGadgetGraph::ArgNodeSentinel).first; + TraverseCFG(&MF.front(), ArgNode, 0); + std::unique_ptr<MachineGadgetGraph> G{Builder.get(FenceCount, GadgetCount)}; + LLVM_DEBUG(dbgs() << "Found " << G->nodes_size() << " nodes\n"); + return G; +} + +// Returns the number of remaining gadget edges that could not be eliminated +int X86LoadValueInjectionLoadHardeningPass::elimMitigatedEdgesAndNodes( + MachineGadgetGraph &G, MachineGadgetGraph::EdgeSet &ElimEdges /* in, out */, + MachineGadgetGraph::NodeSet &ElimNodes /* in, out */) const { + if (G.NumFences > 0) { + // Eliminate fences and CFG edges that ingress and egress the fence, as + // they are trivially mitigated. + for (const auto &E : G.edges()) { + const MachineGadgetGraph::Node *Dest = E.getDest(); + if (isFence(Dest->getValue())) { + ElimNodes.insert(*Dest); + ElimEdges.insert(E); + for (const auto &DE : Dest->edges()) + ElimEdges.insert(DE); + } + } + } + + // Find and eliminate gadget edges that have been mitigated. + int MitigatedGadgets = 0, RemainingGadgets = 0; + MachineGadgetGraph::NodeSet ReachableNodes{G}; + for (const auto &RootN : G.nodes()) { + if (llvm::none_of(RootN.edges(), MachineGadgetGraph::isGadgetEdge)) + continue; // skip this node if it isn't a gadget source + + // Find all of the nodes that are CFG-reachable from RootN using DFS + ReachableNodes.clear(); + std::function<void(const MachineGadgetGraph::Node *, bool)> + FindReachableNodes = + [&](const MachineGadgetGraph::Node *N, bool FirstNode) { + if (!FirstNode) + ReachableNodes.insert(*N); + for (const auto &E : N->edges()) { + const MachineGadgetGraph::Node *Dest = E.getDest(); + if (MachineGadgetGraph::isCFGEdge(E) && + !ElimEdges.contains(E) && !ReachableNodes.contains(*Dest)) + FindReachableNodes(Dest, false); + } + }; + FindReachableNodes(&RootN, true); + + // Any gadget whose sink is unreachable has been mitigated + for (const auto &E : RootN.edges()) { + if (MachineGadgetGraph::isGadgetEdge(E)) { + if (ReachableNodes.contains(*E.getDest())) { + // This gadget's sink is reachable + ++RemainingGadgets; + } else { // This gadget's sink is unreachable, and therefore mitigated + ++MitigatedGadgets; + ElimEdges.insert(E); + } + } + } + } + return RemainingGadgets; +} + +std::unique_ptr<MachineGadgetGraph> +X86LoadValueInjectionLoadHardeningPass::trimMitigatedEdges( + std::unique_ptr<MachineGadgetGraph> Graph) const { + MachineGadgetGraph::NodeSet ElimNodes{*Graph}; + MachineGadgetGraph::EdgeSet ElimEdges{*Graph}; + int RemainingGadgets = + elimMitigatedEdgesAndNodes(*Graph, ElimEdges, ElimNodes); + if (ElimEdges.empty() && ElimNodes.empty()) { + Graph->NumFences = 0; + Graph->NumGadgets = RemainingGadgets; + } else { + Graph = GraphBuilder::trim(*Graph, ElimNodes, ElimEdges, 0 /* NumFences */, + RemainingGadgets); + } + return Graph; +} + +int X86LoadValueInjectionLoadHardeningPass::hardenLoadsWithPlugin( + MachineFunction &MF, std::unique_ptr<MachineGadgetGraph> Graph) const { + int FencesInserted = 0; + + do { + LLVM_DEBUG(dbgs() << "Eliminating mitigated paths...\n"); + Graph = trimMitigatedEdges(std::move(Graph)); + LLVM_DEBUG(dbgs() << "Eliminating mitigated paths... Done\n"); + if (Graph->NumGadgets == 0) + break; + + LLVM_DEBUG(dbgs() << "Cutting edges...\n"); + EdgeSet CutEdges{*Graph}; + auto Nodes = std::make_unique<unsigned int[]>(Graph->nodes_size() + + 1 /* terminator node */); + auto Edges = std::make_unique<unsigned int[]>(Graph->edges_size()); + auto EdgeCuts = std::make_unique<int[]>(Graph->edges_size()); + auto EdgeValues = std::make_unique<int[]>(Graph->edges_size()); + for (const auto &N : Graph->nodes()) { + Nodes[Graph->getNodeIndex(N)] = Graph->getEdgeIndex(*N.edges_begin()); + } + Nodes[Graph->nodes_size()] = Graph->edges_size(); // terminator node + for (const auto &E : Graph->edges()) { + Edges[Graph->getEdgeIndex(E)] = Graph->getNodeIndex(*E.getDest()); + EdgeValues[Graph->getEdgeIndex(E)] = E.getValue(); + } + OptimizeCut(Nodes.get(), Graph->nodes_size(), Edges.get(), EdgeValues.get(), + EdgeCuts.get(), Graph->edges_size()); + for (int I = 0; I < Graph->edges_size(); ++I) + if (EdgeCuts[I]) + CutEdges.set(I); + LLVM_DEBUG(dbgs() << "Cutting edges... Done\n"); + LLVM_DEBUG(dbgs() << "Cut " << CutEdges.count() << " edges\n"); + + LLVM_DEBUG(dbgs() << "Inserting LFENCEs...\n"); + FencesInserted += insertFences(MF, *Graph, CutEdges); + LLVM_DEBUG(dbgs() << "Inserting LFENCEs... Done\n"); + LLVM_DEBUG(dbgs() << "Inserted " << FencesInserted << " fences\n"); + + Graph = GraphBuilder::trim(*Graph, MachineGadgetGraph::NodeSet{*Graph}, + CutEdges); + } while (true); + + return FencesInserted; +} + +int X86LoadValueInjectionLoadHardeningPass::hardenLoadsWithGreedyHeuristic( + MachineFunction &MF, std::unique_ptr<MachineGadgetGraph> Graph) const { + LLVM_DEBUG(dbgs() << "Eliminating mitigated paths...\n"); + Graph = trimMitigatedEdges(std::move(Graph)); + LLVM_DEBUG(dbgs() << "Eliminating mitigated paths... Done\n"); + if (Graph->NumGadgets == 0) + return 0; + + LLVM_DEBUG(dbgs() << "Cutting edges...\n"); + MachineGadgetGraph::NodeSet ElimNodes{*Graph}, GadgetSinks{*Graph}; + MachineGadgetGraph::EdgeSet ElimEdges{*Graph}, CutEdges{*Graph}; + auto IsCFGEdge = [&ElimEdges, &CutEdges](const MachineGadgetGraph::Edge &E) { + return !ElimEdges.contains(E) && !CutEdges.contains(E) && + MachineGadgetGraph::isCFGEdge(E); + }; + auto IsGadgetEdge = [&ElimEdges, + &CutEdges](const MachineGadgetGraph::Edge &E) { + return !ElimEdges.contains(E) && !CutEdges.contains(E) && + MachineGadgetGraph::isGadgetEdge(E); + }; + + // FIXME: this is O(E^2), we could probably do better. + do { + // Find the cheapest CFG edge that will eliminate a gadget (by being + // egress from a SOURCE node or ingress to a SINK node), and cut it. + const MachineGadgetGraph::Edge *CheapestSoFar = nullptr; + + // First, collect all gadget source and sink nodes. + MachineGadgetGraph::NodeSet GadgetSources{*Graph}, GadgetSinks{*Graph}; + for (const auto &N : Graph->nodes()) { + if (ElimNodes.contains(N)) + continue; + for (const auto &E : N.edges()) { + if (IsGadgetEdge(E)) { + GadgetSources.insert(N); + GadgetSinks.insert(*E.getDest()); + } + } + } + + // Next, look for the cheapest CFG edge which, when cut, is guaranteed to + // mitigate at least one gadget by either: + // (a) being egress from a gadget source, or + // (b) being ingress to a gadget sink. + for (const auto &N : Graph->nodes()) { + if (ElimNodes.contains(N)) + continue; + for (const auto &E : N.edges()) { + if (IsCFGEdge(E)) { + if (GadgetSources.contains(N) || GadgetSinks.contains(*E.getDest())) { + if (!CheapestSoFar || E.getValue() < CheapestSoFar->getValue()) + CheapestSoFar = &E; + } + } + } + } + + assert(CheapestSoFar && "Failed to cut an edge"); + CutEdges.insert(*CheapestSoFar); + ElimEdges.insert(*CheapestSoFar); + } while (elimMitigatedEdgesAndNodes(*Graph, ElimEdges, ElimNodes)); + LLVM_DEBUG(dbgs() << "Cutting edges... Done\n"); + LLVM_DEBUG(dbgs() << "Cut " << CutEdges.count() << " edges\n"); + + LLVM_DEBUG(dbgs() << "Inserting LFENCEs...\n"); + int FencesInserted = insertFences(MF, *Graph, CutEdges); + LLVM_DEBUG(dbgs() << "Inserting LFENCEs... Done\n"); + LLVM_DEBUG(dbgs() << "Inserted " << FencesInserted << " fences\n"); + + return FencesInserted; +} + +int X86LoadValueInjectionLoadHardeningPass::insertFences( + MachineFunction &MF, MachineGadgetGraph &G, + EdgeSet &CutEdges /* in, out */) const { + int FencesInserted = 0; + for (const auto &N : G.nodes()) { + for (const auto &E : N.edges()) { + if (CutEdges.contains(E)) { + MachineInstr *MI = N.getValue(), *Prev; + MachineBasicBlock *MBB; // Insert an LFENCE in this MBB + MachineBasicBlock::iterator InsertionPt; // ...at this point + if (MI == MachineGadgetGraph::ArgNodeSentinel) { + // insert LFENCE at beginning of entry block + MBB = &MF.front(); + InsertionPt = MBB->begin(); + Prev = nullptr; + } else if (MI->isBranch()) { // insert the LFENCE before the branch + MBB = MI->getParent(); + InsertionPt = MI; + Prev = MI->getPrevNode(); + // Remove all egress CFG edges from this branch because the inserted + // LFENCE prevents gadgets from crossing the branch. + for (const auto &E : N.edges()) { + if (MachineGadgetGraph::isCFGEdge(E)) + CutEdges.insert(E); + } + } else { // insert the LFENCE after the instruction + MBB = MI->getParent(); + InsertionPt = MI->getNextNode() ? MI->getNextNode() : MBB->end(); + Prev = InsertionPt == MBB->end() + ? (MBB->empty() ? nullptr : &MBB->back()) + : InsertionPt->getPrevNode(); + } + // Ensure this insertion is not redundant (two LFENCEs in sequence). + if ((InsertionPt == MBB->end() || !isFence(&*InsertionPt)) && + (!Prev || !isFence(Prev))) { + BuildMI(*MBB, InsertionPt, DebugLoc(), TII->get(X86::LFENCE)); + ++FencesInserted; + } + } + } + } + return FencesInserted; +} + +bool X86LoadValueInjectionLoadHardeningPass::instrUsesRegToAccessMemory( + const MachineInstr &MI, unsigned Reg) const { + if (!MI.mayLoadOrStore() || MI.getOpcode() == X86::MFENCE || + MI.getOpcode() == X86::SFENCE || MI.getOpcode() == X86::LFENCE) + return false; + + // FIXME: This does not handle pseudo loading instruction like TCRETURN* + const MCInstrDesc &Desc = MI.getDesc(); + int MemRefBeginIdx = X86II::getMemoryOperandNo(Desc.TSFlags); + if (MemRefBeginIdx < 0) { + LLVM_DEBUG(dbgs() << "Warning: unable to obtain memory operand for loading " + "instruction:\n"; + MI.print(dbgs()); dbgs() << '\n';); + return false; + } + MemRefBeginIdx += X86II::getOperandBias(Desc); + + const MachineOperand &BaseMO = + MI.getOperand(MemRefBeginIdx + X86::AddrBaseReg); + const MachineOperand &IndexMO = + MI.getOperand(MemRefBeginIdx + X86::AddrIndexReg); + return (BaseMO.isReg() && BaseMO.getReg() != X86::NoRegister && + TRI->regsOverlap(BaseMO.getReg(), Reg)) || + (IndexMO.isReg() && IndexMO.getReg() != X86::NoRegister && + TRI->regsOverlap(IndexMO.getReg(), Reg)); +} + +bool X86LoadValueInjectionLoadHardeningPass::instrUsesRegToBranch( + const MachineInstr &MI, unsigned Reg) const { + if (!MI.isConditionalBranch()) + return false; + for (const MachineOperand &Use : MI.uses()) + if (Use.isReg() && Use.getReg() == Reg) + return true; + return false; +} + +INITIALIZE_PASS_BEGIN(X86LoadValueInjectionLoadHardeningPass, PASS_KEY, + "X86 LVI load hardening", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineDominanceFrontier) +INITIALIZE_PASS_END(X86LoadValueInjectionLoadHardeningPass, PASS_KEY, + "X86 LVI load hardening", false, false) + +FunctionPass *llvm::createX86LoadValueInjectionLoadHardeningPass() { + return new X86LoadValueInjectionLoadHardeningPass(); +} + +namespace { + +/// The `X86LoadValueInjectionLoadHardeningPass` above depends on expensive +/// analysis passes that add complexity to the pipeline. This complexity +/// can cause noticable overhead when no optimizations are enabled, i.e., -O0. +/// The purpose of `X86LoadValueInjectionLoadHardeningUnoptimizedPass` is to +/// provide the same security as the optimized pass, but without adding +/// unnecessary complexity to the LLVM pipeline. +/// +/// The behavior of this pass is simply to insert an LFENCE after every load +/// instruction. +class X86LoadValueInjectionLoadHardeningUnoptimizedPass + : public MachineFunctionPass { +public: + X86LoadValueInjectionLoadHardeningUnoptimizedPass() + : MachineFunctionPass(ID) {} + + StringRef getPassName() const override { + return "X86 Load Value Injection (LVI) Load Hardening (Unoptimized)"; + } + bool runOnMachineFunction(MachineFunction &MF) override; + static char ID; +}; + +} // end anonymous namespace + +char X86LoadValueInjectionLoadHardeningUnoptimizedPass::ID = 0; + +bool X86LoadValueInjectionLoadHardeningUnoptimizedPass::runOnMachineFunction( + MachineFunction &MF) { + LLVM_DEBUG(dbgs() << "***** " << getPassName() << " : " << MF.getName() + << " *****\n"); + const X86Subtarget *STI = &MF.getSubtarget<X86Subtarget>(); + if (!STI->useLVILoadHardening()) + return false; + + // FIXME: support 32-bit + if (!STI->is64Bit()) + report_fatal_error("LVI load hardening is only supported on 64-bit", false); + + // Don't skip functions with the "optnone" attr but participate in opt-bisect. + const Function &F = MF.getFunction(); + if (!F.hasOptNone() && skipFunction(F)) + return false; + + bool Modified = false; + ++NumFunctionsConsidered; + + const TargetInstrInfo *TII = STI->getInstrInfo(); + for (auto &MBB : MF) { + for (auto &MI : MBB) { + if (!MI.mayLoad() || MI.getOpcode() == X86::LFENCE || + MI.getOpcode() == X86::MFENCE) + continue; + + MachineBasicBlock::iterator InsertionPt = + MI.getNextNode() ? MI.getNextNode() : MBB.end(); + BuildMI(MBB, InsertionPt, DebugLoc(), TII->get(X86::LFENCE)); + ++NumFences; + Modified = true; + } + } + + if (Modified) + ++NumFunctionsMitigated; + + return Modified; +} + +INITIALIZE_PASS(X86LoadValueInjectionLoadHardeningUnoptimizedPass, PASS_KEY, + "X86 LVI load hardening", false, false) + +FunctionPass *llvm::createX86LoadValueInjectionLoadHardeningUnoptimizedPass() { + return new X86LoadValueInjectionLoadHardeningUnoptimizedPass(); +} diff --git a/llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp b/llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp new file mode 100644 index 000000000000..6e1134a25950 --- /dev/null +++ b/llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp @@ -0,0 +1,143 @@ +//===-- X86LoadValueInjectionRetHardening.cpp - LVI RET hardening for x86 --==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// Description: Replaces every `ret` instruction with the sequence: +/// ``` +/// pop <scratch-reg> +/// lfence +/// jmp *<scratch-reg> +/// ``` +/// where `<scratch-reg>` is some available scratch register, according to the +/// calling convention of the function being mitigated. +/// +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "X86InstrBuilder.h" +#include "X86Subtarget.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/Debug.h" +#include <bitset> + +using namespace llvm; + +#define PASS_KEY "x86-lvi-ret" +#define DEBUG_TYPE PASS_KEY + +STATISTIC(NumFences, "Number of LFENCEs inserted for LVI mitigation"); +STATISTIC(NumFunctionsConsidered, "Number of functions analyzed"); +STATISTIC(NumFunctionsMitigated, "Number of functions for which mitigations " + "were deployed"); + +namespace { + +class X86LoadValueInjectionRetHardeningPass : public MachineFunctionPass { +public: + X86LoadValueInjectionRetHardeningPass() : MachineFunctionPass(ID) {} + StringRef getPassName() const override { + return "X86 Load Value Injection (LVI) Ret-Hardening"; + } + bool runOnMachineFunction(MachineFunction &MF) override; + + static char ID; +}; + +} // end anonymous namespace + +char X86LoadValueInjectionRetHardeningPass::ID = 0; + +bool X86LoadValueInjectionRetHardeningPass::runOnMachineFunction( + MachineFunction &MF) { + LLVM_DEBUG(dbgs() << "***** " << getPassName() << " : " << MF.getName() + << " *****\n"); + const X86Subtarget *Subtarget = &MF.getSubtarget<X86Subtarget>(); + if (!Subtarget->useLVIControlFlowIntegrity() || !Subtarget->is64Bit()) + return false; // FIXME: support 32-bit + + // Don't skip functions with the "optnone" attr but participate in opt-bisect. + const Function &F = MF.getFunction(); + if (!F.hasOptNone() && skipFunction(F)) + return false; + + ++NumFunctionsConsidered; + const X86RegisterInfo *TRI = Subtarget->getRegisterInfo(); + const X86InstrInfo *TII = Subtarget->getInstrInfo(); + unsigned ClobberReg = X86::NoRegister; + std::bitset<X86::NUM_TARGET_REGS> UnclobberableGR64s; + UnclobberableGR64s.set(X86::RSP); // can't clobber stack pointer + UnclobberableGR64s.set(X86::RIP); // can't clobber instruction pointer + UnclobberableGR64s.set(X86::RAX); // used for function return + UnclobberableGR64s.set(X86::RDX); // used for function return + + // We can clobber any register allowed by the function's calling convention. + for (const MCPhysReg *PR = TRI->getCalleeSavedRegs(&MF); auto Reg = *PR; ++PR) + UnclobberableGR64s.set(Reg); + for (auto &Reg : X86::GR64RegClass) { + if (!UnclobberableGR64s.test(Reg)) { + ClobberReg = Reg; + break; + } + } + + if (ClobberReg != X86::NoRegister) { + LLVM_DEBUG(dbgs() << "Selected register " + << Subtarget->getRegisterInfo()->getRegAsmName(ClobberReg) + << " to clobber\n"); + } else { + LLVM_DEBUG(dbgs() << "Could not find a register to clobber\n"); + } + + bool Modified = false; + for (auto &MBB : MF) { + if (MBB.empty()) + continue; + + MachineInstr &MI = MBB.back(); + if (MI.getOpcode() != X86::RETQ) + continue; + + if (ClobberReg != X86::NoRegister) { + MBB.erase_instr(&MI); + BuildMI(MBB, MBB.end(), DebugLoc(), TII->get(X86::POP64r)) + .addReg(ClobberReg, RegState::Define) + .setMIFlag(MachineInstr::FrameDestroy); + BuildMI(MBB, MBB.end(), DebugLoc(), TII->get(X86::LFENCE)); + BuildMI(MBB, MBB.end(), DebugLoc(), TII->get(X86::JMP64r)) + .addReg(ClobberReg); + } else { + // In case there is no available scratch register, we can still read from + // RSP to assert that RSP points to a valid page. The write to RSP is + // also helpful because it verifies that the stack's write permissions + // are intact. + MachineInstr *Fence = BuildMI(MBB, MI, DebugLoc(), TII->get(X86::LFENCE)); + addRegOffset(BuildMI(MBB, Fence, DebugLoc(), TII->get(X86::SHL64mi)), + X86::RSP, false, 0) + .addImm(0) + ->addRegisterDead(X86::EFLAGS, TRI); + } + + ++NumFences; + Modified = true; + } + + if (Modified) + ++NumFunctionsMitigated; + return Modified; +} + +INITIALIZE_PASS(X86LoadValueInjectionRetHardeningPass, PASS_KEY, + "X86 LVI ret hardener", false, false) + +FunctionPass *llvm::createX86LoadValueInjectionRetHardeningPass() { + return new X86LoadValueInjectionRetHardeningPass(); +} diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp index 7f49c6e861d4..f5caaaae4d84 100644 --- a/llvm/lib/Target/X86/X86MCInstLower.cpp +++ b/llvm/lib/Target/X86/X86MCInstLower.cpp @@ -1220,8 +1220,8 @@ void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI, break; case MachineOperand::MO_Register: // FIXME: Add retpoline support and remove this. - if (Subtarget->useRetpolineIndirectCalls()) - report_fatal_error("Lowering register statepoints with retpoline not " + if (Subtarget->useIndirectThunkCalls()) + report_fatal_error("Lowering register statepoints with thunks not " "yet implemented."); CallTargetMCOp = MCOperand::createReg(CallTarget.getReg()); CallOpcode = X86::CALL64r; @@ -1399,9 +1399,9 @@ void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI, EmitAndCountInstruction( MCInstBuilder(X86::MOV64ri).addReg(ScratchReg).addOperand(CalleeMCOp)); // FIXME: Add retpoline support and remove this. - if (Subtarget->useRetpolineIndirectCalls()) + if (Subtarget->useIndirectThunkCalls()) report_fatal_error( - "Lowering patchpoint with retpoline not yet implemented."); + "Lowering patchpoint with thunks not yet implemented."); EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(ScratchReg)); } diff --git a/llvm/lib/Target/X86/X86RetpolineThunks.cpp b/llvm/lib/Target/X86/X86RetpolineThunks.cpp deleted file mode 100644 index 9085d7f068ac..000000000000 --- a/llvm/lib/Target/X86/X86RetpolineThunks.cpp +++ /dev/null @@ -1,286 +0,0 @@ -//======- X86RetpolineThunks.cpp - Construct retpoline thunks for x86 --=====// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// Pass that injects an MI thunk implementing a "retpoline". This is -/// a RET-implemented trampoline that is used to lower indirect calls in a way -/// that prevents speculation on some x86 processors and can be used to mitigate -/// security vulnerabilities due to targeted speculative execution and side -/// channels such as CVE-2017-5715. -/// -/// TODO(chandlerc): All of this code could use better comments and -/// documentation. -/// -//===----------------------------------------------------------------------===// - -#include "X86.h" -#include "X86InstrBuilder.h" -#include "X86Subtarget.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/TargetPassConfig.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Module.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -#define DEBUG_TYPE "x86-retpoline-thunks" - -static const char ThunkNamePrefix[] = "__llvm_retpoline_"; -static const char R11ThunkName[] = "__llvm_retpoline_r11"; -static const char EAXThunkName[] = "__llvm_retpoline_eax"; -static const char ECXThunkName[] = "__llvm_retpoline_ecx"; -static const char EDXThunkName[] = "__llvm_retpoline_edx"; -static const char EDIThunkName[] = "__llvm_retpoline_edi"; - -namespace { -class X86RetpolineThunks : public MachineFunctionPass { -public: - static char ID; - - X86RetpolineThunks() : MachineFunctionPass(ID) {} - - StringRef getPassName() const override { return "X86 Retpoline Thunks"; } - - bool doInitialization(Module &M) override; - bool runOnMachineFunction(MachineFunction &F) override; - - void getAnalysisUsage(AnalysisUsage &AU) const override { - MachineFunctionPass::getAnalysisUsage(AU); - AU.addRequired<MachineModuleInfoWrapperPass>(); - AU.addPreserved<MachineModuleInfoWrapperPass>(); - } - -private: - MachineModuleInfo *MMI = nullptr; - const TargetMachine *TM = nullptr; - bool Is64Bit = false; - const X86Subtarget *STI = nullptr; - const X86InstrInfo *TII = nullptr; - - bool InsertedThunks = false; - - void createThunkFunction(Module &M, StringRef Name); - void insertRegReturnAddrClobber(MachineBasicBlock &MBB, unsigned Reg); - void populateThunk(MachineFunction &MF, unsigned Reg); -}; - -} // end anonymous namespace - -FunctionPass *llvm::createX86RetpolineThunksPass() { - return new X86RetpolineThunks(); -} - -char X86RetpolineThunks::ID = 0; - -bool X86RetpolineThunks::doInitialization(Module &M) { - InsertedThunks = false; - return false; -} - -bool X86RetpolineThunks::runOnMachineFunction(MachineFunction &MF) { - LLVM_DEBUG(dbgs() << getPassName() << '\n'); - - TM = &MF.getTarget();; - STI = &MF.getSubtarget<X86Subtarget>(); - TII = STI->getInstrInfo(); - Is64Bit = TM->getTargetTriple().getArch() == Triple::x86_64; - - MMI = &getAnalysis<MachineModuleInfoWrapperPass>().getMMI(); - Module &M = const_cast<Module &>(*MMI->getModule()); - - // If this function is not a thunk, check to see if we need to insert - // a thunk. - if (!MF.getName().startswith(ThunkNamePrefix)) { - // If we've already inserted a thunk, nothing else to do. - if (InsertedThunks) - return false; - - // Only add a thunk if one of the functions has the retpoline feature - // enabled in its subtarget, and doesn't enable external thunks. - // FIXME: Conditionalize on indirect calls so we don't emit a thunk when - // nothing will end up calling it. - // FIXME: It's a little silly to look at every function just to enumerate - // the subtargets, but eventually we'll want to look at them for indirect - // calls, so maybe this is OK. - if ((!STI->useRetpolineIndirectCalls() && - !STI->useRetpolineIndirectBranches()) || - STI->useRetpolineExternalThunk()) - return false; - - // Otherwise, we need to insert the thunk. - // WARNING: This is not really a well behaving thing to do in a function - // pass. We extract the module and insert a new function (and machine - // function) directly into the module. - if (Is64Bit) - createThunkFunction(M, R11ThunkName); - else - for (StringRef Name : - {EAXThunkName, ECXThunkName, EDXThunkName, EDIThunkName}) - createThunkFunction(M, Name); - InsertedThunks = true; - return true; - } - - // If this *is* a thunk function, we need to populate it with the correct MI. - if (Is64Bit) { - assert(MF.getName() == "__llvm_retpoline_r11" && - "Should only have an r11 thunk on 64-bit targets"); - - // __llvm_retpoline_r11: - // callq .Lr11_call_target - // .Lr11_capture_spec: - // pause - // lfence - // jmp .Lr11_capture_spec - // .align 16 - // .Lr11_call_target: - // movq %r11, (%rsp) - // retq - populateThunk(MF, X86::R11); - } else { - // For 32-bit targets we need to emit a collection of thunks for various - // possible scratch registers as well as a fallback that uses EDI, which is - // normally callee saved. - // __llvm_retpoline_eax: - // calll .Leax_call_target - // .Leax_capture_spec: - // pause - // jmp .Leax_capture_spec - // .align 16 - // .Leax_call_target: - // movl %eax, (%esp) # Clobber return addr - // retl - // - // __llvm_retpoline_ecx: - // ... # Same setup - // movl %ecx, (%esp) - // retl - // - // __llvm_retpoline_edx: - // ... # Same setup - // movl %edx, (%esp) - // retl - // - // __llvm_retpoline_edi: - // ... # Same setup - // movl %edi, (%esp) - // retl - if (MF.getName() == EAXThunkName) - populateThunk(MF, X86::EAX); - else if (MF.getName() == ECXThunkName) - populateThunk(MF, X86::ECX); - else if (MF.getName() == EDXThunkName) - populateThunk(MF, X86::EDX); - else if (MF.getName() == EDIThunkName) - populateThunk(MF, X86::EDI); - else - llvm_unreachable("Invalid thunk name on x86-32!"); - } - - return true; -} - -void X86RetpolineThunks::createThunkFunction(Module &M, StringRef Name) { - assert(Name.startswith(ThunkNamePrefix) && - "Created a thunk with an unexpected prefix!"); - - LLVMContext &Ctx = M.getContext(); - auto Type = FunctionType::get(Type::getVoidTy(Ctx), false); - Function *F = - Function::Create(Type, GlobalValue::LinkOnceODRLinkage, Name, &M); - F->setVisibility(GlobalValue::HiddenVisibility); - F->setComdat(M.getOrInsertComdat(Name)); - - // Add Attributes so that we don't create a frame, unwind information, or - // inline. - AttrBuilder B; - B.addAttribute(llvm::Attribute::NoUnwind); - B.addAttribute(llvm::Attribute::Naked); - F->addAttributes(llvm::AttributeList::FunctionIndex, B); - - // Populate our function a bit so that we can verify. - BasicBlock *Entry = BasicBlock::Create(Ctx, "entry", F); - IRBuilder<> Builder(Entry); - - Builder.CreateRetVoid(); - - // MachineFunctions/MachineBasicBlocks aren't created automatically for the - // IR-level constructs we already made. Create them and insert them into the - // module. - MachineFunction &MF = MMI->getOrCreateMachineFunction(*F); - MachineBasicBlock *EntryMBB = MF.CreateMachineBasicBlock(Entry); - - // Insert EntryMBB into MF. It's not in the module until we do this. - MF.insert(MF.end(), EntryMBB); -} - -void X86RetpolineThunks::insertRegReturnAddrClobber(MachineBasicBlock &MBB, - unsigned Reg) { - const unsigned MovOpc = Is64Bit ? X86::MOV64mr : X86::MOV32mr; - const unsigned SPReg = Is64Bit ? X86::RSP : X86::ESP; - addRegOffset(BuildMI(&MBB, DebugLoc(), TII->get(MovOpc)), SPReg, false, 0) - .addReg(Reg); -} - -void X86RetpolineThunks::populateThunk(MachineFunction &MF, - unsigned Reg) { - // Set MF properties. We never use vregs... - MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs); - - // Grab the entry MBB and erase any other blocks. O0 codegen appears to - // generate two bbs for the entry block. - MachineBasicBlock *Entry = &MF.front(); - Entry->clear(); - while (MF.size() > 1) - MF.erase(std::next(MF.begin())); - - MachineBasicBlock *CaptureSpec = MF.CreateMachineBasicBlock(Entry->getBasicBlock()); - MachineBasicBlock *CallTarget = MF.CreateMachineBasicBlock(Entry->getBasicBlock()); - MCSymbol *TargetSym = MF.getContext().createTempSymbol(); - MF.push_back(CaptureSpec); - MF.push_back(CallTarget); - - const unsigned CallOpc = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32; - const unsigned RetOpc = Is64Bit ? X86::RETQ : X86::RETL; - - Entry->addLiveIn(Reg); - BuildMI(Entry, DebugLoc(), TII->get(CallOpc)).addSym(TargetSym); - - // The MIR verifier thinks that the CALL in the entry block will fall through - // to CaptureSpec, so mark it as the successor. Technically, CaptureTarget is - // the successor, but the MIR verifier doesn't know how to cope with that. - Entry->addSuccessor(CaptureSpec); - - // In the capture loop for speculation, we want to stop the processor from - // speculating as fast as possible. On Intel processors, the PAUSE instruction - // will block speculation without consuming any execution resources. On AMD - // processors, the PAUSE instruction is (essentially) a nop, so we also use an - // LFENCE instruction which they have advised will stop speculation as well - // with minimal resource utilization. We still end the capture with a jump to - // form an infinite loop to fully guarantee that no matter what implementation - // of the x86 ISA, speculating this code path never escapes. - BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::PAUSE)); - BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::LFENCE)); - BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::JMP_1)).addMBB(CaptureSpec); - CaptureSpec->setHasAddressTaken(); - CaptureSpec->addSuccessor(CaptureSpec); - - CallTarget->addLiveIn(Reg); - CallTarget->setHasAddressTaken(); - CallTarget->setAlignment(Align(16)); - insertRegReturnAddrClobber(*CallTarget, Reg); - CallTarget->back().setPreInstrSymbol(MF, TargetSym); - BuildMI(CallTarget, DebugLoc(), TII->get(RetOpc)); -} diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h index f4e8d30328ca..af5153243c8b 100644 --- a/llvm/lib/Target/X86/X86Subtarget.h +++ b/llvm/lib/Target/X86/X86Subtarget.h @@ -421,6 +421,16 @@ protected: /// than emitting one inside the compiler. bool UseRetpolineExternalThunk = false; + /// Prevent generation of indirect call/branch instructions from memory, + /// and force all indirect call/branch instructions from a register to be + /// preceded by an LFENCE. Also decompose RET instructions into a + /// POP+LFENCE+JMP sequence. + bool UseLVIControlFlowIntegrity = false; + + /// Insert LFENCE instructions to prevent data speculatively injected into + /// loads from being used maliciously. + bool UseLVILoadHardening = false; + /// Use software floating point for code generation. bool UseSoftFloat = false; @@ -707,8 +717,21 @@ public: return UseRetpolineIndirectBranches; } bool useRetpolineExternalThunk() const { return UseRetpolineExternalThunk; } + + // These are generic getters that OR together all of the thunk types + // supported by the subtarget. Therefore useIndirectThunk*() will return true + // if any respective thunk feature is enabled. + bool useIndirectThunkCalls() const { + return useRetpolineIndirectCalls() || useLVIControlFlowIntegrity(); + } + bool useIndirectThunkBranches() const { + return useRetpolineIndirectBranches() || useLVIControlFlowIntegrity(); + } + bool preferMaskRegisters() const { return PreferMaskRegisters; } bool useGLMDivSqrtCosts() const { return UseGLMDivSqrtCosts; } + bool useLVIControlFlowIntegrity() const { return UseLVIControlFlowIntegrity; } + bool useLVILoadHardening() const { return UseLVILoadHardening; } unsigned getPreferVectorWidth() const { return PreferVectorWidth; } unsigned getRequiredVectorWidth() const { return RequiredVectorWidth; } @@ -853,10 +876,10 @@ public: /// Return true if the subtarget allows calls to immediate address. bool isLegalToCallImmediateAddr() const; - /// If we are using retpolines, we need to expand indirectbr to avoid it + /// If we are using indirect thunks, we need to expand indirectbr to avoid it /// lowering to an actual indirect jump. bool enableIndirectBrExpand() const override { - return useRetpolineIndirectBranches(); + return useIndirectThunkBranches(); } /// Enable the MachineScheduler pass for all X86 subtargets. diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp index 7176e46f07b1..9f639ffa22ec 100644 --- a/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -82,6 +82,8 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Target() { initializeX86SpeculativeLoadHardeningPassPass(PR); initializeX86FlagsCopyLoweringPassPass(PR); initializeX86CondBrFoldingPassPass(PR); + initializeX86LoadValueInjectionLoadHardeningPassPass(PR); + initializeX86LoadValueInjectionRetHardeningPassPass(PR); initializeX86OptimizeLEAPassPass(PR); } @@ -496,6 +498,10 @@ void X86PassConfig::addMachineSSAOptimization() { void X86PassConfig::addPostRegAlloc() { addPass(createX86FloatingPointStackifierPass()); + if (getOptLevel() != CodeGenOpt::None) + addPass(createX86LoadValueInjectionLoadHardeningPass()); + else + addPass(createX86LoadValueInjectionLoadHardeningUnoptimizedPass()); } void X86PassConfig::addPreSched2() { addPass(createX86ExpandPseudoPass()); } @@ -525,7 +531,7 @@ void X86PassConfig::addPreEmitPass2() { const Triple &TT = TM->getTargetTriple(); const MCAsmInfo *MAI = TM->getMCAsmInfo(); - addPass(createX86RetpolineThunksPass()); + addPass(createX86IndirectThunksPass()); // Insert extra int3 instructions after trailing call instructions to avoid // issues in the unwinder. @@ -542,6 +548,7 @@ void X86PassConfig::addPreEmitPass2() { // Identify valid longjmp targets for Windows Control Flow Guard. if (TT.isOSWindows()) addPass(createCFGuardLongjmpPass()); + addPass(createX86LoadValueInjectionRetHardeningPass()); } std::unique_ptr<CSEConfigBase> X86PassConfig::getCSEConfig() const { |