aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-05-30 17:37:31 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-05-30 17:37:31 +0000
commitee2f195dd3e40f49698ca4dc2666ec09c770e80d (patch)
tree66fa9a69e5789356dfe844991e64bac9222f3a35
parentab44ce3d598882e51a25eb82eb7ae6308de85ae6 (diff)
downloadsrc-ee2f195dd3e40f49698ca4dc2666ec09c770e80d.tar.gz
src-ee2f195dd3e40f49698ca4dc2666ec09c770e80d.zip
Vendor import of llvm trunk r304222:vendor/llvm/llvm-trunk-r304222
Notes
Notes: svn path=/vendor/llvm/dist/; revision=319230 svn path=/vendor/llvm/llvm-trunk-r304222/; revision=319231; tag=vendor/llvm/llvm-trunk-r304222
-rw-r--r--docs/Proposals/VectorizationPlan.rst182
-rw-r--r--docs/Vectorizers.rst11
-rw-r--r--docs/index.rst3
-rw-r--r--include/llvm/Analysis/ScalarEvolution.h3
-rw-r--r--include/llvm/CodeGen/DIE.h4
-rw-r--r--include/llvm/DebugInfo/CodeView/CodeView.h22
-rw-r--r--include/llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h (renamed from include/llvm/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.h)37
-rw-r--r--include/llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h59
-rw-r--r--include/llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h (renamed from include/llvm/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.h)37
-rw-r--r--include/llvm/DebugInfo/CodeView/DebugLinesSubsection.h (renamed from include/llvm/DebugInfo/CodeView/ModuleDebugLineFragment.h)32
-rw-r--r--include/llvm/DebugInfo/CodeView/DebugStringTableSubsection.h (renamed from include/llvm/DebugInfo/CodeView/StringTable.h)39
-rw-r--r--include/llvm/DebugInfo/CodeView/DebugSubsection.h52
-rw-r--r--include/llvm/DebugInfo/CodeView/DebugSubsectionRecord.h (renamed from include/llvm/DebugInfo/CodeView/ModuleDebugFragmentRecord.h)39
-rw-r--r--include/llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h (renamed from include/llvm/DebugInfo/CodeView/ModuleDebugFragmentVisitor.h)34
-rw-r--r--include/llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h53
-rw-r--r--include/llvm/DebugInfo/CodeView/DebugUnknownSubsection.h (renamed from include/llvm/DebugInfo/CodeView/ModuleDebugUnknownFragment.h)11
-rw-r--r--include/llvm/DebugInfo/CodeView/ModuleDebugFragment.h48
-rw-r--r--include/llvm/DebugInfo/CodeView/SymbolVisitorDelegate.h4
-rw-r--r--include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h23
-rw-r--r--include/llvm/DebugInfo/PDB/Native/DbiStream.h4
-rw-r--r--include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h7
-rw-r--r--include/llvm/DebugInfo/PDB/Native/PDBStringTable.h4
-rw-r--r--include/llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h10
-rw-r--r--include/llvm/MC/ConstantPools.h3
-rw-r--r--include/llvm/Support/ManagedStatic.h20
-rw-r--r--include/llvm/TableGen/Record.h18
-rw-r--r--include/llvm/Transforms/Scalar/GVNExpression.h11
-rw-r--r--lib/Analysis/ScalarEvolution.cpp7
-rw-r--r--lib/CodeGen/AsmPrinter/CodeViewDebug.cpp17
-rw-r--r--lib/CodeGen/AsmPrinter/CodeViewDebug.h2
-rw-r--r--lib/CodeGen/GlobalISel/Localizer.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp3
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp5
-rw-r--r--lib/CodeGen/TargetLoweringBase.cpp1
-rw-r--r--lib/DebugInfo/CodeView/CMakeLists.txt16
-rw-r--r--lib/DebugInfo/CodeView/DebugChecksumsSubsection.cpp (renamed from lib/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.cpp)33
-rw-r--r--lib/DebugInfo/CodeView/DebugFrameDataSubsection.cpp44
-rw-r--r--lib/DebugInfo/CodeView/DebugInlineeLinesSubsection.cpp (renamed from lib/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.cpp)38
-rw-r--r--lib/DebugInfo/CodeView/DebugLinesSubsection.cpp (renamed from lib/DebugInfo/CodeView/ModuleDebugLineFragment.cpp)52
-rw-r--r--lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp (renamed from lib/DebugInfo/CodeView/StringTable.cpp)27
-rw-r--r--lib/DebugInfo/CodeView/DebugSubsection.cpp (renamed from lib/DebugInfo/CodeView/ModuleDebugFragment.cpp)8
-rw-r--r--lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp81
-rw-r--r--lib/DebugInfo/CodeView/DebugSubsectionVisitor.cpp52
-rw-r--r--lib/DebugInfo/CodeView/DebugSymbolsSubsection.cpp34
-rw-r--r--lib/DebugInfo/CodeView/EnumTables.cpp28
-rw-r--r--lib/DebugInfo/CodeView/ModuleDebugFragmentRecord.cpp84
-rw-r--r--lib/DebugInfo/CodeView/ModuleDebugFragmentVisitor.cpp52
-rw-r--r--lib/DebugInfo/CodeView/SymbolDumper.cpp6
-rw-r--r--lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp18
-rw-r--r--lib/MC/MCCodeView.cpp6
-rw-r--r--lib/Support/APFloat.cpp2
-rw-r--r--lib/Support/Timer.cpp13
-rw-r--r--lib/TableGen/Record.cpp28
-rw-r--r--lib/Target/AArch64/AArch64FrameLowering.cpp35
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp3
-rw-r--r--lib/Target/AMDGPU/AMDGPUSubtarget.h2
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetMachine.cpp3
-rw-r--r--lib/Target/AMDGPU/SIFoldOperands.cpp7
-rw-r--r--lib/Target/AMDGPU/SIPeepholeSDWA.cpp49
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp4
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.cpp2
-rw-r--r--lib/Target/Mips/AsmParser/MipsAsmParser.cpp387
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp1
-rw-r--r--lib/Target/Mips/MipsInstrFPU.td23
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.td27
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp3
-rw-r--r--lib/Target/SystemZ/SystemZ.td2
-rw-r--r--lib/Target/SystemZ/SystemZFeatures.td14
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.cpp60
-rw-r--r--lib/Target/SystemZ/SystemZInstrDFP.td231
-rw-r--r--lib/Target/SystemZ/SystemZInstrFP.td19
-rw-r--r--lib/Target/SystemZ/SystemZInstrFormats.td97
-rw-r--r--lib/Target/SystemZ/SystemZInstrHFP.td240
-rw-r--r--lib/Target/SystemZ/SystemZScheduleZ13.td232
-rw-r--r--lib/Target/SystemZ/SystemZScheduleZ196.td219
-rw-r--r--lib/Target/SystemZ/SystemZScheduleZEC12.td225
-rw-r--r--lib/Target/SystemZ/SystemZSubtarget.cpp2
-rw-r--r--lib/Target/SystemZ/SystemZSubtarget.h8
-rw-r--r--lib/Target/WebAssembly/WebAssemblyISelLowering.cpp4
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp81
-rw-r--r--lib/Transforms/Scalar/NewGVN.cpp55
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp21
-rw-r--r--test/CodeGen/AArch64/reg-scavenge-frame.mir52
-rw-r--r--test/CodeGen/AMDGPU/add.v2i16.ll30
-rw-r--r--test/CodeGen/AMDGPU/bfe-combine.ll10
-rw-r--r--test/CodeGen/AMDGPU/commute-compares.ll2
-rw-r--r--test/CodeGen/AMDGPU/commute_modifiers.ll2
-rw-r--r--test/CodeGen/AMDGPU/copy-illegal-type.ll2
-rw-r--r--test/CodeGen/AMDGPU/cvt_f32_ubyte.ll1
-rw-r--r--test/CodeGen/AMDGPU/fabs.f64.ll4
-rw-r--r--test/CodeGen/AMDGPU/fabs.ll4
-rw-r--r--test/CodeGen/AMDGPU/fadd.f16.ll18
-rw-r--r--test/CodeGen/AMDGPU/fadd64.ll2
-rw-r--r--test/CodeGen/AMDGPU/fcanonicalize.f16.ll21
-rw-r--r--test/CodeGen/AMDGPU/fmul.f16.ll18
-rw-r--r--test/CodeGen/AMDGPU/fneg-fabs.f16.ll17
-rw-r--r--test/CodeGen/AMDGPU/fneg-fabs.f64.ll4
-rw-r--r--test/CodeGen/AMDGPU/fneg-fabs.ll4
-rw-r--r--test/CodeGen/AMDGPU/fneg.f16.ll8
-rw-r--r--test/CodeGen/AMDGPU/fract.f64.ll6
-rw-r--r--test/CodeGen/AMDGPU/fsub.f16.ll18
-rw-r--r--test/CodeGen/AMDGPU/fsub64.ll2
-rw-r--r--test/CodeGen/AMDGPU/immv216.ll57
-rw-r--r--test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll32
-rw-r--r--test/CodeGen/AMDGPU/llvm.amdgcn.div.fixup.f16.ll6
-rw-r--r--test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll2
-rw-r--r--test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.ll4
-rw-r--r--test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll2
-rw-r--r--test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll3
-rw-r--r--test/CodeGen/AMDGPU/llvm.fma.f16.ll6
-rw-r--r--test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll4
-rw-r--r--test/CodeGen/AMDGPU/llvm.maxnum.f16.ll18
-rw-r--r--test/CodeGen/AMDGPU/llvm.minnum.f16.ll17
-rw-r--r--test/CodeGen/AMDGPU/mad24-get-global-id.ll2
-rw-r--r--test/CodeGen/AMDGPU/madak.ll4
-rw-r--r--test/CodeGen/AMDGPU/madmk.ll4
-rw-r--r--test/CodeGen/AMDGPU/mul.ll8
-rw-r--r--test/CodeGen/AMDGPU/scratch-simple.ll6
-rw-r--r--test/CodeGen/AMDGPU/sdiv.ll2
-rw-r--r--test/CodeGen/AMDGPU/sdwa-peephole.ll5
-rw-r--r--test/CodeGen/AMDGPU/sdwa-scalar-ops.mir410
-rw-r--r--test/CodeGen/AMDGPU/select.f16.ll12
-rw-r--r--test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll21
-rw-r--r--test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll43
-rw-r--r--test/CodeGen/AMDGPU/sminmax.v2i16.ll5
-rw-r--r--test/CodeGen/AMDGPU/srem.ll2
-rw-r--r--test/CodeGen/AMDGPU/sub.v2i16.ll30
-rw-r--r--test/CodeGen/AMDGPU/udiv.ll6
-rw-r--r--test/CodeGen/AMDGPU/urem.ll2
-rw-r--r--test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll12
-rw-r--r--test/CodeGen/AMDGPU/v_mac_f16.ll11
-rw-r--r--test/CodeGen/AMDGPU/wqm.ll2
-rw-r--r--test/CodeGen/WebAssembly/negative-base-reg.ll2
-rw-r--r--test/CodeGen/X86/bitcast-and-setcc-128.ll1155
-rw-r--r--test/CodeGen/X86/bitcast-and-setcc-256.ll403
-rw-r--r--test/CodeGen/X86/mul-constant-i16.ll141
-rw-r--r--test/CodeGen/X86/mul-constant-i32.ll1589
-rw-r--r--test/CodeGen/X86/mul-constant-i64.ll1612
-rw-r--r--test/CodeGen/X86/setcc-lowering.ll65
-rw-r--r--test/CodeGen/X86/vector-sext.ll56
-rw-r--r--test/CodeGen/X86/xchg-nofold.ll37
-rw-r--r--test/MC/AArch64/ldr-pseudo.s12
-rw-r--r--test/MC/Disassembler/SystemZ/insns-z13.txt108
-rw-r--r--test/MC/Disassembler/SystemZ/insns.txt3178
-rw-r--r--test/MC/Mips/macro-li.d.s443
-rw-r--r--test/MC/Mips/macro-li.s.s198
-rw-r--r--test/MC/SystemZ/insn-bad-z13.s146
-rw-r--r--test/MC/SystemZ/insn-bad-z196.s368
-rw-r--r--test/MC/SystemZ/insn-bad-zEC12.s166
-rw-r--r--test/MC/SystemZ/insn-bad.s998
-rw-r--r--test/MC/SystemZ/insn-good-z13.s80
-rw-r--r--test/MC/SystemZ/insn-good-z196.s336
-rw-r--r--test/MC/SystemZ/insn-good-zEC12.s80
-rw-r--r--test/MC/SystemZ/insn-good.s2135
-rw-r--r--test/Transforms/LoopVectorize/AArch64/no_vector_instructions.ll26
-rw-r--r--tools/llvm-pdbdump/C13DebugFragmentVisitor.cpp14
-rw-r--r--tools/llvm-pdbdump/C13DebugFragmentVisitor.h22
-rw-r--r--tools/llvm-pdbdump/LLVMOutputStyle.cpp14
-rw-r--r--tools/llvm-pdbdump/YAMLOutputStyle.cpp16
-rw-r--r--tools/llvm-pdbdump/llvm-pdbdump.cpp13
-rw-r--r--tools/llvm-readobj/COFFDumper.cpp113
-rw-r--r--tools/llvm-readobj/CodeView.h54
-rw-r--r--unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp12
-rw-r--r--unittests/Support/ManagedStatic.cpp41
-rw-r--r--utils/TableGen/GlobalISelEmitter.cpp2
-rw-r--r--utils/TableGen/X86FoldTablesEmitter.cpp9
166 files changed, 16653 insertions, 1439 deletions
diff --git a/docs/Proposals/VectorizationPlan.rst b/docs/Proposals/VectorizationPlan.rst
new file mode 100644
index 000000000000..82ce4b2de17a
--- /dev/null
+++ b/docs/Proposals/VectorizationPlan.rst
@@ -0,0 +1,182 @@
+==================
+Vectorization Plan
+==================
+
+.. contents::
+ :local:
+
+Abstract
+========
+The vectorization transformation can be rather complicated, involving several
+potential alternatives, especially for outer-loops [1]_ but also possibly for
+innermost loops. These alternatives may have significant performance impact,
+both positive and negative. A cost model is therefore employed to identify the
+best alternative, including the alternative of avoiding any transformation
+altogether.
+
+The Vectorization Plan is an explicit model for describing vectorization
+candidates. It serves for both optimizing candidates including estimating their
+cost reliably, and for performing their final translation into IR. This
+facilitates dealing with multiple vectorization candidates.
+
+High-level Design
+=================
+
+Vectorization Workflow
+----------------------
+VPlan-based vectorization involves three major steps, taking a "scenario-based
+approach" to vectorization planning:
+
+1. Legal Step: check if a loop can be legally vectorized; encode contraints and
+ artifacts if so.
+2. Plan Step:
+
+ a. Build initial VPlans following the constraints and decisions taken by
+ Legal Step 1, and compute their cost.
+ b. Apply optimizations to the VPlans, possibly forking additional VPlans.
+ Prune sub-optimal VPlans having relatively high cost.
+3. Execute Step: materialize the best VPlan. Note that this is the only step
+ that modifies the IR.
+
+Design Guidelines
+-----------------
+In what follows, the term "input IR" refers to code that is fed into the
+vectorizer whereas the term "output IR" refers to code that is generated by the
+vectorizer. The output IR contains code that has been vectorized or "widened"
+according to a loop Vectorization Factor (VF), and/or loop unroll-and-jammed
+according to an Unroll Factor (UF).
+The design of VPlan follows several high-level guidelines:
+
+1. Analysis-like: building and manipulating VPlans must not modify the input IR.
+ In particular, if the best option is not to vectorize at all, the
+ vectorization process terminates before reaching Step 3, and compilation
+ should proceed as if VPlans had not been built.
+
+2. Align Cost & Execute: each VPlan must support both estimating the cost and
+ generating the output IR code, such that the cost estimation evaluates the
+ to-be-generated code reliably.
+
+3. Support vectorizing additional constructs:
+
+ a. Outer-loop vectorization. In particular, VPlan must be able to model the
+ control-flow of the output IR which may include multiple basic-blocks and
+ nested loops.
+ b. SLP vectorization.
+ c. Combinations of the above, including nested vectorization: vectorizing
+ both an inner loop and an outer-loop at the same time (each with its own
+ VF and UF), mixed vectorization: vectorizing a loop with SLP patterns
+ inside [4]_, (re)vectorizing input IR containing vector code.
+ d. Function vectorization [2]_.
+
+4. Support multiple candidates efficiently. In particular, similar candidates
+ related to a range of possible VF's and UF's must be represented efficiently.
+ Potential versioning needs to be supported efficiently.
+
+5. Support vectorizing idioms, such as interleaved groups of strided loads or
+ stores. This is achieved by modeling a sequence of output instructions using
+ a "Recipe", which is responsible for computing its cost and generating its
+ code.
+
+6. Encapsulate Single-Entry Single-Exit regions (SESE). During vectorization
+ such regions may need to be, for example, predicated and linearized, or
+ replicated VF*UF times to handle scalarized and predicated instructions.
+ Innerloops are also modelled as SESE regions.
+
+Low-level Design
+================
+The low-level design of VPlan comprises of the following classes.
+
+:LoopVectorizationPlanner:
+ A LoopVectorizationPlanner is designed to handle the vectorization of a loop
+ or a loop nest. It can construct, optimize and discard one or more VPlans,
+ each VPlan modelling a distinct way to vectorize the loop or the loop nest.
+ Once the best VPlan is determined, including the best VF and UF, this VPlan
+ drives the generation of output IR.
+
+:VPlan:
+ A model of a vectorized candidate for a given input IR loop or loop nest. This
+ candidate is represented using a Hierarchical CFG. VPlan supports estimating
+ the cost and driving the generation of the output IR code it represents.
+
+:Hierarchical CFG:
+ A control-flow graph whose nodes are basic-blocks or Hierarchical CFG's. The
+ Hierarchical CFG data structure is similar to the Tile Tree [5]_, where
+ cross-Tile edges are lifted to connect Tiles instead of the original
+ basic-blocks as in Sharir [6]_, promoting the Tile encapsulation. The terms
+ Region and Block are used rather than Tile [5]_ to avoid confusion with loop
+ tiling.
+
+:VPBlockBase:
+ The building block of the Hierarchical CFG. A pure-virtual base-class of
+ VPBasicBlock and VPRegionBlock, see below. VPBlockBase models the hierarchical
+ control-flow relations with other VPBlocks. Note that in contrast to the IR
+ BasicBlock, a VPBlockBase models its control-flow successors and predecessors
+ directly, rather than through a Terminator branch or through predecessor
+ branches that "use" the VPBlockBase.
+
+:VPBasicBlock:
+ VPBasicBlock is a subclass of VPBlockBase, and serves as the leaves of the
+ Hierarchical CFG. It represents a sequence of output IR instructions that will
+ appear consecutively in an output IR basic-block. The instructions of this
+ basic-block originate from one or more VPBasicBlocks. VPBasicBlock holds a
+ sequence of zero or more VPRecipes that model the cost and generation of the
+ output IR instructions.
+
+:VPRegionBlock:
+ VPRegionBlock is a subclass of VPBlockBase. It models a collection of
+ VPBasicBlocks and VPRegionBlocks which form a SESE subgraph of the output IR
+ CFG. A VPRegionBlock may indicate that its contents are to be replicated a
+ constant number of times when output IR is generated, effectively representing
+ a loop with constant trip-count that will be completely unrolled. This is used
+ to support scalarized and predicated instructions with a single model for
+ multiple candidate VF's and UF's.
+
+:VPRecipeBase:
+ A pure-virtual base class modeling a sequence of one or more output IR
+ instructions, possibly based on one or more input IR instructions. These
+ input IR instructions are referred to as "Ingredients" of the Recipe. A Recipe
+ may specify how its ingredients are to be transformed to produce the output IR
+ instructions; e.g., cloned once, replicated multiple times or widened
+ according to selected VF.
+
+:VPTransformState:
+ Stores information used for generating output IR, passed from
+ LoopVectorizationPlanner to its selected VPlan for execution, and used to pass
+ additional information down to VPBlocks and VPRecipes.
+
+Related LLVM components
+-----------------------
+1. SLP Vectorizer: one can compare the VPlan model with LLVM's existing SLP
+ tree, where TSLP [3]_ adds Plan Step 2.b.
+
+2. RegionInfo: one can compare VPlan's H-CFG with the Region Analysis as used by
+ Polly [7]_.
+
+References
+----------
+.. [1] "Outer-loop vectorization: revisited for short SIMD architectures", Dorit
+ Nuzman and Ayal Zaks, PACT 2008.
+
+.. [2] "Proposal for function vectorization and loop vectorization with function
+ calls", Xinmin Tian, [`cfe-dev
+ <http://lists.llvm.org/pipermail/cfe-dev/2016-March/047732.html>`_].,
+ March 2, 2016.
+ See also `review <https://reviews.llvm.org/D22792>`_.
+
+.. [3] "Throttling Automatic Vectorization: When Less is More", Vasileios
+ Porpodas and Tim Jones, PACT 2015 and LLVM Developers' Meeting 2015.
+
+.. [4] "Exploiting mixed SIMD parallelism by reducing data reorganization
+ overhead", Hao Zhou and Jingling Xue, CGO 2016.
+
+.. [5] "Register Allocation via Hierarchical Graph Coloring", David Callahan and
+ Brian Koblenz, PLDI 1991
+
+.. [6] "Structural analysis: A new approach to flow analysis in optimizing
+ compilers", M. Sharir, Journal of Computer Languages, Jan. 1980
+
+.. [7] "Enabling Polyhedral Optimizations in LLVM", Tobias Grosser, Diploma
+ thesis, 2011.
+
+.. [8] "Introducing VPlan to the Loop Vectorizer", Gil Rapaport and Ayal Zaks,
+ European LLVM Developers' Meeting 2017.
diff --git a/docs/Vectorizers.rst b/docs/Vectorizers.rst
index a909d458c317..317271af4032 100644
--- a/docs/Vectorizers.rst
+++ b/docs/Vectorizers.rst
@@ -382,6 +382,17 @@ And Linpack-pc with the same configuration. Result is Mflops, higher is better.
.. image:: linpack-pc.png
+Ongoing Development Directions
+------------------------------
+
+.. toctree::
+ :hidden:
+
+ Proposals/VectorizationPlan
+
+:doc:`Proposals/VectorizationPlan`
+ Modeling the process and upgrading the infrastructure of LLVM's Loop Vectorizer.
+
.. _slp-vectorizer:
The SLP Vectorizer
diff --git a/docs/index.rst b/docs/index.rst
index becbe48e7ec7..220df1566bd5 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -528,6 +528,7 @@ can be better.
CodeOfConduct
Proposals/GitHubMove
+ Proposals/VectorizationPlan
:doc:`CodeOfConduct`
Proposal to adopt a code of conduct on the LLVM social spaces (lists, events,
@@ -536,6 +537,8 @@ can be better.
:doc:`Proposals/GitHubMove`
Proposal to move from SVN/Git to GitHub.
+:doc:`Proposals/VectorizationPlan`
+ Proposal to model the process and upgrade the infrastructure of LLVM's Loop Vectorizer.
Indices and tables
==================
diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h
index 4a6fc245c225..1d715b590ab7 100644
--- a/include/llvm/Analysis/ScalarEvolution.h
+++ b/include/llvm/Analysis/ScalarEvolution.h
@@ -1536,8 +1536,7 @@ public:
/// Determine if the SCEV can be evaluated at loop's entry. It is true if it
/// doesn't depend on a SCEVUnknown of an instruction which is dominated by
/// the header of loop L.
- bool isAvailableAtLoopEntry(const SCEV *S, const Loop *L, DominatorTree &DT,
- LoopInfo &LI);
+ bool isAvailableAtLoopEntry(const SCEV *S, const Loop *L);
/// Return true if the given SCEV changes value in a known way in the
/// specified loop. This property being true implies that the value is
diff --git a/include/llvm/CodeGen/DIE.h b/include/llvm/CodeGen/DIE.h
index 4be44e62fa92..4f47ba6e3852 100644
--- a/include/llvm/CodeGen/DIE.h
+++ b/include/llvm/CodeGen/DIE.h
@@ -383,11 +383,11 @@ private:
return;
#define HANDLE_DIEVALUE_SMALL(T) \
case is##T: \
- destruct<DIE##T>();
+ destruct<DIE##T>(); \
return;
#define HANDLE_DIEVALUE_LARGE(T) \
case is##T: \
- destruct<const DIE##T *>();
+ destruct<const DIE##T *>(); \
return;
#include "llvm/CodeGen/DIEValue.def"
}
diff --git a/include/llvm/DebugInfo/CodeView/CodeView.h b/include/llvm/DebugInfo/CodeView/CodeView.h
index f881ad0c9d80..3316e71916ed 100644
--- a/include/llvm/DebugInfo/CodeView/CodeView.h
+++ b/include/llvm/DebugInfo/CodeView/CodeView.h
@@ -13,6 +13,8 @@
#include <cinttypes>
#include <type_traits>
+#include "llvm/Support/Endian.h"
+
namespace llvm {
namespace codeview {
@@ -291,7 +293,7 @@ enum class ModifierOptions : uint16_t {
};
CV_DEFINE_ENUM_CLASS_FLAGS_OPERATORS(ModifierOptions)
-enum class ModuleDebugFragmentKind : uint32_t {
+enum class DebugSubsectionKind : uint32_t {
None = 0,
Symbols = 0xf1,
Lines = 0xf2,
@@ -550,6 +552,24 @@ enum LineFlags : uint16_t {
LF_None = 0,
LF_HaveColumns = 1, // CV_LINES_HAVE_COLUMNS
};
+
+/// Data in the the SUBSEC_FRAMEDATA subection.
+struct FrameData {
+ support::ulittle32_t RvaStart;
+ support::ulittle32_t CodeSize;
+ support::ulittle32_t LocalSize;
+ support::ulittle32_t ParamsSize;
+ support::ulittle32_t MaxStackSize;
+ support::ulittle32_t FrameFunc;
+ support::ulittle16_t PrologSize;
+ support::ulittle16_t SavedRegsSize;
+ support::ulittle32_t Flags;
+ enum : uint32_t {
+ HasSEH = 1 << 0,
+ HasEH = 1 << 1,
+ IsFunctionStart = 1 << 2,
+ };
+};
}
}
diff --git a/include/llvm/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.h b/include/llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h
index 6c08c9aa2137..e7036033d2d9 100644
--- a/include/llvm/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.h
+++ b/include/llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h
@@ -1,4 +1,4 @@
-//===- ModuleDebugFileChecksumFragment.h ------------------------*- C++ -*-===//
+//===- DebugChecksumsSubsection.h -------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,12 +7,12 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGFILECHECKSUMFRAGMENT_H
-#define LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGFILECHECKSUMFRAGMENT_H
+#ifndef LLVM_DEBUGINFO_CODEVIEW_DEBUGCHECKSUMSSUBSECTION_H
+#define LLVM_DEBUGINFO_CODEVIEW_DEBUGCHECKSUMSSUBSECTION_H
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/DebugInfo/CodeView/ModuleDebugFragment.h"
+#include "llvm/DebugInfo/CodeView/DebugSubsection.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/BinaryStreamArray.h"
#include "llvm/Support/BinaryStreamReader.h"
@@ -21,7 +21,7 @@
namespace llvm {
namespace codeview {
-class StringTable;
+class DebugStringTableSubsection;
struct FileChecksumEntry {
uint32_t FileNameOffset; // Byte offset of filename in global stringtable.
@@ -43,19 +43,22 @@ public:
namespace llvm {
namespace codeview {
-class ModuleDebugFileChecksumFragmentRef final : public ModuleDebugFragmentRef {
+class DebugChecksumsSubsectionRef final : public DebugSubsectionRef {
typedef VarStreamArray<codeview::FileChecksumEntry> FileChecksumArray;
typedef FileChecksumArray::Iterator Iterator;
public:
- ModuleDebugFileChecksumFragmentRef()
- : ModuleDebugFragmentRef(ModuleDebugFragmentKind::FileChecksums) {}
+ DebugChecksumsSubsectionRef()
+ : DebugSubsectionRef(DebugSubsectionKind::FileChecksums) {}
- static bool classof(const ModuleDebugFragmentRef *S) {
- return S->kind() == ModuleDebugFragmentKind::FileChecksums;
+ static bool classof(const DebugSubsectionRef *S) {
+ return S->kind() == DebugSubsectionKind::FileChecksums;
}
+ bool valid() const { return Checksums.valid(); }
+
Error initialize(BinaryStreamReader Reader);
+ Error initialize(BinaryStreamRef Stream);
Iterator begin() { return Checksums.begin(); }
Iterator end() { return Checksums.end(); }
@@ -66,23 +69,23 @@ private:
FileChecksumArray Checksums;
};
-class ModuleDebugFileChecksumFragment final : public ModuleDebugFragment {
+class DebugChecksumsSubsection final : public DebugSubsection {
public:
- explicit ModuleDebugFileChecksumFragment(StringTable &Strings);
+ explicit DebugChecksumsSubsection(DebugStringTableSubsection &Strings);
- static bool classof(const ModuleDebugFragment *S) {
- return S->kind() == ModuleDebugFragmentKind::FileChecksums;
+ static bool classof(const DebugSubsection *S) {
+ return S->kind() == DebugSubsectionKind::FileChecksums;
}
void addChecksum(StringRef FileName, FileChecksumKind Kind,
ArrayRef<uint8_t> Bytes);
- uint32_t calculateSerializedLength() override;
- Error commit(BinaryStreamWriter &Writer) override;
+ uint32_t calculateSerializedSize() const override;
+ Error commit(BinaryStreamWriter &Writer) const override;
uint32_t mapChecksumOffset(StringRef FileName) const;
private:
- StringTable &Strings;
+ DebugStringTableSubsection &Strings;
DenseMap<uint32_t, uint32_t> OffsetMap;
uint32_t SerializedSize = 0;
diff --git a/include/llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h b/include/llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h
new file mode 100644
index 000000000000..686b5c4f242e
--- /dev/null
+++ b/include/llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h
@@ -0,0 +1,59 @@
+//===- DebugFrameDataSubsection.h ------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_CODEVIEW_DEBUGFRAMEDATASUBSECTION_H
+#define LLVM_DEBUGINFO_CODEVIEW_DEBUGFRAMEDATASUBSECTION_H
+
+#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/DebugSubsection.h"
+#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+namespace codeview {
+class DebugFrameDataSubsectionRef final : public DebugSubsectionRef {
+public:
+ DebugFrameDataSubsectionRef()
+ : DebugSubsectionRef(DebugSubsectionKind::FrameData) {}
+ static bool classof(const DebugSubsection *S) {
+ return S->kind() == DebugSubsectionKind::FrameData;
+ }
+
+ Error initialize(BinaryStreamReader Reader);
+
+ FixedStreamArray<FrameData>::Iterator begin() const { return Frames.begin(); }
+ FixedStreamArray<FrameData>::Iterator end() const { return Frames.end(); }
+
+ const void *getRelocPtr() const { return RelocPtr; }
+
+private:
+ const uint32_t *RelocPtr = nullptr;
+ FixedStreamArray<FrameData> Frames;
+};
+
+class DebugFrameDataSubsection final : public DebugSubsection {
+public:
+ DebugFrameDataSubsection()
+ : DebugSubsection(DebugSubsectionKind::FrameData) {}
+ static bool classof(const DebugSubsection *S) {
+ return S->kind() == DebugSubsectionKind::FrameData;
+ }
+
+ uint32_t calculateSerializedSize() const override;
+ Error commit(BinaryStreamWriter &Writer) const override;
+
+ void addFrameData(const FrameData &Frame);
+
+private:
+ std::vector<FrameData> Frames;
+};
+}
+}
+
+#endif
diff --git a/include/llvm/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.h b/include/llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h
index 348497cbf7f2..e2cfc3c99233 100644
--- a/include/llvm/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.h
+++ b/include/llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h
@@ -1,4 +1,4 @@
-//===- ModuleDebugInlineeLinesFragment.h ------------------------*- C++ -*-===//
+//===- DebugInlineeLinesSubsection.h ----------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,11 +7,11 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGINLINEELINESFRAGMENT_H
-#define LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGINLINEELINESFRAGMENT_H
+#ifndef LLVM_DEBUGINFO_CODEVIEW_BUGINLINEELINESSUBSECTION_H
+#define LLVM_DEBUGINFO_CODEVIEW_BUGINLINEELINESSUBSECTION_H
+#include "llvm/DebugInfo/CodeView/DebugSubsection.h"
#include "llvm/DebugInfo/CodeView/Line.h"
-#include "llvm/DebugInfo/CodeView/ModuleDebugFragment.h"
#include "llvm/Support/BinaryStreamArray.h"
#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/Error.h"
@@ -19,9 +19,8 @@
namespace llvm {
namespace codeview {
-class ModuleDebugInlineeLineFragmentRef;
-class ModuleDebugFileChecksumFragment;
-class StringTable;
+class DebugInlineeLinesSubsectionsRef;
+class DebugChecksumsSubsection;
enum class InlineeLinesSignature : uint32_t {
Normal, // CV_INLINEE_SOURCE_LINE_SIGNATURE
@@ -51,15 +50,15 @@ template <> struct VarStreamArrayExtractor<codeview::InlineeSourceLine> {
};
namespace codeview {
-class ModuleDebugInlineeLineFragmentRef final : public ModuleDebugFragmentRef {
+class DebugInlineeLinesSubsectionRef final : public DebugSubsectionRef {
typedef VarStreamArray<InlineeSourceLine> LinesArray;
typedef LinesArray::Iterator Iterator;
public:
- ModuleDebugInlineeLineFragmentRef();
+ DebugInlineeLinesSubsectionRef();
- static bool classof(const ModuleDebugFragmentRef *S) {
- return S->kind() == ModuleDebugFragmentKind::InlineeLines;
+ static bool classof(const DebugSubsectionRef *S) {
+ return S->kind() == DebugSubsectionKind::InlineeLines;
}
Error initialize(BinaryStreamReader Reader);
@@ -73,23 +72,23 @@ private:
VarStreamArray<InlineeSourceLine> Lines;
};
-class ModuleDebugInlineeLineFragment final : public ModuleDebugFragment {
+class DebugInlineeLinesSubsection final : public DebugSubsection {
public:
- ModuleDebugInlineeLineFragment(ModuleDebugFileChecksumFragment &Checksums,
- bool HasExtraFiles);
+ DebugInlineeLinesSubsection(DebugChecksumsSubsection &Checksums,
+ bool HasExtraFiles);
- static bool classof(const ModuleDebugFragment *S) {
- return S->kind() == ModuleDebugFragmentKind::InlineeLines;
+ static bool classof(const DebugSubsection *S) {
+ return S->kind() == DebugSubsectionKind::InlineeLines;
}
- Error commit(BinaryStreamWriter &Writer) override;
- uint32_t calculateSerializedLength() override;
+ Error commit(BinaryStreamWriter &Writer) const override;
+ uint32_t calculateSerializedSize() const override;
void addInlineSite(TypeIndex FuncId, StringRef FileName, uint32_t SourceLine);
void addExtraFile(StringRef FileName);
private:
- ModuleDebugFileChecksumFragment &Checksums;
+ DebugChecksumsSubsection &Checksums;
bool HasExtraFiles = false;
uint32_t ExtraFileCount = 0;
diff --git a/include/llvm/DebugInfo/CodeView/ModuleDebugLineFragment.h b/include/llvm/DebugInfo/CodeView/DebugLinesSubsection.h
index 3124236b8fb1..1b63af59c2ed 100644
--- a/include/llvm/DebugInfo/CodeView/ModuleDebugLineFragment.h
+++ b/include/llvm/DebugInfo/CodeView/DebugLinesSubsection.h
@@ -1,4 +1,4 @@
-//===- ModuleDebugLineFragment.h --------------------------------*- C++ -*-===//
+//===- DebugLinesSubsection.h --------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -10,8 +10,8 @@
#ifndef LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGLINEFRAGMENT_H
#define LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGLINEFRAGMENT_H
+#include "llvm/DebugInfo/CodeView/DebugSubsection.h"
#include "llvm/DebugInfo/CodeView/Line.h"
-#include "llvm/DebugInfo/CodeView/ModuleDebugFragment.h"
#include "llvm/Support/BinaryStreamArray.h"
#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/Error.h"
@@ -19,8 +19,8 @@
namespace llvm {
namespace codeview {
-class ModuleDebugFileChecksumFragment;
-class StringTable;
+class DebugChecksumsSubsection;
+class DebugStringTableSubsection;
// Corresponds to the `CV_DebugSLinesHeader_t` structure.
struct LineFragmentHeader {
@@ -70,16 +70,16 @@ public:
LineColumnEntry &Item, const LineFragmentHeader *Ctx);
};
-class ModuleDebugLineFragmentRef final : public ModuleDebugFragmentRef {
+class DebugLinesSubsectionRef final : public DebugSubsectionRef {
friend class LineColumnExtractor;
typedef VarStreamArray<LineColumnEntry, LineColumnExtractor> LineInfoArray;
typedef LineInfoArray::Iterator Iterator;
public:
- ModuleDebugLineFragmentRef();
+ DebugLinesSubsectionRef();
- static bool classof(const ModuleDebugFragmentRef *S) {
- return S->kind() == ModuleDebugFragmentKind::Lines;
+ static bool classof(const DebugSubsectionRef *S) {
+ return S->kind() == DebugSubsectionKind::Lines;
}
Error initialize(BinaryStreamReader Reader);
@@ -96,7 +96,7 @@ private:
LineInfoArray LinesAndColumns;
};
-class ModuleDebugLineFragment final : public ModuleDebugFragment {
+class DebugLinesSubsection final : public DebugSubsection {
struct Block {
Block(uint32_t ChecksumBufferOffset)
: ChecksumBufferOffset(ChecksumBufferOffset) {}
@@ -107,11 +107,11 @@ class ModuleDebugLineFragment final : public ModuleDebugFragment {
};
public:
- ModuleDebugLineFragment(ModuleDebugFileChecksumFragment &Checksums,
- StringTable &Strings);
+ DebugLinesSubsection(DebugChecksumsSubsection &Checksums,
+ DebugStringTableSubsection &Strings);
- static bool classof(const ModuleDebugFragment *S) {
- return S->kind() == ModuleDebugFragmentKind::Lines;
+ static bool classof(const DebugSubsection *S) {
+ return S->kind() == DebugSubsectionKind::Lines;
}
void createBlock(StringRef FileName);
@@ -119,8 +119,8 @@ public:
void addLineAndColumnInfo(uint32_t Offset, const LineInfo &Line,
uint32_t ColStart, uint32_t ColEnd);
- uint32_t calculateSerializedLength() override;
- Error commit(BinaryStreamWriter &Writer) override;
+ uint32_t calculateSerializedSize() const override;
+ Error commit(BinaryStreamWriter &Writer) const override;
void setRelocationAddress(uint16_t Segment, uint16_t Offset);
void setCodeSize(uint32_t Size);
@@ -129,7 +129,7 @@ public:
bool hasColumnInfo() const;
private:
- ModuleDebugFileChecksumFragment &Checksums;
+ DebugChecksumsSubsection &Checksums;
uint16_t RelocOffset = 0;
uint16_t RelocSegment = 0;
diff --git a/include/llvm/DebugInfo/CodeView/StringTable.h b/include/llvm/DebugInfo/CodeView/DebugStringTableSubsection.h
index 05dc02ee849f..fbe39cb16f09 100644
--- a/include/llvm/DebugInfo/CodeView/StringTable.h
+++ b/include/llvm/DebugInfo/CodeView/DebugStringTableSubsection.h
@@ -1,4 +1,4 @@
-//===- StringTable.h - CodeView String Table Reader/Writer ------*- C++ -*-===//
+//===- DebugStringTableSubsection.h - CodeView String Table -----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,12 +7,12 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_DEBUGINFO_CODEVIEW_STRINGTABLE_H
-#define LLVM_DEBUGINFO_CODEVIEW_STRINGTABLE_H
+#ifndef LLVM_DEBUGINFO_CODEVIEW_DEBUGSTRINGTABLESUBSECTION_H
+#define LLVM_DEBUGINFO_CODEVIEW_DEBUGSTRINGTABLESUBSECTION_H
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
-
+#include "llvm/DebugInfo/CodeView/DebugSubsection.h"
#include "llvm/Support/BinaryStreamRef.h"
#include "llvm/Support/Error.h"
@@ -28,11 +28,15 @@ namespace codeview {
/// Represents a read-only view of a CodeView string table. This is a very
/// simple flat buffer consisting of null-terminated strings, where strings
-/// are retrieved by their offset in the buffer. StringTableRef does not own
-/// the underlying storage for the buffer.
-class StringTableRef {
+/// are retrieved by their offset in the buffer. DebugStringTableSubsectionRef
+/// does not own the underlying storage for the buffer.
+class DebugStringTableSubsectionRef : public DebugSubsectionRef {
public:
- StringTableRef();
+ DebugStringTableSubsectionRef();
+
+ static bool classof(const DebugSubsectionRef *S) {
+ return S->kind() == DebugSubsectionKind::StringTable;
+ }
Error initialize(BinaryStreamRef Contents);
@@ -44,11 +48,18 @@ private:
BinaryStreamRef Stream;
};
-/// Represents a read-write view of a CodeView string table. StringTable owns
-/// the underlying storage for the table, and is capable of serializing the
-/// string table into a format understood by StringTableRef.
-class StringTable {
+/// Represents a read-write view of a CodeView string table.
+/// DebugStringTableSubsection owns the underlying storage for the table, and is
+/// capable of serializing the string table into a format understood by
+/// DebugStringTableSubsectionRef.
+class DebugStringTableSubsection : public DebugSubsection {
public:
+ DebugStringTableSubsection();
+
+ static bool classof(const DebugSubsection *S) {
+ return S->kind() == DebugSubsectionKind::StringTable;
+ }
+
// If string S does not exist in the string table, insert it.
// Returns the ID for S.
uint32_t insert(StringRef S);
@@ -56,8 +67,8 @@ public:
// Return the ID for string S. Assumes S exists in the table.
uint32_t getStringId(StringRef S) const;
- uint32_t calculateSerializedSize() const;
- Error commit(BinaryStreamWriter &Writer) const;
+ uint32_t calculateSerializedSize() const override;
+ Error commit(BinaryStreamWriter &Writer) const override;
uint32_t size() const;
diff --git a/include/llvm/DebugInfo/CodeView/DebugSubsection.h b/include/llvm/DebugInfo/CodeView/DebugSubsection.h
new file mode 100644
index 000000000000..e427e0006a55
--- /dev/null
+++ b/include/llvm/DebugInfo/CodeView/DebugSubsection.h
@@ -0,0 +1,52 @@
+//===- DebugSubsection.h ------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGFRAGMENT_H
+#define LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGFRAGMENT_H
+
+#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/Support/BinaryStreamWriter.h"
+#include "llvm/Support/Casting.h"
+
+namespace llvm {
+namespace codeview {
+
+class DebugSubsectionRef {
+public:
+ explicit DebugSubsectionRef(DebugSubsectionKind Kind) : Kind(Kind) {}
+ virtual ~DebugSubsectionRef();
+
+ static bool classof(const DebugSubsectionRef *S) { return true; }
+
+ DebugSubsectionKind kind() const { return Kind; }
+
+protected:
+ DebugSubsectionKind Kind;
+};
+
+class DebugSubsection {
+public:
+ explicit DebugSubsection(DebugSubsectionKind Kind) : Kind(Kind) {}
+ virtual ~DebugSubsection();
+
+ static bool classof(const DebugSubsection *S) { return true; }
+
+ DebugSubsectionKind kind() const { return Kind; }
+
+ virtual Error commit(BinaryStreamWriter &Writer) const = 0;
+ virtual uint32_t calculateSerializedSize() const = 0;
+
+protected:
+ DebugSubsectionKind Kind;
+};
+
+} // namespace codeview
+} // namespace llvm
+
+#endif // LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGFRAGMENT_H
diff --git a/include/llvm/DebugInfo/CodeView/ModuleDebugFragmentRecord.h b/include/llvm/DebugInfo/CodeView/DebugSubsectionRecord.h
index f68f21b224f1..b2e1131e5968 100644
--- a/include/llvm/DebugInfo/CodeView/ModuleDebugFragmentRecord.h
+++ b/include/llvm/DebugInfo/CodeView/DebugSubsectionRecord.h
@@ -1,4 +1,4 @@
-//===- ModuleDebugFragment.h ------------------------------------*- C++ -*-===//
+//===- DebugSubsection.h ------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -20,52 +20,49 @@
namespace llvm {
namespace codeview {
-class ModuleDebugFragment;
+class DebugSubsection;
// Corresponds to the `CV_DebugSSubsectionHeader_t` structure.
-struct ModuleDebugFragmentHeader {
- support::ulittle32_t Kind; // codeview::ModuleDebugFragmentKind enum
+struct DebugSubsectionHeader {
+ support::ulittle32_t Kind; // codeview::DebugSubsectionKind enum
support::ulittle32_t Length; // number of bytes occupied by this record.
};
-class ModuleDebugFragmentRecord {
+class DebugSubsectionRecord {
public:
- ModuleDebugFragmentRecord();
- ModuleDebugFragmentRecord(ModuleDebugFragmentKind Kind, BinaryStreamRef Data);
+ DebugSubsectionRecord();
+ DebugSubsectionRecord(DebugSubsectionKind Kind, BinaryStreamRef Data);
- static Error initialize(BinaryStreamRef Stream,
- ModuleDebugFragmentRecord &Info);
+ static Error initialize(BinaryStreamRef Stream, DebugSubsectionRecord &Info);
uint32_t getRecordLength() const;
- ModuleDebugFragmentKind kind() const;
+ DebugSubsectionKind kind() const;
BinaryStreamRef getRecordData() const;
private:
- ModuleDebugFragmentKind Kind;
+ DebugSubsectionKind Kind;
BinaryStreamRef Data;
};
-class ModuleDebugFragmentRecordBuilder {
+class DebugSubsectionRecordBuilder {
public:
- ModuleDebugFragmentRecordBuilder(ModuleDebugFragmentKind Kind,
- ModuleDebugFragment &Frag);
+ DebugSubsectionRecordBuilder(DebugSubsectionKind Kind, DebugSubsection &Frag);
uint32_t calculateSerializedLength();
Error commit(BinaryStreamWriter &Writer);
private:
- ModuleDebugFragmentKind Kind;
- ModuleDebugFragment &Frag;
+ DebugSubsectionKind Kind;
+ DebugSubsection &Frag;
};
} // namespace codeview
-template <>
-struct VarStreamArrayExtractor<codeview::ModuleDebugFragmentRecord> {
+template <> struct VarStreamArrayExtractor<codeview::DebugSubsectionRecord> {
typedef void ContextType;
static Error extract(BinaryStreamRef Stream, uint32_t &Length,
- codeview::ModuleDebugFragmentRecord &Info) {
- if (auto EC = codeview::ModuleDebugFragmentRecord::initialize(Stream, Info))
+ codeview::DebugSubsectionRecord &Info) {
+ if (auto EC = codeview::DebugSubsectionRecord::initialize(Stream, Info))
return EC;
Length = Info.getRecordLength();
return Error::success();
@@ -73,7 +70,7 @@ struct VarStreamArrayExtractor<codeview::ModuleDebugFragmentRecord> {
};
namespace codeview {
-typedef VarStreamArray<ModuleDebugFragmentRecord> ModuleDebugFragmentArray;
+typedef VarStreamArray<DebugSubsectionRecord> DebugSubsectionArray;
}
} // namespace llvm
diff --git a/include/llvm/DebugInfo/CodeView/ModuleDebugFragmentVisitor.h b/include/llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h
index 1f55d2024203..55bef491c97e 100644
--- a/include/llvm/DebugInfo/CodeView/ModuleDebugFragmentVisitor.h
+++ b/include/llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h
@@ -1,4 +1,4 @@
-//===- ModuleDebugFragmentVisitor.h -----------------------------*- C++ -*-===//
+//===- DebugSubsectionVisitor.h -----------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -17,43 +17,41 @@ namespace llvm {
namespace codeview {
-class ModuleDebugFileChecksumFragmentRef;
-class ModuleDebugFragmentRecord;
-class ModuleDebugInlineeLineFragmentRef;
-class ModuleDebugLineFragmentRef;
-class ModuleDebugUnknownFragmentRef;
+class DebugChecksumsSubsectionRef;
+class DebugSubsectionRecord;
+class DebugInlineeLinesSubsectionRef;
+class DebugLinesSubsectionRef;
+class DebugUnknownSubsectionRef;
-class ModuleDebugFragmentVisitor {
+class DebugSubsectionVisitor {
public:
- virtual ~ModuleDebugFragmentVisitor() = default;
+ virtual ~DebugSubsectionVisitor() = default;
- virtual Error visitUnknown(ModuleDebugUnknownFragmentRef &Unknown) {
+ virtual Error visitUnknown(DebugUnknownSubsectionRef &Unknown) {
return Error::success();
}
- virtual Error visitLines(ModuleDebugLineFragmentRef &Lines) {
+ virtual Error visitLines(DebugLinesSubsectionRef &Lines) {
return Error::success();
}
- virtual Error
- visitFileChecksums(ModuleDebugFileChecksumFragmentRef &Checksums) {
+ virtual Error visitFileChecksums(DebugChecksumsSubsectionRef &Checksums) {
return Error::success();
}
- virtual Error visitInlineeLines(ModuleDebugInlineeLineFragmentRef &Inlinees) {
+ virtual Error visitInlineeLines(DebugInlineeLinesSubsectionRef &Inlinees) {
return Error::success();
}
virtual Error finished() { return Error::success(); }
};
-Error visitModuleDebugFragment(const ModuleDebugFragmentRecord &R,
- ModuleDebugFragmentVisitor &V);
+Error visitDebugSubsection(const DebugSubsectionRecord &R,
+ DebugSubsectionVisitor &V);
template <typename T>
-Error visitModuleDebugFragments(T &&FragmentRange,
- ModuleDebugFragmentVisitor &V) {
+Error visitDebugSubsections(T &&FragmentRange, DebugSubsectionVisitor &V) {
for (const auto &L : FragmentRange) {
- if (auto EC = visitModuleDebugFragment(L, V))
+ if (auto EC = visitDebugSubsection(L, V))
return EC;
}
if (auto EC = V.finished())
diff --git a/include/llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h b/include/llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h
new file mode 100644
index 000000000000..3d1eb27ba270
--- /dev/null
+++ b/include/llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h
@@ -0,0 +1,53 @@
+//===- DebugSymbolsSubsection.h --------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_CODEVIEW_DEBUGSYMBOLSSUBSECTION_H
+#define LLVM_DEBUGINFO_CODEVIEW_DEBUGSYMBOLSSUBSECTION_H
+
+#include "llvm/DebugInfo/CodeView/DebugSubsection.h"
+#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+namespace codeview {
+class DebugSymbolsSubsectionRef final : public DebugSubsectionRef {
+public:
+ DebugSymbolsSubsectionRef()
+ : DebugSubsectionRef(DebugSubsectionKind::Symbols) {}
+
+ static bool classof(const DebugSubsectionRef *S) {
+ return S->kind() == DebugSubsectionKind::Symbols;
+ }
+
+ Error initialize(BinaryStreamReader Reader);
+
+private:
+ CVSymbolArray Records;
+};
+
+class DebugSymbolsSubsection final : public DebugSubsection {
+public:
+ DebugSymbolsSubsection() : DebugSubsection(DebugSubsectionKind::Symbols) {}
+ static bool classof(const DebugSubsection *S) {
+ return S->kind() == DebugSubsectionKind::Symbols;
+ }
+
+ uint32_t calculateSerializedSize() const override;
+ Error commit(BinaryStreamWriter &Writer) const override;
+
+ void addSymbol(CVSymbol Symbol);
+
+private:
+ uint32_t Length = 0;
+ std::vector<CVSymbol> Records;
+};
+}
+}
+
+#endif
diff --git a/include/llvm/DebugInfo/CodeView/ModuleDebugUnknownFragment.h b/include/llvm/DebugInfo/CodeView/DebugUnknownSubsection.h
index b8c1c02e5cf1..ea9a96ca8d68 100644
--- a/include/llvm/DebugInfo/CodeView/ModuleDebugUnknownFragment.h
+++ b/include/llvm/DebugInfo/CodeView/DebugUnknownSubsection.h
@@ -1,4 +1,4 @@
-//===- ModuleDebugUnknownFragment.h -----------------------------*- C++ -*-===//
+//===- DebugUnknownSubsection.h -----------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -10,17 +10,16 @@
#ifndef LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGUNKNOWNFRAGMENT_H
#define LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGUNKNOWNFRAGMENT_H
-#include "llvm/DebugInfo/CodeView/ModuleDebugFragment.h"
+#include "llvm/DebugInfo/CodeView/DebugSubsection.h"
#include "llvm/Support/BinaryStreamRef.h"
namespace llvm {
namespace codeview {
-class ModuleDebugUnknownFragmentRef final : public ModuleDebugFragmentRef {
+class DebugUnknownSubsectionRef final : public DebugSubsectionRef {
public:
- ModuleDebugUnknownFragmentRef(ModuleDebugFragmentKind Kind,
- BinaryStreamRef Data)
- : ModuleDebugFragmentRef(Kind), Data(Data) {}
+ DebugUnknownSubsectionRef(DebugSubsectionKind Kind, BinaryStreamRef Data)
+ : DebugSubsectionRef(Kind), Data(Data) {}
BinaryStreamRef getData() const { return Data; }
diff --git a/include/llvm/DebugInfo/CodeView/ModuleDebugFragment.h b/include/llvm/DebugInfo/CodeView/ModuleDebugFragment.h
deleted file mode 100644
index a5311cae9480..000000000000
--- a/include/llvm/DebugInfo/CodeView/ModuleDebugFragment.h
+++ /dev/null
@@ -1,48 +0,0 @@
-//===- ModuleDebugFragment.h ------------------------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGFRAGMENT_H
-#define LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGFRAGMENT_H
-
-#include "llvm/DebugInfo/CodeView/CodeView.h"
-#include "llvm/Support/BinaryStreamWriter.h"
-#include "llvm/Support/Casting.h"
-
-namespace llvm {
-namespace codeview {
-
-class ModuleDebugFragmentRef {
-public:
- explicit ModuleDebugFragmentRef(ModuleDebugFragmentKind Kind) : Kind(Kind) {}
- virtual ~ModuleDebugFragmentRef();
-
- ModuleDebugFragmentKind kind() const { return Kind; }
-
-protected:
- ModuleDebugFragmentKind Kind;
-};
-
-class ModuleDebugFragment {
-public:
- explicit ModuleDebugFragment(ModuleDebugFragmentKind Kind) : Kind(Kind) {}
- virtual ~ModuleDebugFragment();
-
- ModuleDebugFragmentKind kind() const { return Kind; }
-
- virtual Error commit(BinaryStreamWriter &Writer) = 0;
- virtual uint32_t calculateSerializedLength() = 0;
-
-protected:
- ModuleDebugFragmentKind Kind;
-};
-
-} // namespace codeview
-} // namespace llvm
-
-#endif // LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGFRAGMENT_H
diff --git a/include/llvm/DebugInfo/CodeView/SymbolVisitorDelegate.h b/include/llvm/DebugInfo/CodeView/SymbolVisitorDelegate.h
index 96c8a47a3669..a2a3c6f18fba 100644
--- a/include/llvm/DebugInfo/CodeView/SymbolVisitorDelegate.h
+++ b/include/llvm/DebugInfo/CodeView/SymbolVisitorDelegate.h
@@ -19,7 +19,7 @@ class BinaryStreamReader;
namespace codeview {
-class StringTableRef;
+class DebugStringTableSubsectionRef;
class SymbolVisitorDelegate {
public:
@@ -27,7 +27,7 @@ public:
virtual uint32_t getRecordOffset(BinaryStreamReader Reader) = 0;
virtual StringRef getFileNameForFileOffset(uint32_t FileOffset) = 0;
- virtual StringTableRef getStringTable() = 0;
+ virtual DebugStringTableSubsectionRef getStringTable() = 0;
};
} // end namespace codeview
diff --git a/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h b/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h
index 8cc5db981f56..e5858d0f45e3 100644
--- a/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h
+++ b/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h
@@ -11,9 +11,9 @@
#define LLVM_DEBUGINFO_PDB_RAW_DBIMODULEDESCRIPTORBUILDER_H
#include "llvm/ADT/StringRef.h"
-#include "llvm/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.h"
-#include "llvm/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.h"
-#include "llvm/DebugInfo/CodeView/ModuleDebugLineFragment.h"
+#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h"
+#include "llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h"
+#include "llvm/DebugInfo/CodeView/DebugLinesSubsection.h"
#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
#include "llvm/Support/Error.h"
@@ -25,7 +25,7 @@ namespace llvm {
class BinaryStreamWriter;
namespace codeview {
-class ModuleDebugFragmentRecordBuilder;
+class DebugSubsectionRecordBuilder;
}
namespace msf {
@@ -49,11 +49,11 @@ public:
void setObjFileName(StringRef Name);
void addSymbol(codeview::CVSymbol Symbol);
- void addC13Fragment(std::unique_ptr<codeview::ModuleDebugLineFragment> Lines);
+ void addC13Fragment(std::unique_ptr<codeview::DebugLinesSubsection> Lines);
void addC13Fragment(
- std::unique_ptr<codeview::ModuleDebugInlineeLineFragment> Inlinees);
+ std::unique_ptr<codeview::DebugInlineeLinesSubsection> Inlinees);
void setC13FileChecksums(
- std::unique_ptr<codeview::ModuleDebugFileChecksumFragment> Checksums);
+ std::unique_ptr<codeview::DebugChecksumsSubsection> Checksums);
uint16_t getStreamIndex() const;
StringRef getModuleName() const { return ModuleName; }
@@ -83,12 +83,11 @@ private:
std::vector<std::string> SourceFiles;
std::vector<codeview::CVSymbol> Symbols;
- std::unique_ptr<codeview::ModuleDebugFileChecksumFragment> ChecksumInfo;
- std::vector<std::unique_ptr<codeview::ModuleDebugLineFragment>> LineInfo;
- std::vector<std::unique_ptr<codeview::ModuleDebugInlineeLineFragment>>
- Inlinees;
+ std::unique_ptr<codeview::DebugChecksumsSubsection> ChecksumInfo;
+ std::vector<std::unique_ptr<codeview::DebugLinesSubsection>> LineInfo;
+ std::vector<std::unique_ptr<codeview::DebugInlineeLinesSubsection>> Inlinees;
- std::vector<std::unique_ptr<codeview::ModuleDebugFragmentRecordBuilder>>
+ std::vector<std::unique_ptr<codeview::DebugSubsectionRecordBuilder>>
C13Builders;
ModuleInfoHeader Layout;
diff --git a/include/llvm/DebugInfo/PDB/Native/DbiStream.h b/include/llvm/DebugInfo/PDB/Native/DbiStream.h
index 8f95481f4152..dc35f8c72cd9 100644
--- a/include/llvm/DebugInfo/PDB/Native/DbiStream.h
+++ b/include/llvm/DebugInfo/PDB/Native/DbiStream.h
@@ -10,7 +10,7 @@
#ifndef LLVM_DEBUGINFO_PDB_RAW_PDBDBISTREAM_H
#define LLVM_DEBUGINFO_PDB_RAW_PDBDBISTREAM_H
-#include "llvm/DebugInfo/CodeView/ModuleDebugFragment.h"
+#include "llvm/DebugInfo/CodeView/DebugSubsection.h"
#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h"
#include "llvm/DebugInfo/PDB/Native/DbiModuleList.h"
@@ -19,8 +19,6 @@
#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
#include "llvm/DebugInfo/PDB/PDBTypes.h"
#include "llvm/Support/BinaryStreamArray.h"
-#include "llvm/Support/BinaryStreamArray.h"
-#include "llvm/Support/BinaryStreamRef.h"
#include "llvm/Support/BinaryStreamRef.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
diff --git a/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h b/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h
index 2c95690ed580..822ce3ce13d3 100644
--- a/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h
+++ b/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h
@@ -12,7 +12,7 @@
#include "llvm/ADT/iterator_range.h"
#include "llvm/DebugInfo/CodeView/CVRecord.h"
-#include "llvm/DebugInfo/CodeView/ModuleDebugFragmentRecord.h"
+#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h"
#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
#include "llvm/Support/BinaryStreamArray.h"
@@ -25,8 +25,7 @@ class PDBFile;
class DbiModuleDescriptor;
class ModuleDebugStreamRef {
- typedef codeview::ModuleDebugFragmentArray::Iterator
- LinesAndChecksumsIterator;
+ typedef codeview::DebugSubsectionArray::Iterator LinesAndChecksumsIterator;
public:
ModuleDebugStreamRef(const DbiModuleDescriptor &Module,
@@ -58,7 +57,7 @@ private:
BinaryStreamRef C13LinesSubstream;
BinaryStreamRef GlobalRefsSubstream;
- codeview::ModuleDebugFragmentArray LinesAndChecksums;
+ codeview::DebugSubsectionArray LinesAndChecksums;
};
}
}
diff --git a/include/llvm/DebugInfo/PDB/Native/PDBStringTable.h b/include/llvm/DebugInfo/PDB/Native/PDBStringTable.h
index 7c7f16bd1c73..6aeb0a5479cb 100644
--- a/include/llvm/DebugInfo/PDB/Native/PDBStringTable.h
+++ b/include/llvm/DebugInfo/PDB/Native/PDBStringTable.h
@@ -12,7 +12,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/DebugInfo/CodeView/StringTable.h"
+#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h"
#include "llvm/Support/BinaryStreamArray.h"
#include "llvm/Support/BinaryStreamRef.h"
#include "llvm/Support/Endian.h"
@@ -52,7 +52,7 @@ private:
Error readEpilogue(BinaryStreamReader &Reader);
const PDBStringTableHeader *Header = nullptr;
- codeview::StringTableRef Strings;
+ codeview::DebugStringTableSubsectionRef Strings;
FixedStreamArray<support::ulittle32_t> IDs;
uint32_t ByteSize = 0;
uint32_t NameCount = 0;
diff --git a/include/llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h b/include/llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h
index 6f85e7a4a074..0faa02dc4525 100644
--- a/include/llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h
+++ b/include/llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h
@@ -16,7 +16,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/DebugInfo/CodeView/StringTable.h"
+#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h"
#include "llvm/Support/Error.h"
#include <vector>
@@ -41,8 +41,10 @@ public:
uint32_t calculateSerializedSize() const;
Error commit(BinaryStreamWriter &Writer) const;
- codeview::StringTable &getStrings() { return Strings; }
- const codeview::StringTable &getStrings() const { return Strings; }
+ codeview::DebugStringTableSubsection &getStrings() { return Strings; }
+ const codeview::DebugStringTableSubsection &getStrings() const {
+ return Strings;
+ }
private:
uint32_t calculateHashTableSize() const;
@@ -51,7 +53,7 @@ private:
Error writeHashTable(BinaryStreamWriter &Writer) const;
Error writeEpilogue(BinaryStreamWriter &Writer) const;
- codeview::StringTable Strings;
+ codeview::DebugStringTableSubsection Strings;
};
} // end namespace pdb
diff --git a/include/llvm/MC/ConstantPools.h b/include/llvm/MC/ConstantPools.h
index 5d4e32a672dd..ef33250204ec 100644
--- a/include/llvm/MC/ConstantPools.h
+++ b/include/llvm/MC/ConstantPools.h
@@ -19,6 +19,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/SMLoc.h"
#include <cstdint>
+#include <map>
namespace llvm {
@@ -44,7 +45,7 @@ struct ConstantPoolEntry {
class ConstantPool {
using EntryVecTy = SmallVector<ConstantPoolEntry, 4>;
EntryVecTy Entries;
- DenseMap<int64_t, const MCSymbolRefExpr *> CachedEntries;
+ std::map<int64_t, const MCSymbolRefExpr *> CachedEntries;
public:
// Initialize a new empty constant pool
diff --git a/include/llvm/Support/ManagedStatic.h b/include/llvm/Support/ManagedStatic.h
index 7ce86eee95d2..b4bf3210cc73 100644
--- a/include/llvm/Support/ManagedStatic.h
+++ b/include/llvm/Support/ManagedStatic.h
@@ -14,25 +14,22 @@
#ifndef LLVM_SUPPORT_MANAGEDSTATIC_H
#define LLVM_SUPPORT_MANAGEDSTATIC_H
-#include "llvm/Support/Compiler.h"
#include <atomic>
#include <cstddef>
namespace llvm {
/// object_creator - Helper method for ManagedStatic.
-template<class C>
-LLVM_LIBRARY_VISIBILITY void* object_creator() {
- return new C();
-}
+template <class C> struct object_creator {
+ static void *call() { return new C(); }
+};
/// object_deleter - Helper method for ManagedStatic.
///
-template <typename T> struct LLVM_LIBRARY_VISIBILITY object_deleter {
+template <typename T> struct object_deleter {
static void call(void *Ptr) { delete (T *)Ptr; }
};
-template <typename T, size_t N>
-struct LLVM_LIBRARY_VISIBILITY object_deleter<T[N]> {
+template <typename T, size_t N> struct object_deleter<T[N]> {
static void call(void *Ptr) { delete[](T *)Ptr; }
};
@@ -59,14 +56,15 @@ public:
/// libraries that link in LLVM components) and for making destruction be
/// explicit through the llvm_shutdown() function call.
///
-template<class C>
+template <class C, class Creator = object_creator<C>,
+ class Deleter = object_deleter<C>>
class ManagedStatic : public ManagedStaticBase {
public:
// Accessors.
C &operator*() {
void *Tmp = Ptr.load(std::memory_order_acquire);
if (!Tmp)
- RegisterManagedStatic(object_creator<C>, object_deleter<C>::call);
+ RegisterManagedStatic(Creator::call, Deleter::call);
return *static_cast<C *>(Ptr.load(std::memory_order_relaxed));
}
@@ -76,7 +74,7 @@ public:
const C &operator*() const {
void *Tmp = Ptr.load(std::memory_order_acquire);
if (!Tmp)
- RegisterManagedStatic(object_creator<C>, object_deleter<C>::call);
+ RegisterManagedStatic(Creator::call, Deleter::call);
return *static_cast<C *>(Ptr.load(std::memory_order_relaxed));
}
diff --git a/include/llvm/TableGen/Record.h b/include/llvm/TableGen/Record.h
index d14a56cb87e0..437f68b24e57 100644
--- a/include/llvm/TableGen/Record.h
+++ b/include/llvm/TableGen/Record.h
@@ -1189,6 +1189,9 @@ public:
return Init ? Init->getValue() : StringRef();
}
+ ArrayRef<Init *> getArgs() const {
+ return makeArrayRef(getTrailingObjects<Init *>(), NumArgs);
+ }
ArrayRef<StringInit *> getArgNames() const {
return makeArrayRef(getTrailingObjects<StringInit *>(), NumArgNames);
}
@@ -1200,19 +1203,16 @@ public:
typedef SmallVectorImpl<Init*>::const_iterator const_arg_iterator;
typedef SmallVectorImpl<StringInit*>::const_iterator const_name_iterator;
- inline const_arg_iterator arg_begin() const { return getTrailingObjects<Init *>(); }
- inline const_arg_iterator arg_end () const { return arg_begin() + NumArgs; }
- inline iterator_range<const_arg_iterator> args() const {
- return llvm::make_range(arg_begin(), arg_end());
- }
+ inline const_arg_iterator arg_begin() const { return getArgs().begin(); }
+ inline const_arg_iterator arg_end () const { return getArgs().end(); }
- inline size_t arg_size () const { return NumArgs; }
+ inline size_t arg_size () const { return NumArgs; }
inline bool arg_empty() const { return NumArgs == 0; }
- inline const_name_iterator name_begin() const { return getTrailingObjects<StringInit *>(); }
- inline const_name_iterator name_end () const { return name_begin() + NumArgNames; }
+ inline const_name_iterator name_begin() const { return getArgNames().begin();}
+ inline const_name_iterator name_end () const { return getArgNames().end(); }
- inline size_t name_size () const { return NumArgNames; }
+ inline size_t name_size () const { return NumArgNames; }
inline bool name_empty() const { return NumArgNames == 0; }
Init *getBit(unsigned Bit) const override {
diff --git a/include/llvm/Transforms/Scalar/GVNExpression.h b/include/llvm/Transforms/Scalar/GVNExpression.h
index a971df975b6f..324ebca46de2 100644
--- a/include/llvm/Transforms/Scalar/GVNExpression.h
+++ b/include/llvm/Transforms/Scalar/GVNExpression.h
@@ -58,10 +58,11 @@ class Expression {
private:
ExpressionType EType;
unsigned Opcode;
+ mutable hash_code HashVal;
public:
Expression(ExpressionType ET = ET_Base, unsigned O = ~2U)
- : EType(ET), Opcode(O) {}
+ : EType(ET), Opcode(O), HashVal(0) {}
Expression(const Expression &) = delete;
Expression &operator=(const Expression &) = delete;
virtual ~Expression();
@@ -82,6 +83,14 @@ public:
return equals(Other);
}
+ hash_code getComputedHash() const {
+ // It's theoretically possible for a thing to hash to zero. In that case,
+ // we will just compute the hash a few extra times, which is no worse that
+ // we did before, which was to compute it always.
+ if (static_cast<unsigned>(HashVal) == 0)
+ HashVal = getHashValue();
+ return HashVal;
+ }
virtual bool equals(const Expression &Other) const { return true; }
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index d280fda0a162..f55ce202bcbb 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -2178,8 +2178,7 @@ StrengthenNoWrapFlags(ScalarEvolution *SE, SCEVTypes Type,
return Flags;
}
-bool ScalarEvolution::isAvailableAtLoopEntry(const SCEV *S, const Loop *L,
- DominatorTree &DT, LoopInfo &LI) {
+bool ScalarEvolution::isAvailableAtLoopEntry(const SCEV *S, const Loop *L) {
if (!isLoopInvariant(S, L))
return false;
// If a value depends on a SCEVUnknown which is defined after the loop, we
@@ -2516,7 +2515,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
const Loop *AddRecLoop = AddRec->getLoop();
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
- if (isAvailableAtLoopEntry(Ops[i], AddRecLoop, DT, LI)) {
+ if (isAvailableAtLoopEntry(Ops[i], AddRecLoop)) {
LIOps.push_back(Ops[i]);
Ops.erase(Ops.begin()+i);
--i; --e;
@@ -2791,7 +2790,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
const Loop *AddRecLoop = AddRec->getLoop();
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
- if (isAvailableAtLoopEntry(Ops[i], AddRecLoop, DT, LI)) {
+ if (isAvailableAtLoopEntry(Ops[i], AddRecLoop)) {
LIOps.push_back(Ops[i]);
Ops.erase(Ops.begin()+i);
--i; --e;
diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index 114aea391a86..385c78bbccef 100644
--- a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -15,8 +15,8 @@
#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h"
#include "llvm/DebugInfo/CodeView/Line.h"
-#include "llvm/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.h"
#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
#include "llvm/DebugInfo/CodeView/TypeDatabase.h"
#include "llvm/DebugInfo/CodeView/TypeDumpVisitor.h"
@@ -393,7 +393,7 @@ void CodeViewDebug::endModule() {
// subprograms.
switchToDebugSectionForSymbol(nullptr);
- MCSymbol *CompilerInfo = beginCVSubsection(ModuleDebugFragmentKind::Symbols);
+ MCSymbol *CompilerInfo = beginCVSubsection(DebugSubsectionKind::Symbols);
emitCompilerInformation();
endCVSubsection(CompilerInfo);
@@ -417,7 +417,7 @@ void CodeViewDebug::endModule() {
// Emit UDT records for any types used by global variables.
if (!GlobalUDTs.empty()) {
- MCSymbol *SymbolsEnd = beginCVSubsection(ModuleDebugFragmentKind::Symbols);
+ MCSymbol *SymbolsEnd = beginCVSubsection(DebugSubsectionKind::Symbols);
emitDebugInfoForUDTs(GlobalUDTs);
endCVSubsection(SymbolsEnd);
}
@@ -630,8 +630,7 @@ void CodeViewDebug::emitInlineeLinesSubsection() {
return;
OS.AddComment("Inlinee lines subsection");
- MCSymbol *InlineEnd =
- beginCVSubsection(ModuleDebugFragmentKind::InlineeLines);
+ MCSymbol *InlineEnd = beginCVSubsection(DebugSubsectionKind::InlineeLines);
// We don't provide any extra file info.
// FIXME: Find out if debuggers use this info.
@@ -756,7 +755,7 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
// Emit a symbol subsection, required by VS2012+ to find function boundaries.
OS.AddComment("Symbol subsection for " + Twine(FuncName));
- MCSymbol *SymbolsEnd = beginCVSubsection(ModuleDebugFragmentKind::Symbols);
+ MCSymbol *SymbolsEnd = beginCVSubsection(DebugSubsectionKind::Symbols);
{
MCSymbol *ProcRecordBegin = MMI->getContext().createTempSymbol(),
*ProcRecordEnd = MMI->getContext().createTempSymbol();
@@ -2111,7 +2110,7 @@ void CodeViewDebug::beginInstruction(const MachineInstr *MI) {
maybeRecordLocation(DL, Asm->MF);
}
-MCSymbol *CodeViewDebug::beginCVSubsection(ModuleDebugFragmentKind Kind) {
+MCSymbol *CodeViewDebug::beginCVSubsection(DebugSubsectionKind Kind) {
MCSymbol *BeginLabel = MMI->getContext().createTempSymbol(),
*EndLabel = MMI->getContext().createTempSymbol();
OS.EmitIntValue(unsigned(Kind), 4);
@@ -2171,7 +2170,7 @@ void CodeViewDebug::emitDebugInfoForGlobals() {
if (!GV->hasComdat() && !GV->isDeclarationForLinker()) {
if (!EndLabel) {
OS.AddComment("Symbol subsection for globals");
- EndLabel = beginCVSubsection(ModuleDebugFragmentKind::Symbols);
+ EndLabel = beginCVSubsection(DebugSubsectionKind::Symbols);
}
// FIXME: emitDebugInfoForGlobal() doesn't handle DIExpressions.
emitDebugInfoForGlobal(GVE->getVariable(), GV, Asm->getSymbol(GV));
@@ -2189,7 +2188,7 @@ void CodeViewDebug::emitDebugInfoForGlobals() {
OS.AddComment("Symbol subsection for " +
Twine(GlobalValue::dropLLVMManglingEscape(GV->getName())));
switchToDebugSectionForSymbol(GVSym);
- EndLabel = beginCVSubsection(ModuleDebugFragmentKind::Symbols);
+ EndLabel = beginCVSubsection(DebugSubsectionKind::Symbols);
// FIXME: emitDebugInfoForGlobal() doesn't handle DIExpressions.
emitDebugInfoForGlobal(GVE->getVariable(), GV, GVSym);
endCVSubsection(EndLabel);
diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/lib/CodeGen/AsmPrinter/CodeViewDebug.h
index 46b2daa1e007..1c0c1644edaf 100644
--- a/lib/CodeGen/AsmPrinter/CodeViewDebug.h
+++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.h
@@ -216,7 +216,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
/// Opens a subsection of the given kind in a .debug$S codeview section.
/// Returns an end label for use with endCVSubsection when the subsection is
/// finished.
- MCSymbol *beginCVSubsection(codeview::ModuleDebugFragmentKind Kind);
+ MCSymbol *beginCVSubsection(codeview::DebugSubsectionKind Kind);
void endCVSubsection(MCSymbol *EndLabel);
diff --git a/lib/CodeGen/GlobalISel/Localizer.cpp b/lib/CodeGen/GlobalISel/Localizer.cpp
index bdca732b4e33..c2a568e4b452 100644
--- a/lib/CodeGen/GlobalISel/Localizer.cpp
+++ b/lib/CodeGen/GlobalISel/Localizer.cpp
@@ -23,7 +23,7 @@ using namespace llvm;
char Localizer::ID = 0;
INITIALIZE_PASS(Localizer, DEBUG_TYPE,
"Move/duplicate certain instructions close to their use", false,
- false);
+ false)
Localizer::Localizer() : MachineFunctionPass(ID) {
initializeLocalizerPass(*PassRegistry::getPassRegistry());
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 23a302f3e561..ab36bc1417ae 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -14567,7 +14567,8 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
// extract instead or remove that condition entirely.
auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
- if (!Ld || !Ld->hasOneUse() || Ld->isVolatile() || !ExtIdx)
+ if (!Ld || !Ld->hasOneUse() || Ld->getExtensionType() || Ld->isVolatile() ||
+ !ExtIdx)
return SDValue();
// The narrow load will be offset from the base address of the old load if
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index d0a8b34c69c6..da2fb72bec45 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -925,10 +925,6 @@ getStrictFPOpcodeAction(const TargetLowering &TLI, unsigned Opcode, EVT VT) {
if (Action != TargetLowering::Legal)
Action = TargetLowering::Expand;
- // ISD::FPOWI returns 'Legal' even though it should be expanded.
- if (Opcode == ISD::STRICT_FPOWI && Action == TargetLowering::Legal)
- Action = TargetLowering::Expand;
-
return Action;
}
@@ -1027,7 +1023,6 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
break;
case ISD::EXTRACT_ELEMENT:
case ISD::FLT_ROUNDS_:
- case ISD::FPOWI:
case ISD::MERGE_VALUES:
case ISD::EH_RETURN:
case ISD::FRAME_TO_ARGS_OFFSET:
diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
index 5f63fd4320bb..0def5ae6d0d0 100644
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@@ -935,6 +935,7 @@ void TargetLoweringBase::initActions() {
// These library functions default to expand.
setOperationAction(ISD::FROUND, VT, Expand);
+ setOperationAction(ISD::FPOWI, VT, Expand);
// These operations default to expand for vector types.
if (VT.isVector()) {
diff --git a/lib/DebugInfo/CodeView/CMakeLists.txt b/lib/DebugInfo/CodeView/CMakeLists.txt
index 90193d07b95d..410b89bc949e 100644
--- a/lib/DebugInfo/CodeView/CMakeLists.txt
+++ b/lib/DebugInfo/CodeView/CMakeLists.txt
@@ -7,14 +7,16 @@ add_llvm_library(LLVMDebugInfoCodeView
Formatters.cpp
LazyRandomTypeCollection.cpp
Line.cpp
- ModuleDebugFileChecksumFragment.cpp
- ModuleDebugFragment.cpp
- ModuleDebugFragmentRecord.cpp
- ModuleDebugFragmentVisitor.cpp
- ModuleDebugInlineeLinesFragment.cpp
- ModuleDebugLineFragment.cpp
+ DebugChecksumsSubsection.cpp
+ DebugFrameDataSubsection.cpp
+ DebugInlineeLinesSubsection.cpp
+ DebugLinesSubsection.cpp
+ DebugStringTableSubsection.cpp
+ DebugSubsection.cpp
+ DebugSubsectionRecord.cpp
+ DebugSubsectionVisitor.cpp
+ DebugSymbolsSubsection.cpp
RecordSerialization.cpp
- StringTable.cpp
SymbolRecordMapping.cpp
SymbolDumper.cpp
SymbolSerializer.cpp
diff --git a/lib/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.cpp b/lib/DebugInfo/CodeView/DebugChecksumsSubsection.cpp
index 42f0afc3e2d7..1a85a339f8c3 100644
--- a/lib/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.cpp
+++ b/lib/DebugInfo/CodeView/DebugChecksumsSubsection.cpp
@@ -1,4 +1,4 @@
-//===- ModuleDebugFileChecksumFragment.cpp ----------------------*- C++ -*-===//
+//===- DebugChecksumsSubsection.cpp ----------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,10 +7,10 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.h"
+#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h"
#include "llvm/DebugInfo/CodeView/CodeViewError.h"
-#include "llvm/DebugInfo/CodeView/StringTable.h"
+#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h"
#include "llvm/Support/BinaryStreamReader.h"
using namespace llvm;
@@ -42,22 +42,24 @@ Error llvm::VarStreamArrayExtractor<FileChecksumEntry>::extract(
return Error::success();
}
-Error ModuleDebugFileChecksumFragmentRef::initialize(
- BinaryStreamReader Reader) {
+Error DebugChecksumsSubsectionRef::initialize(BinaryStreamReader Reader) {
if (auto EC = Reader.readArray(Checksums, Reader.bytesRemaining()))
return EC;
return Error::success();
}
+Error DebugChecksumsSubsectionRef::initialize(BinaryStreamRef Section) {
+ BinaryStreamReader Reader(Section);
+ return initialize(Reader);
+}
-ModuleDebugFileChecksumFragment::ModuleDebugFileChecksumFragment(
- StringTable &Strings)
- : ModuleDebugFragment(ModuleDebugFragmentKind::FileChecksums),
- Strings(Strings) {}
+DebugChecksumsSubsection::DebugChecksumsSubsection(
+ DebugStringTableSubsection &Strings)
+ : DebugSubsection(DebugSubsectionKind::FileChecksums), Strings(Strings) {}
-void ModuleDebugFileChecksumFragment::addChecksum(StringRef FileName,
- FileChecksumKind Kind,
- ArrayRef<uint8_t> Bytes) {
+void DebugChecksumsSubsection::addChecksum(StringRef FileName,
+ FileChecksumKind Kind,
+ ArrayRef<uint8_t> Bytes) {
FileChecksumEntry Entry;
if (!Bytes.empty()) {
uint8_t *Copy = Storage.Allocate<uint8_t>(Bytes.size());
@@ -78,11 +80,11 @@ void ModuleDebugFileChecksumFragment::addChecksum(StringRef FileName,
SerializedSize += Len;
}
-uint32_t ModuleDebugFileChecksumFragment::calculateSerializedLength() {
+uint32_t DebugChecksumsSubsection::calculateSerializedSize() const {
return SerializedSize;
}
-Error ModuleDebugFileChecksumFragment::commit(BinaryStreamWriter &Writer) {
+Error DebugChecksumsSubsection::commit(BinaryStreamWriter &Writer) const {
for (const auto &FC : Checksums) {
FileChecksumEntryHeader Header;
Header.ChecksumKind = uint8_t(FC.Kind);
@@ -98,8 +100,7 @@ Error ModuleDebugFileChecksumFragment::commit(BinaryStreamWriter &Writer) {
return Error::success();
}
-uint32_t
-ModuleDebugFileChecksumFragment::mapChecksumOffset(StringRef FileName) const {
+uint32_t DebugChecksumsSubsection::mapChecksumOffset(StringRef FileName) const {
uint32_t Offset = Strings.getStringId(FileName);
auto Iter = OffsetMap.find(Offset);
assert(Iter != OffsetMap.end());
diff --git a/lib/DebugInfo/CodeView/DebugFrameDataSubsection.cpp b/lib/DebugInfo/CodeView/DebugFrameDataSubsection.cpp
new file mode 100644
index 000000000000..fd558aa9cc8a
--- /dev/null
+++ b/lib/DebugInfo/CodeView/DebugFrameDataSubsection.cpp
@@ -0,0 +1,44 @@
+//===- DebugFrameDataSubsection.cpp -----------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h"
+#include "llvm/DebugInfo/CodeView/CodeViewError.h"
+
+using namespace llvm;
+using namespace llvm::codeview;
+
+Error DebugFrameDataSubsectionRef::initialize(BinaryStreamReader Reader) {
+ if (auto EC = Reader.readObject(RelocPtr))
+ return EC;
+ if (Reader.bytesRemaining() % sizeof(FrameData) != 0)
+ return make_error<CodeViewError>(cv_error_code::corrupt_record,
+ "Invalid frame data record format!");
+
+ uint32_t Count = Reader.bytesRemaining() / sizeof(FrameData);
+ if (auto EC = Reader.readArray(Frames, Count))
+ return EC;
+ return Error::success();
+}
+
+uint32_t DebugFrameDataSubsection::calculateSerializedSize() const {
+ return 4 + sizeof(FrameData) * Frames.size();
+}
+
+Error DebugFrameDataSubsection::commit(BinaryStreamWriter &Writer) const {
+ if (auto EC = Writer.writeInteger<uint32_t>(0))
+ return EC;
+
+ if (auto EC = Writer.writeArray(makeArrayRef(Frames)))
+ return EC;
+ return Error::success();
+}
+
+void DebugFrameDataSubsection::addFrameData(const FrameData &Frame) {
+ Frames.push_back(Frame);
+}
diff --git a/lib/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.cpp b/lib/DebugInfo/CodeView/DebugInlineeLinesSubsection.cpp
index cb6a8478797f..520a0ee4454f 100644
--- a/lib/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.cpp
+++ b/lib/DebugInfo/CodeView/DebugInlineeLinesSubsection.cpp
@@ -1,4 +1,4 @@
-//===- ModuleDebugInlineeLineFragment.cpp ------------------------*- C++-*-===//
+//===- DebugInlineeLinesSubsection.cpp ------------------------*- C++-*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,12 +7,12 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.h"
+#include "llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h"
#include "llvm/DebugInfo/CodeView/CodeViewError.h"
-#include "llvm/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.h"
-#include "llvm/DebugInfo/CodeView/ModuleDebugFragmentRecord.h"
-#include "llvm/DebugInfo/CodeView/StringTable.h"
+#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h"
+#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h"
+#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h"
using namespace llvm;
using namespace llvm::codeview;
@@ -37,10 +37,10 @@ Error VarStreamArrayExtractor<InlineeSourceLine>::extract(
return Error::success();
}
-ModuleDebugInlineeLineFragmentRef::ModuleDebugInlineeLineFragmentRef()
- : ModuleDebugFragmentRef(ModuleDebugFragmentKind::InlineeLines) {}
+DebugInlineeLinesSubsectionRef::DebugInlineeLinesSubsectionRef()
+ : DebugSubsectionRef(DebugSubsectionKind::InlineeLines) {}
-Error ModuleDebugInlineeLineFragmentRef::initialize(BinaryStreamReader Reader) {
+Error DebugInlineeLinesSubsectionRef::initialize(BinaryStreamReader Reader) {
if (auto EC = Reader.readEnum(Signature))
return EC;
@@ -52,16 +52,16 @@ Error ModuleDebugInlineeLineFragmentRef::initialize(BinaryStreamReader Reader) {
return Error::success();
}
-bool ModuleDebugInlineeLineFragmentRef::hasExtraFiles() const {
+bool DebugInlineeLinesSubsectionRef::hasExtraFiles() const {
return Signature == InlineeLinesSignature::ExtraFiles;
}
-ModuleDebugInlineeLineFragment::ModuleDebugInlineeLineFragment(
- ModuleDebugFileChecksumFragment &Checksums, bool HasExtraFiles)
- : ModuleDebugFragment(ModuleDebugFragmentKind::InlineeLines),
- Checksums(Checksums), HasExtraFiles(HasExtraFiles) {}
+DebugInlineeLinesSubsection::DebugInlineeLinesSubsection(
+ DebugChecksumsSubsection &Checksums, bool HasExtraFiles)
+ : DebugSubsection(DebugSubsectionKind::InlineeLines), Checksums(Checksums),
+ HasExtraFiles(HasExtraFiles) {}
-uint32_t ModuleDebugInlineeLineFragment::calculateSerializedLength() {
+uint32_t DebugInlineeLinesSubsection::calculateSerializedSize() const {
// 4 bytes for the signature
uint32_t Size = sizeof(InlineeLinesSignature);
@@ -78,7 +78,7 @@ uint32_t ModuleDebugInlineeLineFragment::calculateSerializedLength() {
return Size;
}
-Error ModuleDebugInlineeLineFragment::commit(BinaryStreamWriter &Writer) {
+Error DebugInlineeLinesSubsection::commit(BinaryStreamWriter &Writer) const {
InlineeLinesSignature Sig = InlineeLinesSignature::Normal;
if (HasExtraFiles)
Sig = InlineeLinesSignature::ExtraFiles;
@@ -102,7 +102,7 @@ Error ModuleDebugInlineeLineFragment::commit(BinaryStreamWriter &Writer) {
return Error::success();
}
-void ModuleDebugInlineeLineFragment::addExtraFile(StringRef FileName) {
+void DebugInlineeLinesSubsection::addExtraFile(StringRef FileName) {
uint32_t Offset = Checksums.mapChecksumOffset(FileName);
auto &Entry = Entries.back();
@@ -110,9 +110,9 @@ void ModuleDebugInlineeLineFragment::addExtraFile(StringRef FileName) {
++ExtraFileCount;
}
-void ModuleDebugInlineeLineFragment::addInlineSite(TypeIndex FuncId,
- StringRef FileName,
- uint32_t SourceLine) {
+void DebugInlineeLinesSubsection::addInlineSite(TypeIndex FuncId,
+ StringRef FileName,
+ uint32_t SourceLine) {
uint32_t Offset = Checksums.mapChecksumOffset(FileName);
Entries.emplace_back();
diff --git a/lib/DebugInfo/CodeView/ModuleDebugLineFragment.cpp b/lib/DebugInfo/CodeView/DebugLinesSubsection.cpp
index e0ee934709ba..2fce06ca2a17 100644
--- a/lib/DebugInfo/CodeView/ModuleDebugLineFragment.cpp
+++ b/lib/DebugInfo/CodeView/DebugLinesSubsection.cpp
@@ -1,4 +1,4 @@
-//===- ModuleDebugLineFragment.cpp -------------------------------*- C++-*-===//
+//===- DebugLinesSubsection.cpp -------------------------------*- C++-*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,12 +7,12 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/DebugInfo/CodeView/ModuleDebugLineFragment.h"
+#include "llvm/DebugInfo/CodeView/DebugLinesSubsection.h"
#include "llvm/DebugInfo/CodeView/CodeViewError.h"
-#include "llvm/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.h"
-#include "llvm/DebugInfo/CodeView/ModuleDebugFragmentRecord.h"
-#include "llvm/DebugInfo/CodeView/StringTable.h"
+#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h"
+#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h"
+#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h"
using namespace llvm;
using namespace llvm::codeview;
@@ -49,10 +49,10 @@ Error LineColumnExtractor::extract(BinaryStreamRef Stream, uint32_t &Len,
return Error::success();
}
-ModuleDebugLineFragmentRef::ModuleDebugLineFragmentRef()
- : ModuleDebugFragmentRef(ModuleDebugFragmentKind::Lines) {}
+DebugLinesSubsectionRef::DebugLinesSubsectionRef()
+ : DebugSubsectionRef(DebugSubsectionKind::Lines) {}
-Error ModuleDebugLineFragmentRef::initialize(BinaryStreamReader Reader) {
+Error DebugLinesSubsectionRef::initialize(BinaryStreamReader Reader) {
if (auto EC = Reader.readObject(Header))
return EC;
@@ -63,23 +63,21 @@ Error ModuleDebugLineFragmentRef::initialize(BinaryStreamReader Reader) {
return Error::success();
}
-bool ModuleDebugLineFragmentRef::hasColumnInfo() const {
+bool DebugLinesSubsectionRef::hasColumnInfo() const {
return !!(Header->Flags & LF_HaveColumns);
}
-ModuleDebugLineFragment::ModuleDebugLineFragment(
- ModuleDebugFileChecksumFragment &Checksums, StringTable &Strings)
- : ModuleDebugFragment(ModuleDebugFragmentKind::Lines),
- Checksums(Checksums) {}
+DebugLinesSubsection::DebugLinesSubsection(DebugChecksumsSubsection &Checksums,
+ DebugStringTableSubsection &Strings)
+ : DebugSubsection(DebugSubsectionKind::Lines), Checksums(Checksums) {}
-void ModuleDebugLineFragment::createBlock(StringRef FileName) {
+void DebugLinesSubsection::createBlock(StringRef FileName) {
uint32_t Offset = Checksums.mapChecksumOffset(FileName);
Blocks.emplace_back(Offset);
}
-void ModuleDebugLineFragment::addLineInfo(uint32_t Offset,
- const LineInfo &Line) {
+void DebugLinesSubsection::addLineInfo(uint32_t Offset, const LineInfo &Line) {
Block &B = Blocks.back();
LineNumberEntry LNE;
LNE.Flags = Line.getRawData();
@@ -87,10 +85,10 @@ void ModuleDebugLineFragment::addLineInfo(uint32_t Offset,
B.Lines.push_back(LNE);
}
-void ModuleDebugLineFragment::addLineAndColumnInfo(uint32_t Offset,
- const LineInfo &Line,
- uint32_t ColStart,
- uint32_t ColEnd) {
+void DebugLinesSubsection::addLineAndColumnInfo(uint32_t Offset,
+ const LineInfo &Line,
+ uint32_t ColStart,
+ uint32_t ColEnd) {
Block &B = Blocks.back();
assert(B.Lines.size() == B.Columns.size());
@@ -101,7 +99,7 @@ void ModuleDebugLineFragment::addLineAndColumnInfo(uint32_t Offset,
B.Columns.push_back(CNE);
}
-Error ModuleDebugLineFragment::commit(BinaryStreamWriter &Writer) {
+Error DebugLinesSubsection::commit(BinaryStreamWriter &Writer) const {
LineFragmentHeader Header;
Header.CodeSize = CodeSize;
Header.Flags = hasColumnInfo() ? LF_HaveColumns : 0;
@@ -135,7 +133,7 @@ Error ModuleDebugLineFragment::commit(BinaryStreamWriter &Writer) {
return Error::success();
}
-uint32_t ModuleDebugLineFragment::calculateSerializedLength() {
+uint32_t DebugLinesSubsection::calculateSerializedSize() const {
uint32_t Size = sizeof(LineFragmentHeader);
for (const auto &B : Blocks) {
Size += sizeof(LineBlockFragmentHeader);
@@ -146,16 +144,16 @@ uint32_t ModuleDebugLineFragment::calculateSerializedLength() {
return Size;
}
-void ModuleDebugLineFragment::setRelocationAddress(uint16_t Segment,
- uint16_t Offset) {
+void DebugLinesSubsection::setRelocationAddress(uint16_t Segment,
+ uint16_t Offset) {
RelocOffset = Offset;
RelocSegment = Segment;
}
-void ModuleDebugLineFragment::setCodeSize(uint32_t Size) { CodeSize = Size; }
+void DebugLinesSubsection::setCodeSize(uint32_t Size) { CodeSize = Size; }
-void ModuleDebugLineFragment::setFlags(LineFlags Flags) { this->Flags = Flags; }
+void DebugLinesSubsection::setFlags(LineFlags Flags) { this->Flags = Flags; }
-bool ModuleDebugLineFragment::hasColumnInfo() const {
+bool DebugLinesSubsection::hasColumnInfo() const {
return Flags & LF_HaveColumns;
}
diff --git a/lib/DebugInfo/CodeView/StringTable.cpp b/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp
index 21f11204686b..b8741eb0b675 100644
--- a/lib/DebugInfo/CodeView/StringTable.cpp
+++ b/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp
@@ -1,4 +1,4 @@
-//===- StringTable.cpp - CodeView String Table Reader/Writer ----*- C++ -*-===//
+//===- DebugStringTableSubsection.cpp - CodeView String Table ---*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/DebugInfo/CodeView/StringTable.h"
+#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h"
#include "llvm/Support/BinaryStream.h"
#include "llvm/Support/BinaryStreamReader.h"
@@ -16,14 +16,16 @@
using namespace llvm;
using namespace llvm::codeview;
-StringTableRef::StringTableRef() {}
+DebugStringTableSubsectionRef::DebugStringTableSubsectionRef()
+ : DebugSubsectionRef(DebugSubsectionKind::StringTable) {}
-Error StringTableRef::initialize(BinaryStreamRef Contents) {
+Error DebugStringTableSubsectionRef::initialize(BinaryStreamRef Contents) {
Stream = Contents;
return Error::success();
}
-Expected<StringRef> StringTableRef::getString(uint32_t Offset) const {
+Expected<StringRef>
+DebugStringTableSubsectionRef::getString(uint32_t Offset) const {
BinaryStreamReader Reader(Stream);
Reader.setOffset(Offset);
StringRef Result;
@@ -32,7 +34,10 @@ Expected<StringRef> StringTableRef::getString(uint32_t Offset) const {
return Result;
}
-uint32_t StringTable::insert(StringRef S) {
+DebugStringTableSubsection::DebugStringTableSubsection()
+ : DebugSubsection(DebugSubsectionKind::StringTable) {}
+
+uint32_t DebugStringTableSubsection::insert(StringRef S) {
auto P = Strings.insert({S, StringSize});
// If a given string didn't exist in the string table, we want to increment
@@ -42,9 +47,11 @@ uint32_t StringTable::insert(StringRef S) {
return P.first->second;
}
-uint32_t StringTable::calculateSerializedSize() const { return StringSize; }
+uint32_t DebugStringTableSubsection::calculateSerializedSize() const {
+ return StringSize;
+}
-Error StringTable::commit(BinaryStreamWriter &Writer) const {
+Error DebugStringTableSubsection::commit(BinaryStreamWriter &Writer) const {
assert(Writer.bytesRemaining() == StringSize);
uint32_t MaxOffset = 1;
@@ -62,9 +69,9 @@ Error StringTable::commit(BinaryStreamWriter &Writer) const {
return Error::success();
}
-uint32_t StringTable::size() const { return Strings.size(); }
+uint32_t DebugStringTableSubsection::size() const { return Strings.size(); }
-uint32_t StringTable::getStringId(StringRef S) const {
+uint32_t DebugStringTableSubsection::getStringId(StringRef S) const {
auto P = Strings.find(S);
assert(P != Strings.end());
return P->second;
diff --git a/lib/DebugInfo/CodeView/ModuleDebugFragment.cpp b/lib/DebugInfo/CodeView/DebugSubsection.cpp
index 2af1917413da..67b428bfa713 100644
--- a/lib/DebugInfo/CodeView/ModuleDebugFragment.cpp
+++ b/lib/DebugInfo/CodeView/DebugSubsection.cpp
@@ -1,4 +1,4 @@
-//===- ModuleDebugFragment.cpp -----------------------------------*- C++-*-===//
+//===- DebugSubsection.cpp -----------------------------------*- C++-*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,10 +7,10 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/DebugInfo/CodeView/ModuleDebugFragment.h"
+#include "llvm/DebugInfo/CodeView/DebugSubsection.h"
using namespace llvm::codeview;
-ModuleDebugFragmentRef::~ModuleDebugFragmentRef() {}
+DebugSubsectionRef::~DebugSubsectionRef() {}
-ModuleDebugFragment::~ModuleDebugFragment() {}
+DebugSubsection::~DebugSubsection() {}
diff --git a/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp b/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp
new file mode 100644
index 000000000000..511f36d0020a
--- /dev/null
+++ b/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp
@@ -0,0 +1,81 @@
+//===- DebugSubsectionRecord.cpp -----------------------------*- C++-*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h"
+#include "llvm/DebugInfo/CodeView/DebugSubsection.h"
+
+#include "llvm/Support/BinaryStreamReader.h"
+
+using namespace llvm;
+using namespace llvm::codeview;
+
+DebugSubsectionRecord::DebugSubsectionRecord()
+ : Kind(DebugSubsectionKind::None) {}
+
+DebugSubsectionRecord::DebugSubsectionRecord(DebugSubsectionKind Kind,
+ BinaryStreamRef Data)
+ : Kind(Kind), Data(Data) {}
+
+Error DebugSubsectionRecord::initialize(BinaryStreamRef Stream,
+ DebugSubsectionRecord &Info) {
+ const DebugSubsectionHeader *Header;
+ BinaryStreamReader Reader(Stream);
+ if (auto EC = Reader.readObject(Header))
+ return EC;
+
+ DebugSubsectionKind Kind =
+ static_cast<DebugSubsectionKind>(uint32_t(Header->Kind));
+ switch (Kind) {
+ case DebugSubsectionKind::FileChecksums:
+ case DebugSubsectionKind::Lines:
+ case DebugSubsectionKind::InlineeLines:
+ break;
+ default:
+ llvm_unreachable("Unexpected debug fragment kind!");
+ }
+ if (auto EC = Reader.readStreamRef(Info.Data, Header->Length))
+ return EC;
+ Info.Kind = Kind;
+ return Error::success();
+}
+
+uint32_t DebugSubsectionRecord::getRecordLength() const {
+ uint32_t Result = sizeof(DebugSubsectionHeader) + Data.getLength();
+ assert(Result % 4 == 0);
+ return Result;
+}
+
+DebugSubsectionKind DebugSubsectionRecord::kind() const { return Kind; }
+
+BinaryStreamRef DebugSubsectionRecord::getRecordData() const { return Data; }
+
+DebugSubsectionRecordBuilder::DebugSubsectionRecordBuilder(
+ DebugSubsectionKind Kind, DebugSubsection &Frag)
+ : Kind(Kind), Frag(Frag) {}
+
+uint32_t DebugSubsectionRecordBuilder::calculateSerializedLength() {
+ uint32_t Size = sizeof(DebugSubsectionHeader) +
+ alignTo(Frag.calculateSerializedSize(), 4);
+ return Size;
+}
+
+Error DebugSubsectionRecordBuilder::commit(BinaryStreamWriter &Writer) {
+ DebugSubsectionHeader Header;
+ Header.Kind = uint32_t(Kind);
+ Header.Length = calculateSerializedLength() - sizeof(DebugSubsectionHeader);
+
+ if (auto EC = Writer.writeObject(Header))
+ return EC;
+ if (auto EC = Frag.commit(Writer))
+ return EC;
+ if (auto EC = Writer.padToAlignment(4))
+ return EC;
+
+ return Error::success();
+}
diff --git a/lib/DebugInfo/CodeView/DebugSubsectionVisitor.cpp b/lib/DebugInfo/CodeView/DebugSubsectionVisitor.cpp
new file mode 100644
index 000000000000..f2c4dea8685f
--- /dev/null
+++ b/lib/DebugInfo/CodeView/DebugSubsectionVisitor.cpp
@@ -0,0 +1,52 @@
+//===- DebugSubsectionVisitor.cpp ---------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h"
+
+#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h"
+#include "llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h"
+#include "llvm/DebugInfo/CodeView/DebugLinesSubsection.h"
+#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h"
+#include "llvm/DebugInfo/CodeView/DebugUnknownSubsection.h"
+#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/BinaryStreamRef.h"
+
+using namespace llvm;
+using namespace llvm::codeview;
+
+Error llvm::codeview::visitDebugSubsection(const DebugSubsectionRecord &R,
+ DebugSubsectionVisitor &V) {
+ BinaryStreamReader Reader(R.getRecordData());
+ switch (R.kind()) {
+ case DebugSubsectionKind::Lines: {
+ DebugLinesSubsectionRef Fragment;
+ if (auto EC = Fragment.initialize(Reader))
+ return EC;
+
+ return V.visitLines(Fragment);
+ }
+ case DebugSubsectionKind::FileChecksums: {
+ DebugChecksumsSubsectionRef Fragment;
+ if (auto EC = Fragment.initialize(Reader))
+ return EC;
+
+ return V.visitFileChecksums(Fragment);
+ }
+ case DebugSubsectionKind::InlineeLines: {
+ DebugInlineeLinesSubsectionRef Fragment;
+ if (auto EC = Fragment.initialize(Reader))
+ return EC;
+ return V.visitInlineeLines(Fragment);
+ }
+ default: {
+ DebugUnknownSubsectionRef Fragment(R.kind(), R.getRecordData());
+ return V.visitUnknown(Fragment);
+ }
+ }
+}
diff --git a/lib/DebugInfo/CodeView/DebugSymbolsSubsection.cpp b/lib/DebugInfo/CodeView/DebugSymbolsSubsection.cpp
new file mode 100644
index 000000000000..dc8ba8c929ae
--- /dev/null
+++ b/lib/DebugInfo/CodeView/DebugSymbolsSubsection.cpp
@@ -0,0 +1,34 @@
+//===- DebugSymbolsSubsection.cpp -------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h"
+
+using namespace llvm;
+using namespace llvm::codeview;
+
+Error DebugSymbolsSubsectionRef::initialize(BinaryStreamReader Reader) {
+ return Reader.readArray(Records, Reader.getLength());
+}
+
+uint32_t DebugSymbolsSubsection::calculateSerializedSize() const {
+ return Length;
+}
+
+Error DebugSymbolsSubsection::commit(BinaryStreamWriter &Writer) const {
+ for (const auto &Record : Records) {
+ if (auto EC = Writer.writeBytes(Record.RecordData))
+ return EC;
+ }
+ return Error::success();
+}
+
+void DebugSymbolsSubsection::addSymbol(CVSymbol Symbol) {
+ Records.push_back(Symbol);
+ Length += Symbol.length();
+} \ No newline at end of file
diff --git a/lib/DebugInfo/CodeView/EnumTables.cpp b/lib/DebugInfo/CodeView/EnumTables.cpp
index fc6008ba66de..0441110c85ef 100644
--- a/lib/DebugInfo/CodeView/EnumTables.cpp
+++ b/lib/DebugInfo/CodeView/EnumTables.cpp
@@ -245,20 +245,20 @@ static const EnumEntry<uint32_t> FrameProcSymFlagNames[] = {
};
static const EnumEntry<uint32_t> ModuleSubstreamKindNames[] = {
- CV_ENUM_CLASS_ENT(ModuleDebugFragmentKind, None),
- CV_ENUM_CLASS_ENT(ModuleDebugFragmentKind, Symbols),
- CV_ENUM_CLASS_ENT(ModuleDebugFragmentKind, Lines),
- CV_ENUM_CLASS_ENT(ModuleDebugFragmentKind, StringTable),
- CV_ENUM_CLASS_ENT(ModuleDebugFragmentKind, FileChecksums),
- CV_ENUM_CLASS_ENT(ModuleDebugFragmentKind, FrameData),
- CV_ENUM_CLASS_ENT(ModuleDebugFragmentKind, InlineeLines),
- CV_ENUM_CLASS_ENT(ModuleDebugFragmentKind, CrossScopeImports),
- CV_ENUM_CLASS_ENT(ModuleDebugFragmentKind, CrossScopeExports),
- CV_ENUM_CLASS_ENT(ModuleDebugFragmentKind, ILLines),
- CV_ENUM_CLASS_ENT(ModuleDebugFragmentKind, FuncMDTokenMap),
- CV_ENUM_CLASS_ENT(ModuleDebugFragmentKind, TypeMDTokenMap),
- CV_ENUM_CLASS_ENT(ModuleDebugFragmentKind, MergedAssemblyInput),
- CV_ENUM_CLASS_ENT(ModuleDebugFragmentKind, CoffSymbolRVA),
+ CV_ENUM_CLASS_ENT(DebugSubsectionKind, None),
+ CV_ENUM_CLASS_ENT(DebugSubsectionKind, Symbols),
+ CV_ENUM_CLASS_ENT(DebugSubsectionKind, Lines),
+ CV_ENUM_CLASS_ENT(DebugSubsectionKind, StringTable),
+ CV_ENUM_CLASS_ENT(DebugSubsectionKind, FileChecksums),
+ CV_ENUM_CLASS_ENT(DebugSubsectionKind, FrameData),
+ CV_ENUM_CLASS_ENT(DebugSubsectionKind, InlineeLines),
+ CV_ENUM_CLASS_ENT(DebugSubsectionKind, CrossScopeImports),
+ CV_ENUM_CLASS_ENT(DebugSubsectionKind, CrossScopeExports),
+ CV_ENUM_CLASS_ENT(DebugSubsectionKind, ILLines),
+ CV_ENUM_CLASS_ENT(DebugSubsectionKind, FuncMDTokenMap),
+ CV_ENUM_CLASS_ENT(DebugSubsectionKind, TypeMDTokenMap),
+ CV_ENUM_CLASS_ENT(DebugSubsectionKind, MergedAssemblyInput),
+ CV_ENUM_CLASS_ENT(DebugSubsectionKind, CoffSymbolRVA),
};
static const EnumEntry<uint16_t> ExportSymFlagNames[] = {
diff --git a/lib/DebugInfo/CodeView/ModuleDebugFragmentRecord.cpp b/lib/DebugInfo/CodeView/ModuleDebugFragmentRecord.cpp
deleted file mode 100644
index b2543de78069..000000000000
--- a/lib/DebugInfo/CodeView/ModuleDebugFragmentRecord.cpp
+++ /dev/null
@@ -1,84 +0,0 @@
-//===- ModuleDebugFragmentRecord.cpp -----------------------------*- C++-*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/DebugInfo/CodeView/ModuleDebugFragmentRecord.h"
-#include "llvm/DebugInfo/CodeView/ModuleDebugFragment.h"
-
-#include "llvm/Support/BinaryStreamReader.h"
-
-using namespace llvm;
-using namespace llvm::codeview;
-
-ModuleDebugFragmentRecord::ModuleDebugFragmentRecord()
- : Kind(ModuleDebugFragmentKind::None) {}
-
-ModuleDebugFragmentRecord::ModuleDebugFragmentRecord(
- ModuleDebugFragmentKind Kind, BinaryStreamRef Data)
- : Kind(Kind), Data(Data) {}
-
-Error ModuleDebugFragmentRecord::initialize(BinaryStreamRef Stream,
- ModuleDebugFragmentRecord &Info) {
- const ModuleDebugFragmentHeader *Header;
- BinaryStreamReader Reader(Stream);
- if (auto EC = Reader.readObject(Header))
- return EC;
-
- ModuleDebugFragmentKind Kind =
- static_cast<ModuleDebugFragmentKind>(uint32_t(Header->Kind));
- switch (Kind) {
- case ModuleDebugFragmentKind::FileChecksums:
- case ModuleDebugFragmentKind::Lines:
- case ModuleDebugFragmentKind::InlineeLines:
- break;
- default:
- llvm_unreachable("Unexpected debug fragment kind!");
- }
- if (auto EC = Reader.readStreamRef(Info.Data, Header->Length))
- return EC;
- Info.Kind = Kind;
- return Error::success();
-}
-
-uint32_t ModuleDebugFragmentRecord::getRecordLength() const {
- uint32_t Result = sizeof(ModuleDebugFragmentHeader) + Data.getLength();
- assert(Result % 4 == 0);
- return Result;
-}
-
-ModuleDebugFragmentKind ModuleDebugFragmentRecord::kind() const { return Kind; }
-
-BinaryStreamRef ModuleDebugFragmentRecord::getRecordData() const {
- return Data;
-}
-
-ModuleDebugFragmentRecordBuilder::ModuleDebugFragmentRecordBuilder(
- ModuleDebugFragmentKind Kind, ModuleDebugFragment &Frag)
- : Kind(Kind), Frag(Frag) {}
-
-uint32_t ModuleDebugFragmentRecordBuilder::calculateSerializedLength() {
- uint32_t Size = sizeof(ModuleDebugFragmentHeader) +
- alignTo(Frag.calculateSerializedLength(), 4);
- return Size;
-}
-
-Error ModuleDebugFragmentRecordBuilder::commit(BinaryStreamWriter &Writer) {
- ModuleDebugFragmentHeader Header;
- Header.Kind = uint32_t(Kind);
- Header.Length =
- calculateSerializedLength() - sizeof(ModuleDebugFragmentHeader);
-
- if (auto EC = Writer.writeObject(Header))
- return EC;
- if (auto EC = Frag.commit(Writer))
- return EC;
- if (auto EC = Writer.padToAlignment(4))
- return EC;
-
- return Error::success();
-}
diff --git a/lib/DebugInfo/CodeView/ModuleDebugFragmentVisitor.cpp b/lib/DebugInfo/CodeView/ModuleDebugFragmentVisitor.cpp
deleted file mode 100644
index dc591f3990e2..000000000000
--- a/lib/DebugInfo/CodeView/ModuleDebugFragmentVisitor.cpp
+++ /dev/null
@@ -1,52 +0,0 @@
-//===- ModuleDebugFragmentVisitor.cpp ---------------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/DebugInfo/CodeView/ModuleDebugFragmentVisitor.h"
-
-#include "llvm/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.h"
-#include "llvm/DebugInfo/CodeView/ModuleDebugFragmentRecord.h"
-#include "llvm/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.h"
-#include "llvm/DebugInfo/CodeView/ModuleDebugLineFragment.h"
-#include "llvm/DebugInfo/CodeView/ModuleDebugUnknownFragment.h"
-#include "llvm/Support/BinaryStreamReader.h"
-#include "llvm/Support/BinaryStreamRef.h"
-
-using namespace llvm;
-using namespace llvm::codeview;
-
-Error llvm::codeview::visitModuleDebugFragment(
- const ModuleDebugFragmentRecord &R, ModuleDebugFragmentVisitor &V) {
- BinaryStreamReader Reader(R.getRecordData());
- switch (R.kind()) {
- case ModuleDebugFragmentKind::Lines: {
- ModuleDebugLineFragmentRef Fragment;
- if (auto EC = Fragment.initialize(Reader))
- return EC;
-
- return V.visitLines(Fragment);
- }
- case ModuleDebugFragmentKind::FileChecksums: {
- ModuleDebugFileChecksumFragmentRef Fragment;
- if (auto EC = Fragment.initialize(Reader))
- return EC;
-
- return V.visitFileChecksums(Fragment);
- }
- case ModuleDebugFragmentKind::InlineeLines: {
- ModuleDebugInlineeLineFragmentRef Fragment;
- if (auto EC = Fragment.initialize(Reader))
- return EC;
- return V.visitInlineeLines(Fragment);
- }
- default: {
- ModuleDebugUnknownFragmentRef Fragment(R.kind(), R.getRecordData());
- return V.visitUnknown(Fragment);
- }
- }
-}
diff --git a/lib/DebugInfo/CodeView/SymbolDumper.cpp b/lib/DebugInfo/CodeView/SymbolDumper.cpp
index 7d01c8c5f194..2f5a7d256c60 100644
--- a/lib/DebugInfo/CodeView/SymbolDumper.cpp
+++ b/lib/DebugInfo/CodeView/SymbolDumper.cpp
@@ -11,8 +11,8 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/DebugInfo/CodeView/CVSymbolVisitor.h"
+#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h"
#include "llvm/DebugInfo/CodeView/EnumTables.h"
-#include "llvm/DebugInfo/CodeView/StringTable.h"
#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
#include "llvm/DebugInfo/CodeView/SymbolDumpDelegate.h"
#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
@@ -369,7 +369,7 @@ Error CVSymbolDumperImpl::visitKnownRecord(
DictScope S(W, "DefRangeSubfield");
if (ObjDelegate) {
- StringTableRef Strings = ObjDelegate->getStringTable();
+ DebugStringTableSubsectionRef Strings = ObjDelegate->getStringTable();
auto ExpectedProgram = Strings.getString(DefRangeSubfield.Program);
if (!ExpectedProgram) {
consumeError(ExpectedProgram.takeError());
@@ -390,7 +390,7 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
DictScope S(W, "DefRange");
if (ObjDelegate) {
- StringTableRef Strings = ObjDelegate->getStringTable();
+ DebugStringTableSubsectionRef Strings = ObjDelegate->getStringTable();
auto ExpectedProgram = Strings.getString(DefRange.Program);
if (!ExpectedProgram) {
consumeError(ExpectedProgram.takeError());
diff --git a/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp b/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp
index 867864e47dce..b28ec2ff33ac 100644
--- a/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp
+++ b/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp
@@ -10,7 +10,7 @@
#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h"
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/DebugInfo/CodeView/ModuleDebugFragmentRecord.h"
+#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h"
#include "llvm/DebugInfo/MSF/MSFBuilder.h"
#include "llvm/DebugInfo/MSF/MSFCommon.h"
#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
@@ -170,8 +170,8 @@ Error DbiModuleDescriptorBuilder::commit(BinaryStreamWriter &ModiWriter,
}
void DbiModuleDescriptorBuilder::addC13Fragment(
- std::unique_ptr<ModuleDebugLineFragment> Lines) {
- ModuleDebugLineFragment &Frag = *Lines;
+ std::unique_ptr<DebugLinesSubsection> Lines) {
+ DebugLinesSubsection &Frag = *Lines;
// File Checksums have to come first, so push an empty entry on if this
// is the first.
@@ -180,12 +180,12 @@ void DbiModuleDescriptorBuilder::addC13Fragment(
this->LineInfo.push_back(std::move(Lines));
C13Builders.push_back(
- llvm::make_unique<ModuleDebugFragmentRecordBuilder>(Frag.kind(), Frag));
+ llvm::make_unique<DebugSubsectionRecordBuilder>(Frag.kind(), Frag));
}
void DbiModuleDescriptorBuilder::addC13Fragment(
- std::unique_ptr<codeview::ModuleDebugInlineeLineFragment> Inlinees) {
- ModuleDebugInlineeLineFragment &Frag = *Inlinees;
+ std::unique_ptr<codeview::DebugInlineeLinesSubsection> Inlinees) {
+ DebugInlineeLinesSubsection &Frag = *Inlinees;
// File Checksums have to come first, so push an empty entry on if this
// is the first.
@@ -194,17 +194,17 @@ void DbiModuleDescriptorBuilder::addC13Fragment(
this->Inlinees.push_back(std::move(Inlinees));
C13Builders.push_back(
- llvm::make_unique<ModuleDebugFragmentRecordBuilder>(Frag.kind(), Frag));
+ llvm::make_unique<DebugSubsectionRecordBuilder>(Frag.kind(), Frag));
}
void DbiModuleDescriptorBuilder::setC13FileChecksums(
- std::unique_ptr<ModuleDebugFileChecksumFragment> Checksums) {
+ std::unique_ptr<DebugChecksumsSubsection> Checksums) {
assert(!ChecksumInfo && "Can't have more than one checksum info!");
if (C13Builders.empty())
C13Builders.push_back(nullptr);
ChecksumInfo = std::move(Checksums);
- C13Builders[0] = llvm::make_unique<ModuleDebugFragmentRecordBuilder>(
+ C13Builders[0] = llvm::make_unique<DebugSubsectionRecordBuilder>(
ChecksumInfo->kind(), *ChecksumInfo);
}
diff --git a/lib/MC/MCCodeView.cpp b/lib/MC/MCCodeView.cpp
index 2b97ecc0fd2c..a0a0ef312276 100644
--- a/lib/MC/MCCodeView.cpp
+++ b/lib/MC/MCCodeView.cpp
@@ -145,7 +145,7 @@ void CodeViewContext::emitStringTable(MCObjectStreamer &OS) {
MCSymbol *StringBegin = Ctx.createTempSymbol("strtab_begin", false),
*StringEnd = Ctx.createTempSymbol("strtab_end", false);
- OS.EmitIntValue(unsigned(ModuleDebugFragmentKind::StringTable), 4);
+ OS.EmitIntValue(unsigned(DebugSubsectionKind::StringTable), 4);
OS.emitAbsoluteSymbolDiff(StringEnd, StringBegin, 4);
OS.EmitLabel(StringBegin);
@@ -172,7 +172,7 @@ void CodeViewContext::emitFileChecksums(MCObjectStreamer &OS) {
MCSymbol *FileBegin = Ctx.createTempSymbol("filechecksums_begin", false),
*FileEnd = Ctx.createTempSymbol("filechecksums_end", false);
- OS.EmitIntValue(unsigned(ModuleDebugFragmentKind::FileChecksums), 4);
+ OS.EmitIntValue(unsigned(DebugSubsectionKind::FileChecksums), 4);
OS.emitAbsoluteSymbolDiff(FileEnd, FileBegin, 4);
OS.EmitLabel(FileBegin);
@@ -197,7 +197,7 @@ void CodeViewContext::emitLineTableForFunction(MCObjectStreamer &OS,
MCSymbol *LineBegin = Ctx.createTempSymbol("linetable_begin", false),
*LineEnd = Ctx.createTempSymbol("linetable_end", false);
- OS.EmitIntValue(unsigned(ModuleDebugFragmentKind::Lines), 4);
+ OS.EmitIntValue(unsigned(DebugSubsectionKind::Lines), 4);
OS.emitAbsoluteSymbolDiff(LineEnd, LineBegin, 4);
OS.EmitLabel(LineBegin);
OS.EmitCOFFSecRel32(FuncBegin, /*Offset=*/0);
diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp
index e1e2c22e1df1..f36c25a0ce91 100644
--- a/lib/Support/APFloat.cpp
+++ b/lib/Support/APFloat.cpp
@@ -1559,11 +1559,13 @@ IEEEFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) {
case PackCategoriesIntoKey(fcInfinity, fcNaN):
category = fcNaN;
copySignificand(rhs);
+ LLVM_FALLTHROUGH;
case PackCategoriesIntoKey(fcNaN, fcZero):
case PackCategoriesIntoKey(fcNaN, fcNormal):
case PackCategoriesIntoKey(fcNaN, fcInfinity):
case PackCategoriesIntoKey(fcNaN, fcNaN):
sign = false;
+ LLVM_FALLTHROUGH;
case PackCategoriesIntoKey(fcInfinity, fcZero):
case PackCategoriesIntoKey(fcInfinity, fcNormal):
case PackCategoriesIntoKey(fcZero, fcInfinity):
diff --git a/lib/Support/Timer.cpp b/lib/Support/Timer.cpp
index 8d68c6ae9682..dec6baf7bf47 100644
--- a/lib/Support/Timer.cpp
+++ b/lib/Support/Timer.cpp
@@ -72,10 +72,15 @@ std::unique_ptr<raw_fd_ostream> llvm::CreateInfoOutputFile() {
return llvm::make_unique<raw_fd_ostream>(2, false); // stderr.
}
-static TimerGroup *getDefaultTimerGroup() {
- static TimerGroup DefaultTimerGroup("misc", "Miscellaneous Ungrouped Timers");
- return &DefaultTimerGroup;
-}
+namespace {
+struct CreateDefaultTimerGroup {
+ static void *call() {
+ return new TimerGroup("misc", "Miscellaneous Ungrouped Timers");
+ }
+};
+} // namespace
+static ManagedStatic<TimerGroup, CreateDefaultTimerGroup> DefaultTimerGroup;
+static TimerGroup *getDefaultTimerGroup() { return &*DefaultTimerGroup; }
//===----------------------------------------------------------------------===//
// Timer Implementation
diff --git a/lib/TableGen/Record.cpp b/lib/TableGen/Record.cpp
index 09f9759ce7da..f07208b1fb90 100644
--- a/lib/TableGen/Record.cpp
+++ b/lib/TableGen/Record.cpp
@@ -405,27 +405,21 @@ IntInit::convertInitializerBitRange(ArrayRef<unsigned> Bits) const {
}
CodeInit *CodeInit::get(StringRef V) {
- static DenseMap<StringRef, CodeInit*> ThePool;
+ static StringMap<CodeInit*, BumpPtrAllocator &> ThePool(Allocator);
- auto I = ThePool.insert(std::make_pair(V, nullptr));
- if (I.second) {
- StringRef VCopy = V.copy(Allocator);
- I.first->first = VCopy;
- I.first->second = new(Allocator) CodeInit(VCopy);
- }
- return I.first->second;
+ auto &Entry = *ThePool.insert(std::make_pair(V, nullptr)).first;
+ if (!Entry.second)
+ Entry.second = new(Allocator) CodeInit(Entry.getKey());
+ return Entry.second;
}
StringInit *StringInit::get(StringRef V) {
- static DenseMap<StringRef, StringInit*> ThePool;
+ static StringMap<StringInit*, BumpPtrAllocator &> ThePool(Allocator);
- auto I = ThePool.insert(std::make_pair(V, nullptr));
- if (I.second) {
- StringRef VCopy = V.copy(Allocator);
- I.first->first = VCopy;
- I.first->second = new(Allocator) StringInit(VCopy);
- }
- return I.first->second;
+ auto &Entry = *ThePool.insert(std::make_pair(V, nullptr)).first;
+ if (!Entry.second)
+ Entry.second = new(Allocator) StringInit(Entry.getKey());
+ return Entry.second;
}
Init *StringInit::convertInitializerTo(RecTy *Ty) const {
@@ -1540,7 +1534,7 @@ Init *DagInit::resolveReferences(Record &R, const RecordVal *RV) const {
SmallVector<Init*, 8> NewArgs;
NewArgs.reserve(arg_size());
bool ArgsChanged = false;
- for (const Init *Arg : args()) {
+ for (const Init *Arg : getArgs()) {
Init *NewArg = Arg->resolveReferences(R, RV);
NewArgs.push_back(NewArg);
ArgsChanged |= NewArg != Arg;
diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp
index 0b92249580c8..e96ee7d29b3e 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -137,6 +137,34 @@ static cl::opt<bool> EnableRedZone("aarch64-redzone",
STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
+/// Look at each instruction that references stack frames and return the stack
+/// size limit beyond which some of these instructions will require a scratch
+/// register during their expansion later.
+static unsigned estimateRSStackSizeLimit(MachineFunction &MF) {
+ // FIXME: For now, just conservatively guestimate based on unscaled indexing
+ // range. We'll end up allocating an unnecessary spill slot a lot, but
+ // realistically that's not a big deal at this stage of the game.
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ if (MI.isDebugValue() || MI.isPseudo() ||
+ MI.getOpcode() == AArch64::ADDXri ||
+ MI.getOpcode() == AArch64::ADDSXri)
+ continue;
+
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ if (!MI.getOperand(i).isFI())
+ continue;
+
+ int Offset = 0;
+ if (isAArch64FrameOffsetLegal(MI, Offset, nullptr, nullptr, nullptr) ==
+ AArch64FrameOffsetCannotUpdate)
+ return 0;
+ }
+ }
+ }
+ return 255;
+}
+
bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
if (!EnableRedZone)
return false;
@@ -1169,16 +1197,13 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
unsigned NumRegsSpilled = SavedRegs.count();
bool CanEliminateFrame = NumRegsSpilled == 0;
- // FIXME: Set BigStack if any stack slot references may be out of range.
- // For now, just conservatively guestimate based on unscaled indexing
- // range. We'll end up allocating an unnecessary spill slot a lot, but
- // realistically that's not a big deal at this stage of the game.
// The CSR spill slots have not been allocated yet, so estimateStackSize
// won't include them.
MachineFrameInfo &MFI = MF.getFrameInfo();
unsigned CFSize = MFI.estimateStackSize(MF) + 8 * NumRegsSpilled;
DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n");
- bool BigStack = (CFSize >= 256);
+ unsigned EstimatedStackSizeLimit = estimateRSStackSizeLimit(MF);
+ bool BigStack = (CFSize > EstimatedStackSizeLimit);
if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
AFI->setHasStackFrame(true);
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 62f4c953830b..f798010906cc 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -381,7 +381,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FNEARBYINT, MVT::v4f16, Expand);
setOperationAction(ISD::FNEG, MVT::v4f16, Expand);
setOperationAction(ISD::FPOW, MVT::v4f16, Expand);
- setOperationAction(ISD::FPOWI, MVT::v4f16, Expand);
setOperationAction(ISD::FREM, MVT::v4f16, Expand);
setOperationAction(ISD::FROUND, MVT::v4f16, Expand);
setOperationAction(ISD::FRINT, MVT::v4f16, Expand);
@@ -413,7 +412,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FNEARBYINT, MVT::v8f16, Expand);
setOperationAction(ISD::FNEG, MVT::v8f16, Expand);
setOperationAction(ISD::FPOW, MVT::v8f16, Expand);
- setOperationAction(ISD::FPOWI, MVT::v8f16, Expand);
setOperationAction(ISD::FREM, MVT::v8f16, Expand);
setOperationAction(ISD::FROUND, MVT::v8f16, Expand);
setOperationAction(ISD::FRINT, MVT::v8f16, Expand);
@@ -726,7 +724,6 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) {
if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64) {
setOperationAction(ISD::FSIN, VT, Expand);
setOperationAction(ISD::FCOS, VT, Expand);
- setOperationAction(ISD::FPOWI, VT, Expand);
setOperationAction(ISD::FPOW, VT, Expand);
setOperationAction(ISD::FLOG, VT, Expand);
setOperationAction(ISD::FLOG2, VT, Expand);
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 660879426810..0582ce95693a 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -730,7 +730,7 @@ public:
/// \returns True if waitcnt instruction is needed before barrier instruction,
/// false otherwise.
bool needWaitcntBeforeBarrier() const {
- return getGeneration() < GFX9;
+ return true;
}
/// \returns true if the flat_scratch register should be initialized with the
diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index a9d3a31a7240..48827f463997 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -736,6 +736,9 @@ void GCNPassConfig::addMachineSSAOptimization() {
addPass(createSIShrinkInstructionsPass());
if (EnableSDWAPeephole) {
addPass(&SIPeepholeSDWAID);
+ addPass(&MachineLICMID);
+ addPass(&MachineCSEID);
+ addPass(&SIFoldOperandsID);
addPass(&DeadMachineInstructionElimID);
}
}
diff --git a/lib/Target/AMDGPU/SIFoldOperands.cpp b/lib/Target/AMDGPU/SIFoldOperands.cpp
index d63414735b95..f13629a3185f 100644
--- a/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -247,9 +247,10 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
// If the use operand doesn't care about the value, this may be an operand only
// used for register indexing, in which case it is unsafe to fold.
-static bool isUseSafeToFold(const MachineInstr &MI,
+static bool isUseSafeToFold(const SIInstrInfo *TII,
+ const MachineInstr &MI,
const MachineOperand &UseMO) {
- return !UseMO.isUndef();
+ return !UseMO.isUndef() && !TII->isSDWA(MI);
//return !MI.hasRegisterImplicitUseOperand(UseMO.getReg());
}
@@ -261,7 +262,7 @@ void SIFoldOperands::foldOperand(
SmallVectorImpl<MachineInstr *> &CopiesToReplace) const {
const MachineOperand &UseOp = UseMI->getOperand(UseOpIdx);
- if (!isUseSafeToFold(*UseMI, UseOp))
+ if (!isUseSafeToFold(TII, *UseMI, UseOp))
return;
// FIXME: Fold operands with subregs.
diff --git a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
index 4dc090d9b7ed..fae249b04492 100644
--- a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
+++ b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
@@ -55,6 +55,7 @@ private:
std::unordered_map<MachineInstr *, std::unique_ptr<SDWAOperand>> SDWAOperands;
std::unordered_map<MachineInstr *, SDWAOperandsVector> PotentialMatches;
+ SmallVector<MachineInstr *, 8> ConvertedInstructions;
Optional<int64_t> foldToImm(const MachineOperand &Op) const;
@@ -69,6 +70,7 @@ public:
void matchSDWAOperands(MachineFunction &MF);
bool isConvertibleToSDWA(const MachineInstr &MI) const;
bool convertToSDWA(MachineInstr &MI, const SDWAOperandsVector &SDWAOperands);
+ void legalizeScalarOperands(MachineInstr &MI) const;
StringRef getPassName() const override { return "SI Peephole SDWA"; }
@@ -289,7 +291,7 @@ bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
MachineOperand *SrcSel = TII->getNamedOperand(MI, AMDGPU::OpName::src0_sel);
MachineOperand *SrcMods =
TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers);
- assert(Src && Src->isReg());
+ assert(Src && (Src->isReg() || Src->isImm()));
if (!isSameReg(*Src, *getReplacedOperand())) {
// If this is not src0 then it should be src1
Src = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
@@ -580,18 +582,8 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
}
bool SIPeepholeSDWA::isConvertibleToSDWA(const MachineInstr &MI) const {
- // Check if this instruction can be converted to SDWA:
- // 1. Does this opcode support SDWA
- if (AMDGPU::getSDWAOp(MI.getOpcode()) == -1)
- return false;
-
- // 2. Are all operands - VGPRs
- for (const MachineOperand &Operand : MI.explicit_operands()) {
- if (!Operand.isReg() || !TRI->isVGPR(*MRI, Operand.getReg()))
- return false;
- }
-
- return true;
+ // Check if this instruction has opcode that supports SDWA
+ return AMDGPU::getSDWAOp(MI.getOpcode()) != -1;
}
bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
@@ -685,7 +677,9 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
if (PotentialMatches.count(Operand->getParentInst()) == 0)
Converted |= Operand->convertToSDWA(*SDWAInst, TII);
}
- if (!Converted) {
+ if (Converted) {
+ ConvertedInstructions.push_back(SDWAInst);
+ } else {
SDWAInst->eraseFromParent();
return false;
}
@@ -698,6 +692,29 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
return true;
}
+// If an instruction was converted to SDWA it should not have immediates or SGPR
+// operands. Copy its scalar operands into VGPRs.
+void SIPeepholeSDWA::legalizeScalarOperands(MachineInstr &MI) const {
+ const MCInstrDesc &Desc = TII->get(MI.getOpcode());
+ for (unsigned I = 0, E = MI.getNumExplicitOperands(); I != E; ++I) {
+ MachineOperand &Op = MI.getOperand(I);
+ if (!Op.isImm() && !(Op.isReg() && !TRI->isVGPR(*MRI, Op.getReg())))
+ continue;
+ if (Desc.OpInfo[I].RegClass == -1 ||
+ !TRI->hasVGPRs(TRI->getRegClass(Desc.OpInfo[I].RegClass)))
+ continue;
+ unsigned VGPR = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ auto Copy = BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(),
+ TII->get(AMDGPU::V_MOV_B32_e32), VGPR);
+ if (Op.isImm())
+ Copy.addImm(Op.getImm());
+ else if (Op.isReg())
+ Copy.addReg(Op.getReg(), Op.isKill() ? RegState::Kill : 0,
+ Op.getSubReg());
+ Op.ChangeToRegister(VGPR, false);
+ }
+}
+
bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) {
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
@@ -728,5 +745,9 @@ bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) {
PotentialMatches.clear();
SDWAOperands.clear();
+
+ while (!ConvertedInstructions.empty())
+ legalizeScalarOperands(*ConvertedInstructions.pop_back_val());
+
return false;
}
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 62e774d869da..949d821e36b2 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -585,7 +585,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
- setOperationAction(ISD::FPOWI, MVT::v2f64, Expand);
setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
setOperationAction(ISD::FLOG2, MVT::v2f64, Expand);
@@ -603,7 +602,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
- setOperationAction(ISD::FPOWI, MVT::v4f32, Expand);
setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
setOperationAction(ISD::FLOG, MVT::v4f32, Expand);
setOperationAction(ISD::FLOG2, MVT::v4f32, Expand);
@@ -620,7 +618,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FSQRT, MVT::v2f32, Expand);
setOperationAction(ISD::FSIN, MVT::v2f32, Expand);
setOperationAction(ISD::FCOS, MVT::v2f32, Expand);
- setOperationAction(ISD::FPOWI, MVT::v2f32, Expand);
setOperationAction(ISD::FPOW, MVT::v2f32, Expand);
setOperationAction(ISD::FLOG, MVT::v2f32, Expand);
setOperationAction(ISD::FLOG2, MVT::v2f32, Expand);
@@ -743,7 +740,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FSQRT, MVT::f64, Expand);
setOperationAction(ISD::FSIN, MVT::f64, Expand);
setOperationAction(ISD::FCOS, MVT::f64, Expand);
- setOperationAction(ISD::FPOWI, MVT::f64, Expand);
setOperationAction(ISD::FPOW, MVT::f64, Expand);
setOperationAction(ISD::FLOG, MVT::f64, Expand);
setOperationAction(ISD::FLOG2, MVT::f64, Expand);
diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp
index 1dffebe97f2d..5ecf9320d5c2 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -2003,7 +2003,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
// Floating point arithmetic/math functions:
ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FMA, ISD::FDIV,
ISD::FREM, ISD::FNEG, ISD::FABS, ISD::FSQRT, ISD::FSIN,
- ISD::FCOS, ISD::FPOWI, ISD::FPOW, ISD::FLOG, ISD::FLOG2,
+ ISD::FCOS, ISD::FPOW, ISD::FLOG, ISD::FLOG2,
ISD::FLOG10, ISD::FEXP, ISD::FEXP2, ISD::FCEIL, ISD::FTRUNC,
ISD::FRINT, ISD::FNEARBYINT, ISD::FROUND, ISD::FFLOOR,
ISD::FMINNUM, ISD::FMAXNUM, ISD::FSINCOS,
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index d407774574be..d855d3e7f778 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -13,6 +13,7 @@
#include "MCTargetDesc/MipsMCTargetDesc.h"
#include "MipsTargetStreamer.h"
#include "MCTargetDesc/MipsBaseInfo.h"
+#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringSwitch.h"
@@ -216,9 +217,15 @@ class MipsAsmParser : public MCTargetAsmParser {
unsigned SrcReg, bool Is32BitSym, SMLoc IDLoc,
MCStreamer &Out, const MCSubtargetInfo *STI);
+ bool emitPartialAddress(MipsTargetStreamer &TOut, SMLoc IDLoc, MCSymbol *Sym);
+
bool expandLoadImm(MCInst &Inst, bool Is32BitImm, SMLoc IDLoc,
MCStreamer &Out, const MCSubtargetInfo *STI);
+ bool expandLoadImmReal(MCInst &Inst, bool IsSingle, bool IsGPR, bool Is64FPU,
+ SMLoc IDLoc, MCStreamer &Out,
+ const MCSubtargetInfo *STI);
+
bool expandLoadAddress(unsigned DstReg, unsigned BaseReg,
const MCOperand &Offset, bool Is32BitAddress,
SMLoc IDLoc, MCStreamer &Out,
@@ -1011,6 +1018,16 @@ public:
Inst.addOperand(MCOperand::createReg(getAFGR64Reg()));
}
+ void addStrictlyAFGR64AsmRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(getAFGR64Reg()));
+ }
+
+ void addStrictlyFGR64AsmRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(getFGR64Reg()));
+ }
+
void addFGR64AsmRegOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::createReg(getFGR64Reg()));
@@ -1027,6 +1044,15 @@ public:
"registers");
}
+ void addStrictlyFGR32AsmRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(getFGR32Reg()));
+ // FIXME: We ought to do this for -integrated-as without -via-file-asm too.
+ if (!AsmParser.useOddSPReg() && RegIdx.Index & 1)
+ AsmParser.Error(StartLoc, "-mno-odd-spreg prohibits the use of odd FPU "
+ "registers");
+ }
+
void addFGRH32AsmRegOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::createReg(getFGRH32Reg()));
@@ -1574,6 +1600,11 @@ public:
return isRegIdx() && RegIdx.Kind & RegKind_FGR && RegIdx.Index <= 31;
}
+ bool isStrictlyFGRAsmReg() const {
+ // AFGR64 is $0-$15 but we handle this in getAFGR64()
+ return isRegIdx() && RegIdx.Kind == RegKind_FGR && RegIdx.Index <= 31;
+ }
+
bool isHWRegsAsmReg() const {
return isRegIdx() && RegIdx.Kind & RegKind_HWRegs && RegIdx.Index <= 31;
}
@@ -2368,6 +2399,27 @@ MipsAsmParser::tryExpandInstruction(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
case Mips::PseudoTRUNC_W_D:
return expandTrunc(Inst, true, true, IDLoc, Out, STI) ? MER_Fail
: MER_Success;
+
+ case Mips::LoadImmSingleGPR:
+ return expandLoadImmReal(Inst, true, true, false, IDLoc, Out, STI)
+ ? MER_Fail
+ : MER_Success;
+ case Mips::LoadImmSingleFGR:
+ return expandLoadImmReal(Inst, true, false, false, IDLoc, Out, STI)
+ ? MER_Fail
+ : MER_Success;
+ case Mips::LoadImmDoubleGPR:
+ return expandLoadImmReal(Inst, false, true, false, IDLoc, Out, STI)
+ ? MER_Fail
+ : MER_Success;
+ case Mips::LoadImmDoubleFGR:
+ return expandLoadImmReal(Inst, false, false, true, IDLoc, Out, STI)
+ ? MER_Fail
+ : MER_Success;
+ case Mips::LoadImmDoubleFGR_32:
+ return expandLoadImmReal(Inst, false, false, false, IDLoc, Out, STI)
+ ? MER_Fail
+ : MER_Success;
case Mips::Ulh:
return expandUlh(Inst, true, IDLoc, Out, STI) ? MER_Fail : MER_Success;
case Mips::Ulhu:
@@ -2952,6 +3004,302 @@ bool MipsAsmParser::loadAndAddSymbolAddress(const MCExpr *SymExpr,
return false;
}
+// Each double-precision register DO-D15 overlaps with two of the single
+// precision registers F0-F31. As an example, all of the following hold true:
+// D0 + 1 == F1, F1 + 1 == D1, F1 + 1 == F2, depending on the context.
+static unsigned nextReg(unsigned Reg) {
+ if (MipsMCRegisterClasses[Mips::FGR32RegClassID].contains(Reg))
+ return Reg == (unsigned)Mips::F31 ? (unsigned)Mips::F0 : Reg + 1;
+ switch (Reg) {
+ default: llvm_unreachable("Unknown register in assembly macro expansion!");
+ case Mips::ZERO: return Mips::AT;
+ case Mips::AT: return Mips::V0;
+ case Mips::V0: return Mips::V1;
+ case Mips::V1: return Mips::A0;
+ case Mips::A0: return Mips::A1;
+ case Mips::A1: return Mips::A2;
+ case Mips::A2: return Mips::A3;
+ case Mips::A3: return Mips::T0;
+ case Mips::T0: return Mips::T1;
+ case Mips::T1: return Mips::T2;
+ case Mips::T2: return Mips::T3;
+ case Mips::T3: return Mips::T4;
+ case Mips::T4: return Mips::T5;
+ case Mips::T5: return Mips::T6;
+ case Mips::T6: return Mips::T7;
+ case Mips::T7: return Mips::S0;
+ case Mips::S0: return Mips::S1;
+ case Mips::S1: return Mips::S2;
+ case Mips::S2: return Mips::S3;
+ case Mips::S3: return Mips::S4;
+ case Mips::S4: return Mips::S5;
+ case Mips::S5: return Mips::S6;
+ case Mips::S6: return Mips::S7;
+ case Mips::S7: return Mips::T8;
+ case Mips::T8: return Mips::T9;
+ case Mips::T9: return Mips::K0;
+ case Mips::K0: return Mips::K1;
+ case Mips::K1: return Mips::GP;
+ case Mips::GP: return Mips::SP;
+ case Mips::SP: return Mips::FP;
+ case Mips::FP: return Mips::RA;
+ case Mips::RA: return Mips::ZERO;
+ case Mips::D0: return Mips::F1;
+ case Mips::D1: return Mips::F3;
+ case Mips::D2: return Mips::F5;
+ case Mips::D3: return Mips::F7;
+ case Mips::D4: return Mips::F9;
+ case Mips::D5: return Mips::F11;
+ case Mips::D6: return Mips::F13;
+ case Mips::D7: return Mips::F15;
+ case Mips::D8: return Mips::F17;
+ case Mips::D9: return Mips::F19;
+ case Mips::D10: return Mips::F21;
+ case Mips::D11: return Mips::F23;
+ case Mips::D12: return Mips::F25;
+ case Mips::D13: return Mips::F27;
+ case Mips::D14: return Mips::F29;
+ case Mips::D15: return Mips::F31;
+ }
+}
+
+// FIXME: This method is too general. In principle we should compute the number
+// of instructions required to synthesize the immediate inline compared to
+// synthesizing the address inline and relying on non .text sections.
+// For static O32 and N32 this may yield a small benefit, for static N64 this is
+// likely to yield a much larger benefit as we have to synthesize a 64bit
+// address to load a 64 bit value.
+bool MipsAsmParser::emitPartialAddress(MipsTargetStreamer &TOut, SMLoc IDLoc,
+ MCSymbol *Sym) {
+ unsigned ATReg = getATReg(IDLoc);
+ if (!ATReg)
+ return true;
+
+ if(IsPicEnabled) {
+ const MCExpr *GotSym =
+ MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
+ const MipsMCExpr *GotExpr =
+ MipsMCExpr::create(MipsMCExpr::MEK_GOT, GotSym, getContext());
+
+ if(isABI_O32() || isABI_N32()) {
+ TOut.emitRRX(Mips::LW, ATReg, Mips::GP, MCOperand::createExpr(GotExpr),
+ IDLoc, STI);
+ } else { //isABI_N64()
+ TOut.emitRRX(Mips::LD, ATReg, Mips::GP, MCOperand::createExpr(GotExpr),
+ IDLoc, STI);
+ }
+ } else { //!IsPicEnabled
+ const MCExpr *HiSym =
+ MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
+ const MipsMCExpr *HiExpr =
+ MipsMCExpr::create(MipsMCExpr::MEK_HI, HiSym, getContext());
+
+ // FIXME: This is technically correct but gives a different result to gas,
+ // but gas is incomplete there (it has a fixme noting it doesn't work with
+ // 64-bit addresses).
+ // FIXME: With -msym32 option, the address expansion for N64 should probably
+ // use the O32 / N32 case. It's safe to use the 64 address expansion as the
+ // symbol's value is considered sign extended.
+ if(isABI_O32() || isABI_N32()) {
+ TOut.emitRX(Mips::LUi, ATReg, MCOperand::createExpr(HiExpr), IDLoc, STI);
+ } else { //isABI_N64()
+ const MCExpr *HighestSym =
+ MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
+ const MipsMCExpr *HighestExpr =
+ MipsMCExpr::create(MipsMCExpr::MEK_HIGHEST, HighestSym, getContext());
+ const MCExpr *HigherSym =
+ MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
+ const MipsMCExpr *HigherExpr =
+ MipsMCExpr::create(MipsMCExpr::MEK_HIGHER, HigherSym, getContext());
+
+ TOut.emitRX(Mips::LUi, ATReg, MCOperand::createExpr(HighestExpr), IDLoc,
+ STI);
+ TOut.emitRRX(Mips::DADDiu, ATReg, ATReg,
+ MCOperand::createExpr(HigherExpr), IDLoc, STI);
+ TOut.emitRRI(Mips::DSLL, ATReg, ATReg, 16, IDLoc, STI);
+ TOut.emitRRX(Mips::DADDiu, ATReg, ATReg, MCOperand::createExpr(HiExpr),
+ IDLoc, STI);
+ TOut.emitRRI(Mips::DSLL, ATReg, ATReg, 16, IDLoc, STI);
+ }
+ }
+ return false;
+}
+
+bool MipsAsmParser::expandLoadImmReal(MCInst &Inst, bool IsSingle, bool IsGPR,
+ bool Is64FPU, SMLoc IDLoc,
+ MCStreamer &Out,
+ const MCSubtargetInfo *STI) {
+ MipsTargetStreamer &TOut = getTargetStreamer();
+ assert(Inst.getNumOperands() == 2 && "Invalid operand count");
+ assert(Inst.getOperand(0).isReg() && Inst.getOperand(1).isImm() &&
+ "Invalid instruction operand.");
+
+ unsigned FirstReg = Inst.getOperand(0).getReg();
+ uint64_t ImmOp64 = Inst.getOperand(1).getImm();
+
+ uint32_t HiImmOp64 = (ImmOp64 & 0xffffffff00000000) >> 32;
+ // If ImmOp64 is AsmToken::Integer type (all bits set to zero in the
+ // exponent field), convert it to double (e.g. 1 to 1.0)
+ if ((HiImmOp64 & 0x7ff00000) == 0) {
+ APFloat RealVal(APFloat::IEEEdouble(), ImmOp64);
+ ImmOp64 = RealVal.bitcastToAPInt().getZExtValue();
+ }
+
+ uint32_t LoImmOp64 = ImmOp64 & 0xffffffff;
+ HiImmOp64 = (ImmOp64 & 0xffffffff00000000) >> 32;
+
+ if (IsSingle) {
+ // Conversion of a double in an uint64_t to a float in a uint32_t,
+ // retaining the bit pattern of a float.
+ uint32_t ImmOp32;
+ double doubleImm = BitsToDouble(ImmOp64);
+ float tmp_float = static_cast<float>(doubleImm);
+ ImmOp32 = FloatToBits(tmp_float);
+
+ if (IsGPR) {
+ if (loadImmediate(ImmOp32, FirstReg, Mips::NoRegister, true, true, IDLoc,
+ Out, STI))
+ return true;
+ return false;
+ } else {
+ unsigned ATReg = getATReg(IDLoc);
+ if (!ATReg)
+ return true;
+ if (LoImmOp64 == 0) {
+ if (loadImmediate(ImmOp32, ATReg, Mips::NoRegister, true, true, IDLoc,
+ Out, STI))
+ return true;
+ TOut.emitRR(Mips::MTC1, FirstReg, ATReg, IDLoc, STI);
+ return false;
+ }
+
+ MCSection *CS = getStreamer().getCurrentSectionOnly();
+ // FIXME: Enhance this expansion to use the .lit4 & .lit8 sections
+ // where appropriate.
+ MCSection *ReadOnlySection = getContext().getELFSection(
+ ".rodata", ELF::SHT_PROGBITS, ELF::SHF_ALLOC);
+
+ MCSymbol *Sym = getContext().createTempSymbol();
+ const MCExpr *LoSym =
+ MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
+ const MipsMCExpr *LoExpr =
+ MipsMCExpr::create(MipsMCExpr::MEK_LO, LoSym, getContext());
+
+ getStreamer().SwitchSection(ReadOnlySection);
+ getStreamer().EmitLabel(Sym, IDLoc);
+ getStreamer().EmitIntValue(ImmOp32, 4);
+ getStreamer().SwitchSection(CS);
+
+ if(emitPartialAddress(TOut, IDLoc, Sym))
+ return true;
+ TOut.emitRRX(Mips::LWC1, FirstReg, ATReg,
+ MCOperand::createExpr(LoExpr), IDLoc, STI);
+ }
+ return false;
+ }
+
+ // if(!IsSingle)
+ unsigned ATReg = getATReg(IDLoc);
+ if (!ATReg)
+ return true;
+
+ if (IsGPR) {
+ if (LoImmOp64 == 0) {
+ if(isABI_N32() || isABI_N64()) {
+ if (loadImmediate(HiImmOp64, FirstReg, Mips::NoRegister, false, true,
+ IDLoc, Out, STI))
+ return true;
+ return false;
+ } else {
+ if (loadImmediate(HiImmOp64, FirstReg, Mips::NoRegister, true, true,
+ IDLoc, Out, STI))
+ return true;
+
+ if (loadImmediate(0, nextReg(FirstReg), Mips::NoRegister, true, true,
+ IDLoc, Out, STI))
+ return true;
+ return false;
+ }
+ }
+
+ MCSection *CS = getStreamer().getCurrentSectionOnly();
+ MCSection *ReadOnlySection = getContext().getELFSection(
+ ".rodata", ELF::SHT_PROGBITS, ELF::SHF_ALLOC);
+
+ MCSymbol *Sym = getContext().createTempSymbol();
+ const MCExpr *LoSym =
+ MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
+ const MipsMCExpr *LoExpr =
+ MipsMCExpr::create(MipsMCExpr::MEK_LO, LoSym, getContext());
+
+ getStreamer().SwitchSection(ReadOnlySection);
+ getStreamer().EmitLabel(Sym, IDLoc);
+ getStreamer().EmitIntValue(HiImmOp64, 4);
+ getStreamer().EmitIntValue(LoImmOp64, 4);
+ getStreamer().SwitchSection(CS);
+
+ if(emitPartialAddress(TOut, IDLoc, Sym))
+ return true;
+ if(isABI_N64())
+ TOut.emitRRX(Mips::DADDiu, ATReg, ATReg,
+ MCOperand::createExpr(LoExpr), IDLoc, STI);
+ else
+ TOut.emitRRX(Mips::ADDiu, ATReg, ATReg,
+ MCOperand::createExpr(LoExpr), IDLoc, STI);
+
+ if(isABI_N32() || isABI_N64())
+ TOut.emitRRI(Mips::LD, FirstReg, ATReg, 0, IDLoc, STI);
+ else {
+ TOut.emitRRI(Mips::LW, FirstReg, ATReg, 0, IDLoc, STI);
+ TOut.emitRRI(Mips::LW, nextReg(FirstReg), ATReg, 4, IDLoc, STI);
+ }
+ return false;
+ } else { // if(!IsGPR && !IsSingle)
+ if ((LoImmOp64 == 0) &&
+ !((HiImmOp64 & 0xffff0000) && (HiImmOp64 & 0x0000ffff))) {
+ // FIXME: In the case where the constant is zero, we can load the
+ // register directly from the zero register.
+ if (loadImmediate(HiImmOp64, ATReg, Mips::NoRegister, true, true, IDLoc,
+ Out, STI))
+ return true;
+ if (isABI_N32() || isABI_N64())
+ TOut.emitRR(Mips::DMTC1, FirstReg, ATReg, IDLoc, STI);
+ else if (hasMips32r2()) {
+ TOut.emitRR(Mips::MTC1, FirstReg, Mips::ZERO, IDLoc, STI);
+ TOut.emitRRR(Mips::MTHC1_D32, FirstReg, FirstReg, ATReg, IDLoc, STI);
+ } else {
+ TOut.emitRR(Mips::MTC1, nextReg(FirstReg), ATReg, IDLoc, STI);
+ TOut.emitRR(Mips::MTC1, FirstReg, Mips::ZERO, IDLoc, STI);
+ }
+ return false;
+ }
+
+ MCSection *CS = getStreamer().getCurrentSectionOnly();
+ // FIXME: Enhance this expansion to use the .lit4 & .lit8 sections
+ // where appropriate.
+ MCSection *ReadOnlySection = getContext().getELFSection(
+ ".rodata", ELF::SHT_PROGBITS, ELF::SHF_ALLOC);
+
+ MCSymbol *Sym = getContext().createTempSymbol();
+ const MCExpr *LoSym =
+ MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
+ const MipsMCExpr *LoExpr =
+ MipsMCExpr::create(MipsMCExpr::MEK_LO, LoSym, getContext());
+
+ getStreamer().SwitchSection(ReadOnlySection);
+ getStreamer().EmitLabel(Sym, IDLoc);
+ getStreamer().EmitIntValue(HiImmOp64, 4);
+ getStreamer().EmitIntValue(LoImmOp64, 4);
+ getStreamer().SwitchSection(CS);
+
+ if(emitPartialAddress(TOut, IDLoc, Sym))
+ return true;
+ TOut.emitRRX(Is64FPU ? Mips::LDC164 : Mips::LDC1, FirstReg, ATReg,
+ MCOperand::createExpr(LoExpr), IDLoc, STI);
+ }
+ return false;
+}
+
bool MipsAsmParser::expandUncondBranchMMPseudo(MCInst &Inst, SMLoc IDLoc,
MCStreamer &Out,
const MCSubtargetInfo *STI) {
@@ -4318,45 +4666,6 @@ bool MipsAsmParser::expandDMULMacro(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
return false;
}
-static unsigned nextReg(unsigned Reg) {
- switch (Reg) {
- case Mips::ZERO: return Mips::AT;
- case Mips::AT: return Mips::V0;
- case Mips::V0: return Mips::V1;
- case Mips::V1: return Mips::A0;
- case Mips::A0: return Mips::A1;
- case Mips::A1: return Mips::A2;
- case Mips::A2: return Mips::A3;
- case Mips::A3: return Mips::T0;
- case Mips::T0: return Mips::T1;
- case Mips::T1: return Mips::T2;
- case Mips::T2: return Mips::T3;
- case Mips::T3: return Mips::T4;
- case Mips::T4: return Mips::T5;
- case Mips::T5: return Mips::T6;
- case Mips::T6: return Mips::T7;
- case Mips::T7: return Mips::S0;
- case Mips::S0: return Mips::S1;
- case Mips::S1: return Mips::S2;
- case Mips::S2: return Mips::S3;
- case Mips::S3: return Mips::S4;
- case Mips::S4: return Mips::S5;
- case Mips::S5: return Mips::S6;
- case Mips::S6: return Mips::S7;
- case Mips::S7: return Mips::T8;
- case Mips::T8: return Mips::T9;
- case Mips::T9: return Mips::K0;
- case Mips::K0: return Mips::K1;
- case Mips::K1: return Mips::GP;
- case Mips::GP: return Mips::SP;
- case Mips::SP: return Mips::FP;
- case Mips::FP: return Mips::RA;
- case Mips::RA: return Mips::ZERO;
- default: return 0;
- }
-
-}
-
// Expand 'ld $<reg> offset($reg2)' to 'lw $<reg>, offset($reg2);
// lw $<reg+1>>, offset+4($reg2)'
// or expand 'sd $<reg> offset($reg2)' to 'sw $<reg>, offset($reg2);
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 8fe4e75f3e18..760630c41176 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -362,7 +362,6 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM,
setOperationAction(ISD::FCOS, MVT::f64, Expand);
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
- setOperationAction(ISD::FPOWI, MVT::f32, Expand);
setOperationAction(ISD::FPOW, MVT::f32, Expand);
setOperationAction(ISD::FPOW, MVT::f64, Expand);
setOperationAction(ISD::FLOG, MVT::f32, Expand);
diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td
index df42d56d041b..d81a769d7fd9 100644
--- a/lib/Target/Mips/MipsInstrFPU.td
+++ b/lib/Target/Mips/MipsInstrFPU.td
@@ -681,6 +681,29 @@ def PseudoTRUNC_W_D : MipsAsmPseudoInst<(outs FGR32Opnd:$fd),
"trunc.w.d\t$fd, $fs, $rs">,
FGR_64, HARDFLOAT;
+def LoadImmSingleGPR : MipsAsmPseudoInst<(outs GPR32Opnd:$rd),
+ (ins imm64:$fpimm),
+ "li.s\t$rd, $fpimm">;
+
+def LoadImmSingleFGR : MipsAsmPseudoInst<(outs StrictlyFGR32Opnd:$rd),
+ (ins imm64:$fpimm),
+ "li.s\t$rd, $fpimm">,
+ HARDFLOAT;
+
+def LoadImmDoubleGPR : MipsAsmPseudoInst<(outs GPR32Opnd:$rd),
+ (ins imm64:$fpimm),
+ "li.d\t$rd, $fpimm">;
+
+def LoadImmDoubleFGR_32 : MipsAsmPseudoInst<(outs StrictlyAFGR64Opnd:$rd),
+ (ins imm64:$fpimm),
+ "li.d\t$rd, $fpimm">,
+ FGR_32, HARDFLOAT;
+
+def LoadImmDoubleFGR : MipsAsmPseudoInst<(outs StrictlyFGR64Opnd:$rd),
+ (ins imm64:$fpimm),
+ "li.d\t$rd, $fpimm">,
+ FGR_64, HARDFLOAT;
+
//===----------------------------------------------------------------------===//
// InstAliases.
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td
index ccfdcc89b078..08fb3d7d4352 100644
--- a/lib/Target/Mips/MipsRegisterInfo.td
+++ b/lib/Target/Mips/MipsRegisterInfo.td
@@ -552,16 +552,31 @@ def AFGR64AsmOperand : MipsAsmRegOperand {
let PredicateMethod = "isFGRAsmReg";
}
+def StrictlyAFGR64AsmOperand : MipsAsmRegOperand {
+ let Name = "StrictlyAFGR64AsmReg";
+ let PredicateMethod = "isStrictlyFGRAsmReg";
+}
+
def FGR64AsmOperand : MipsAsmRegOperand {
let Name = "FGR64AsmReg";
let PredicateMethod = "isFGRAsmReg";
}
+def StrictlyFGR64AsmOperand : MipsAsmRegOperand {
+ let Name = "StrictlyFGR64AsmReg";
+ let PredicateMethod = "isStrictlyFGRAsmReg";
+}
+
def FGR32AsmOperand : MipsAsmRegOperand {
let Name = "FGR32AsmReg";
let PredicateMethod = "isFGRAsmReg";
}
+def StrictlyFGR32AsmOperand : MipsAsmRegOperand {
+ let Name = "StrictlyFGR32AsmReg";
+ let PredicateMethod = "isStrictlyFGRAsmReg";
+}
+
def FGRH32AsmOperand : MipsAsmRegOperand {
let Name = "FGRH32AsmReg";
let PredicateMethod = "isFGRAsmReg";
@@ -639,14 +654,26 @@ def AFGR64Opnd : RegisterOperand<AFGR64> {
let ParserMatchClass = AFGR64AsmOperand;
}
+def StrictlyAFGR64Opnd : RegisterOperand<AFGR64> {
+ let ParserMatchClass = StrictlyAFGR64AsmOperand;
+}
+
def FGR64Opnd : RegisterOperand<FGR64> {
let ParserMatchClass = FGR64AsmOperand;
}
+def StrictlyFGR64Opnd : RegisterOperand<FGR64> {
+ let ParserMatchClass = StrictlyFGR64AsmOperand;
+}
+
def FGR32Opnd : RegisterOperand<FGR32> {
let ParserMatchClass = FGR32AsmOperand;
}
+def StrictlyFGR32Opnd : RegisterOperand<FGR32> {
+ let ParserMatchClass = StrictlyFGR32AsmOperand;
+}
+
def FGRCCOpnd : RegisterOperand<FGRCC> {
// The assembler doesn't use register classes so we can re-use
// FGR32AsmOperand.
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index b90a5ee28342..216efcc4a1ee 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -539,7 +539,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::FSIN, VT, Expand);
setOperationAction(ISD::FCOS, VT, Expand);
setOperationAction(ISD::FABS, VT, Expand);
- setOperationAction(ISD::FPOWI, VT, Expand);
setOperationAction(ISD::FFLOOR, VT, Expand);
setOperationAction(ISD::FCEIL, VT, Expand);
setOperationAction(ISD::FTRUNC, VT, Expand);
@@ -798,7 +797,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::FABS , MVT::v4f64, Legal);
setOperationAction(ISD::FSIN , MVT::v4f64, Expand);
setOperationAction(ISD::FCOS , MVT::v4f64, Expand);
- setOperationAction(ISD::FPOWI , MVT::v4f64, Expand);
setOperationAction(ISD::FPOW , MVT::v4f64, Expand);
setOperationAction(ISD::FLOG , MVT::v4f64, Expand);
setOperationAction(ISD::FLOG2 , MVT::v4f64, Expand);
@@ -844,7 +842,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::FABS , MVT::v4f32, Legal);
setOperationAction(ISD::FSIN , MVT::v4f32, Expand);
setOperationAction(ISD::FCOS , MVT::v4f32, Expand);
- setOperationAction(ISD::FPOWI , MVT::v4f32, Expand);
setOperationAction(ISD::FPOW , MVT::v4f32, Expand);
setOperationAction(ISD::FLOG , MVT::v4f32, Expand);
setOperationAction(ISD::FLOG2 , MVT::v4f32, Expand);
diff --git a/lib/Target/SystemZ/SystemZ.td b/lib/Target/SystemZ/SystemZ.td
index 6bdfd4d07edc..c5f324418da5 100644
--- a/lib/Target/SystemZ/SystemZ.td
+++ b/lib/Target/SystemZ/SystemZ.td
@@ -54,6 +54,8 @@ include "SystemZInstrFormats.td"
include "SystemZInstrInfo.td"
include "SystemZInstrVector.td"
include "SystemZInstrFP.td"
+include "SystemZInstrHFP.td"
+include "SystemZInstrDFP.td"
def SystemZInstrInfo : InstrInfo {}
diff --git a/lib/Target/SystemZ/SystemZFeatures.td b/lib/Target/SystemZ/SystemZFeatures.td
index 7bfa378aa85c..ffb0b8d1c861 100644
--- a/lib/Target/SystemZ/SystemZFeatures.td
+++ b/lib/Target/SystemZ/SystemZFeatures.td
@@ -115,12 +115,18 @@ def FeatureTransactionalExecution : SystemZFeature<
"Assume that the transactional-execution facility is installed"
>;
+def FeatureDFPZonedConversion : SystemZFeature<
+ "dfp-zoned-conversion", "DFPZonedConversion",
+ "Assume that the DFP zoned-conversion facility is installed"
+>;
+
def Arch10NewFeatures : SystemZFeatureList<[
FeatureExecutionHint,
FeatureLoadAndTrap,
FeatureMiscellaneousExtensions,
FeatureProcessorAssist,
- FeatureTransactionalExecution
+ FeatureTransactionalExecution,
+ FeatureDFPZonedConversion
]>;
//===----------------------------------------------------------------------===//
@@ -144,6 +150,11 @@ def FeatureMessageSecurityAssist5 : SystemZFeature<
"Assume that the message-security-assist extension facility 5 is installed"
>;
+def FeatureDFPPackedConversion : SystemZFeature<
+ "dfp-packed-conversion", "DFPPackedConversion",
+ "Assume that the DFP packed-conversion facility is installed"
+>;
+
def FeatureVector : SystemZFeature<
"vector", "Vector",
"Assume that the vectory facility is installed"
@@ -154,6 +165,7 @@ def Arch11NewFeatures : SystemZFeatureList<[
FeatureLoadAndZeroRightmostByte,
FeatureLoadStoreOnCond2,
FeatureMessageSecurityAssist5,
+ FeatureDFPPackedConversion,
FeatureVector
]>;
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index 235e095f0010..ae141dbcad34 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -4189,12 +4189,20 @@ static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
if (Single.getNode() && (Count > 1 || Single.getOpcode() == ISD::LOAD))
return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single);
+ // If all elements are loads, use VLREP/VLEs (below).
+ bool AllLoads = true;
+ for (auto Elem : Elems)
+ if (Elem.getOpcode() != ISD::LOAD || cast<LoadSDNode>(Elem)->isIndexed()) {
+ AllLoads = false;
+ break;
+ }
+
// The best way of building a v2i64 from two i64s is to use VLVGP.
- if (VT == MVT::v2i64)
+ if (VT == MVT::v2i64 && !AllLoads)
return joinDwords(DAG, DL, Elems[0], Elems[1]);
// Use a 64-bit merge high to combine two doubles.
- if (VT == MVT::v2f64)
+ if (VT == MVT::v2f64 && !AllLoads)
return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
// Build v4f32 values directly from the FPRs:
@@ -4204,7 +4212,7 @@ static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
// <ABxx> <CDxx>
// V VMRHG
// <ABCD>
- if (VT == MVT::v4f32) {
+ if (VT == MVT::v4f32 && !AllLoads) {
SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]);
// Avoid unnecessary undefs by reusing the other operand.
@@ -4246,23 +4254,37 @@ static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
Constants[I] = DAG.getUNDEF(Elems[I].getValueType());
Result = DAG.getBuildVector(VT, DL, Constants);
} else {
- // Otherwise try to use VLVGP to start the sequence in order to
+ // Otherwise try to use VLREP or VLVGP to start the sequence in order to
// avoid a false dependency on any previous contents of the vector
- // register. This only makes sense if one of the associated elements
- // is defined.
- unsigned I1 = NumElements / 2 - 1;
- unsigned I2 = NumElements - 1;
- bool Def1 = !Elems[I1].isUndef();
- bool Def2 = !Elems[I2].isUndef();
- if (Def1 || Def2) {
- SDValue Elem1 = Elems[Def1 ? I1 : I2];
- SDValue Elem2 = Elems[Def2 ? I2 : I1];
- Result = DAG.getNode(ISD::BITCAST, DL, VT,
- joinDwords(DAG, DL, Elem1, Elem2));
- Done[I1] = true;
- Done[I2] = true;
- } else
- Result = DAG.getUNDEF(VT);
+ // register.
+
+ // Use a VLREP if at least one element is a load.
+ unsigned LoadElIdx = UINT_MAX;
+ for (unsigned I = 0; I < NumElements; ++I)
+ if (Elems[I].getOpcode() == ISD::LOAD &&
+ cast<LoadSDNode>(Elems[I])->isUnindexed()) {
+ LoadElIdx = I;
+ break;
+ }
+ if (LoadElIdx != UINT_MAX) {
+ Result = DAG.getNode(SystemZISD::REPLICATE, DL, VT, Elems[LoadElIdx]);
+ Done[LoadElIdx] = true;
+ } else {
+ // Try to use VLVGP.
+ unsigned I1 = NumElements / 2 - 1;
+ unsigned I2 = NumElements - 1;
+ bool Def1 = !Elems[I1].isUndef();
+ bool Def2 = !Elems[I2].isUndef();
+ if (Def1 || Def2) {
+ SDValue Elem1 = Elems[Def1 ? I1 : I2];
+ SDValue Elem2 = Elems[Def2 ? I2 : I1];
+ Result = DAG.getNode(ISD::BITCAST, DL, VT,
+ joinDwords(DAG, DL, Elem1, Elem2));
+ Done[I1] = true;
+ Done[I2] = true;
+ } else
+ Result = DAG.getUNDEF(VT);
+ }
}
// Use VLVGx to insert the other elements.
diff --git a/lib/Target/SystemZ/SystemZInstrDFP.td b/lib/Target/SystemZ/SystemZInstrDFP.td
new file mode 100644
index 000000000000..08ab2d7bbc52
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZInstrDFP.td
@@ -0,0 +1,231 @@
+//==- SystemZInstrDFP.td - Floating-point SystemZ instructions -*- tblgen-*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The instructions in this file implement SystemZ decimal floating-point
+// arithmetic. These instructions are inot currently used for code generation,
+// are provided for use with the assembler and disassembler only. If LLVM
+// ever supports decimal floating-point types (_Decimal64 etc.), they can
+// also be used for code generation for those types.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Move instructions
+//===----------------------------------------------------------------------===//
+
+// Load and test.
+let Defs = [CC] in {
+ def LTDTR : UnaryRRE<"ltdtr", 0xB3D6, null_frag, FP64, FP64>;
+ def LTXTR : UnaryRRE<"ltxtr", 0xB3DE, null_frag, FP128, FP128>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Conversion instructions
+//===----------------------------------------------------------------------===//
+
+// Convert floating-point values to narrower representations. The destination
+// of LDXTR is a 128-bit value, but only the first register of the pair is used.
+def LEDTR : TernaryRRFe<"ledtr", 0xB3D5, FP32, FP64>;
+def LDXTR : TernaryRRFe<"ldxtr", 0xB3DD, FP128, FP128>;
+
+// Extend floating-point values to wider representations.
+def LDETR : BinaryRRFd<"ldetr", 0xB3D4, FP64, FP32>;
+def LXDTR : BinaryRRFd<"lxdtr", 0xB3DC, FP128, FP64>;
+
+// Convert a signed integer value to a floating-point one.
+def CDGTR : UnaryRRE<"cdgtr", 0xB3F1, null_frag, FP64, GR64>;
+def CXGTR : UnaryRRE<"cxgtr", 0xB3F9, null_frag, FP128, GR64>;
+let Predicates = [FeatureFPExtension] in {
+ def CDGTRA : TernaryRRFe<"cdgtra", 0xB3F1, FP64, GR64>;
+ def CXGTRA : TernaryRRFe<"cxgtra", 0xB3F9, FP128, GR64>;
+ def CDFTR : TernaryRRFe<"cdftr", 0xB951, FP64, GR32>;
+ def CXFTR : TernaryRRFe<"cxftr", 0xB959, FP128, GR32>;
+}
+
+// Convert an unsigned integer value to a floating-point one.
+let Predicates = [FeatureFPExtension] in {
+ def CDLGTR : TernaryRRFe<"cdlgtr", 0xB952, FP64, GR64>;
+ def CXLGTR : TernaryRRFe<"cxlgtr", 0xB95A, FP128, GR64>;
+ def CDLFTR : TernaryRRFe<"cdlftr", 0xB953, FP64, GR32>;
+ def CXLFTR : TernaryRRFe<"cxlftr", 0xB95B, FP128, GR32>;
+}
+
+// Convert a floating-point value to a signed integer value.
+let Defs = [CC] in {
+ def CGDTR : BinaryRRFe<"cgdtr", 0xB3E1, GR64, FP64>;
+ def CGXTR : BinaryRRFe<"cgxtr", 0xB3E9, GR64, FP128>;
+ let Predicates = [FeatureFPExtension] in {
+ def CGDTRA : TernaryRRFe<"cgdtra", 0xB3E1, GR64, FP64>;
+ def CGXTRA : TernaryRRFe<"cgxtra", 0xB3E9, GR64, FP128>;
+ def CFDTR : TernaryRRFe<"cfdtr", 0xB941, GR32, FP64>;
+ def CFXTR : TernaryRRFe<"cfxtr", 0xB949, GR32, FP128>;
+ }
+}
+
+// Convert a floating-point value to an unsigned integer value.
+let Defs = [CC] in {
+ let Predicates = [FeatureFPExtension] in {
+ def CLGDTR : TernaryRRFe<"clgdtr", 0xB942, GR64, FP64>;
+ def CLGXTR : TernaryRRFe<"clgxtr", 0xB94A, GR64, FP128>;
+ def CLFDTR : TernaryRRFe<"clfdtr", 0xB943, GR32, FP64>;
+ def CLFXTR : TernaryRRFe<"clfxtr", 0xB94B, GR32, FP128>;
+ }
+}
+
+// Convert a packed value to a floating-point one.
+def CDSTR : UnaryRRE<"cdstr", 0xB3F3, null_frag, FP64, GR64>;
+def CXSTR : UnaryRRE<"cxstr", 0xB3FB, null_frag, FP128, GR128>;
+def CDUTR : UnaryRRE<"cdutr", 0xB3F2, null_frag, FP64, GR64>;
+def CXUTR : UnaryRRE<"cxutr", 0xB3FA, null_frag, FP128, GR128>;
+
+// Convert a floating-point value to a packed value.
+def CSDTR : BinaryRRFd<"csdtr", 0xB3E3, GR64, FP64>;
+def CSXTR : BinaryRRFd<"csxtr", 0xB3EB, GR128, FP128>;
+def CUDTR : UnaryRRE<"cudtr", 0xB3E2, null_frag, GR64, FP64>;
+def CUXTR : UnaryRRE<"cuxtr", 0xB3EA, null_frag, GR128, FP128>;
+
+// Convert from/to memory values in the zoned format.
+let Predicates = [FeatureDFPZonedConversion] in {
+ def CDZT : BinaryRSL<"cdzt", 0xEDAA, FP64>;
+ def CXZT : BinaryRSL<"cxzt", 0xEDAB, FP128>;
+ def CZDT : StoreBinaryRSL<"czdt", 0xEDA8, FP64>;
+ def CZXT : StoreBinaryRSL<"czxt", 0xEDA9, FP128>;
+}
+
+// Convert from/to memory values in the packed format.
+let Predicates = [FeatureDFPPackedConversion] in {
+ def CDPT : BinaryRSL<"cdpt", 0xEDAE, FP64>;
+ def CXPT : BinaryRSL<"cxpt", 0xEDAF, FP128>;
+ def CPDT : StoreBinaryRSL<"cpdt", 0xEDAC, FP64>;
+ def CPXT : StoreBinaryRSL<"cpxt", 0xEDAD, FP128>;
+}
+
+// Perform floating-point operation.
+let Defs = [CC, R1L, F0Q], Uses = [R0L, F4Q] in
+ def PFPO : SideEffectInherentE<"pfpo", 0x010A>;
+
+
+//===----------------------------------------------------------------------===//
+// Unary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Round to an integer, with the second operand (M3) specifying the rounding
+// mode. M4 can be set to 4 to suppress detection of inexact conditions.
+def FIDTR : TernaryRRFe<"fidtr", 0xB3D7, FP64, FP64>;
+def FIXTR : TernaryRRFe<"fixtr", 0xB3DF, FP128, FP128>;
+
+// Extract biased exponent.
+def EEDTR : UnaryRRE<"eedtr", 0xB3E5, null_frag, FP64, FP64>;
+def EEXTR : UnaryRRE<"eextr", 0xB3ED, null_frag, FP128, FP128>;
+
+// Extract significance.
+def ESDTR : UnaryRRE<"esdtr", 0xB3E7, null_frag, FP64, FP64>;
+def ESXTR : UnaryRRE<"esxtr", 0xB3EF, null_frag, FP128, FP128>;
+
+
+//===----------------------------------------------------------------------===//
+// Binary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Addition.
+let Defs = [CC] in {
+ let isCommutable = 1 in {
+ def ADTR : BinaryRRFa<"adtr", 0xB3D2, null_frag, FP64, FP64, FP64>;
+ def AXTR : BinaryRRFa<"axtr", 0xB3DA, null_frag, FP128, FP128, FP128>;
+ }
+ let Predicates = [FeatureFPExtension] in {
+ def ADTRA : TernaryRRFa<"adtra", 0xB3D2, FP64, FP64, FP64>;
+ def AXTRA : TernaryRRFa<"axtra", 0xB3DA, FP128, FP128, FP128>;
+ }
+}
+
+// Subtraction.
+let Defs = [CC] in {
+ def SDTR : BinaryRRFa<"sdtr", 0xB3D3, null_frag, FP64, FP64, FP64>;
+ def SXTR : BinaryRRFa<"sxtr", 0xB3DB, null_frag, FP128, FP128, FP128>;
+ let Predicates = [FeatureFPExtension] in {
+ def SDTRA : TernaryRRFa<"sdtra", 0xB3D3, FP64, FP64, FP64>;
+ def SXTRA : TernaryRRFa<"sxtra", 0xB3DB, FP128, FP128, FP128>;
+ }
+}
+
+// Multiplication.
+let isCommutable = 1 in {
+ def MDTR : BinaryRRFa<"mdtr", 0xB3D0, null_frag, FP64, FP64, FP64>;
+ def MXTR : BinaryRRFa<"mxtr", 0xB3D8, null_frag, FP128, FP128, FP128>;
+}
+let Predicates = [FeatureFPExtension] in {
+ def MDTRA : TernaryRRFa<"mdtra", 0xB3D0, FP64, FP64, FP64>;
+ def MXTRA : TernaryRRFa<"mxtra", 0xB3D8, FP128, FP128, FP128>;
+}
+
+// Division.
+def DDTR : BinaryRRFa<"ddtr", 0xB3D1, null_frag, FP64, FP64, FP64>;
+def DXTR : BinaryRRFa<"dxtr", 0xB3D9, null_frag, FP128, FP128, FP128>;
+let Predicates = [FeatureFPExtension] in {
+ def DDTRA : TernaryRRFa<"ddtra", 0xB3D1, FP64, FP64, FP64>;
+ def DXTRA : TernaryRRFa<"dxtra", 0xB3D9, FP128, FP128, FP128>;
+}
+
+// Quantize.
+def QADTR : TernaryRRFb<"qadtr", 0xB3F5, FP64, FP64, FP64>;
+def QAXTR : TernaryRRFb<"qaxtr", 0xB3FD, FP128, FP128, FP128>;
+
+// Reround.
+def RRDTR : TernaryRRFb<"rrdtr", 0xB3F7, FP64, FP64, FP64>;
+def RRXTR : TernaryRRFb<"rrxtr", 0xB3FF, FP128, FP128, FP128>;
+
+// Shift significand left/right.
+def SLDT : BinaryRXF<"sldt", 0xED40, null_frag, FP64, FP64, null_frag, 0>;
+def SLXT : BinaryRXF<"slxt", 0xED48, null_frag, FP128, FP128, null_frag, 0>;
+def SRDT : BinaryRXF<"srdt", 0xED41, null_frag, FP64, FP64, null_frag, 0>;
+def SRXT : BinaryRXF<"srxt", 0xED49, null_frag, FP128, FP128, null_frag, 0>;
+
+// Insert biased exponent.
+def IEDTR : BinaryRRFb<"iedtr", 0xB3F6, null_frag, FP64, FP64, FP64>;
+def IEXTR : BinaryRRFb<"iextr", 0xB3FE, null_frag, FP128, FP128, FP128>;
+
+
+//===----------------------------------------------------------------------===//
+// Comparisons
+//===----------------------------------------------------------------------===//
+
+// Compare.
+let Defs = [CC] in {
+ def CDTR : CompareRRE<"cdtr", 0xB3E4, null_frag, FP64, FP64>;
+ def CXTR : CompareRRE<"cxtr", 0xB3EC, null_frag, FP128, FP128>;
+}
+
+// Compare and signal.
+let Defs = [CC] in {
+ def KDTR : CompareRRE<"kdtr", 0xB3E0, null_frag, FP64, FP64>;
+ def KXTR : CompareRRE<"kxtr", 0xB3E8, null_frag, FP128, FP128>;
+}
+
+// Compare biased exponent.
+let Defs = [CC] in {
+ def CEDTR : CompareRRE<"cedtr", 0xB3F4, null_frag, FP64, FP64>;
+ def CEXTR : CompareRRE<"cextr", 0xB3FC, null_frag, FP128, FP128>;
+}
+
+// Test Data Class.
+let Defs = [CC] in {
+ def TDCET : TestRXE<"tdcet", 0xED50, null_frag, FP32>;
+ def TDCDT : TestRXE<"tdcdt", 0xED54, null_frag, FP64>;
+ def TDCXT : TestRXE<"tdcxt", 0xED58, null_frag, FP128>;
+}
+
+// Test Data Group.
+let Defs = [CC] in {
+ def TDGET : TestRXE<"tdget", 0xED51, null_frag, FP32>;
+ def TDGDT : TestRXE<"tdgdt", 0xED55, null_frag, FP64>;
+ def TDGXT : TestRXE<"tdgxt", 0xED59, null_frag, FP128>;
+}
+
diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td
index 364b81f98eed..10172bd45203 100644
--- a/lib/Target/SystemZ/SystemZInstrFP.td
+++ b/lib/Target/SystemZ/SystemZInstrFP.td
@@ -121,7 +121,8 @@ let canFoldAsLoad = 1, SimpleBDXLoad = 1 in {
defm LD : UnaryRXPair<"ld", 0x68, 0xED65, load, FP64, 8>;
// For z13 we prefer LDE over LE to avoid partial register dependencies.
- def LDE32 : UnaryRXE<"lde", 0xED24, null_frag, FP32, 4>;
+ let isCodeGenOnly = 1 in
+ def LDE32 : UnaryRXE<"lde", 0xED24, null_frag, FP32, 4>;
// These instructions are split after register allocation, so we don't
// want a custom inserter.
@@ -437,18 +438,18 @@ def : Pat<(fmul (f128 (fpextend FP64:$src1)),
bdxaddr12only:$addr)>;
// Fused multiply-add.
-def MAEBR : TernaryRRD<"maebr", 0xB30E, z_fma, FP32>;
-def MADBR : TernaryRRD<"madbr", 0xB31E, z_fma, FP64>;
+def MAEBR : TernaryRRD<"maebr", 0xB30E, z_fma, FP32, FP32>;
+def MADBR : TernaryRRD<"madbr", 0xB31E, z_fma, FP64, FP64>;
-def MAEB : TernaryRXF<"maeb", 0xED0E, z_fma, FP32, load, 4>;
-def MADB : TernaryRXF<"madb", 0xED1E, z_fma, FP64, load, 8>;
+def MAEB : TernaryRXF<"maeb", 0xED0E, z_fma, FP32, FP32, load, 4>;
+def MADB : TernaryRXF<"madb", 0xED1E, z_fma, FP64, FP64, load, 8>;
// Fused multiply-subtract.
-def MSEBR : TernaryRRD<"msebr", 0xB30F, z_fms, FP32>;
-def MSDBR : TernaryRRD<"msdbr", 0xB31F, z_fms, FP64>;
+def MSEBR : TernaryRRD<"msebr", 0xB30F, z_fms, FP32, FP32>;
+def MSDBR : TernaryRRD<"msdbr", 0xB31F, z_fms, FP64, FP64>;
-def MSEB : TernaryRXF<"mseb", 0xED0F, z_fms, FP32, load, 4>;
-def MSDB : TernaryRXF<"msdb", 0xED1F, z_fms, FP64, load, 8>;
+def MSEB : TernaryRXF<"mseb", 0xED0F, z_fms, FP32, FP32, load, 4>;
+def MSDB : TernaryRXF<"msdb", 0xED1F, z_fms, FP64, FP64, load, 8>;
// Division.
def DEBR : BinaryRRE<"debr", 0xB30D, fdiv, FP32, FP32>;
diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td
index a37da2807854..5f6115ed86a4 100644
--- a/lib/Target/SystemZ/SystemZInstrFormats.td
+++ b/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -527,6 +527,22 @@ class InstRRFc<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
let Inst{3-0} = R2;
}
+class InstRRFd<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<4, outs, ins, asmstr, pattern> {
+ field bits<32> Inst;
+ field bits<32> SoftFail = 0;
+
+ bits<4> R1;
+ bits<4> R2;
+ bits<4> M4;
+
+ let Inst{31-16} = op;
+ let Inst{15-12} = 0;
+ let Inst{11-8} = M4;
+ let Inst{7-4} = R1;
+ let Inst{3-0} = R2;
+}
+
class InstRRFe<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
: InstSystemZ<4, outs, ins, asmstr, pattern> {
field bits<32> Inst;
@@ -725,6 +741,22 @@ class InstRSLa<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
let Inst{7-0} = op{7-0};
}
+class InstRSLb<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<4> R1;
+ bits<24> BDL2;
+ bits<4> M3;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-16} = BDL2;
+ let Inst{15-12} = R1;
+ let Inst{11-8} = M3;
+ let Inst{7-0} = op{7-0};
+}
+
class InstRSYa<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
: InstSystemZ<6, outs, ins, asmstr, pattern> {
field bits<48> Inst;
@@ -2752,6 +2784,15 @@ class BinaryRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
let DisableEncoding = "$R1src";
}
+class BinaryRRD<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ RegisterOperand cls1, RegisterOperand cls2>
+ : InstRRD<opcode, (outs cls1:$R1), (ins cls2:$R3, cls2:$R2),
+ mnemonic#"\t$R1, $R3, $R2",
+ [(set cls1:$R1, (operator cls2:$R3, cls2:$R2))]> {
+ let OpKey = mnemonic#cls;
+ let OpType = "reg";
+}
+
class BinaryRRFa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
RegisterOperand cls1, RegisterOperand cls2,
RegisterOperand cls3>
@@ -2808,6 +2849,11 @@ multiclass BinaryMemRRFcOpt<string mnemonic, bits<16> opcode,
def Opt : UnaryMemRRFc<mnemonic, opcode, cls1, cls2>;
}
+class BinaryRRFd<string mnemonic, bits<16> opcode, RegisterOperand cls1,
+ RegisterOperand cls2>
+ : InstRRFd<opcode, (outs cls1:$R1), (ins cls2:$R2, imm32zx4:$M4),
+ mnemonic#"\t$R1, $R2, $M4", []>;
+
class BinaryRRFe<string mnemonic, bits<16> opcode, RegisterOperand cls1,
RegisterOperand cls2>
: InstRRFe<opcode, (outs cls1:$R1), (ins imm32zx4:$M3, cls2:$R2),
@@ -2958,6 +3004,13 @@ multiclass BinaryRSAndK<string mnemonic, bits<8> opcode1, bits<16> opcode2,
}
}
+class BinaryRSL<string mnemonic, bits<16> opcode, RegisterOperand cls>
+ : InstRSLb<opcode, (outs cls:$R1),
+ (ins bdladdr12onlylen8:$BDL2, imm32zx4:$M3),
+ mnemonic#"\t$R1, $BDL2, $M3", []> {
+ let mayLoad = 1;
+}
+
class BinaryRX<string mnemonic, bits<8> opcode, SDPatternOperator operator,
RegisterOperand cls, SDPatternOperator load, bits<5> bytes,
AddressingMode mode = bdxaddr12only>
@@ -2987,6 +3040,18 @@ class BinaryRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
let M3 = 0;
}
+class BinaryRXF<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ RegisterOperand cls1, RegisterOperand cls2,
+ SDPatternOperator load, bits<5> bytes>
+ : InstRXF<opcode, (outs cls1:$R1), (ins cls2:$R3, bdxaddr12only:$XBD2),
+ mnemonic#"\t$R1, $R3, $XBD2",
+ [(set cls1:$R1, (operator cls2:$R3, (load bdxaddr12only:$XBD2)))]> {
+ let OpKey = mnemonic#"r"#cls;
+ let OpType = "mem";
+ let mayLoad = 1;
+ let AccessBytes = bytes;
+}
+
class BinaryRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
RegisterOperand cls, SDPatternOperator load, bits<5> bytes,
AddressingMode mode = bdxaddr20only>
@@ -3294,6 +3359,13 @@ multiclass StoreBinaryRSPair<string mnemonic, bits<8> rsOpcode,
}
}
+class StoreBinaryRSL<string mnemonic, bits<16> opcode, RegisterOperand cls>
+ : InstRSLb<opcode, (outs),
+ (ins cls:$R1, bdladdr12onlylen8:$BDL2, imm32zx4:$M3),
+ mnemonic#"\t$R1, $BDL2, $M3", []> {
+ let mayStore = 1;
+}
+
class StoreBinaryVRV<string mnemonic, bits<16> opcode, bits<5> bytes,
Immediate index>
: InstVRV<opcode, (outs), (ins VR128:$V1, bdvaddr12only:$VBD2, index:$M3),
@@ -3581,6 +3653,12 @@ class SideEffectTernarySSF<string mnemonic, bits<12> opcode,
(ins bdaddr12only:$BD1, bdaddr12only:$BD2, cls:$R3),
mnemonic#"\t$BD1, $BD2, $R3", []>;
+class TernaryRRFa<string mnemonic, bits<16> opcode,
+ RegisterOperand cls1, RegisterOperand cls2,
+ RegisterOperand cls3>
+ : InstRRFa<opcode, (outs cls1:$R1), (ins cls2:$R2, cls3:$R3, imm32zx4:$M4),
+ mnemonic#"\t$R1, $R2, $R3, $M4", []>;
+
class TernaryRRFb<string mnemonic, bits<16> opcode,
RegisterOperand cls1, RegisterOperand cls2,
RegisterOperand cls3>
@@ -3597,11 +3675,11 @@ class TernaryRRFe<string mnemonic, bits<16> opcode, RegisterOperand cls1,
(ins imm32zx4:$M3, cls2:$R2, imm32zx4:$M4),
mnemonic#"\t$R1, $M3, $R2, $M4", []>;
-class TernaryRRD<string mnemonic, bits<16> opcode,
- SDPatternOperator operator, RegisterOperand cls>
- : InstRRD<opcode, (outs cls:$R1), (ins cls:$R1src, cls:$R3, cls:$R2),
+class TernaryRRD<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ RegisterOperand cls1, RegisterOperand cls2>
+ : InstRRD<opcode, (outs cls1:$R1), (ins cls2:$R1src, cls2:$R3, cls2:$R2),
mnemonic#"\t$R1, $R3, $R2",
- [(set cls:$R1, (operator cls:$R1src, cls:$R3, cls:$R2))]> {
+ [(set cls1:$R1, (operator cls2:$R1src, cls2:$R3, cls2:$R2))]> {
let OpKey = mnemonic#cls;
let OpType = "reg";
let Constraints = "$R1 = $R1src";
@@ -3661,12 +3739,13 @@ class SideEffectTernaryMemMemRSY<string mnemonic, bits<16> opcode,
}
class TernaryRXF<string mnemonic, bits<16> opcode, SDPatternOperator operator,
- RegisterOperand cls, SDPatternOperator load, bits<5> bytes>
- : InstRXF<opcode, (outs cls:$R1),
- (ins cls:$R1src, cls:$R3, bdxaddr12only:$XBD2),
+ RegisterOperand cls1, RegisterOperand cls2,
+ SDPatternOperator load, bits<5> bytes>
+ : InstRXF<opcode, (outs cls1:$R1),
+ (ins cls2:$R1src, cls2:$R3, bdxaddr12only:$XBD2),
mnemonic#"\t$R1, $R3, $XBD2",
- [(set cls:$R1, (operator cls:$R1src, cls:$R3,
- (load bdxaddr12only:$XBD2)))]> {
+ [(set cls1:$R1, (operator cls2:$R1src, cls2:$R3,
+ (load bdxaddr12only:$XBD2)))]> {
let OpKey = mnemonic#"r"#cls;
let OpType = "mem";
let Constraints = "$R1 = $R1src";
diff --git a/lib/Target/SystemZ/SystemZInstrHFP.td b/lib/Target/SystemZ/SystemZInstrHFP.td
new file mode 100644
index 000000000000..6d5b4b92f650
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZInstrHFP.td
@@ -0,0 +1,240 @@
+//==- SystemZInstrHFP.td - Floating-point SystemZ instructions -*- tblgen-*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The instructions in this file implement SystemZ hexadecimal floating-point
+// arithmetic. Since this format is not mapped to any source-language data
+// type, these instructions are not used for code generation, but are provided
+// for use with the assembler and disassembler only.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Move instructions
+//===----------------------------------------------------------------------===//
+
+// Load and test.
+let Defs = [CC] in {
+ def LTER : UnaryRR <"lter", 0x32, null_frag, FP32, FP32>;
+ def LTDR : UnaryRR <"ltdr", 0x22, null_frag, FP64, FP64>;
+ def LTXR : UnaryRRE<"ltxr", 0xB362, null_frag, FP128, FP128>;
+}
+
+//===----------------------------------------------------------------------===//
+// Conversion instructions
+//===----------------------------------------------------------------------===//
+
+// Convert floating-point values to narrower representations.
+def LEDR : UnaryRR <"ledr", 0x35, null_frag, FP32, FP64>;
+def LEXR : UnaryRRE<"lexr", 0xB366, null_frag, FP32, FP128>;
+def LDXR : UnaryRR <"ldxr", 0x25, null_frag, FP64, FP128>;
+let isAsmParserOnly = 1 in {
+ def LRER : UnaryRR <"lrer", 0x35, null_frag, FP32, FP64>;
+ def LRDR : UnaryRR <"lrdr", 0x25, null_frag, FP64, FP128>;
+}
+
+// Extend floating-point values to wider representations.
+def LDER : UnaryRRE<"lder", 0xB324, null_frag, FP64, FP32>;
+def LXER : UnaryRRE<"lxer", 0xB326, null_frag, FP128, FP32>;
+def LXDR : UnaryRRE<"lxdr", 0xB325, null_frag, FP128, FP64>;
+
+def LDE : UnaryRXE<"lde", 0xED24, null_frag, FP64, 4>;
+def LXE : UnaryRXE<"lxe", 0xED26, null_frag, FP128, 4>;
+def LXD : UnaryRXE<"lxd", 0xED25, null_frag, FP128, 8>;
+
+// Convert a signed integer register value to a floating-point one.
+def CEFR : UnaryRRE<"cefr", 0xB3B4, null_frag, FP32, GR32>;
+def CDFR : UnaryRRE<"cdfr", 0xB3B5, null_frag, FP64, GR32>;
+def CXFR : UnaryRRE<"cxfr", 0xB3B6, null_frag, FP128, GR32>;
+
+def CEGR : UnaryRRE<"cegr", 0xB3C4, null_frag, FP32, GR64>;
+def CDGR : UnaryRRE<"cdgr", 0xB3C5, null_frag, FP64, GR64>;
+def CXGR : UnaryRRE<"cxgr", 0xB3C6, null_frag, FP128, GR64>;
+
+// Convert a floating-point register value to a signed integer value,
+// with the second operand (modifier M3) specifying the rounding mode.
+let Defs = [CC] in {
+ def CFER : BinaryRRFe<"cfer", 0xB3B8, GR32, FP32>;
+ def CFDR : BinaryRRFe<"cfdr", 0xB3B9, GR32, FP64>;
+ def CFXR : BinaryRRFe<"cfxr", 0xB3BA, GR32, FP128>;
+
+ def CGER : BinaryRRFe<"cger", 0xB3C8, GR64, FP32>;
+ def CGDR : BinaryRRFe<"cgdr", 0xB3C9, GR64, FP64>;
+ def CGXR : BinaryRRFe<"cgxr", 0xB3CA, GR64, FP128>;
+}
+
+// Convert BFP to HFP.
+let Defs = [CC] in {
+ def THDER : UnaryRRE<"thder", 0xB358, null_frag, FP64, FP32>;
+ def THDR : UnaryRRE<"thdr", 0xB359, null_frag, FP64, FP64>;
+}
+
+// Convert HFP to BFP.
+let Defs = [CC] in {
+ def TBEDR : BinaryRRFe<"tbedr", 0xB350, FP32, FP64>;
+ def TBDR : BinaryRRFe<"tbdr", 0xB351, FP64, FP64>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Unary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Negation (Load Complement).
+let Defs = [CC] in {
+ def LCER : UnaryRR <"lcer", 0x33, null_frag, FP32, FP32>;
+ def LCDR : UnaryRR <"lcdr", 0x23, null_frag, FP64, FP64>;
+ def LCXR : UnaryRRE<"lcxr", 0xB363, null_frag, FP128, FP128>;
+}
+
+// Absolute value (Load Positive).
+let Defs = [CC] in {
+ def LPER : UnaryRR <"lper", 0x30, null_frag, FP32, FP32>;
+ def LPDR : UnaryRR <"lpdr", 0x20, null_frag, FP64, FP64>;
+ def LPXR : UnaryRRE<"lpxr", 0xB360, null_frag, FP128, FP128>;
+}
+
+// Negative absolute value (Load Negative).
+let Defs = [CC] in {
+ def LNER : UnaryRR <"lner", 0x31, null_frag, FP32, FP32>;
+ def LNDR : UnaryRR <"lndr", 0x21, null_frag, FP64, FP64>;
+ def LNXR : UnaryRRE<"lnxr", 0xB361, null_frag, FP128, FP128>;
+}
+
+// Halve.
+def HER : UnaryRR <"her", 0x34, null_frag, FP32, FP32>;
+def HDR : UnaryRR <"hdr", 0x24, null_frag, FP64, FP64>;
+
+// Square root.
+def SQER : UnaryRRE<"sqer", 0xB245, null_frag, FP32, FP32>;
+def SQDR : UnaryRRE<"sqdr", 0xB244, null_frag, FP64, FP64>;
+def SQXR : UnaryRRE<"sqxr", 0xB336, null_frag, FP128, FP128>;
+
+def SQE : UnaryRXE<"sqe", 0xED34, null_frag, FP32, 4>;
+def SQD : UnaryRXE<"sqd", 0xED35, null_frag, FP64, 8>;
+
+// Round to an integer (rounding towards zero).
+def FIER : UnaryRRE<"fier", 0xB377, null_frag, FP32, FP32>;
+def FIDR : UnaryRRE<"fidr", 0xB37F, null_frag, FP64, FP64>;
+def FIXR : UnaryRRE<"fixr", 0xB367, null_frag, FP128, FP128>;
+
+
+//===----------------------------------------------------------------------===//
+// Binary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Addition.
+let Defs = [CC] in {
+ let isCommutable = 1 in {
+ def AER : BinaryRR<"aer", 0x3A, null_frag, FP32, FP32>;
+ def ADR : BinaryRR<"adr", 0x2A, null_frag, FP64, FP64>;
+ def AXR : BinaryRR<"axr", 0x36, null_frag, FP128, FP128>;
+ }
+ def AE : BinaryRX<"ae", 0x7A, null_frag, FP32, load, 4>;
+ def AD : BinaryRX<"ad", 0x6A, null_frag, FP64, load, 8>;
+}
+
+// Addition (unnormalized).
+let Defs = [CC] in {
+ let isCommutable = 1 in {
+ def AUR : BinaryRR<"aur", 0x3E, null_frag, FP32, FP32>;
+ def AWR : BinaryRR<"awr", 0x2E, null_frag, FP64, FP64>;
+ }
+ def AU : BinaryRX<"au", 0x7E, null_frag, FP32, load, 4>;
+ def AW : BinaryRX<"aw", 0x6E, null_frag, FP64, load, 8>;
+}
+
+// Subtraction.
+let Defs = [CC] in {
+ def SER : BinaryRR<"ser", 0x3B, null_frag, FP32, FP32>;
+ def SDR : BinaryRR<"sdr", 0x2B, null_frag, FP64, FP64>;
+ def SXR : BinaryRR<"sxr", 0x37, null_frag, FP128, FP128>;
+
+ def SE : BinaryRX<"se", 0x7B, null_frag, FP32, load, 4>;
+ def SD : BinaryRX<"sd", 0x6B, null_frag, FP64, load, 8>;
+}
+
+// Subtraction (unnormalized).
+let Defs = [CC] in {
+ def SUR : BinaryRR<"sur", 0x3F, null_frag, FP32, FP32>;
+ def SWR : BinaryRR<"swr", 0x2F, null_frag, FP64, FP64>;
+
+ def SU : BinaryRX<"su", 0x7F, null_frag, FP32, load, 4>;
+ def SW : BinaryRX<"sw", 0x6F, null_frag, FP64, load, 8>;
+}
+
+// Multiplication.
+let isCommutable = 1 in {
+ def MEER : BinaryRRE<"meer", 0xB337, null_frag, FP32, FP32>;
+ def MDR : BinaryRR <"mdr", 0x2C, null_frag, FP64, FP64>;
+ def MXR : BinaryRR <"mxr", 0x26, null_frag, FP128, FP128>;
+}
+def MEE : BinaryRXE<"mee", 0xED37, null_frag, FP32, load, 4>;
+def MD : BinaryRX <"md", 0x6C, null_frag, FP64, load, 8>;
+
+// Extending multiplication (f32 x f32 -> f64).
+def MDER : BinaryRR<"mder", 0x3C, null_frag, FP64, FP32>;
+def MDE : BinaryRX<"mde", 0x7C, null_frag, FP64, load, 4>;
+let isAsmParserOnly = 1 in {
+ def MER : BinaryRR<"mer", 0x3C, null_frag, FP64, FP32>;
+ def ME : BinaryRX<"me", 0x7C, null_frag, FP64, load, 4>;
+}
+
+// Extending multiplication (f64 x f64 -> f128).
+def MXDR : BinaryRR<"mxdr", 0x27, null_frag, FP128, FP64>;
+def MXD : BinaryRX<"mxd", 0x67, null_frag, FP128, load, 8>;
+
+// Fused multiply-add.
+def MAER : TernaryRRD<"maer", 0xB32E, null_frag, FP32, FP32>;
+def MADR : TernaryRRD<"madr", 0xB33E, null_frag, FP64, FP64>;
+def MAE : TernaryRXF<"mae", 0xED2E, null_frag, FP32, FP32, load, 4>;
+def MAD : TernaryRXF<"mad", 0xED3E, null_frag, FP64, FP64, load, 8>;
+
+// Fused multiply-subtract.
+def MSER : TernaryRRD<"mser", 0xB32F, null_frag, FP32, FP32>;
+def MSDR : TernaryRRD<"msdr", 0xB33F, null_frag, FP64, FP64>;
+def MSE : TernaryRXF<"mse", 0xED2F, null_frag, FP32, FP32, load, 4>;
+def MSD : TernaryRXF<"msd", 0xED3F, null_frag, FP64, FP64, load, 8>;
+
+// Multiplication (unnormalized).
+def MYR : BinaryRRD<"myr", 0xB33B, null_frag, FP128, FP64>;
+def MYHR : BinaryRRD<"myhr", 0xB33D, null_frag, FP64, FP64>;
+def MYLR : BinaryRRD<"mylr", 0xB339, null_frag, FP64, FP64>;
+def MY : BinaryRXF<"my", 0xED3B, null_frag, FP128, FP64, load, 8>;
+def MYH : BinaryRXF<"myh", 0xED3D, null_frag, FP64, FP64, load, 8>;
+def MYL : BinaryRXF<"myl", 0xED39, null_frag, FP64, FP64, load, 8>;
+
+// Fused multiply-add (unnormalized).
+def MAYR : TernaryRRD<"mayr", 0xB33A, null_frag, FP128, FP64>;
+def MAYHR : TernaryRRD<"mayhr", 0xB33C, null_frag, FP64, FP64>;
+def MAYLR : TernaryRRD<"maylr", 0xB338, null_frag, FP64, FP64>;
+def MAY : TernaryRXF<"may", 0xED3A, null_frag, FP128, FP64, load, 8>;
+def MAYH : TernaryRXF<"mayh", 0xED3C, null_frag, FP64, FP64, load, 8>;
+def MAYL : TernaryRXF<"mayl", 0xED38, null_frag, FP64, FP64, load, 8>;
+
+// Division.
+def DER : BinaryRR <"der", 0x3D, null_frag, FP32, FP32>;
+def DDR : BinaryRR <"ddr", 0x2D, null_frag, FP64, FP64>;
+def DXR : BinaryRRE<"dxr", 0xB22D, null_frag, FP128, FP128>;
+def DE : BinaryRX <"de", 0x7D, null_frag, FP32, load, 4>;
+def DD : BinaryRX <"dd", 0x6D, null_frag, FP64, load, 8>;
+
+
+//===----------------------------------------------------------------------===//
+// Comparisons
+//===----------------------------------------------------------------------===//
+
+let Defs = [CC] in {
+ def CER : CompareRR <"cer", 0x39, null_frag, FP32, FP32>;
+ def CDR : CompareRR <"cdr", 0x29, null_frag, FP64, FP64>;
+ def CXR : CompareRRE<"cxr", 0xB369, null_frag, FP128, FP128>;
+
+ def CE : CompareRX<"ce", 0x79, null_frag, FP32, load, 4>;
+ def CD : CompareRX<"cd", 0x69, null_frag, FP64, load, 8>;
+}
+
diff --git a/lib/Target/SystemZ/SystemZScheduleZ13.td b/lib/Target/SystemZ/SystemZScheduleZ13.td
index 612c3b6cf96e..5f5f2f690e58 100644
--- a/lib/Target/SystemZ/SystemZScheduleZ13.td
+++ b/lib/Target/SystemZ/SystemZScheduleZ13.td
@@ -908,6 +908,238 @@ def : InstRW<[FXa, Lat30, GroupAlone], (instregex "SFASR$")>;
def : InstRW<[FXa, LSU, Lat30, GroupAlone], (instregex "LFAS$")>;
def : InstRW<[FXb, Lat3, GroupAlone], (instregex "SRNM(B|T)?$")>;
+
+// --------------------- Hexadecimal floating point ------------------------- //
+
+//===----------------------------------------------------------------------===//
+// HFP: Move instructions
+//===----------------------------------------------------------------------===//
+
+// Load and Test
+def : InstRW<[VecXsPm, Lat4], (instregex "LT(D|E)R$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "LTXR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Conversion instructions
+//===----------------------------------------------------------------------===//
+
+// Load rounded
+def : InstRW<[VecBF], (instregex "(LEDR|LRER)$")>;
+def : InstRW<[VecBF], (instregex "LEXR$")>;
+def : InstRW<[VecDF2, VecDF2], (instregex "(LDXR|LRDR)$")>;
+
+// Load lengthened
+def : InstRW<[LSU], (instregex "LDE$")>;
+def : InstRW<[FXb], (instregex "LDER$")>;
+def : InstRW<[VecBF2, VecBF2, LSU, Lat12, GroupAlone], (instregex "LX(D|E)$")>;
+def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "LX(D|E)R$")>;
+
+// Convert from fixed
+def : InstRW<[FXb, VecBF, Lat9, BeginGroup], (instregex "CE(F|G)R$")>;
+def : InstRW<[FXb, VecBF, Lat9, BeginGroup], (instregex "CD(F|G)R$")>;
+def : InstRW<[FXb, VecDF2, VecDF2, Lat12, GroupAlone], (instregex "CX(F|G)R$")>;
+
+// Convert to fixed
+def : InstRW<[FXb, VecBF, Lat11, BeginGroup], (instregex "CF(E|D)R$")>;
+def : InstRW<[FXb, VecBF, Lat11, BeginGroup], (instregex "CG(E|D)R$")>;
+def : InstRW<[FXb, VecDF, VecDF, Lat20, BeginGroup], (instregex "C(F|G)XR$")>;
+
+// Convert BFP to HFP / HFP to BFP.
+def : InstRW<[VecBF], (instregex "THD(E)?R$")>;
+def : InstRW<[VecBF], (instregex "TB(E)?DR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Unary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Load Complement / Negative / Positive
+def : InstRW<[VecXsPm, Lat4], (instregex "L(C|N|P)DR$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "L(C|N|P)ER$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "L(C|N|P)XR$")>;
+
+// Halve
+def : InstRW<[VecBF], (instregex "H(E|D)R$")>;
+
+// Square root
+def : InstRW<[VecFPd, LSU], (instregex "SQ(E|D)$")>;
+def : InstRW<[VecFPd], (instregex "SQ(E|D)R$")>;
+def : InstRW<[VecFPd, VecFPd, GroupAlone], (instregex "SQXR$")>;
+
+// Load FP integer
+def : InstRW<[VecBF], (instregex "FIER$")>;
+def : InstRW<[VecBF], (instregex "FIDR$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "FIXR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Binary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Addition
+def : InstRW<[VecBF, LSU, Lat12], (instregex "A(E|D|U|W)$")>;
+def : InstRW<[VecBF], (instregex "A(E|D|U|W)R$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "AXR$")>;
+
+// Subtraction
+def : InstRW<[VecBF, LSU, Lat12], (instregex "S(E|D|U|W)$")>;
+def : InstRW<[VecBF], (instregex "S(E|D|U|W)R$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "SXR$")>;
+
+// Multiply
+def : InstRW<[VecBF, LSU, Lat12], (instregex "M(D|DE|E|EE)$")>;
+def : InstRW<[VecBF], (instregex "M(D|DE|E|EE)R$")>;
+def : InstRW<[VecBF2, VecBF2, LSU, Lat12, GroupAlone], (instregex "MXD$")>;
+def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "MXDR$")>;
+def : InstRW<[VecDF2, VecDF2, Lat20, GroupAlone], (instregex "MXR$")>;
+def : InstRW<[VecBF2, VecBF2, LSU, Lat12, GroupAlone], (instregex "MY(H|L)?$")>;
+def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "MY(H|L)?R$")>;
+
+// Multiply and add / subtract
+def : InstRW<[VecBF, LSU, Lat12, GroupAlone], (instregex "M(A|S)E$")>;
+def : InstRW<[VecBF, GroupAlone], (instregex "M(A|S)ER$")>;
+def : InstRW<[VecBF, LSU, Lat12, GroupAlone], (instregex "M(A|S)D$")>;
+def : InstRW<[VecBF], (instregex "M(A|S)DR$")>;
+def : InstRW<[VecBF2, VecBF2, LSU, Lat12, GroupAlone], (instregex "MAY(H|L)?$")>;
+def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "MAY(H|L)?R$")>;
+
+// Division
+def : InstRW<[VecFPd, LSU], (instregex "D(E|D)$")>;
+def : InstRW<[VecFPd], (instregex "D(E|D)R$")>;
+def : InstRW<[VecFPd, VecFPd, GroupAlone], (instregex "DXR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Comparisons
+//===----------------------------------------------------------------------===//
+
+// Compare
+def : InstRW<[VecXsPm, LSU, Lat8], (instregex "C(E|D)$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "C(E|D)R$")>;
+def : InstRW<[VecDF, VecDF, Lat20, GroupAlone], (instregex "CXR$")>;
+
+
+// ------------------------ Decimal floating point -------------------------- //
+
+//===----------------------------------------------------------------------===//
+// DFP: Move instructions
+//===----------------------------------------------------------------------===//
+
+// Load and Test
+def : InstRW<[VecDF], (instregex "LTDTR$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "LTXTR$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Conversion instructions
+//===----------------------------------------------------------------------===//
+
+// Load rounded
+def : InstRW<[VecDF, Lat15], (instregex "LEDTR$")>;
+def : InstRW<[VecDF, VecDF, Lat20], (instregex "LDXTR$")>;
+
+// Load lengthened
+def : InstRW<[VecDF], (instregex "LDETR$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "LXDTR$")>;
+
+// Convert from fixed / logical
+def : InstRW<[FXb, VecDF, Lat30, BeginGroup], (instregex "CD(F|G)TR(A)?$")>;
+def : InstRW<[FXb, VecDF2, VecDF2, Lat30, GroupAlone], (instregex "CX(F|G)TR(A)?$")>;
+def : InstRW<[FXb, VecDF, Lat30, BeginGroup], (instregex "CDL(F|G)TR$")>;
+def : InstRW<[FXb, VecDF2, VecDF2, Lat30, GroupAlone], (instregex "CXL(F|G)TR$")>;
+
+// Convert to fixed / logical
+def : InstRW<[FXb, VecDF, Lat30, BeginGroup], (instregex "C(F|G)DTR(A)?$")>;
+def : InstRW<[FXb, VecDF, VecDF, Lat30, BeginGroup], (instregex "C(F|G)XTR(A)?$")>;
+def : InstRW<[FXb, VecDF, Lat30, BeginGroup], (instregex "CL(F|G)DTR$")>;
+def : InstRW<[FXb, VecDF, VecDF, Lat30, BeginGroup], (instregex "CL(F|G)XTR$")>;
+
+// Convert from / to signed / unsigned packed
+def : InstRW<[FXb, VecDF, Lat9, BeginGroup], (instregex "CD(S|U)TR$")>;
+def : InstRW<[FXb, FXb, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "CX(S|U)TR$")>;
+def : InstRW<[FXb, VecDF, Lat12, BeginGroup], (instregex "C(S|U)DTR$")>;
+def : InstRW<[FXb, FXb, VecDF2, VecDF2, Lat15, BeginGroup], (instregex "C(S|U)XTR$")>;
+
+// Convert from / to zoned
+def : InstRW<[LSU, VecDF, Lat11, BeginGroup], (instregex "CDZT$")>;
+def : InstRW<[LSU, LSU, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "CXZT$")>;
+def : InstRW<[FXb, LSU, VecDF, Lat11, BeginGroup], (instregex "CZDT$")>;
+def : InstRW<[FXb, LSU, VecDF, VecDF, Lat15, GroupAlone], (instregex "CZXT$")>;
+
+// Convert from / to packed
+def : InstRW<[LSU, VecDF, Lat11, BeginGroup], (instregex "CDPT$")>;
+def : InstRW<[LSU, LSU, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "CXPT$")>;
+def : InstRW<[FXb, LSU, VecDF, Lat11, BeginGroup], (instregex "CPDT$")>;
+def : InstRW<[FXb, LSU, VecDF, VecDF, Lat15, GroupAlone], (instregex "CPXT$")>;
+
+// Perform floating-point operation
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "PFPO$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Unary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Load FP integer
+def : InstRW<[VecDF], (instregex "FIDTR$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "FIXTR$")>;
+
+// Extract biased exponent
+def : InstRW<[FXb, VecDF, Lat12, BeginGroup], (instregex "EEDTR$")>;
+def : InstRW<[FXb, VecDF, Lat12, BeginGroup], (instregex "EEXTR$")>;
+
+// Extract significance
+def : InstRW<[FXb, VecDF, Lat12, BeginGroup], (instregex "ESDTR$")>;
+def : InstRW<[FXb, VecDF, VecDF, Lat15, BeginGroup], (instregex "ESXTR$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Binary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Addition
+def : InstRW<[VecDF], (instregex "ADTR(A)?$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "AXTR(A)?$")>;
+
+// Subtraction
+def : InstRW<[VecDF], (instregex "SDTR(A)?$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "SXTR(A)?$")>;
+
+// Multiply
+def : InstRW<[VecDF, Lat30], (instregex "MDTR(A)?$")>;
+def : InstRW<[VecDF2, VecDF2, Lat30, GroupAlone], (instregex "MXTR(A)?$")>;
+
+// Division
+def : InstRW<[VecDF, Lat30], (instregex "DDTR(A)?$")>;
+def : InstRW<[VecDF2, VecDF2, Lat30, GroupAlone], (instregex "DXTR(A)?$")>;
+
+// Quantize
+def : InstRW<[VecDF], (instregex "QADTR$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "QAXTR$")>;
+
+// Reround
+def : InstRW<[FXb, VecDF, Lat11], (instregex "RRDTR$")>;
+def : InstRW<[FXb, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "RRXTR$")>;
+
+// Shift significand left/right
+def : InstRW<[LSU, VecDF, Lat11], (instregex "S(L|R)DT$")>;
+def : InstRW<[LSU, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "S(L|R)XT$")>;
+
+// Insert biased exponent
+def : InstRW<[FXb, VecDF, Lat11], (instregex "IEDTR$")>;
+def : InstRW<[FXb, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "IEXTR$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Comparisons
+//===----------------------------------------------------------------------===//
+
+// Compare
+def : InstRW<[VecDF], (instregex "(K|C)DTR$")>;
+def : InstRW<[VecDF, VecDF, Lat11, GroupAlone], (instregex "(K|C)XTR$")>;
+
+// Compare biased exponent
+def : InstRW<[VecDF], (instregex "CEDTR$")>;
+def : InstRW<[VecDF], (instregex "CEXTR$")>;
+
+// Test Data Class/Group
+def : InstRW<[LSU, VecDF, Lat11], (instregex "TD(C|G)(E|D)T$")>;
+def : InstRW<[LSU, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "TD(C|G)XT$")>;
+
+
// --------------------------------- Vector --------------------------------- //
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/SystemZ/SystemZScheduleZ196.td b/lib/Target/SystemZ/SystemZScheduleZ196.td
index 670df8ff5541..126eac2e2072 100644
--- a/lib/Target/SystemZ/SystemZScheduleZ196.td
+++ b/lib/Target/SystemZ/SystemZScheduleZ196.td
@@ -839,5 +839,224 @@ def : InstRW<[FXU, Lat30, GroupAlone], (instregex "SFASR$")>;
def : InstRW<[FXU, LSU, Lat30, GroupAlone], (instregex "LFAS$")>;
def : InstRW<[FXU, Lat2, GroupAlone], (instregex "SRNM(B|T)?$")>;
+
+// --------------------- Hexadecimal floating point ------------------------- //
+
+//===----------------------------------------------------------------------===//
+// HFP: Move instructions
+//===----------------------------------------------------------------------===//
+
+// Load and Test
+def : InstRW<[FPU], (instregex "LT(D|E)R$")>;
+def : InstRW<[FPU2, FPU2, Lat9, GroupAlone], (instregex "LTXR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Conversion instructions
+//===----------------------------------------------------------------------===//
+
+// Load rounded
+def : InstRW<[FPU], (instregex "(LEDR|LRER)$")>;
+def : InstRW<[FPU], (instregex "LEXR$")>;
+def : InstRW<[FPU], (instregex "(LDXR|LRDR)$")>;
+
+// Load lengthened
+def : InstRW<[LSU], (instregex "LDE$")>;
+def : InstRW<[FXU], (instregex "LDER$")>;
+def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "LX(D|E)$")>;
+def : InstRW<[FPU2, FPU2, Lat10, GroupAlone], (instregex "LX(D|E)R$")>;
+
+// Convert from fixed
+def : InstRW<[FXU, FPU, Lat9, GroupAlone], (instregex "CE(F|G)R$")>;
+def : InstRW<[FXU, FPU, Lat9, GroupAlone], (instregex "CD(F|G)R$")>;
+def : InstRW<[FXU, FPU2, FPU2, Lat11, GroupAlone], (instregex "CX(F|G)R$")>;
+
+// Convert to fixed
+def : InstRW<[FXU, FPU, Lat12, GroupAlone], (instregex "CF(E|D)R$")>;
+def : InstRW<[FXU, FPU, Lat12, GroupAlone], (instregex "CG(E|D)R$")>;
+def : InstRW<[FXU, FPU, FPU, Lat20, GroupAlone], (instregex "C(F|G)XR$")>;
+
+// Convert BFP to HFP / HFP to BFP.
+def : InstRW<[FPU], (instregex "THD(E)?R$")>;
+def : InstRW<[FPU], (instregex "TB(E)?DR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Unary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Load Complement / Negative / Positive
+def : InstRW<[FPU], (instregex "L(C|N|P)DR$")>;
+def : InstRW<[FPU], (instregex "L(C|N|P)ER$")>;
+def : InstRW<[FPU2, FPU2, Lat9, GroupAlone], (instregex "L(C|N|P)XR$")>;
+
+// Halve
+def : InstRW<[FPU], (instregex "H(E|D)R$")>;
+
+// Square root
+def : InstRW<[FPU, LSU, Lat30], (instregex "SQ(E|D)$")>;
+def : InstRW<[FPU, Lat30], (instregex "SQ(E|D)R$")>;
+def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "SQXR$")>;
+
+// Load FP integer
+def : InstRW<[FPU], (instregex "FIER$")>;
+def : InstRW<[FPU], (instregex "FIDR$")>;
+def : InstRW<[FPU2, FPU2, Lat15, GroupAlone], (instregex "FIXR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Binary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Addition
+def : InstRW<[FPU, LSU, Lat12], (instregex "A(E|D|U|W)$")>;
+def : InstRW<[FPU], (instregex "A(E|D|U|W)R$")>;
+def : InstRW<[FPU2, FPU2, Lat20, GroupAlone], (instregex "AXR$")>;
+
+// Subtraction
+def : InstRW<[FPU, LSU, Lat12], (instregex "S(E|D|U|W)$")>;
+def : InstRW<[FPU], (instregex "S(E|D|U|W)R$")>;
+def : InstRW<[FPU2, FPU2, Lat20, GroupAlone], (instregex "SXR$")>;
+
+// Multiply
+def : InstRW<[FPU, LSU, Lat12], (instregex "M(D|DE|E|EE)$")>;
+def : InstRW<[FPU], (instregex "M(D|DE|E|EE)R$")>;
+def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "MXD$")>;
+def : InstRW<[FPU2, FPU2, Lat10, GroupAlone], (instregex "MXDR$")>;
+def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "MXR$")>;
+def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "MY(H|L)?$")>;
+def : InstRW<[FPU2, FPU2, Lat10, GroupAlone], (instregex "MY(H|L)?R$")>;
+
+// Multiply and add / subtract
+def : InstRW<[FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)E$")>;
+def : InstRW<[FPU, GroupAlone], (instregex "M(A|S)ER$")>;
+def : InstRW<[FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)D$")>;
+def : InstRW<[FPU, GroupAlone], (instregex "M(A|S)DR$")>;
+def : InstRW<[FPU2, FPU2, LSU, Lat12, GroupAlone], (instregex "MAY(H|L)?$")>;
+def : InstRW<[FPU2, FPU2, GroupAlone], (instregex "MAY(H|L)?R$")>;
+
+// Division
+def : InstRW<[FPU, LSU, Lat30], (instregex "D(E|D)$")>;
+def : InstRW<[FPU, Lat30], (instregex "D(E|D)R$")>;
+def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "DXR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Comparisons
+//===----------------------------------------------------------------------===//
+
+// Compare
+def : InstRW<[FPU, LSU, Lat12], (instregex "C(E|D)$")>;
+def : InstRW<[FPU], (instregex "C(E|D)R$")>;
+def : InstRW<[FPU, FPU, Lat15], (instregex "CXR$")>;
+
+
+// ------------------------ Decimal floating point -------------------------- //
+
+//===----------------------------------------------------------------------===//
+// DFP: Move instructions
+//===----------------------------------------------------------------------===//
+
+// Load and Test
+def : InstRW<[DFU, Lat20], (instregex "LTDTR$")>;
+def : InstRW<[DFU2, DFU2, Lat20, GroupAlone], (instregex "LTXTR$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Conversion instructions
+//===----------------------------------------------------------------------===//
+
+// Load rounded
+def : InstRW<[DFU, Lat30], (instregex "LEDTR$")>;
+def : InstRW<[DFU, DFU, Lat30], (instregex "LDXTR$")>;
+
+// Load lengthened
+def : InstRW<[DFU, Lat20], (instregex "LDETR$")>;
+def : InstRW<[DFU2, DFU2, Lat20, GroupAlone], (instregex "LXDTR$")>;
+
+// Convert from fixed / logical
+def : InstRW<[FXU, DFU, Lat30, GroupAlone], (instregex "CD(F|G)TR(A)?$")>;
+def : InstRW<[FXU, DFU2, DFU2, Lat30, GroupAlone], (instregex "CX(F|G)TR(A)?$")>;
+def : InstRW<[FXU, DFU, Lat11, GroupAlone], (instregex "CDL(F|G)TR$")>;
+def : InstRW<[FXU, DFU2, DFU2, Lat11, GroupAlone], (instregex "CXL(F|G)TR$")>;
+
+// Convert to fixed / logical
+def : InstRW<[FXU, DFU, Lat30, GroupAlone], (instregex "C(F|G)DTR(A)?$")>;
+def : InstRW<[FXU, DFU, DFU, Lat30, GroupAlone], (instregex "C(F|G)XTR(A)?$")>;
+def : InstRW<[FXU, DFU, Lat30, GroupAlone], (instregex "CL(F|G)DTR$")>;
+def : InstRW<[FXU, DFU, DFU, Lat30, GroupAlone], (instregex "CL(F|G)XTR$")>;
+
+// Convert from / to signed / unsigned packed
+def : InstRW<[FXU, DFU, Lat12, GroupAlone], (instregex "CD(S|U)TR$")>;
+def : InstRW<[FXU, FXU, DFU2, DFU2, Lat20, GroupAlone], (instregex "CX(S|U)TR$")>;
+def : InstRW<[FXU, DFU, Lat12, GroupAlone], (instregex "C(S|U)DTR$")>;
+def : InstRW<[FXU, FXU, DFU2, DFU2, Lat20, GroupAlone], (instregex "C(S|U)XTR$")>;
+
+// Perform floating-point operation
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "PFPO$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Unary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Load FP integer
+def : InstRW<[DFU, Lat20], (instregex "FIDTR$")>;
+def : InstRW<[DFU2, DFU2, Lat20, GroupAlone], (instregex "FIXTR$")>;
+
+// Extract biased exponent
+def : InstRW<[FXU, DFU, Lat15, GroupAlone], (instregex "EEDTR$")>;
+def : InstRW<[FXU, DFU, Lat15, GroupAlone], (instregex "EEXTR$")>;
+
+// Extract significance
+def : InstRW<[FXU, DFU, Lat15, GroupAlone], (instregex "ESDTR$")>;
+def : InstRW<[FXU, DFU, DFU, Lat20, GroupAlone], (instregex "ESXTR$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Binary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Addition
+def : InstRW<[DFU, Lat30], (instregex "ADTR(A)?$")>;
+def : InstRW<[DFU2, DFU2, Lat30, GroupAlone], (instregex "AXTR(A)?$")>;
+
+// Subtraction
+def : InstRW<[DFU, Lat30], (instregex "SDTR(A)?$")>;
+def : InstRW<[DFU2, DFU2, Lat30, GroupAlone], (instregex "SXTR(A)?$")>;
+
+// Multiply
+def : InstRW<[DFU, Lat30], (instregex "MDTR(A)?$")>;
+def : InstRW<[DFU2, DFU2, Lat30, GroupAlone], (instregex "MXTR(A)?$")>;
+
+// Division
+def : InstRW<[DFU, Lat30], (instregex "DDTR(A)?$")>;
+def : InstRW<[DFU2, DFU2, Lat30, GroupAlone], (instregex "DXTR(A)?$")>;
+
+// Quantize
+def : InstRW<[DFU, Lat30], (instregex "QADTR$")>;
+def : InstRW<[DFU2, DFU2, Lat30, GroupAlone], (instregex "QAXTR$")>;
+
+// Reround
+def : InstRW<[FXU, DFU, Lat30], (instregex "RRDTR$")>;
+def : InstRW<[FXU, DFU2, DFU2, Lat30, GroupAlone], (instregex "RRXTR$")>;
+
+// Shift significand left/right
+def : InstRW<[LSU, DFU, Lat11], (instregex "S(L|R)DT$")>;
+def : InstRW<[LSU, DFU2, DFU2, Lat15, GroupAlone], (instregex "S(L|R)XT$")>;
+
+// Insert biased exponent
+def : InstRW<[FXU, DFU, Lat11], (instregex "IEDTR$")>;
+def : InstRW<[FXU, DFU2, DFU2, Lat15, GroupAlone], (instregex "IEXTR$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Comparisons
+//===----------------------------------------------------------------------===//
+
+// Compare
+def : InstRW<[DFU, Lat11], (instregex "(K|C)DTR$")>;
+def : InstRW<[DFU, DFU, Lat15, GroupAlone], (instregex "(K|C)XTR$")>;
+
+// Compare biased exponent
+def : InstRW<[DFU, Lat8], (instregex "CEDTR$")>;
+def : InstRW<[DFU, Lat9], (instregex "CEXTR$")>;
+
+// Test Data Class/Group
+def : InstRW<[LSU, DFU, Lat15], (instregex "TD(C|G)(E|D)T$")>;
+def : InstRW<[LSU, DFU2, DFU2, Lat15, GroupAlone], (instregex "TD(C|G)XT$")>;
+
}
diff --git a/lib/Target/SystemZ/SystemZScheduleZEC12.td b/lib/Target/SystemZ/SystemZScheduleZEC12.td
index 1bdb8779dc72..d38ca64d2e9b 100644
--- a/lib/Target/SystemZ/SystemZScheduleZEC12.td
+++ b/lib/Target/SystemZ/SystemZScheduleZEC12.td
@@ -877,5 +877,230 @@ def : InstRW<[FXU, Lat30, GroupAlone], (instregex "SFASR$")>;
def : InstRW<[FXU, LSU, Lat30, GroupAlone], (instregex "LFAS$")>;
def : InstRW<[FXU, Lat2, GroupAlone], (instregex "SRNM(B|T)?$")>;
+
+// --------------------- Hexadecimal floating point ------------------------- //
+
+//===----------------------------------------------------------------------===//
+// HFP: Move instructions
+//===----------------------------------------------------------------------===//
+
+// Load and Test
+def : InstRW<[FPU], (instregex "LT(D|E)R$")>;
+def : InstRW<[FPU2, FPU2, Lat9, GroupAlone], (instregex "LTXR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Conversion instructions
+//===----------------------------------------------------------------------===//
+
+// Load rounded
+def : InstRW<[FPU], (instregex "(LEDR|LRER)$")>;
+def : InstRW<[FPU], (instregex "LEXR$")>;
+def : InstRW<[FPU], (instregex "(LDXR|LRDR)$")>;
+
+// Load lengthened
+def : InstRW<[LSU], (instregex "LDE$")>;
+def : InstRW<[FXU], (instregex "LDER$")>;
+def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "LX(D|E)$")>;
+def : InstRW<[FPU2, FPU2, Lat10, GroupAlone], (instregex "LX(D|E)R$")>;
+
+// Convert from fixed
+def : InstRW<[FXU, FPU, Lat9, GroupAlone], (instregex "CE(F|G)R$")>;
+def : InstRW<[FXU, FPU, Lat9, GroupAlone], (instregex "CD(F|G)R$")>;
+def : InstRW<[FXU, FPU2, FPU2, Lat11, GroupAlone], (instregex "CX(F|G)R$")>;
+
+// Convert to fixed
+def : InstRW<[FXU, FPU, Lat12, GroupAlone], (instregex "CF(E|D)R$")>;
+def : InstRW<[FXU, FPU, Lat12, GroupAlone], (instregex "CG(E|D)R$")>;
+def : InstRW<[FXU, FPU, FPU, Lat20, GroupAlone], (instregex "C(F|G)XR$")>;
+
+// Convert BFP to HFP / HFP to BFP.
+def : InstRW<[FPU], (instregex "THD(E)?R$")>;
+def : InstRW<[FPU], (instregex "TB(E)?DR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Unary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Load Complement / Negative / Positive
+def : InstRW<[FPU], (instregex "L(C|N|P)DR$")>;
+def : InstRW<[FPU], (instregex "L(C|N|P)ER$")>;
+def : InstRW<[FPU2, FPU2, Lat9, GroupAlone], (instregex "L(C|N|P)XR$")>;
+
+// Halve
+def : InstRW<[FPU], (instregex "H(E|D)R$")>;
+
+// Square root
+def : InstRW<[FPU, LSU, Lat30], (instregex "SQ(E|D)$")>;
+def : InstRW<[FPU, Lat30], (instregex "SQ(E|D)R$")>;
+def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "SQXR$")>;
+
+// Load FP integer
+def : InstRW<[FPU], (instregex "FIER$")>;
+def : InstRW<[FPU], (instregex "FIDR$")>;
+def : InstRW<[FPU2, FPU2, Lat15, GroupAlone], (instregex "FIXR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Binary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Addition
+def : InstRW<[FPU, LSU, Lat12], (instregex "A(E|D|U|W)$")>;
+def : InstRW<[FPU], (instregex "A(E|D|U|W)R$")>;
+def : InstRW<[FPU2, FPU2, Lat20, GroupAlone], (instregex "AXR$")>;
+
+// Subtraction
+def : InstRW<[FPU, LSU, Lat12], (instregex "S(E|D|U|W)$")>;
+def : InstRW<[FPU], (instregex "S(E|D|U|W)R$")>;
+def : InstRW<[FPU2, FPU2, Lat20, GroupAlone], (instregex "SXR$")>;
+
+// Multiply
+def : InstRW<[FPU, LSU, Lat12], (instregex "M(D|DE|E|EE)$")>;
+def : InstRW<[FPU], (instregex "M(D|DE|E|EE)R$")>;
+def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "MXD$")>;
+def : InstRW<[FPU2, FPU2, Lat10, GroupAlone], (instregex "MXDR$")>;
+def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "MXR$")>;
+def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "MY(H|L)?$")>;
+def : InstRW<[FPU2, FPU2, Lat10, GroupAlone], (instregex "MY(H|L)?R$")>;
+
+// Multiply and add / subtract
+def : InstRW<[FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)E$")>;
+def : InstRW<[FPU, GroupAlone], (instregex "M(A|S)ER$")>;
+def : InstRW<[FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)D$")>;
+def : InstRW<[FPU, GroupAlone], (instregex "M(A|S)DR$")>;
+def : InstRW<[FPU2, FPU2, LSU, Lat12, GroupAlone], (instregex "MAY(H|L)?$")>;
+def : InstRW<[FPU2, FPU2, GroupAlone], (instregex "MAY(H|L)?R$")>;
+
+// Division
+def : InstRW<[FPU, LSU, Lat30], (instregex "D(E|D)$")>;
+def : InstRW<[FPU, Lat30], (instregex "D(E|D)R$")>;
+def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "DXR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Comparisons
+//===----------------------------------------------------------------------===//
+
+// Compare
+def : InstRW<[FPU, LSU, Lat12], (instregex "C(E|D)$")>;
+def : InstRW<[FPU], (instregex "C(E|D)R$")>;
+def : InstRW<[FPU, FPU, Lat15], (instregex "CXR$")>;
+
+
+// ------------------------ Decimal floating point -------------------------- //
+
+//===----------------------------------------------------------------------===//
+// DFP: Move instructions
+//===----------------------------------------------------------------------===//
+
+// Load and Test
+def : InstRW<[DFU, Lat20], (instregex "LTDTR$")>;
+def : InstRW<[DFU2, DFU2, Lat20, GroupAlone], (instregex "LTXTR$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Conversion instructions
+//===----------------------------------------------------------------------===//
+
+// Load rounded
+def : InstRW<[DFU, Lat30], (instregex "LEDTR$")>;
+def : InstRW<[DFU, DFU, Lat30], (instregex "LDXTR$")>;
+
+// Load lengthened
+def : InstRW<[DFU, Lat20], (instregex "LDETR$")>;
+def : InstRW<[DFU2, DFU2, Lat20, GroupAlone], (instregex "LXDTR$")>;
+
+// Convert from fixed / logical
+def : InstRW<[FXU, DFU, Lat30, GroupAlone], (instregex "CD(F|G)TR(A)?$")>;
+def : InstRW<[FXU, DFU2, DFU2, Lat30, GroupAlone], (instregex "CX(F|G)TR(A)?$")>;
+def : InstRW<[FXU, DFU, Lat11, GroupAlone], (instregex "CDL(F|G)TR$")>;
+def : InstRW<[FXU, DFU2, DFU2, Lat11, GroupAlone], (instregex "CXL(F|G)TR$")>;
+
+// Convert to fixed / logical
+def : InstRW<[FXU, DFU, Lat30, GroupAlone], (instregex "C(F|G)DTR(A)?$")>;
+def : InstRW<[FXU, DFU, DFU, Lat30, GroupAlone], (instregex "C(F|G)XTR(A)?$")>;
+def : InstRW<[FXU, DFU, Lat30, GroupAlone], (instregex "CL(F|G)DTR$")>;
+def : InstRW<[FXU, DFU, DFU, Lat30, GroupAlone], (instregex "CL(F|G)XTR$")>;
+
+// Convert from / to signed / unsigned packed
+def : InstRW<[FXU, DFU, Lat12, GroupAlone], (instregex "CD(S|U)TR$")>;
+def : InstRW<[FXU, FXU, DFU2, DFU2, Lat20, GroupAlone], (instregex "CX(S|U)TR$")>;
+def : InstRW<[FXU, DFU, Lat12, GroupAlone], (instregex "C(S|U)DTR$")>;
+def : InstRW<[FXU, FXU, DFU2, DFU2, Lat20, GroupAlone], (instregex "C(S|U)XTR$")>;
+
+// Convert from / to zoned
+def : InstRW<[LSU, DFU2, Lat7, GroupAlone], (instregex "CDZT$")>;
+def : InstRW<[LSU, LSU, DFU2, DFU2, Lat10, GroupAlone], (instregex "CXZT$")>;
+def : InstRW<[FXU, LSU, DFU, Lat11, GroupAlone], (instregex "CZDT$")>;
+def : InstRW<[FXU, LSU, DFU, DFU, Lat15, GroupAlone], (instregex "CZXT$")>;
+
+// Perform floating-point operation
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "PFPO$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Unary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Load FP integer
+def : InstRW<[DFU, Lat20], (instregex "FIDTR$")>;
+def : InstRW<[DFU2, DFU2, Lat20, GroupAlone], (instregex "FIXTR$")>;
+
+// Extract biased exponent
+def : InstRW<[FXU, DFU, Lat15, GroupAlone], (instregex "EEDTR$")>;
+def : InstRW<[FXU, DFU, Lat15, GroupAlone], (instregex "EEXTR$")>;
+
+// Extract significance
+def : InstRW<[FXU, DFU, Lat15, GroupAlone], (instregex "ESDTR$")>;
+def : InstRW<[FXU, DFU, DFU, Lat20, GroupAlone], (instregex "ESXTR$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Binary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Addition
+def : InstRW<[DFU, Lat30], (instregex "ADTR(A)?$")>;
+def : InstRW<[DFU2, DFU2, Lat30, GroupAlone], (instregex "AXTR(A)?$")>;
+
+// Subtraction
+def : InstRW<[DFU, Lat30], (instregex "SDTR(A)?$")>;
+def : InstRW<[DFU2, DFU2, Lat30, GroupAlone], (instregex "SXTR(A)?$")>;
+
+// Multiply
+def : InstRW<[DFU, Lat30], (instregex "MDTR(A)?$")>;
+def : InstRW<[DFU2, DFU2, Lat30, GroupAlone], (instregex "MXTR(A)?$")>;
+
+// Division
+def : InstRW<[DFU, Lat30], (instregex "DDTR(A)?$")>;
+def : InstRW<[DFU2, DFU2, Lat30, GroupAlone], (instregex "DXTR(A)?$")>;
+
+// Quantize
+def : InstRW<[DFU, Lat30], (instregex "QADTR$")>;
+def : InstRW<[DFU2, DFU2, Lat30, GroupAlone], (instregex "QAXTR$")>;
+
+// Reround
+def : InstRW<[FXU, DFU, Lat30], (instregex "RRDTR$")>;
+def : InstRW<[FXU, DFU2, DFU2, Lat30, GroupAlone], (instregex "RRXTR$")>;
+
+// Shift significand left/right
+def : InstRW<[LSU, DFU, Lat11], (instregex "S(L|R)DT$")>;
+def : InstRW<[LSU, DFU2, DFU2, Lat15, GroupAlone], (instregex "S(L|R)XT$")>;
+
+// Insert biased exponent
+def : InstRW<[FXU, DFU, Lat11], (instregex "IEDTR$")>;
+def : InstRW<[FXU, DFU2, DFU2, Lat15, GroupAlone], (instregex "IEXTR$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Comparisons
+//===----------------------------------------------------------------------===//
+
+// Compare
+def : InstRW<[DFU, Lat11], (instregex "(K|C)DTR$")>;
+def : InstRW<[DFU, DFU, Lat15, GroupAlone], (instregex "(K|C)XTR$")>;
+
+// Compare biased exponent
+def : InstRW<[DFU, Lat8], (instregex "CEDTR$")>;
+def : InstRW<[DFU, Lat9], (instregex "CEXTR$")>;
+
+// Test Data Class/Group
+def : InstRW<[LSU, DFU, Lat15], (instregex "TD(C|G)(E|D)T$")>;
+def : InstRW<[LSU, DFU2, DFU2, Lat15, GroupAlone], (instregex "TD(C|G)XT$")>;
+
}
diff --git a/lib/Target/SystemZ/SystemZSubtarget.cpp b/lib/Target/SystemZ/SystemZSubtarget.cpp
index 022679a7bc18..0ab0c2f25915 100644
--- a/lib/Target/SystemZ/SystemZSubtarget.cpp
+++ b/lib/Target/SystemZ/SystemZSubtarget.cpp
@@ -42,8 +42,10 @@ SystemZSubtarget::SystemZSubtarget(const Triple &TT, const std::string &CPU,
HasMiscellaneousExtensions(false),
HasExecutionHint(false), HasLoadAndTrap(false),
HasTransactionalExecution(false), HasProcessorAssist(false),
+ HasDFPZonedConversion(false),
HasVector(false), HasLoadStoreOnCond2(false),
HasLoadAndZeroRightmostByte(false), HasMessageSecurityAssist5(false),
+ HasDFPPackedConversion(false),
TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
TLInfo(TM, *this), TSInfo(), FrameLowering() {}
diff --git a/lib/Target/SystemZ/SystemZSubtarget.h b/lib/Target/SystemZ/SystemZSubtarget.h
index 770dd7cd939f..36e51921bf2f 100644
--- a/lib/Target/SystemZ/SystemZSubtarget.h
+++ b/lib/Target/SystemZ/SystemZSubtarget.h
@@ -47,10 +47,12 @@ protected:
bool HasLoadAndTrap;
bool HasTransactionalExecution;
bool HasProcessorAssist;
+ bool HasDFPZonedConversion;
bool HasVector;
bool HasLoadStoreOnCond2;
bool HasLoadAndZeroRightmostByte;
bool HasMessageSecurityAssist5;
+ bool HasDFPPackedConversion;
private:
Triple TargetTriple;
@@ -133,6 +135,9 @@ public:
// Return true if the target has the processor-assist facility.
bool hasProcessorAssist() const { return HasProcessorAssist; }
+ // Return true if the target has the DFP zoned-conversion facility.
+ bool hasDFPZonedConversion() const { return HasDFPZonedConversion; }
+
// Return true if the target has the load-and-zero-rightmost-byte facility.
bool hasLoadAndZeroRightmostByte() const {
return HasLoadAndZeroRightmostByte;
@@ -142,6 +147,9 @@ public:
// extension facility 5.
bool hasMessageSecurityAssist5() const { return HasMessageSecurityAssist5; }
+ // Return true if the target has the DFP packed-conversion facility.
+ bool hasDFPPackedConversion() const { return HasDFPPackedConversion; }
+
// Return true if the target has the vector facility.
bool hasVector() const { return HasVector; }
diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 31a5ca1f4cc2..814377003cbc 100644
--- a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -84,8 +84,8 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE})
setCondCodeAction(CC, T, Expand);
// Expand floating-point library function operators.
- for (auto Op : {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOWI, ISD::FPOW,
- ISD::FREM, ISD::FMA})
+ for (auto Op : {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM,
+ ISD::FMA})
setOperationAction(Op, T, Expand);
// Note supported floating-point library function operators that otherwise
// default to expand.
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 86744b064132..8d78308afe9d 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -80,6 +80,12 @@ static cl::opt<int> ExperimentalPrefLoopAlignment(
" of the loop header PC will be 0)."),
cl::Hidden);
+static cl::opt<bool> MulConstantOptimization(
+ "mul-constant-optimization", cl::init(true),
+ cl::desc("Replace 'mul x, Const' with more effective instructions like "
+ "SHIFT, LEA, etc."),
+ cl::Hidden);
+
/// Call this when the user attempts to do something unsupported, like
/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
/// report_fatal_error, so calling code should attempt to recover without
@@ -670,7 +676,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FSINCOS, VT, Expand);
setOperationAction(ISD::FCOS, VT, Expand);
setOperationAction(ISD::FREM, VT, Expand);
- setOperationAction(ISD::FPOWI, VT, Expand);
setOperationAction(ISD::FCOPYSIGN, VT, Expand);
setOperationAction(ISD::FPOW, VT, Expand);
setOperationAction(ISD::FLOG, VT, Expand);
@@ -30928,6 +30933,75 @@ static SDValue reduceVMULWidth(SDNode *N, SelectionDAG &DAG,
}
}
+static SDValue combineMulSpecial(uint64_t MulAmt, SDNode *N, SelectionDAG &DAG,
+ EVT VT, SDLoc DL) {
+
+ auto combineMulShlAddOrSub = [&](int Mult, int Shift, bool isAdd) {
+ SDValue Result = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0),
+ DAG.getConstant(Mult, DL, VT));
+ Result = DAG.getNode(ISD::SHL, DL, VT, Result,
+ DAG.getConstant(Shift, DL, MVT::i8));
+ Result = DAG.getNode(isAdd ? ISD::ADD : ISD::SUB, DL, VT, N->getOperand(0),
+ Result);
+ return Result;
+ };
+
+ auto combineMulMulAddOrSub = [&](bool isAdd) {
+ SDValue Result = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0),
+ DAG.getConstant(9, DL, VT));
+ Result = DAG.getNode(ISD::MUL, DL, VT, Result, DAG.getConstant(3, DL, VT));
+ Result = DAG.getNode(isAdd ? ISD::ADD : ISD::SUB, DL, VT, N->getOperand(0),
+ Result);
+ return Result;
+ };
+
+ switch (MulAmt) {
+ default:
+ break;
+ case 11:
+ // mul x, 11 => add ((shl (mul x, 5), 1), x)
+ return combineMulShlAddOrSub(5, 1, /*isAdd*/ true);
+ case 21:
+ // mul x, 21 => add ((shl (mul x, 5), 2), x)
+ return combineMulShlAddOrSub(5, 2, /*isAdd*/ true);
+ case 22:
+ // mul x, 22 => add (add ((shl (mul x, 5), 2), x), x)
+ return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0),
+ combineMulShlAddOrSub(5, 2, /*isAdd*/ true));
+ case 19:
+ // mul x, 19 => sub ((shl (mul x, 5), 2), x)
+ return combineMulShlAddOrSub(5, 2, /*isAdd*/ false);
+ case 13:
+ // mul x, 13 => add ((shl (mul x, 3), 2), x)
+ return combineMulShlAddOrSub(3, 2, /*isAdd*/ true);
+ case 23:
+ // mul x, 13 => sub ((shl (mul x, 3), 3), x)
+ return combineMulShlAddOrSub(3, 3, /*isAdd*/ false);
+ case 14:
+ // mul x, 14 => add (add ((shl (mul x, 3), 2), x), x)
+ return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0),
+ combineMulShlAddOrSub(3, 2, /*isAdd*/ true));
+ case 26:
+ // mul x, 26 => sub ((mul (mul x, 9), 3), x)
+ return combineMulMulAddOrSub(/*isAdd*/ false);
+ case 28:
+ // mul x, 28 => add ((mul (mul x, 9), 3), x)
+ return combineMulMulAddOrSub(/*isAdd*/ true);
+ case 29:
+ // mul x, 29 => add (add ((mul (mul x, 9), 3), x), x)
+ return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0),
+ combineMulMulAddOrSub(/*isAdd*/ true));
+ case 30:
+ // mul x, 30 => sub (sub ((shl x, 5), x), x)
+ return DAG.getNode(
+ ISD::SUB, DL, VT, N->getOperand(0),
+ DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0),
+ DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+ DAG.getConstant(5, DL, MVT::i8))));
+ }
+ return SDValue();
+}
+
/// Optimize a single multiply with constant into two operations in order to
/// implement it with two cheaper instructions, e.g. LEA + SHL, LEA + LEA.
static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
@@ -30937,6 +31011,8 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
if (DCI.isBeforeLegalize() && VT.isVector())
return reduceVMULWidth(N, DAG, Subtarget);
+ if (!MulConstantOptimization)
+ return SDValue();
// An imul is usually smaller than the alternative sequence.
if (DAG.getMachineFunction().getFunction()->optForMinSize())
return SDValue();
@@ -30992,7 +31068,8 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
else
NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, NewMul,
DAG.getConstant(MulAmt2, DL, VT));
- }
+ } else if (!Subtarget.slowLEA())
+ NewMul = combineMulSpecial(MulAmt, N, DAG, VT, DL);
if (!NewMul) {
assert(MulAmt != 0 &&
diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp
index 67abc3116988..5e9f40019ce8 100644
--- a/lib/Transforms/Scalar/NewGVN.cpp
+++ b/lib/Transforms/Scalar/NewGVN.cpp
@@ -377,7 +377,6 @@ private:
int StoreCount = 0;
};
-struct HashedExpression;
namespace llvm {
template <> struct DenseMapInfo<const Expression *> {
static const Expression *getEmptyKey() {
@@ -391,41 +390,25 @@ template <> struct DenseMapInfo<const Expression *> {
return reinterpret_cast<const Expression *>(Val);
}
static unsigned getHashValue(const Expression *E) {
- return static_cast<unsigned>(E->getHashValue());
+ return static_cast<unsigned>(E->getComputedHash());
}
- static unsigned getHashValue(const HashedExpression &HE);
- static bool isEqual(const HashedExpression &LHS, const Expression *RHS);
static bool isEqual(const Expression *LHS, const Expression *RHS) {
if (LHS == RHS)
return true;
if (LHS == getTombstoneKey() || RHS == getTombstoneKey() ||
LHS == getEmptyKey() || RHS == getEmptyKey())
return false;
+ // Compare hashes before equality. This is *not* what the hashtable does,
+ // since it is computing it modulo the number of buckets, whereas we are
+ // using the full hash keyspace. Since the hashes are precomputed, this
+ // check is *much* faster than equality.
+ if (LHS->getComputedHash() != RHS->getComputedHash())
+ return false;
return *LHS == *RHS;
}
};
} // end namespace llvm
-// This is just a wrapper around Expression that computes the hash value once at
-// creation time. Hash values for an Expression can't change once they are
-// inserted into the DenseMap (it breaks DenseMap), so they must be immutable at
-// that point anyway.
-struct HashedExpression {
- const Expression *E;
- unsigned HashVal;
- HashedExpression(const Expression *E)
- : E(E), HashVal(DenseMapInfo<const Expression *>::getHashValue(E)) {}
-};
-
-unsigned
-DenseMapInfo<const Expression *>::getHashValue(const HashedExpression &HE) {
- return HE.HashVal;
-}
-bool DenseMapInfo<const Expression *>::isEqual(const HashedExpression &LHS,
- const Expression *RHS) {
- return isEqual(LHS.E, RHS);
-}
-
namespace {
class NewGVN {
Function &F;
@@ -707,7 +690,7 @@ private:
void markPredicateUsersTouched(Instruction *);
void markValueLeaderChangeTouched(CongruenceClass *CC);
void markMemoryLeaderChangeTouched(CongruenceClass *CC);
- void markPhiOfOpsChanged(const HashedExpression &HE);
+ void markPhiOfOpsChanged(const Expression *E);
void addPredicateUsers(const PredicateBase *, Instruction *) const;
void addMemoryUsers(const MemoryAccess *To, MemoryAccess *U) const;
void addAdditionalUsers(Value *To, Value *User) const;
@@ -956,8 +939,12 @@ const Expression *NewGVN::checkSimplificationResults(Expression *E,
if (CC && CC->getDefiningExpr()) {
// If we simplified to something else, we need to communicate
// that we're users of the value we simplified to.
- if (I != V)
- addAdditionalUsers(V, I);
+ if (I != V) {
+ // Don't add temporary instructions to the user lists.
+ if (!AllTempInstructions.count(I))
+ addAdditionalUsers(V, I);
+ }
+
if (I)
DEBUG(dbgs() << "Simplified " << *I << " to "
<< " expression " << *CC->getDefiningExpr() << "\n");
@@ -2195,8 +2182,8 @@ void NewGVN::moveValueToNewCongruenceClass(Instruction *I, const Expression *E,
// For a given expression, mark the phi of ops instructions that could have
// changed as a result.
-void NewGVN::markPhiOfOpsChanged(const HashedExpression &HE) {
- touchAndErase(ExpressionToPhiOfOps, HE);
+void NewGVN::markPhiOfOpsChanged(const Expression *E) {
+ touchAndErase(ExpressionToPhiOfOps, E);
}
// Perform congruence finding on a given value numbering expression.
@@ -2210,14 +2197,13 @@ void NewGVN::performCongruenceFinding(Instruction *I, const Expression *E) {
assert(!IClass->isDead() && "Found a dead class");
CongruenceClass *EClass = nullptr;
- HashedExpression HE(E);
if (const auto *VE = dyn_cast<VariableExpression>(E)) {
EClass = ValueToClass.lookup(VE->getVariableValue());
} else if (isa<DeadExpression>(E)) {
EClass = TOPClass;
}
if (!EClass) {
- auto lookupResult = ExpressionToClass.insert_as({E, nullptr}, HE);
+ auto lookupResult = ExpressionToClass.insert({E, nullptr});
// If it's not in the value table, create a new congruence class.
if (lookupResult.second) {
@@ -2268,7 +2254,7 @@ void NewGVN::performCongruenceFinding(Instruction *I, const Expression *E) {
<< "\n");
if (ClassChanged) {
moveValueToNewCongruenceClass(I, E, IClass, EClass);
- markPhiOfOpsChanged(HE);
+ markPhiOfOpsChanged(E);
}
markUsersTouched(I);
@@ -2502,9 +2488,8 @@ NewGVN::makePossiblePhiOfOps(Instruction *I, bool HasBackedge,
// Clone the instruction, create an expression from it, and see if we
// have a leader.
Instruction *ValueOp = I->clone();
- auto Iter = TempToMemory.end();
if (MemAccess)
- Iter = TempToMemory.insert({ValueOp, MemAccess}).first;
+ TempToMemory.insert({ValueOp, MemAccess});
for (auto &Op : ValueOp->operands()) {
Op = Op->DoPHITranslation(PHIBlock, PredBB);
@@ -2523,7 +2508,7 @@ NewGVN::makePossiblePhiOfOps(Instruction *I, bool HasBackedge,
AllTempInstructions.erase(ValueOp);
ValueOp->deleteValue();
if (MemAccess)
- TempToMemory.erase(Iter);
+ TempToMemory.erase(ValueOp);
if (!E)
return nullptr;
FoundVal = findPhiOfOpsLeader(E, PredBB);
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 3b036a6ac430..2b83b8426d14 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7173,7 +7173,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
// Note: Even if all instructions are scalarized, return true if any memory
// accesses appear in the loop to get benefits from address folding etc.
bool TypeNotScalarized =
- VF > 1 && VectorTy->isVectorTy() && TTI.getNumberOfParts(VectorTy) < VF;
+ VF > 1 && !VectorTy->isVoidTy() && TTI.getNumberOfParts(VectorTy) < VF;
return VectorizationCostTy(C, TypeNotScalarized);
}
@@ -7312,7 +7312,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
Type *RetTy = I->getType();
if (canTruncateToMinimalBitwidth(I, VF))
RetTy = IntegerType::get(RetTy->getContext(), MinBWs[I]);
- VectorTy = isScalarAfterVectorization(I, VF) ? RetTy : ToVectorTy(RetTy, VF);
+ VectorTy = ToVectorTy(RetTy, VF);
auto SE = PSE.getSE();
// TODO: We need to estimate the cost of intrinsic calls.
@@ -7445,10 +7445,9 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
} else if (Legal->isUniform(Op2)) {
Op2VK = TargetTransformInfo::OK_UniformValue;
}
- SmallVector<const Value *, 4> Operands(I->operand_values());
- unsigned N = isScalarAfterVectorization(I, VF) ? VF : 1;
- return N * TTI.getArithmeticInstrCost(I->getOpcode(), VectorTy, Op1VK,
- Op2VK, Op1VP, Op2VP, Operands);
+ SmallVector<const Value *, 4> Operands(I->operand_values());
+ return TTI.getArithmeticInstrCost(I->getOpcode(), VectorTy, Op1VK,
+ Op2VK, Op1VP, Op2VP, Operands);
}
case Instruction::Select: {
SelectInst *SI = cast<SelectInst>(I);
@@ -7471,15 +7470,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
}
case Instruction::Store:
case Instruction::Load: {
- unsigned Width = VF;
- if (Width > 1) {
- InstWidening Decision = getWideningDecision(I, Width);
- assert(Decision != CM_Unknown &&
- "CM decision should be taken at this point");
- if (Decision == CM_Scalarize)
- Width = 1;
- }
- VectorTy = ToVectorTy(getMemInstValueType(I), Width);
+ VectorTy = ToVectorTy(getMemInstValueType(I), VF);
return getMemoryInstructionCost(I, VF);
}
case Instruction::ZExt:
diff --git a/test/CodeGen/AArch64/reg-scavenge-frame.mir b/test/CodeGen/AArch64/reg-scavenge-frame.mir
new file mode 100644
index 000000000000..3300bb1e5831
--- /dev/null
+++ b/test/CodeGen/AArch64/reg-scavenge-frame.mir
@@ -0,0 +1,52 @@
+# RUN: llc -run-pass=prologepilog -verify-machineinstrs %s -o - | FileCheck %s
+
+--- |
+ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+ target triple = "aarch64-linux-gnu"
+ define void @ScavengeForFrameWithoutOffset() { ret void }
+...
+---
+name: ScavengeForFrameWithoutOffset
+tracksRegLiveness: true
+stack:
+ - { id: 0, type: spill-slot, offset: 0, size: 32, alignment: 8 }
+body: |
+ bb.0:
+ liveins: %d16_d17_d18_d19
+ %x0 = COPY %xzr
+ %x1 = COPY %xzr
+ %x2 = COPY %xzr
+ %x3 = COPY %xzr
+ %x4 = COPY %xzr
+ %x5 = COPY %xzr
+ %x6 = COPY %xzr
+ %x7 = COPY %xzr
+ %x8 = COPY %xzr
+ %x9 = COPY %xzr
+ %x10 = COPY %xzr
+ %x11 = COPY %xzr
+ %x12 = COPY %xzr
+ %x13 = COPY %xzr
+ %x14 = COPY %xzr
+ %x15 = COPY %xzr
+ %x16 = COPY %xzr
+ %x17 = COPY %xzr
+ %x18 = COPY %xzr
+ %x19 = COPY %xzr
+ %x20 = COPY %xzr
+ %x21 = COPY %xzr
+ %x22 = COPY %xzr
+ %x23 = COPY %xzr
+ %x24 = COPY %xzr
+ %x25 = COPY %xzr
+ %x26 = COPY %xzr
+ %x27 = COPY %xzr
+ %x28 = COPY %xzr
+ %fp = COPY %xzr
+ %lr = COPY %xzr
+ ST1Fourv1d killed %d16_d17_d18_d19, %stack.0 :: (store 32 into %stack.0, align 8)
+# CHECK: STRXui killed %[[SCAVREG:x[0-9]+|fp|lr]], %sp, [[SPOFFSET:[0-9]+]] :: (store 8 into %stack.1)
+# CHECK-NEXT: %[[SCAVREG]] = ADDXri %sp, {{[0-9]+}}, 0
+# CHECK-NEXT: ST1Fourv1d killed %d16_d17_d18_d19, killed %[[SCAVREG]] :: (store 32 into %stack.0, align 8)
+# CHECK-NEXT: %[[SCAVREG]] = LDRXui %sp, [[SPOFFSET]] :: (load 8 from %stack.1)
+...
diff --git a/test/CodeGen/AMDGPU/add.v2i16.ll b/test/CodeGen/AMDGPU/add.v2i16.ll
index a6b280578531..e5e2d436deb0 100644
--- a/test/CodeGen/AMDGPU/add.v2i16.ll
+++ b/test/CodeGen/AMDGPU/add.v2i16.ll
@@ -23,7 +23,7 @@ define amdgpu_kernel void @v_test_add_v2i16(<2 x i16> addrspace(1)* %out, <2 x i
; GFX9: s_load_dword [[VAL0:s[0-9]+]]
; GFX9: s_load_dword [[VAL1:s[0-9]+]]
; GFX9: v_mov_b32_e32 [[VVAL1:v[0-9]+]]
-; GFX9: v_pk_add_u16 v{{[0-9]+}}, [[VVAL1]], [[VAL0]]
+; GFX9: v_pk_add_u16 v{{[0-9]+}}, [[VAL0]], [[VVAL1]]
; VI: s_add_i32
; VI: s_add_i32
@@ -50,7 +50,7 @@ define amdgpu_kernel void @s_test_add_self_v2i16(<2 x i16> addrspace(1)* %out, <
; FIXME: VI should not scalarize arg access.
; GCN-LABEL: {{^}}s_test_add_v2i16_kernarg:
-; GFX9: v_pk_add_u16 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}
+; GFX9: v_pk_add_u16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
; VI: v_add_i32
; VI: v_add_i32_sdwa
@@ -62,10 +62,11 @@ define amdgpu_kernel void @s_test_add_v2i16_kernarg(<2 x i16> addrspace(1)* %out
; GCN-LABEL: {{^}}v_test_add_v2i16_constant:
; GFX9: s_mov_b32 [[CONST:s[0-9]+]], 0x1c8007b{{$}}
-; GFX9: v_pk_add_u16 v{{[0-9]+}}, [[CONST]], v{{[0-9]+}}
+; GFX9: v_pk_add_u16 v{{[0-9]+}}, v{{[0-9]+}}, [[CONST]]
; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, 0x7b, v{{[0-9]+}}
-; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, 0x1c8, v{{[0-9]+}}
+; VI-DAG: v_mov_b32_e32 v[[SCONST:[0-9]+]], 0x1c8
+; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, v[[SCONST]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
define amdgpu_kernel void @v_test_add_v2i16_constant(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid
@@ -79,10 +80,11 @@ define amdgpu_kernel void @v_test_add_v2i16_constant(<2 x i16> addrspace(1)* %ou
; FIXME: Need to handle non-uniform case for function below (load without gep).
; GCN-LABEL: {{^}}v_test_add_v2i16_neg_constant:
; GFX9: s_mov_b32 [[CONST:s[0-9]+]], 0xfc21fcb3{{$}}
-; GFX9: v_pk_add_u16 v{{[0-9]+}}, [[CONST]], v{{[0-9]+}}
+; GFX9: v_pk_add_u16 v{{[0-9]+}}, v{{[0-9]+}}, [[CONST]]
; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, 0xfffffcb3, v{{[0-9]+}}
-; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, 0xfffffc21, v{{[0-9]+}}
+; VI-DAG: v_mov_b32_e32 v[[SCONST:[0-9]+]], 0xfffffc21
+; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, v[[SCONST]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
define amdgpu_kernel void @v_test_add_v2i16_neg_constant(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid
@@ -96,11 +98,11 @@ define amdgpu_kernel void @v_test_add_v2i16_neg_constant(<2 x i16> addrspace(1)*
; GCN-LABEL: {{^}}v_test_add_v2i16_inline_neg1:
; GFX9: v_pk_add_u16 v{{[0-9]+}}, v{{[0-9]+}}, -1{{$}}
+; VI: v_mov_b32_e32 v[[SCONST:[0-9]+]], -1
; VI: flat_load_ushort [[LOAD0:v[0-9]+]]
; VI: flat_load_ushort [[LOAD1:v[0-9]+]]
-; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, -1, [[LOAD0]]
+; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, v[[SCONST]], [[LOAD0]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, -1, [[LOAD1]]
-; VI-DAG: v_lshlrev_b32_e32 v{{[0-9]+}}, 16,
; VI: v_or_b32_e32
define amdgpu_kernel void @v_test_add_v2i16_inline_neg1(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -114,7 +116,7 @@ define amdgpu_kernel void @v_test_add_v2i16_inline_neg1(<2 x i16> addrspace(1)*
; GCN-LABEL: {{^}}v_test_add_v2i16_inline_lo_zero_hi:
; GFX9: s_mov_b32 [[K:s[0-9]+]], 32{{$}}
-; GFX9: v_pk_add_u16 v{{[0-9]+}}, [[K]], v{{[0-9]+}}{{$}}
+; GFX9: v_pk_add_u16 v{{[0-9]+}}, v{{[0-9]+}}, [[K]]{{$}}
; VI-NOT: v_add_u16
; VI: v_add_u16_e32 v{{[0-9]+}}, 32, v{{[0-9]+}}
@@ -134,12 +136,12 @@ define amdgpu_kernel void @v_test_add_v2i16_inline_lo_zero_hi(<2 x i16> addrspac
; The high element gives fp
; GCN-LABEL: {{^}}v_test_add_v2i16_inline_fp_split:
; GFX9: s_mov_b32 [[K:s[0-9]+]], 1.0
-; GFX9: v_pk_add_u16 v{{[0-9]+}}, [[K]], v{{[0-9]+}}{{$}}
+; GFX9: v_pk_add_u16 v{{[0-9]+}}, v{{[0-9]+}}, [[K]]{{$}}
; VI-NOT: v_add_u16
-; VI: v_add_u16_e32 v{{[0-9]+}}, 0x3f80, v{{[0-9]+}}
+; VI: v_mov_b32_e32 v[[K:[0-9]+]], 0x3f80
+; VI: v_add_u16_sdwa v{{[0-9]+}}, v[[K]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI-NOT: v_add_u16
-; VI: v_lshlrev_b32_e32 v{{[0-9]+}}, 16,
; VI: v_or_b32_e32
define amdgpu_kernel void @v_test_add_v2i16_inline_fp_split(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -191,19 +193,17 @@ define amdgpu_kernel void @v_test_add_v2i16_zext_to_v2i32(<2 x i32> addrspace(1)
; GFX9: flat_load_dword [[A:v[0-9]+]]
; GFX9: flat_load_dword [[B:v[0-9]+]]
-; GFX9: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]], [[A]], [[B]]
; GFX9-DAG: v_and_b32_e32 v[[ELT0:[0-9]+]], 0xffff, [[ADD]]
; GFX9-DAG: v_lshrrev_b32_e32 v[[ELT1:[0-9]+]], 16, [[ADD]]
; GFX9: buffer_store_dwordx4
+; VI-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
; VI: flat_load_ushort v[[A_LO:[0-9]+]]
; VI: flat_load_ushort v[[A_HI:[0-9]+]]
; VI: flat_load_ushort v[[B_LO:[0-9]+]]
; VI: flat_load_ushort v[[B_HI:[0-9]+]]
-; VI-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
-; VI-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
; VI-DAG: v_add_u16_e32
; VI-DAG: v_add_u16_e32
diff --git a/test/CodeGen/AMDGPU/bfe-combine.ll b/test/CodeGen/AMDGPU/bfe-combine.ll
index 791b49f0e143..6035e3bf4a5f 100644
--- a/test/CodeGen/AMDGPU/bfe-combine.ll
+++ b/test/CodeGen/AMDGPU/bfe-combine.ll
@@ -1,12 +1,16 @@
-; RUN: llc -march=amdgcn -mcpu=fiji < %s | FileCheck --check-prefix=GCN --check-prefix=VI %s
+; RUN: llc -march=amdgcn -mcpu=fiji -amdgpu-sdwa-peephole=0 < %s | FileCheck --check-prefix=GCN --check-prefix=VI %s
+; RUN: llc -march=amdgcn -mcpu=fiji < %s | FileCheck --check-prefix=GCN --check-prefix=VI-SDWA %s
; RUN: llc -march=amdgcn -mcpu=bonaire < %s | FileCheck --check-prefix=GCN --check-prefix=CI %s
; GCN-LABEL: {{^}}bfe_combine8:
; VI: v_bfe_u32 v[[BFE:[0-9]+]], v{{[0-9]+}}, 8, 8
; VI: v_lshlrev_b32_e32 v[[ADDRBASE:[0-9]+]], 2, v[[BFE]]
+; VI-SDWA: v_mov_b32_e32 v[[SHIFT:[0-9]+]], 2
+; VI-SDWA: v_lshlrev_b32_sdwa v[[ADDRBASE:[0-9]+]], v[[SHIFT]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
; CI: v_lshrrev_b32_e32 v[[SHR:[0-9]+]], 6, v{{[0-9]+}}
; CI: v_and_b32_e32 v[[ADDRLO:[0-9]+]], 0x3fc, v[[SHR]]
; VI: v_add_i32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADDRBASE]]
+; VI-SDWA: v_add_i32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADDRBASE]]
; GCN: load_dword v{{[0-9]+}}, v{{\[}}[[ADDRLO]]:
define amdgpu_kernel void @bfe_combine8(i32 addrspace(1)* nocapture %arg, i32 %x) {
%id = tail call i32 @llvm.amdgcn.workitem.id.x() #2
@@ -22,6 +26,10 @@ define amdgpu_kernel void @bfe_combine8(i32 addrspace(1)* nocapture %arg, i32 %x
; GCN-LABEL: {{^}}bfe_combine16:
; VI: v_bfe_u32 v[[BFE:[0-9]+]], v{{[0-9]+}}, 16, 16
; VI: v_lshlrev_b32_e32 v[[ADDRBASE:[0-9]+]], {{[^,]+}}, v[[BFE]]
+; VI-SDWA: v_mov_b32_e32 v[[SHIFT:[0-9]+]], 15
+; VI-SDWA: v_lshlrev_b32_sdwa v[[ADDRBASE1:[0-9]+]], v[[SHIFT]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; VI-SDWA: v_lshlrev_b64 v{{\[}}[[ADDRBASE:[0-9]+]]:{{[^\]+}}], 2, v{{\[}}[[ADDRBASE1]]:{{[^\]+}}]
+; VI-SDWA: v_add_i32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADDRBASE]]
; CI: v_lshrrev_b32_e32 v[[SHR:[0-9]+]], 1, v{{[0-9]+}}
; CI: v_and_b32_e32 v[[AND:[0-9]+]], 0x7fff8000, v[[SHR]]
; CI: v_lshl_b64 v{{\[}}[[ADDRLO:[0-9]+]]:{{[^\]+}}], v{{\[}}[[AND]]:{{[^\]+}}], 2
diff --git a/test/CodeGen/AMDGPU/commute-compares.ll b/test/CodeGen/AMDGPU/commute-compares.ll
index 973c4544d97a..66148a43a271 100644
--- a/test/CodeGen/AMDGPU/commute-compares.ll
+++ b/test/CodeGen/AMDGPU/commute-compares.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -march=amdgcn -amdgpu-sdwa-peephole=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
declare i32 @llvm.amdgcn.workitem.id.x() #0
diff --git a/test/CodeGen/AMDGPU/commute_modifiers.ll b/test/CodeGen/AMDGPU/commute_modifiers.ll
index 8820e4fd80e5..f38c1f8aa6ed 100644
--- a/test/CodeGen/AMDGPU/commute_modifiers.ll
+++ b/test/CodeGen/AMDGPU/commute_modifiers.ll
@@ -51,7 +51,7 @@ define amdgpu_kernel void @commute_mul_imm_fneg_f32(float addrspace(1)* %out, fl
; FUNC-LABEL: @commute_add_lit_fabs_f32
; SI: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI: v_mov_b32_e32 [[K:v[0-9]+]], 0x44800000
-; SI: v_add_f32_e64 [[REG:v[0-9]+]], [[K]], |[[X]]|
+; SI: v_add_f32_e64 [[REG:v[0-9]+]], |[[X]]|, [[K]]
; SI: buffer_store_dword [[REG]]
define amdgpu_kernel void @commute_add_lit_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
diff --git a/test/CodeGen/AMDGPU/copy-illegal-type.ll b/test/CodeGen/AMDGPU/copy-illegal-type.ll
index 026dd7ca6c87..d772d1b67936 100644
--- a/test/CodeGen/AMDGPU/copy-illegal-type.ll
+++ b/test/CodeGen/AMDGPU/copy-illegal-type.ll
@@ -1,5 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -amdgpu-sdwa-peephole=0 < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone
diff --git a/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll b/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll
index e16daa6fad9d..0328ce31002d 100644
--- a/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll
+++ b/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll
@@ -94,7 +94,6 @@ define amdgpu_kernel void @load_v4i8_to_v4f32_unaligned(<4 x float> addrspace(1)
; GCN-DAG: v_cvt_f32_ubyte3_e32
; GCN-DAG: v_lshrrev_b32_e32 v{{[0-9]+}}, 24
-; GCN-DAG: v_lshrrev_b32_e32 v{{[0-9]+}}, 16
; SI-DAG: v_lshlrev_b32_e32 v{{[0-9]+}}, 16
; SI-DAG: v_lshlrev_b32_e32 v{{[0-9]+}}, 8
diff --git a/test/CodeGen/AMDGPU/fabs.f64.ll b/test/CodeGen/AMDGPU/fabs.f64.ll
index 998e02f7bdf8..718176b80f0f 100644
--- a/test/CodeGen/AMDGPU/fabs.f64.ll
+++ b/test/CodeGen/AMDGPU/fabs.f64.ll
@@ -55,7 +55,7 @@ define amdgpu_kernel void @fabs_v4f64(<4 x double> addrspace(1)* %out, <4 x doub
; SI-LABEL: {{^}}fabs_fold_f64:
; SI: s_load_dwordx2 [[ABS_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
; SI-NOT: and
-; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, |[[ABS_VALUE]]|
+; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, |[[ABS_VALUE]]|, {{v\[[0-9]+:[0-9]+\]}}
; SI: s_endpgm
define amdgpu_kernel void @fabs_fold_f64(double addrspace(1)* %out, double %in0, double %in1) {
%fabs = call double @llvm.fabs.f64(double %in0)
@@ -67,7 +67,7 @@ define amdgpu_kernel void @fabs_fold_f64(double addrspace(1)* %out, double %in0,
; SI-LABEL: {{^}}fabs_fn_fold_f64:
; SI: s_load_dwordx2 [[ABS_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
; SI-NOT: and
-; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, |[[ABS_VALUE]]|
+; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, |[[ABS_VALUE]]|, {{v\[[0-9]+:[0-9]+\]}}
; SI: s_endpgm
define amdgpu_kernel void @fabs_fn_fold_f64(double addrspace(1)* %out, double %in0, double %in1) {
%fabs = call double @fabs(double %in0)
diff --git a/test/CodeGen/AMDGPU/fabs.ll b/test/CodeGen/AMDGPU/fabs.ll
index ac8fa3e45ef5..600c6cd8230e 100644
--- a/test/CodeGen/AMDGPU/fabs.ll
+++ b/test/CodeGen/AMDGPU/fabs.ll
@@ -75,7 +75,7 @@ define amdgpu_kernel void @fabs_v4f32(<4 x float> addrspace(1)* %out, <4 x float
; SI: s_load_dword [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
; VI: s_load_dword [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c
; GCN-NOT: and
-; GCN: v_mul_f32_e64 v{{[0-9]+}}, v{{[0-9]+}}, |[[ABS_VALUE]]|
+; GCN: v_mul_f32_e64 v{{[0-9]+}}, |[[ABS_VALUE]]|, v{{[0-9]+}}
define amdgpu_kernel void @fabs_fn_fold(float addrspace(1)* %out, float %in0, float %in1) {
%fabs = call float @fabs(float %in0)
%fmul = fmul float %fabs, %in1
@@ -87,7 +87,7 @@ define amdgpu_kernel void @fabs_fn_fold(float addrspace(1)* %out, float %in0, fl
; SI: s_load_dword [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
; VI: s_load_dword [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c
; GCN-NOT: and
-; GCN: v_mul_f32_e64 v{{[0-9]+}}, v{{[0-9]+}}, |[[ABS_VALUE]]|
+; GCN: v_mul_f32_e64 v{{[0-9]+}}, |[[ABS_VALUE]]|, v{{[0-9]+}}
define amdgpu_kernel void @fabs_fold(float addrspace(1)* %out, float %in0, float %in1) {
%fabs = call float @llvm.fabs.f32(float %in0)
%fmul = fmul float %fabs, %in1
diff --git a/test/CodeGen/AMDGPU/fadd.f16.ll b/test/CodeGen/AMDGPU/fadd.f16.ll
index f76ecf58d905..9b3d2a475a14 100644
--- a/test/CodeGen/AMDGPU/fadd.f16.ll
+++ b/test/CodeGen/AMDGPU/fadd.f16.ll
@@ -96,9 +96,9 @@ entry:
}
; GCN-LABEL: {{^}}fadd_v2f16_imm_a:
-; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]]
+; GCN-DAG: buffer_load_dword v[[B_V2_F16:[0-9]+]]
; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
-; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
+; SI: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
; SI: v_add_f32_e32 v[[R_F32_0:[0-9]+]], 1.0, v[[B_F32_0]]
; SI: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]]
@@ -107,9 +107,9 @@ entry:
; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
-; VI-DAG: v_add_f16_e32 v[[R_F16_1:[0-9]+]], 2.0, v[[B_F16_1]]
+; VI-DAG: v_mov_b32_e32 v[[CONST2:[0-9]+]], 0x4000
+; VI-DAG: v_add_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[CONST2]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; VI-DAG: v_add_f16_e32 v[[R_F16_0:[0-9]+]], 1.0, v[[B_V2_F16]]
-; VI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
; GCN: buffer_store_dword v[[R_V2_F16]]
@@ -125,9 +125,9 @@ entry:
}
; GCN-LABEL: {{^}}fadd_v2f16_imm_b:
-; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]]
+; GCN-DAG: buffer_load_dword v[[A_V2_F16:[0-9]+]]
; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
-; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
+; SI-DAG: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
; SI: v_add_f32_e32 v[[R_F32_0:[0-9]+]], 2.0, v[[A_F32_0]]
; SI: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]]
@@ -136,10 +136,10 @@ entry:
; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
-; VI-DAG: v_add_f16_e32 v[[R_F16_0:[0-9]+]], 1.0, v[[A_F16_1]]
+; VI-DAG: v_mov_b32_e32 v[[CONST1:[0-9]+]], 0x3c00
+; VI-DAG: v_add_f16_sdwa v[[R_F16_0:[0-9]+]], v[[CONST1]], v[[A_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; VI-DAG: v_add_f16_e32 v[[R_F16_1:[0-9]+]], 2.0, v[[A_V2_F16]]
-; VI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_0]]
-; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_1]]
+; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_1]]
; GCN: buffer_store_dword v[[R_V2_F16]]
; GCN: s_endpgm
diff --git a/test/CodeGen/AMDGPU/fadd64.ll b/test/CodeGen/AMDGPU/fadd64.ll
index 7eb7747de215..c936d98673ba 100644
--- a/test/CodeGen/AMDGPU/fadd64.ll
+++ b/test/CodeGen/AMDGPU/fadd64.ll
@@ -13,7 +13,7 @@ define amdgpu_kernel void @v_fadd_f64(double addrspace(1)* %out, double addrspac
}
; CHECK-LABEL: {{^}}s_fadd_f64:
-; CHECK: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+; CHECK: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}
define amdgpu_kernel void @s_fadd_f64(double addrspace(1)* %out, double %r0, double %r1) {
%r2 = fadd double %r0, %r1
store double %r2, double addrspace(1)* %out
diff --git a/test/CodeGen/AMDGPU/fcanonicalize.f16.ll b/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
index c9787bb478ef..9e8ddd39bbaf 100644
--- a/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
+++ b/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
@@ -205,9 +205,9 @@ define amdgpu_kernel void @test_fold_canonicalize_snan3_value_f16(half addrspace
}
; GCN-LABEL: {{^}}v_test_canonicalize_var_v2f16:
-; VI: v_mul_f16_e32 [[REG0:v[0-9]+]], 1.0, {{v[0-9]+}}
+; VI: v_mov_b32_e32 v[[CONST1:[0-9]+]], 0x3c00
+; VI-DAG: v_mul_f16_sdwa [[REG0:v[0-9]+]], v[[CONST1]], {{v[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; VI-DAG: v_mul_f16_e32 [[REG1:v[0-9]+]], 1.0, {{v[0-9]+}}
-; VI-DAG: v_lshlrev_b32_e32 v{{[0-9]+}}, 16,
; VI-NOT: v_and_b32
; GFX9: v_pk_mul_f16 [[REG:v[0-9]+]], 1.0, {{v[0-9]+$}}
@@ -223,7 +223,8 @@ define amdgpu_kernel void @v_test_canonicalize_var_v2f16(<2 x half> addrspace(1)
; GCN-LABEL: {{^}}v_test_canonicalize_fabs_var_v2f16:
; VI-DAG: v_bfe_u32
; VI-DAG: v_and_b32_e32 v{{[0-9]+}}, 0x7fff7fff, v{{[0-9]+}}
-; VI: v_mul_f16_e32 [[REG0:v[0-9]+]], 1.0, v{{[0-9]+}}
+; VI-DAG: v_mov_b32_e32 v[[CONST1:[0-9]+]], 0x3c00
+; VI: v_mul_f16_sdwa [[REG0:v[0-9]+]], v[[CONST1]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI: v_mul_f16_e32 [[REG1:v[0-9]+]], 1.0, v{{[0-9]+}}
; VI-NOT: 0xffff
; VI: v_or_b32
@@ -240,9 +241,10 @@ define amdgpu_kernel void @v_test_canonicalize_fabs_var_v2f16(<2 x half> addrspa
}
; GCN-LABEL: {{^}}v_test_canonicalize_fneg_fabs_var_v2f16:
-; VI: v_or_b32_e32 v{{[0-9]+}}, 0x80008000, v{{[0-9]+}}
-; VI: v_mul_f16_e32 [[REG0:v[0-9]+]], 1.0, v{{[0-9]+}}
-; VI: v_mul_f16_e32 [[REG1:v[0-9]+]], 1.0, v{{[0-9]+}}
+; VI-DAG: v_mov_b32_e32 v[[CONST1:[0-9]+]], 0x3c00
+; VI-DAG: v_or_b32_e32 v{{[0-9]+}}, 0x80008000, v{{[0-9]+}}
+; VI-DAG: v_mul_f16_sdwa [[REG0:v[0-9]+]], v[[CONST1]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; VI-DAG: v_mul_f16_e32 [[REG1:v[0-9]+]], 1.0, v{{[0-9]+}}
; VI: v_or_b32
; GFX9: v_and_b32_e32 [[ABS:v[0-9]+]], 0x7fff7fff, v{{[0-9]+}}
@@ -259,11 +261,10 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_v2f16(<2 x half> ad
; FIXME: Fold modifier
; GCN-LABEL: {{^}}v_test_canonicalize_fneg_var_v2f16:
-; VI: v_xor_b32_e32 [[FNEG:v[0-9]+]], 0x80008000, v{{[0-9]+}}
-; VI-DAG: v_lshrrev_b32_e32 [[FNEG_HI:v[0-9]+]], 16, [[FNEG]]
-; VI-DAG: v_mul_f16_e32 [[REG1:v[0-9]+]], 1.0, [[FNEG_HI]]
+; VI-DAG: v_mov_b32_e32 v[[CONST1:[0-9]+]], 0x3c00
+; VI-DAG: v_xor_b32_e32 [[FNEG:v[0-9]+]], 0x80008000, v{{[0-9]+}}
+; VI-DAG: v_mul_f16_sdwa [[REG1:v[0-9]+]], v[[CONST1]], [[FNEG]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; VI-DAG: v_mul_f16_e32 [[REG0:v[0-9]+]], 1.0, [[FNEG]]
-; VI-DAG: v_lshlrev_b32_e32 v{{[0-9]+}}, 16,
; VI-NOT: 0xffff
; GFX9: v_pk_mul_f16 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} neg_lo:[0,1] neg_hi:[0,1]{{$}}
diff --git a/test/CodeGen/AMDGPU/fmul.f16.ll b/test/CodeGen/AMDGPU/fmul.f16.ll
index 4e96091ae256..4ef2aa693cf4 100644
--- a/test/CodeGen/AMDGPU/fmul.f16.ll
+++ b/test/CodeGen/AMDGPU/fmul.f16.ll
@@ -96,17 +96,18 @@ entry:
}
; GCN-LABEL: {{^}}fmul_v2f16_imm_a:
-; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]]
+; GCN-DAG: buffer_load_dword v[[B_V2_F16:[0-9]+]]
; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
-; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
+; SI: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
; SI: v_mul_f32_e32 v[[R_F32_0:[0-9]+]], 0x40400000, v[[B_F32_0]]
; SI: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]]
; SI: v_mul_f32_e32 v[[R_F32_1:[0-9]+]], 4.0, v[[B_F32_1]]
; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]]
-; VI-DAG: v_mul_f16_e32 v[[R_F16_1:[0-9]+]], 4.0, v[[B_F16_1]]
+; VI-DAG: v_mov_b32_e32 v[[CONST4:[0-9]+]], 0x4400
+; VI-DAG: v_mul_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[CONST4]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; VI-DAG: v_mul_f16_e32 v[[R_F16_0:[0-9]+]], 0x4200, v[[B_V2_F16]]
-; GCN-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
+; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
; GCN: buffer_store_dword v[[R_V2_F16]]
; GCN: s_endpgm
@@ -121,17 +122,18 @@ entry:
}
; GCN-LABEL: {{^}}fmul_v2f16_imm_b:
-; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]]
+; GCN-DAG: buffer_load_dword v[[A_V2_F16:[0-9]+]]
; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
-; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
+; SI: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
; SI: v_mul_f32_e32 v[[R_F32_0:[0-9]+]], 4.0, v[[A_F32_0]]
; SI: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]]
; SI: v_mul_f32_e32 v[[R_F32_1:[0-9]+]], 0x40400000, v[[A_F32_1]]
; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]]
-; VI-DAG: v_mul_f16_e32 v[[R_F16_1:[0-9]+]], 0x4200, v[[A_F16_1]]
+; VI-DAG: v_mov_b32_e32 v[[CONST3:[0-9]+]], 0x4200
+; VI-DAG: v_mul_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[CONST3]], v[[A_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; VI-DAG: v_mul_f16_e32 v[[R_F16_0:[0-9]+]], 4.0, v[[A_V2_F16]]
-; GCN-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
+; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
; GCN: buffer_store_dword v[[R_V2_F16]]
; GCN: s_endpgm
diff --git a/test/CodeGen/AMDGPU/fneg-fabs.f16.ll b/test/CodeGen/AMDGPU/fneg-fabs.f16.ll
index 506b2a02f828..c256159726bf 100644
--- a/test/CodeGen/AMDGPU/fneg-fabs.f16.ll
+++ b/test/CodeGen/AMDGPU/fneg-fabs.f16.ll
@@ -71,7 +71,9 @@ define amdgpu_kernel void @v_fneg_fabs_f16(half addrspace(1)* %out, half addrspa
; FIXME: single bit op
; GCN-LABEL: {{^}}s_fneg_fabs_v2f16:
; CIVI: s_mov_b32 [[MASK:s[0-9]+]], 0x8000{{$}}
-; CIVI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]],
+; VI: v_mov_b32_e32 [[VMASK:v[0-9]+]], [[MASK]]
+; CI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]],
+; VI: v_or_b32_sdwa v{{[0-9]+}}, [[VMASK]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; CIVI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]],
; CIVI: flat_store_dword
@@ -85,10 +87,15 @@ define amdgpu_kernel void @s_fneg_fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x
; GCN-LABEL: {{^}}fneg_fabs_v4f16:
; CIVI: s_mov_b32 [[MASK:s[0-9]+]], 0x8000{{$}}
-; CIVI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]],
-; CIVI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]],
-; CIVI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]],
-; CIVI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]],
+; CI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]],
+; CI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]],
+; CI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]],
+; CI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]],
+; VI: v_mov_b32_e32 [[VMASK:v[0-9]+]], [[MASK]]
+; VI: v_or_b32_sdwa v{{[0-9]+}}, [[VMASK]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]],
+; VI: v_or_b32_sdwa v{{[0-9]+}}, [[VMASK]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]],
; GFX9: s_mov_b32 [[MASK:s[0-9]+]], 0x80008000
; GFX9: s_or_b32 s{{[0-9]+}}, [[MASK]], s{{[0-9]+}}
diff --git a/test/CodeGen/AMDGPU/fneg-fabs.f64.ll b/test/CodeGen/AMDGPU/fneg-fabs.f64.ll
index 85f544032171..bc0e59980186 100644
--- a/test/CodeGen/AMDGPU/fneg-fabs.f64.ll
+++ b/test/CodeGen/AMDGPU/fneg-fabs.f64.ll
@@ -5,7 +5,7 @@
; into 2 modifiers, although theoretically that should work.
; GCN-LABEL: {{^}}fneg_fabs_fadd_f64:
-; GCN: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, -|v{{\[[0-9]+:[0-9]+\]}}|, {{s\[[0-9]+:[0-9]+\]}}
+; GCN: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, -|v{{\[[0-9]+:[0-9]+\]}}|
define amdgpu_kernel void @fneg_fabs_fadd_f64(double addrspace(1)* %out, double %x, double %y) {
%fabs = call double @llvm.fabs.f64(double %x)
%fsub = fsub double -0.000000e+00, %fabs
@@ -25,7 +25,7 @@ define amdgpu_kernel void @v_fneg_fabs_fadd_f64(double addrspace(1)* %out, doubl
}
; GCN-LABEL: {{^}}fneg_fabs_fmul_f64:
-; GCN: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, -|{{v\[[0-9]+:[0-9]+\]}}|, {{s\[[0-9]+:[0-9]+\]}}
+; GCN: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, -|v{{\[[0-9]+:[0-9]+\]}}|
define amdgpu_kernel void @fneg_fabs_fmul_f64(double addrspace(1)* %out, double %x, double %y) {
%fabs = call double @llvm.fabs.f64(double %x)
%fsub = fsub double -0.000000e+00, %fabs
diff --git a/test/CodeGen/AMDGPU/fneg-fabs.ll b/test/CodeGen/AMDGPU/fneg-fabs.ll
index a0cf37b159db..0a7346f410c9 100644
--- a/test/CodeGen/AMDGPU/fneg-fabs.ll
+++ b/test/CodeGen/AMDGPU/fneg-fabs.ll
@@ -4,7 +4,7 @@
; FUNC-LABEL: {{^}}fneg_fabs_fadd_f32:
; SI-NOT: and
-; SI: v_subrev_f32_e64 {{v[0-9]+}}, |{{v[0-9]+}}|, {{s[0-9]+}}
+; SI: v_sub_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, |{{v[0-9]+}}|
define amdgpu_kernel void @fneg_fabs_fadd_f32(float addrspace(1)* %out, float %x, float %y) {
%fabs = call float @llvm.fabs.f32(float %x)
%fsub = fsub float -0.000000e+00, %fabs
@@ -15,7 +15,7 @@ define amdgpu_kernel void @fneg_fabs_fadd_f32(float addrspace(1)* %out, float %x
; FUNC-LABEL: {{^}}fneg_fabs_fmul_f32:
; SI-NOT: and
-; SI: v_mul_f32_e64 {{v[0-9]+}}, -|{{v[0-9]+}}|, {{s[0-9]+}}
+; SI: v_mul_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, -|{{v[0-9]+}}|
; SI-NOT: and
define amdgpu_kernel void @fneg_fabs_fmul_f32(float addrspace(1)* %out, float %x, float %y) {
%fabs = call float @llvm.fabs.f32(float %x)
diff --git a/test/CodeGen/AMDGPU/fneg.f16.ll b/test/CodeGen/AMDGPU/fneg.f16.ll
index ed36666db807..16e4fc680bea 100644
--- a/test/CodeGen/AMDGPU/fneg.f16.ll
+++ b/test/CodeGen/AMDGPU/fneg.f16.ll
@@ -130,13 +130,15 @@ define amdgpu_kernel void @v_fneg_fold_v2f16(<2 x half> addrspace(1)* %out, <2 x
}
; GCN-LABEL: {{^}}v_extract_fneg_fold_v2f16:
-; GCN: flat_load_dword [[VAL:v[0-9]+]]
+; GCN-DAG: flat_load_dword [[VAL:v[0-9]+]]
; CI-DAG: v_mul_f32_e32 v{{[0-9]+}}, -4.0, v{{[0-9]+}}
; CI-DAG: v_sub_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}
-; GFX89: v_lshrrev_b32_e32 [[ELT1:v[0-9]+]], 16, [[VAL]]
+; GFX9: v_lshrrev_b32_e32 [[ELT1:v[0-9]+]], 16, [[VAL]]
; GFX89-DAG: v_mul_f16_e32 v{{[0-9]+}}, -4.0, [[VAL]]
-; GFX89-DAG: v_sub_f16_e32 v{{[0-9]+}}, 2.0, [[ELT1]]
+; GFX9-DAG: v_sub_f16_e32 v{{[0-9]+}}, 2.0, [[ELT1]]
+; VI-DAG: v_mov_b32_e32 [[CONST2:v[0-9]+]], 0x4000
+; VI-DAG: v_sub_f16_sdwa v{{[0-9]+}}, [[CONST2]], [[VAL]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
define amdgpu_kernel void @v_extract_fneg_fold_v2f16(<2 x half> addrspace(1)* %in) #0 {
%val = load <2 x half>, <2 x half> addrspace(1)* %in
%fneg = fsub <2 x half> <half -0.0, half -0.0>, %val
diff --git a/test/CodeGen/AMDGPU/fract.f64.ll b/test/CodeGen/AMDGPU/fract.f64.ll
index 7a5bcfffa3f3..9a56cbe983cd 100644
--- a/test/CodeGen/AMDGPU/fract.f64.ll
+++ b/test/CodeGen/AMDGPU/fract.f64.ll
@@ -12,7 +12,7 @@ declare double @llvm.floor.f64(double) #0
; SI-DAG: v_fract_f64_e32 [[FRC:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]]
; SI-DAG: v_mov_b32_e32 v[[UPLO:[0-9]+]], -1
; SI-DAG: v_mov_b32_e32 v[[UPHI:[0-9]+]], 0x3fefffff
-; SI-DAG: v_min_f64 v{{\[}}[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], v{{\[}}[[UPLO]]:[[UPHI]]], [[FRC]]
+; SI-DAG: v_min_f64 v{{\[}}[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], [[FRC]], v{{\[}}[[UPLO]]:[[UPHI]]]
; SI-DAG: v_cmp_class_f64_e64 vcc, v{{\[}}[[LO]]:[[HI]]], 3
; SI: v_cndmask_b32_e32 v[[RESLO:[0-9]+]], v[[MINLO]], v[[LO]], vcc
; SI: v_cndmask_b32_e32 v[[RESHI:[0-9]+]], v[[MINHI]], v[[HI]], vcc
@@ -39,7 +39,7 @@ define amdgpu_kernel void @fract_f64(double addrspace(1)* %out, double addrspace
; SI-DAG: v_fract_f64_e64 [[FRC:v\[[0-9]+:[0-9]+\]]], -v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]]
; SI-DAG: v_mov_b32_e32 v[[UPLO:[0-9]+]], -1
; SI-DAG: v_mov_b32_e32 v[[UPHI:[0-9]+]], 0x3fefffff
-; SI-DAG: v_min_f64 v{{\[}}[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], v{{\[}}[[UPLO]]:[[UPHI]]], [[FRC]]
+; SI-DAG: v_min_f64 v{{\[}}[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], [[FRC]], v{{\[}}[[UPLO]]:[[UPHI]]]
; SI-DAG: v_cmp_class_f64_e64 vcc, v{{\[}}[[LO]]:[[HI]]], 3
; SI: v_cndmask_b32_e32 v[[RESLO:[0-9]+]], v[[MINLO]], v[[LO]], vcc
; SI: v_cndmask_b32_e32 v[[RESHI:[0-9]+]], v[[MINHI]], v[[HI]], vcc
@@ -67,7 +67,7 @@ define amdgpu_kernel void @fract_f64_neg(double addrspace(1)* %out, double addrs
; SI-DAG: v_fract_f64_e64 [[FRC:v\[[0-9]+:[0-9]+\]]], -|v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]]|
; SI-DAG: v_mov_b32_e32 v[[UPLO:[0-9]+]], -1
; SI-DAG: v_mov_b32_e32 v[[UPHI:[0-9]+]], 0x3fefffff
-; SI-DAG: v_min_f64 v{{\[}}[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], v{{\[}}[[UPLO]]:[[UPHI]]], [[FRC]]
+; SI-DAG: v_min_f64 v{{\[}}[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], [[FRC]], v{{\[}}[[UPLO]]:[[UPHI]]]
; SI-DAG: v_cmp_class_f64_e64 vcc, v{{\[}}[[LO]]:[[HI]]], 3
; SI: v_cndmask_b32_e32 v[[RESLO:[0-9]+]], v[[MINLO]], v[[LO]], vcc
; SI: v_cndmask_b32_e32 v[[RESHI:[0-9]+]], v[[MINHI]], v[[HI]], vcc
diff --git a/test/CodeGen/AMDGPU/fsub.f16.ll b/test/CodeGen/AMDGPU/fsub.f16.ll
index d3c5df317771..836b480b6a67 100644
--- a/test/CodeGen/AMDGPU/fsub.f16.ll
+++ b/test/CodeGen/AMDGPU/fsub.f16.ll
@@ -99,7 +99,7 @@ entry:
}
; GCN-LABEL: {{^}}fsub_v2f16_imm_a:
-; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]]
+; GCN-DAG: buffer_load_dword v[[B_V2_F16:[0-9]+]]
; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
; SI: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
@@ -111,14 +111,13 @@ entry:
; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
-; VI: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
-; VI-DAG: v_sub_f16_e32 v[[R_F16_1:[0-9]+]], 2.0, v[[B_F16_1]]
+; VI-DAG: v_mov_b32_e32 [[CONST2:v[0-9]+]], 0x4000
+; VI-DAG: v_sub_f16_sdwa v[[R_F16_HI:[0-9]+]], [[CONST2]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; VI-DAG: v_sub_f16_e32 v[[R_F16_0:[0-9]+]], 1.0, v[[B_V2_F16]]
-; VI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x40003c00
-; GFX9: v_pk_add_f16 v[[R_V2_F16:[0-9]+]], [[K]], v[[B_V2_F16]] neg_lo:[0,1] neg_hi:[0,1]
+; GFX9: v_pk_add_f16 v[[R_V2_F16:[0-9]+]], v[[B_V2_F16]], [[K]] neg_lo:[1,0] neg_hi:[1,0]
; GCN: buffer_store_dword v[[R_V2_F16]]
; GCN: s_endpgm
@@ -134,7 +133,7 @@ entry:
}
; GCN-LABEL: {{^}}fsub_v2f16_imm_b:
-; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]]
+; GCN-DAG: buffer_load_dword v[[A_V2_F16:[0-9]+]]
; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
; SI: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
@@ -146,14 +145,13 @@ entry:
; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
-; VI: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
-; VI-DAG: v_add_f16_e32 v[[R_F16_1:[0-9]+]], -1.0, v[[A_F16_1]]
+; VI-DAG: v_mov_b32_e32 [[CONSTM1:v[0-9]+]], 0xbc00
+; VI-DAG: v_add_f16_sdwa v[[R_F16_HI:[0-9]+]], [[CONSTM1]], v[[A_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; VI-DAG: v_add_f16_e32 v[[R_F16_0:[0-9]+]], -2.0, v[[A_V2_F16]]
-; VI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0xbc00c000
-; GFX9: v_pk_add_f16 v[[R_V2_F16:[0-9]+]], [[K]], v[[A_V2_F16]]{{$}}
+; GFX9: v_pk_add_f16 v[[R_V2_F16:[0-9]+]], v[[A_V2_F16]], [[K]]{{$}}
; GCN: buffer_store_dword v[[R_V2_F16]]
; GCN: s_endpgm
diff --git a/test/CodeGen/AMDGPU/fsub64.ll b/test/CodeGen/AMDGPU/fsub64.ll
index 1b0879d098ee..dc332414a152 100644
--- a/test/CodeGen/AMDGPU/fsub64.ll
+++ b/test/CodeGen/AMDGPU/fsub64.ll
@@ -39,7 +39,7 @@ define amdgpu_kernel void @fsub_fabs_inv_f64(double addrspace(1)* %out, double a
}
; SI-LABEL: {{^}}s_fsub_f64:
-; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\]}}
+; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
define amdgpu_kernel void @s_fsub_f64(double addrspace(1)* %out, double %a, double %b) {
%sub = fsub double %a, %b
store double %sub, double addrspace(1)* %out
diff --git a/test/CodeGen/AMDGPU/immv216.ll b/test/CodeGen/AMDGPU/immv216.ll
index 96132d841997..bc951a82becd 100644
--- a/test/CodeGen/AMDGPU/immv216.ll
+++ b/test/CodeGen/AMDGPU/immv216.ll
@@ -123,7 +123,8 @@ define amdgpu_kernel void @store_literal_imm_v2f16(<2 x half> addrspace(1)* %out
; VI: buffer_load_ushort [[VAL0:v[0-9]+]]
; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 0, [[VAL0]]
-; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 0, [[VAL1]]
+; VI-DAG: v_mov_b32_e32 [[CONST0:v[0-9]+]], 0
+; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONST0]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI: v_or_b32
; VI: buffer_store_dword
define amdgpu_kernel void @add_inline_imm_0.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
@@ -140,7 +141,8 @@ define amdgpu_kernel void @add_inline_imm_0.0_v2f16(<2 x half> addrspace(1)* %ou
; VI: buffer_load_ushort [[VAL0:v[0-9]+]]
; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 0.5, [[VAL0]]
-; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 0.5, [[VAL1]]
+; VI-DAG: v_mov_b32_e32 [[CONST05:v[0-9]+]], 0x3800
+; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONST05]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI: v_or_b32
; VI: buffer_store_dword
define amdgpu_kernel void @add_inline_imm_0.5_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
@@ -157,7 +159,8 @@ define amdgpu_kernel void @add_inline_imm_0.5_v2f16(<2 x half> addrspace(1)* %ou
; VI: buffer_load_ushort [[VAL0:v[0-9]+]]
; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -0.5, [[VAL0]]
-; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -0.5, [[VAL1]]
+; VI-DAG: v_mov_b32_e32 [[CONSTM05:v[0-9]+]], 0xb800
+; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONSTM05]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI: v_or_b32
; VI: buffer_store_dword
define amdgpu_kernel void @add_inline_imm_neg_0.5_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
@@ -174,7 +177,8 @@ define amdgpu_kernel void @add_inline_imm_neg_0.5_v2f16(<2 x half> addrspace(1)*
; VI: buffer_load_ushort [[VAL0:v[0-9]+]]
; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 1.0, [[VAL0]]
-; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 1.0, [[VAL1]]
+; VI-DAG: v_mov_b32_e32 [[CONST1:v[0-9]+]], 0x3c00
+; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONST1]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI: v_or_b32
; VI: buffer_store_dword
define amdgpu_kernel void @add_inline_imm_1.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
@@ -191,7 +195,8 @@ define amdgpu_kernel void @add_inline_imm_1.0_v2f16(<2 x half> addrspace(1)* %ou
; VI: buffer_load_ushort [[VAL0:v[0-9]+]]
; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -1.0, [[VAL0]]
-; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -1.0, [[VAL1]]
+; VI-DAG: v_mov_b32_e32 [[CONSTM1:v[0-9]+]], 0xbc00
+; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONSTM1]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI: v_or_b32
; VI: buffer_store_dword
define amdgpu_kernel void @add_inline_imm_neg_1.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
@@ -208,7 +213,8 @@ define amdgpu_kernel void @add_inline_imm_neg_1.0_v2f16(<2 x half> addrspace(1)*
; VI: buffer_load_ushort [[VAL0:v[0-9]+]]
; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 2.0, [[VAL0]]
-; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 2.0, [[VAL1]]
+; VI-DAG: v_mov_b32_e32 [[CONST2:v[0-9]+]], 0x4000
+; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONST2]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI: v_or_b32
; VI: buffer_store_dword
define amdgpu_kernel void @add_inline_imm_2.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
@@ -225,7 +231,8 @@ define amdgpu_kernel void @add_inline_imm_2.0_v2f16(<2 x half> addrspace(1)* %ou
; VI: buffer_load_ushort [[VAL0:v[0-9]+]]
; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -2.0, [[VAL0]]
-; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -2.0, [[VAL1]]
+; VI-DAG: v_mov_b32_e32 [[CONSTM2:v[0-9]+]], 0xc000
+; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONSTM2]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI: v_or_b32
; VI: buffer_store_dword
define amdgpu_kernel void @add_inline_imm_neg_2.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
@@ -242,7 +249,8 @@ define amdgpu_kernel void @add_inline_imm_neg_2.0_v2f16(<2 x half> addrspace(1)*
; VI: buffer_load_ushort [[VAL0:v[0-9]+]]
; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 4.0, [[VAL0]]
-; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 4.0, [[VAL1]]
+; VI-DAG: v_mov_b32_e32 [[CONST4:v[0-9]+]], 0x4400
+; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONST4]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI: v_or_b32
; VI: buffer_store_dword
define amdgpu_kernel void @add_inline_imm_4.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
@@ -259,7 +267,8 @@ define amdgpu_kernel void @add_inline_imm_4.0_v2f16(<2 x half> addrspace(1)* %ou
; VI: buffer_load_ushort [[VAL0:v[0-9]+]]
; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -4.0, [[VAL0]]
-; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -4.0, [[VAL1]]
+; VI-DAG: v_mov_b32_e32 [[CONSTM4:v[0-9]+]], 0xc400
+; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONSTM4]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI: v_or_b32
; VI: buffer_store_dword
define amdgpu_kernel void @add_inline_imm_neg_4.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
@@ -273,10 +282,10 @@ define amdgpu_kernel void @add_inline_imm_neg_4.0_v2f16(<2 x half> addrspace(1)*
; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 0.5
; GFX9: buffer_store_dword [[REG]]
+; VI: v_mov_b32_e32 [[CONST05:v[0-9]+]], 0x3800
; VI: buffer_load_dword
; VI-NOT: and
-; VI: v_lshrrev_b32_e32 {{v[0-9]+}}, 16,
-; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 0.5, v{{[0-9]+}}
+; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONST05]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 0.5, v{{[0-9]+}}
; VI: v_or_b32
; VI: buffer_store_dword
@@ -290,7 +299,7 @@ define amdgpu_kernel void @commute_add_inline_imm_0.5_v2f16(<2 x half> addrspace
; GCN-LABEL: {{^}}commute_add_literal_v2f16:
; GFX9-DAG: buffer_load_dword [[VAL:v[0-9]+]]
; GFX9-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x6400{{$}}
-; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[K]], [[VAL]] op_sel_hi:[0,1]{{$}}
+; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], [[K]] op_sel_hi:[1,0]{{$}}
; GFX9: buffer_store_dword [[REG]]
; VI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x6400{{$}}
@@ -315,7 +324,8 @@ define amdgpu_kernel void @commute_add_literal_v2f16(<2 x half> addrspace(1)* %o
; VI: buffer_load_ushort [[VAL0:v[0-9]+]]
; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 1, [[VAL0]]
-; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 1, [[VAL1]]
+; VI-DAG: v_mov_b32_e32 [[CONST1:v[0-9]+]], 1
+; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONST1]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI: v_or_b32
; VI: buffer_store_dword
define amdgpu_kernel void @add_inline_imm_1_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
@@ -332,7 +342,8 @@ define amdgpu_kernel void @add_inline_imm_1_v2f16(<2 x half> addrspace(1)* %out,
; VI: buffer_load_ushort [[VAL0:v[0-9]+]]
; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 2, [[VAL0]]
-; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 2, [[VAL1]]
+; VI-DAG: v_mov_b32_e32 [[CONST2:v[0-9]+]], 2
+; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONST2]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI: v_or_b32
; VI: buffer_store_dword
define amdgpu_kernel void @add_inline_imm_2_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
@@ -349,7 +360,8 @@ define amdgpu_kernel void @add_inline_imm_2_v2f16(<2 x half> addrspace(1)* %out,
; VI: buffer_load_ushort [[VAL0:v[0-9]+]]
; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 16, [[VAL0]]
-; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 16, [[VAL1]]
+; VI-DAG: v_mov_b32_e32 [[CONST16:v[0-9]+]], 16
+; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONST16]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI: v_or_b32
; VI: buffer_store_dword
define amdgpu_kernel void @add_inline_imm_16_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
@@ -366,7 +378,8 @@ define amdgpu_kernel void @add_inline_imm_16_v2f16(<2 x half> addrspace(1)* %out
; VI: buffer_load_ushort [[VAL0:v[0-9]+]]
; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -1, [[VAL0]]
-; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -1, [[VAL1]]
+; VI-DAG: v_mov_b32_e32 [[CONSTM1:v[0-9]+]], 0xffff
+; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONSTM1]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI: v_or_b32
; VI: buffer_store_dword
define amdgpu_kernel void @add_inline_imm_neg_1_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
@@ -383,7 +396,8 @@ define amdgpu_kernel void @add_inline_imm_neg_1_v2f16(<2 x half> addrspace(1)* %
; VI: buffer_load_ushort [[VAL0:v[0-9]+]]
; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -2, [[VAL0]]
-; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -2, [[VAL1]]
+; VI-DAG: v_mov_b32_e32 [[CONSTM2:v[0-9]+]], 0xfffe
+; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONSTM2]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI: v_or_b32
; VI: buffer_store_dword
define amdgpu_kernel void @add_inline_imm_neg_2_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
@@ -400,7 +414,8 @@ define amdgpu_kernel void @add_inline_imm_neg_2_v2f16(<2 x half> addrspace(1)* %
; VI: buffer_load_ushort [[VAL0:v[0-9]+]]
; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -16, [[VAL0]]
-; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -16, [[VAL1]]
+; VI-DAG: v_mov_b32_e32 [[CONSTM16:v[0-9]+]], 0xfff0
+; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONSTM16]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI: v_or_b32
; VI: buffer_store_dword
define amdgpu_kernel void @add_inline_imm_neg_16_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
@@ -417,7 +432,8 @@ define amdgpu_kernel void @add_inline_imm_neg_16_v2f16(<2 x half> addrspace(1)*
; VI: buffer_load_ushort [[VAL0:v[0-9]+]]
; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 63, [[VAL0]]
-; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 63, [[VAL1]]
+; VI-DAG: v_mov_b32_e32 [[CONST63:v[0-9]+]], 63
+; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONST63]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI: v_or_b32
; VI: buffer_store_dword
define amdgpu_kernel void @add_inline_imm_63_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
@@ -434,7 +450,8 @@ define amdgpu_kernel void @add_inline_imm_63_v2f16(<2 x half> addrspace(1)* %out
; VI: buffer_load_ushort [[VAL0:v[0-9]+]]
; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 64, [[VAL0]]
-; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 64, [[VAL1]]
+; VI-DAG: v_mov_b32_e32 [[CONST64:v[0-9]+]], 64
+; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONST64]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI: v_or_b32
; VI: buffer_store_dword
define amdgpu_kernel void @add_inline_imm_64_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
diff --git a/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll b/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll
index 89adcff1a278..350dd38ef583 100644
--- a/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll
+++ b/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll
@@ -258,8 +258,10 @@ define amdgpu_kernel void @v_insertelement_v2i16_0_inlineimm(<2 x i16> addrspace
; FIXME: fold lshl_or c0, c1, v0 -> or (c0 << c1), v0
; GCN-LABEL: {{^}}v_insertelement_v2i16_1:
+; VI: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e70000
; GCN-DAG: flat_load_dword [[VEC:v[0-9]+]]
-; CIVI: v_or_b32_e32 [[RES:v[0-9]+]], 0x3e70000, [[VEC]]
+; CI: v_or_b32_e32 [[RES:v[0-9]+]], 0x3e70000, [[VEC]]
+; VI: v_or_b32_sdwa [[RES:v[0-9]+]], [[K]], [[VEC]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX9-DAG: s_movk_i32 [[K:s[0-9]+]], 0x3e7
; GFX9-DAG: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]]
@@ -278,9 +280,12 @@ define amdgpu_kernel void @v_insertelement_v2i16_1(<2 x i16> addrspace(1)* %out,
}
; GCN-LABEL: {{^}}v_insertelement_v2i16_1_inlineimm:
+; VI: v_mov_b32_e32 [[K:v[0-9]+]], 0xfff10000
; GCN: flat_load_dword [[VEC:v[0-9]+]]
-; GCN: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]]
-; CIVI: v_or_b32_e32 [[RES:v[0-9]+]], 0xfff10000, [[ELT0]]
+; CI: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]]
+; GFX9: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]]
+; CI: v_or_b32_e32 [[RES:v[0-9]+]], 0xfff10000, [[ELT0]]
+; VI: v_or_b32_sdwa [[RES:v[0-9]+]], [[K]], [[VEC]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX9: v_lshl_or_b32 [[RES:v[0-9]+]], -15, 16, [[ELT0]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RES]]
define amdgpu_kernel void @v_insertelement_v2i16_1_inlineimm(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
@@ -337,8 +342,10 @@ define amdgpu_kernel void @v_insertelement_v2f16_0_inlineimm(<2 x half> addrspac
}
; GCN-LABEL: {{^}}v_insertelement_v2f16_1:
+; VI: v_mov_b32_e32 [[K:v[0-9]+]], 0x45000000
; GCN-DAG: flat_load_dword [[VEC:v[0-9]+]]
-; CIVI: v_or_b32_e32 [[RES:v[0-9]+]], 0x45000000, [[VEC]]
+; CI: v_or_b32_e32 [[RES:v[0-9]+]], 0x45000000, [[VEC]]
+; VI: v_or_b32_sdwa [[RES:v[0-9]+]], [[K]], [[VEC]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX9-DAG: s_movk_i32 [[K:s[0-9]+]], 0x4500
; GFX9-DAG: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]]
@@ -357,9 +364,12 @@ define amdgpu_kernel void @v_insertelement_v2f16_1(<2 x half> addrspace(1)* %out
}
; GCN-LABEL: {{^}}v_insertelement_v2f16_1_inlineimm:
+; VI: v_mov_b32_e32 [[K:v[0-9]+]], 0x230000
; GCN: flat_load_dword [[VEC:v[0-9]+]]
-; GCN: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]]
-; CIVI: v_or_b32_e32 [[RES:v[0-9]+]], 0x230000, [[ELT0]]
+; CI: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]]
+; GFX9: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]]
+; CI: v_or_b32_e32 [[RES:v[0-9]+]], 0x230000, [[ELT0]]
+; VI: v_or_b32_sdwa [[RES:v[0-9]+]], [[K]], [[VEC]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX9: v_lshl_or_b32 [[RES:v[0-9]+]], 35, 16, [[ELT0]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RES]]
define amdgpu_kernel void @v_insertelement_v2f16_1_inlineimm(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
@@ -411,11 +421,12 @@ define amdgpu_kernel void @v_insertelement_v2i16_dynamic_sgpr(<2 x i16> addrspac
}
; GCN-LABEL: {{^}}v_insertelement_v2i16_dynamic_vgpr:
+; GFX89: s_mov_b32 [[MASKK:s[0-9]+]], 0xffff{{$}}
+; CI: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7
; GCN: flat_load_dword [[IDX:v[0-9]+]]
; GCN: flat_load_dword [[VEC:v[0-9]+]]
-; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7
+; GFX89-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7
-; GFX89-DAG: s_mov_b32 [[MASKK:s[0-9]+]], 0xffff{{$}}
; GFX89-DAG: v_lshlrev_b32_e32 [[SCALED_IDX:v[0-9]+]], 16, [[IDX]]
; GFX89-DAG: v_lshlrev_b32_e64 [[MASK:v[0-9]+]], [[SCALED_IDX]], [[MASKK]]
@@ -438,11 +449,12 @@ define amdgpu_kernel void @v_insertelement_v2i16_dynamic_vgpr(<2 x i16> addrspac
}
; GCN-LABEL: {{^}}v_insertelement_v2f16_dynamic_vgpr:
+; GFX89: s_mov_b32 [[MASKK:s[0-9]+]], 0xffff{{$}}
+; CI: v_mov_b32_e32 [[K:v[0-9]+]], 0x1234
; GCN: flat_load_dword [[IDX:v[0-9]+]]
; GCN: flat_load_dword [[VEC:v[0-9]+]]
-; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x1234
+; GFX89-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x1234
-; GFX89-DAG: s_mov_b32 [[MASKK:s[0-9]+]], 0xffff{{$}}
; GFX89-DAG: v_lshlrev_b32_e32 [[SCALED_IDX:v[0-9]+]], 16, [[IDX]]
; GFX89-DAG: v_lshlrev_b32_e64 [[MASK:v[0-9]+]], [[SCALED_IDX]], [[MASKK]]
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.div.fixup.f16.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.div.fixup.f16.ll
index e04d9e662cea..3bb5e21d67ac 100644
--- a/test/CodeGen/AMDGPU/llvm.amdgcn.div.fixup.f16.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.div.fixup.f16.ll
@@ -27,7 +27,7 @@ entry:
; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
; GCN: buffer_load_ushort v[[C_F16:[0-9]+]]
; VI: v_mov_b32_e32 v[[A_F16:[0-9]+]], 0x4200{{$}}
-; VI: v_div_fixup_f16 v[[R_F16:[0-9]+]], v[[B_F16]], v[[A_F16]], v[[C_F16]]
+; VI: v_div_fixup_f16 v[[R_F16:[0-9]+]], v[[A_F16]], v[[B_F16]], v[[C_F16]]
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @div_fixup_f16_imm_a(
@@ -46,7 +46,7 @@ entry:
; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
; GCN: buffer_load_ushort v[[C_F16:[0-9]+]]
; VI: v_mov_b32_e32 v[[B_F16:[0-9]+]], 0x4200{{$}}
-; VI: v_div_fixup_f16 v[[R_F16:[0-9]+]], v[[B_F16]], v[[A_F16]], v[[C_F16]]
+; VI: v_div_fixup_f16 v[[R_F16:[0-9]+]], v[[A_F16]], v[[B_F16]], v[[C_F16]]
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @div_fixup_f16_imm_b(
@@ -65,7 +65,7 @@ entry:
; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
; VI: v_mov_b32_e32 v[[C_F16:[0-9]+]], 0x4200{{$}}
-; VI: v_div_fixup_f16 v[[R_F16:[0-9]+]], v[[B_F16]], v[[A_F16]], v[[C_F16]]
+; VI: v_div_fixup_f16 v[[R_F16:[0-9]+]], v[[A_F16]], v[[B_F16]], v[[C_F16]]
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @div_fixup_f16_imm_c(
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll
index a86468b07a27..2cc63ae74bf1 100644
--- a/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll
@@ -17,7 +17,7 @@ declare double @llvm.amdgcn.div.fmas.f64(double, double, double, i1) nounwind re
; GCN-DAG: v_mov_b32_e32 [[VC:v[0-9]+]], [[SC]]
; GCN-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
; GCN-DAG: v_mov_b32_e32 [[VA:v[0-9]+]], [[SA]]
-; GCN: v_div_fmas_f32 [[RESULT:v[0-9]+]], [[VB]], [[VA]], [[VC]]
+; GCN: v_div_fmas_f32 [[RESULT:v[0-9]+]], [[VA]], [[VB]], [[VC]]
; GCN: buffer_store_dword [[RESULT]],
; GCN: s_endpgm
define amdgpu_kernel void @test_div_fmas_f32(float addrspace(1)* %out, float %a, float %b, float %c, i1 %d) nounwind {
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.ll
index c9993ee88369..737be5d00447 100644
--- a/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.ll
@@ -14,7 +14,7 @@ define amdgpu_kernel void @v_fcmp_f32_dynamic_cc(i64 addrspace(1)* %out, float %
}
; GCN-LABEL: {{^}}v_fcmp_f32_oeq_with_fabs:
-; GCN: v_cmp_eq_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, |{{v[0-9]+}}|, {{s[0-9]+}}
+; GCN: v_cmp_eq_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}}, |{{v[0-9]+}}|
define amdgpu_kernel void @v_fcmp_f32_oeq_with_fabs(i64 addrspace(1)* %out, float %src, float %a) {
%temp = call float @llvm.fabs.f32(float %a)
%result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float %temp, i32 1)
@@ -23,7 +23,7 @@ define amdgpu_kernel void @v_fcmp_f32_oeq_with_fabs(i64 addrspace(1)* %out, floa
}
; GCN-LABEL: {{^}}v_fcmp_f32_oeq_both_operands_with_fabs:
-; GCN: v_cmp_eq_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, |{{v[0-9]+}}|, |{{s[0-9]+}}|
+; GCN: v_cmp_eq_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, |{{s[0-9]+}}|, |{{v[0-9]+}}|
define amdgpu_kernel void @v_fcmp_f32_oeq_both_operands_with_fabs(i64 addrspace(1)* %out, float %src, float %a) {
%temp = call float @llvm.fabs.f32(float %a)
%src_input = call float @llvm.fabs.f32(float %src)
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll
index b47d2dbc744d..be8462d09064 100644
--- a/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll
@@ -27,7 +27,7 @@ define amdgpu_kernel void @test_mul_legacy_undef1_f32(float addrspace(1)* %out,
}
; GCN-LABEL: {{^}}test_mul_legacy_fabs_f32:
-; GCN: v_mul_legacy_f32_e64 v{{[0-9]+}}, |v{{[0-9]+}}|, |s{{[0-9]+}}|
+; GCN: v_mul_legacy_f32_e64 v{{[0-9]+}}, |s{{[0-9]+}}|, |v{{[0-9]+}}|
define amdgpu_kernel void @test_mul_legacy_fabs_f32(float addrspace(1)* %out, float %a, float %b) #0 {
%a.fabs = call float @llvm.fabs.f32(float %a)
%b.fabs = call float @llvm.fabs.f32(float %b)
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll
index 1b937ab93247..ef9cda142850 100644
--- a/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll
@@ -3,9 +3,8 @@
; GCN-LABEL: {{^}}test_barrier:
; GFX8: buffer_store_dword
-; GFX8: s_waitcnt
; GFX9: flat_store_dword
-; GFX9-NOT: s_waitcnt
+; GCN: s_waitcnt
; GCN: s_barrier
define amdgpu_kernel void @test_barrier(i32 addrspace(1)* %out, i32 %size) #0 {
entry:
diff --git a/test/CodeGen/AMDGPU/llvm.fma.f16.ll b/test/CodeGen/AMDGPU/llvm.fma.f16.ll
index 518fe8baaa7a..3f4fba7d8ead 100644
--- a/test/CodeGen/AMDGPU/llvm.fma.f16.ll
+++ b/test/CodeGen/AMDGPU/llvm.fma.f16.ll
@@ -39,7 +39,7 @@ define amdgpu_kernel void @fma_f16(
; SI: v_fma_f32 v[[R_F32:[0-9]+]], v[[A_F32:[0-9]]], v[[B_F32:[0-9]]], v[[C_F32:[0-9]]]
; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]]
; VI: v_mov_b32_e32 v[[A_F16:[0-9]+]], 0x4200{{$}}
-; VI: v_fma_f16 v[[R_F16:[0-9]+]], v[[A_F16]], v[[B_F16]], v[[C_F16]]
+; VI: v_fma_f16 v[[R_F16:[0-9]+]], v[[B_F16]], v[[A_F16]], v[[C_F16]]
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @fma_f16_imm_a(
@@ -62,7 +62,7 @@ define amdgpu_kernel void @fma_f16_imm_a(
; SI: v_fma_f32 v[[R_F32:[0-9]+]], v[[A_F32:[0-9]]], v[[B_F32:[0-9]]], v[[C_F32:[0-9]]]
; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]]
; VI: v_mov_b32_e32 v[[B_F16:[0-9]+]], 0x4200{{$}}
-; VI: v_fma_f16 v[[R_F16:[0-9]+]], v[[B_F16]], v[[A_F16]], v[[C_F16]]
+; VI: v_fma_f16 v[[R_F16:[0-9]+]], v[[A_F16]], v[[B_F16]], v[[C_F16]]
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @fma_f16_imm_b(
@@ -85,7 +85,7 @@ define amdgpu_kernel void @fma_f16_imm_b(
; SI: v_fma_f32 v[[R_F32:[0-9]+]], v[[A_F32:[0-9]]], v[[B_F32:[0-9]]], v[[C_F32:[0-9]]]
; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]]
; VI: v_mov_b32_e32 v[[C_F16:[0-9]+]], 0x4200{{$}}
-; VI: v_fma_f16 v[[R_F16:[0-9]+]], v[[B_F16]], v[[A_F16]], v[[C_F16]]
+; VI: v_fma_f16 v[[R_F16:[0-9]+]], v[[A_F16]], v[[B_F16]], v[[C_F16]]
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @fma_f16_imm_c(
diff --git a/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll b/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll
index f30fd1d58204..eec187390169 100644
--- a/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll
+++ b/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll
@@ -50,7 +50,7 @@ define amdgpu_kernel void @fmuladd_f16(
; VI-FLUSH: buffer_store_short v[[C_F16]]
; VI-DENORM: v_mov_b32_e32 [[KA:v[0-9]+]], 0x4200
-; VI-DENORM: v_fma_f16 [[RESULT:v[0-9]+]], [[KA]], v[[B_F16]], v[[C_F16]]
+; VI-DENORM: v_fma_f16 [[RESULT:v[0-9]+]], v[[B_F16]], [[KA]], v[[C_F16]]
; VI-DENORM: buffer_store_short [[RESULT]]
; GCN: s_endpgm
@@ -78,7 +78,7 @@ define amdgpu_kernel void @fmuladd_f16_imm_a(
; VI-FLUSH: buffer_store_short v[[C_F16]]
; VI-DENORM: v_mov_b32_e32 [[KA:v[0-9]+]], 0x4200
-; VI-DENORM: v_fma_f16 [[RESULT:v[0-9]+]], [[KA]], v[[A_F16]], v[[C_F16]]
+; VI-DENORM: v_fma_f16 [[RESULT:v[0-9]+]], v[[A_F16]], [[KA]], v[[C_F16]]
; VI-DENORM buffer_store_short [[RESULT]]
diff --git a/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll b/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll
index 4c8dff52509a..a4353d1136e1 100644
--- a/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll
+++ b/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll
@@ -101,18 +101,19 @@ entry:
}
; GCN-LABEL: {{^}}maxnum_v2f16_imm_a:
-; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]]
+; GCN-DAG: buffer_load_dword v[[B_V2_F16:[0-9]+]]
; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
-; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
+; SI: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
; SI: v_max_f32_e32 v[[R_F32_0:[0-9]+]], 0x40400000, v[[B_F32_0]]
; SI: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]]
; SI: v_max_f32_e32 v[[R_F32_1:[0-9]+]], 4.0, v[[B_F32_1]]
; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]]
-; VI-DAG: v_max_f16_e32 v[[R_F16_1:[0-9]+]], 4.0, v[[B_F16_1]]
+; VI-DAG: v_mov_b32_e32 [[CONST4:v[0-9]+]], 0x4400
+; VI-DAG: v_max_f16_sdwa v[[R_F16_HI:[0-9]+]], [[CONST4]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; VI-DAG: v_max_f16_e32 v[[R_F16_0:[0-9]+]], 0x4200, v[[B_V2_F16]]
-; GCN-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
+; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
; GCN-NOT: and
; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
; GCN: buffer_store_dword v[[R_V2_F16]]
@@ -128,18 +129,19 @@ entry:
}
; GCN-LABEL: {{^}}maxnum_v2f16_imm_b:
-; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]]
+; GCN-DAG: buffer_load_dword v[[A_V2_F16:[0-9]+]]
; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
-; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
+; SI: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
; SI: v_max_f32_e32 v[[R_F32_0:[0-9]+]], 4.0, v[[A_F32_0]]
; SI: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]]
; SI: v_max_f32_e32 v[[R_F32_1:[0-9]+]], 0x40400000, v[[A_F32_1]]
; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]]
-; VI-DAG: v_max_f16_e32 v[[R_F16_1:[0-9]+]], 0x4200, v[[A_F16_1]]
+; VI-DAG: v_mov_b32_e32 [[CONST3:v[0-9]+]], 0x4200
+; VI-DAG: v_max_f16_sdwa v[[R_F16_HI:[0-9]+]], [[CONST3]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; VI-DAG: v_max_f16_e32 v[[R_F16_0:[0-9]+]], 4.0, v[[A_V2_F16]]
-; GCN-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
+; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
; GCN-NOT: and
; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
; GCN: buffer_store_dword v[[R_V2_F16]]
diff --git a/test/CodeGen/AMDGPU/llvm.minnum.f16.ll b/test/CodeGen/AMDGPU/llvm.minnum.f16.ll
index b8221356b664..4875d26fc860 100644
--- a/test/CodeGen/AMDGPU/llvm.minnum.f16.ll
+++ b/test/CodeGen/AMDGPU/llvm.minnum.f16.ll
@@ -100,7 +100,7 @@ entry:
}
; GCN-LABEL: {{^}}minnum_v2f16_imm_a:
-; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]]
+; GCN-DAG: buffer_load_dword v[[B_V2_F16:[0-9]+]]
; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
; SI: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
@@ -110,11 +110,11 @@ entry:
; SI-DAG: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]]
; SI-DAG: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]]
-; VI: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
-; VI-DAG: v_min_f16_e32 v[[R_F16_1:[0-9]+]], 4.0, v[[B_F16_1]]
+; VI-DAG: v_mov_b32_e32 [[CONST4:v[0-9]+]], 0x4400
+; VI-DAG: v_min_f16_sdwa v[[R_F16_HI:[0-9]+]], [[CONST4]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; VI-DAG: v_min_f16_e32 v[[R_F16_0:[0-9]+]], 0x4200, v[[B_V2_F16]]
-; GCN-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
+; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
; GCN-NOT: and
; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
; GCN: buffer_store_dword v[[R_V2_F16]]
@@ -130,18 +130,19 @@ entry:
}
; GCN-LABEL: {{^}}minnum_v2f16_imm_b:
-; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]]
+; GCN-DAG: buffer_load_dword v[[A_V2_F16:[0-9]+]]
; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
-; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
+; SI: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
; SI: v_min_f32_e32 v[[R_F32_0:[0-9]+]], 4.0, v[[A_F32_0]]
; SI: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]]
; SI: v_min_f32_e32 v[[R_F32_1:[0-9]+]], 0x40400000, v[[A_F32_1]]
; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]]
-; VI-DAG: v_min_f16_e32 v[[R_F16_1:[0-9]+]], 0x4200, v[[A_F16_1]]
+; VI-DAG: v_mov_b32_e32 [[CONST3:v[0-9]+]], 0x4200
+; VI-DAG: v_min_f16_sdwa v[[R_F16_HI:[0-9]+]], [[CONST3]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; VI-DAG: v_min_f16_e32 v[[R_F16_0:[0-9]+]], 4.0, v[[A_V2_F16]]
-; GCN-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
+; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
; GCN-NOT: and
; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
; GCN: buffer_store_dword v[[R_V2_F16]]
diff --git a/test/CodeGen/AMDGPU/mad24-get-global-id.ll b/test/CodeGen/AMDGPU/mad24-get-global-id.ll
index 1e78c4ebcc9f..176d1d25f196 100644
--- a/test/CodeGen/AMDGPU/mad24-get-global-id.ll
+++ b/test/CodeGen/AMDGPU/mad24-get-global-id.ll
@@ -10,7 +10,7 @@ declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
; GCN-LABEL: {{^}}get_global_id_0:
; GCN: s_and_b32 [[WGSIZEX:s[0-9]+]], {{s[0-9]+}}, 0xffff
; GCN: v_mov_b32_e32 [[VWGSIZEX:v[0-9]+]], [[WGSIZEX]]
-; GCN: v_mad_u32_u24 v{{[0-9]+}}, [[VWGSIZEX]], s8, v0
+; GCN: v_mad_u32_u24 v{{[0-9]+}}, s8, [[VWGSIZEX]], v0
define amdgpu_kernel void @get_global_id_0(i32 addrspace(1)* %out) #1 {
%dispatch.ptr = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
%cast.dispatch.ptr = bitcast i8 addrspace(2)* %dispatch.ptr to i32 addrspace(2)*
diff --git a/test/CodeGen/AMDGPU/madak.ll b/test/CodeGen/AMDGPU/madak.ll
index 5f1fb0e2d732..8e0014911def 100644
--- a/test/CodeGen/AMDGPU/madak.ll
+++ b/test/CodeGen/AMDGPU/madak.ll
@@ -151,7 +151,7 @@ define amdgpu_kernel void @s_s_madak_f32(float addrspace(1)* %out, float %a, flo
; GCN-LABEL: {{^}}no_madak_src0_modifier_f32:
; GCN: buffer_load_dword [[VA:v[0-9]+]]
; GCN: buffer_load_dword [[VB:v[0-9]+]]
-; GCN: v_mad_f32 {{v[0-9]+}}, {{v[0-9]+}}, |{{v[0-9]+}}|, {{[sv][0-9]+}}
+; GCN: v_mad_f32 {{v[0-9]+}}, |{{v[0-9]+}}|, {{v[0-9]+}}, {{[sv][0-9]+}}
; GCN: s_endpgm
define amdgpu_kernel void @no_madak_src0_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
@@ -173,7 +173,7 @@ define amdgpu_kernel void @no_madak_src0_modifier_f32(float addrspace(1)* noalia
; GCN-LABEL: {{^}}no_madak_src1_modifier_f32:
; GCN: buffer_load_dword [[VA:v[0-9]+]]
; GCN: buffer_load_dword [[VB:v[0-9]+]]
-; GCN: v_mad_f32 {{v[0-9]+}}, |{{v[0-9]+}}|, {{v[0-9]+}}, {{[sv][0-9]+}}
+; GCN: v_mad_f32 {{v[0-9]+}}, {{v[0-9]+}}, |{{v[0-9]+}}|, {{[sv][0-9]+}}
; GCN: s_endpgm
define amdgpu_kernel void @no_madak_src1_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/test/CodeGen/AMDGPU/madmk.ll b/test/CodeGen/AMDGPU/madmk.ll
index 6e70e95383c9..6bc40e82459b 100644
--- a/test/CodeGen/AMDGPU/madmk.ll
+++ b/test/CodeGen/AMDGPU/madmk.ll
@@ -129,7 +129,7 @@ define amdgpu_kernel void @scalar_vector_madmk_f32(float addrspace(1)* noalias %
; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000
-; GCN: v_mad_f32 {{v[0-9]+}}, [[VK]], |[[VA]]|, [[VB]]
+; GCN: v_mad_f32 {{v[0-9]+}}, |[[VA]]|, [[VK]], [[VB]]
define amdgpu_kernel void @no_madmk_src0_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
@@ -171,7 +171,7 @@ define amdgpu_kernel void @no_madmk_src2_modifier_f32(float addrspace(1)* noalia
; GCN-LABEL: {{^}}madmk_add_inline_imm_f32:
; GCN: buffer_load_dword [[A:v[0-9]+]]
; GCN: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000
-; GCN: v_mad_f32 {{v[0-9]+}}, [[VK]], [[A]], 2.0
+; GCN: v_mad_f32 {{v[0-9]+}}, [[A]], [[VK]], 2.0
define amdgpu_kernel void @madmk_add_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
diff --git a/test/CodeGen/AMDGPU/mul.ll b/test/CodeGen/AMDGPU/mul.ll
index a72a6efb0711..57c50c9804e5 100644
--- a/test/CodeGen/AMDGPU/mul.ll
+++ b/test/CodeGen/AMDGPU/mul.ll
@@ -211,10 +211,10 @@ endif:
; SI: s_mul_i32
; SI: v_mul_hi_u32
; SI: s_mul_i32
-; SI: s_mul_i32
-; SI: v_mul_hi_u32
-; SI: v_mul_hi_u32
-; SI: s_mul_i32
+; SI-DAG: s_mul_i32
+; SI-DAG: v_mul_hi_u32
+; SI-DAG: v_mul_hi_u32
+; SI-DAG: s_mul_i32
; SI-DAG: s_mul_i32
; SI-DAG: v_mul_hi_u32
; SI: s_mul_i32
diff --git a/test/CodeGen/AMDGPU/scratch-simple.ll b/test/CodeGen/AMDGPU/scratch-simple.ll
index 60b9b56a48d1..6ed730ad60f4 100644
--- a/test/CodeGen/AMDGPU/scratch-simple.ll
+++ b/test/CodeGen/AMDGPU/scratch-simple.ll
@@ -9,13 +9,11 @@
; GCN-LABEL: {{^}}ps_main:
; GCN-DAG: s_mov_b32 [[SWO:s[0-9]+]], s0
-; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x200
-; GCN-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0x400{{$}}
; GCN-DAG: v_lshlrev_b32_e32 [[BYTES:v[0-9]+]], 2, v0
; GCN-DAG: v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, [[BYTES]]
-; GCN-DAG: v_or_b32_e32 [[LO_OFF:v[0-9]+]], [[CLAMP_IDX]], [[K]]
-; GCN-DAG: v_or_b32_e32 [[HI_OFF:v[0-9]+]], [[CLAMP_IDX]], [[ZERO]]
+; GCN-DAG: v_or_b32_e32 [[LO_OFF:v[0-9]+]], 0x200, [[CLAMP_IDX]]
+; GCN-DAG: v_or_b32_e32 [[HI_OFF:v[0-9]+]], 0x400, [[CLAMP_IDX]]
; GCN: buffer_load_dword {{v[0-9]+}}, [[LO_OFF]], {{s\[[0-9]+:[0-9]+\]}}, [[SWO]] offen
; GCN: buffer_load_dword {{v[0-9]+}}, [[HI_OFF]], {{s\[[0-9]+:[0-9]+\]}}, [[SWO]] offen
diff --git a/test/CodeGen/AMDGPU/sdiv.ll b/test/CodeGen/AMDGPU/sdiv.ll
index f9ac425be794..7ec6ca809b68 100644
--- a/test/CodeGen/AMDGPU/sdiv.ll
+++ b/test/CodeGen/AMDGPU/sdiv.ll
@@ -36,7 +36,7 @@ define amdgpu_kernel void @sdiv_i32_4(i32 addrspace(1)* %out, i32 addrspace(1)*
; FUNC-LABEL: {{^}}slow_sdiv_i32_3435:
; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]],
; SI-DAG: v_mov_b32_e32 [[MAGIC:v[0-9]+]], 0x98a1930b
-; SI: v_mul_hi_i32 [[TMP:v[0-9]+]], [[MAGIC]], [[VAL]]
+; SI: v_mul_hi_i32 [[TMP:v[0-9]+]], [[VAL]], [[MAGIC]]
; SI: v_add_i32
; SI: v_lshrrev_b32
; SI: v_ashrrev_i32
diff --git a/test/CodeGen/AMDGPU/sdwa-peephole.ll b/test/CodeGen/AMDGPU/sdwa-peephole.ll
index 73defc17d04f..a319edfc5ace 100644
--- a/test/CodeGen/AMDGPU/sdwa-peephole.ll
+++ b/test/CodeGen/AMDGPU/sdwa-peephole.ll
@@ -345,7 +345,10 @@ entry:
; GCN-LABEL: {{^}}immediate_mul_v2i16:
; NOSDWA-NOT: v_mul_u32_u24_sdwa
-; SDWA-NOT: v_mul_u32_u24_sdwa
+; SDWA-DAG: v_mov_b32_e32 v[[M321:[0-9]+]], 0x141
+; SDWA-DAG: v_mov_b32_e32 v[[M123:[0-9]+]], 0x7b
+; SDWA-DAG: v_mul_u32_u24_sdwa v{{[0-9]+}}, v[[M123]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; SDWA-DAG: v_mul_u32_u24_sdwa v{{[0-9]+}}, v[[M321]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
define amdgpu_kernel void @immediate_mul_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
entry:
diff --git a/test/CodeGen/AMDGPU/sdwa-scalar-ops.mir b/test/CodeGen/AMDGPU/sdwa-scalar-ops.mir
new file mode 100644
index 000000000000..cd50e01032c3
--- /dev/null
+++ b/test/CodeGen/AMDGPU/sdwa-scalar-ops.mir
@@ -0,0 +1,410 @@
+# RUN: llc -march=amdgcn -mcpu=fiji -start-before si-peephole-sdwa -o - %s | FileCheck -check-prefix=GCN %s
+
+# GCN-LABEL: {{^}}sdwa_imm_operand:
+# GCN: v_mov_b32_e32 v[[SHIFT:[0-9]+]], 2
+# GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 2
+# GCN: BB0_1:
+# GCN: v_lshlrev_b32_sdwa v{{[0-9]+}}, v[[SHIFT]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
+# GCN: v_lshlrev_b32_sdwa v{{[0-9]+}}, v[[SHIFT]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
+
+# GCN-LABEL: {{^}}sdwa_sgpr_operand:
+# GCN: v_mov_b32_e32 v[[SHIFT:[0-9]+]], 2
+# GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 2
+# GCN: BB1_1:
+# GCN: v_lshlrev_b32_sdwa v{{[0-9]+}}, v[[SHIFT]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
+# GCN: v_lshlrev_b32_sdwa v{{[0-9]+}}, v[[SHIFT]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
+
+--- |
+ ; ModuleID = 'sdwa-scalar-ops.opt.ll'
+ source_filename = "sdwa-scalar-ops.opt.ll"
+ target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
+
+ define amdgpu_kernel void @sdwa_imm_operand(i32 addrspace(1)* nocapture %arg) {
+ bb:
+ br label %bb2
+
+ bb1: ; preds = %bb2
+ ret void
+
+ bb2: ; preds = %bb2, %bb
+ %lsr.iv = phi i64 [ %lsr.iv.next, %bb2 ], [ 0, %bb ]
+ %bc = bitcast i32 addrspace(1)* %arg to i8 addrspace(1)*
+ %uglygep4 = getelementptr i8, i8 addrspace(1)* %bc, i64 %lsr.iv
+ %uglygep45 = bitcast i8 addrspace(1)* %uglygep4 to i32 addrspace(1)*
+ %tmp5 = load i32, i32 addrspace(1)* %uglygep45, align 4
+ %tmp6 = lshr i32 %tmp5, 8
+ %tmp7 = and i32 %tmp6, 255
+ %tmp8 = zext i32 %tmp7 to i64
+ %tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp8
+ store i32 1, i32 addrspace(1)* %tmp9, align 4
+ %scevgep = getelementptr i32, i32 addrspace(1)* %uglygep45, i64 1
+ %tmp13 = load i32, i32 addrspace(1)* %scevgep, align 4
+ %tmp14 = lshr i32 %tmp13, 8
+ %tmp15 = and i32 %tmp14, 255
+ %tmp16 = zext i32 %tmp15 to i64
+ %tmp17 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp16
+ store i32 1, i32 addrspace(1)* %tmp17, align 4
+ %lsr.iv.next = add nuw nsw i64 %lsr.iv, 8
+ %tmp1 = trunc i64 %lsr.iv.next to i32
+ %tmp19 = icmp eq i32 %tmp1, 4096
+ br i1 %tmp19, label %bb1, label %bb2
+ }
+
+ define amdgpu_kernel void @sdwa_sgpr_operand(i32 addrspace(1)* nocapture %arg) {
+ bb:
+ br label %bb2
+
+ bb1: ; preds = %bb2
+ ret void
+
+ bb2: ; preds = %bb2, %bb
+ %lsr.iv = phi i64 [ %lsr.iv.next, %bb2 ], [ 0, %bb ]
+ %bc = bitcast i32 addrspace(1)* %arg to i8 addrspace(1)*
+ %uglygep4 = getelementptr i8, i8 addrspace(1)* %bc, i64 %lsr.iv
+ %uglygep45 = bitcast i8 addrspace(1)* %uglygep4 to i32 addrspace(1)*
+ %tmp5 = load i32, i32 addrspace(1)* %uglygep45, align 4
+ %tmp6 = lshr i32 %tmp5, 8
+ %tmp7 = and i32 %tmp6, 255
+ %tmp8 = zext i32 %tmp7 to i64
+ %tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp8
+ store i32 1, i32 addrspace(1)* %tmp9, align 4
+ %scevgep = getelementptr i32, i32 addrspace(1)* %uglygep45, i64 1
+ %tmp13 = load i32, i32 addrspace(1)* %scevgep, align 4
+ %tmp14 = lshr i32 %tmp13, 8
+ %tmp15 = and i32 %tmp14, 255
+ %tmp16 = zext i32 %tmp15 to i64
+ %tmp17 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp16
+ store i32 1, i32 addrspace(1)* %tmp17, align 4
+ %lsr.iv.next = add nuw nsw i64 %lsr.iv, 8
+ %tmp1 = trunc i64 %lsr.iv.next to i32
+ %tmp19 = icmp eq i32 %tmp1, 4096
+ br i1 %tmp19, label %bb1, label %bb2
+ }
+
+...
+---
+name: sdwa_imm_operand
+alignment: 0
+exposesReturnsTwice: false
+noVRegs: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: sreg_64 }
+ - { id: 1, class: sreg_64 }
+ - { id: 2, class: vgpr_32 }
+ - { id: 3, class: sgpr_128 }
+ - { id: 4, class: sgpr_64 }
+ - { id: 5, class: sreg_32_xm0 }
+ - { id: 6, class: sgpr_32 }
+ - { id: 7, class: sreg_64 }
+ - { id: 8, class: sreg_64 }
+ - { id: 9, class: sreg_64_xexec }
+ - { id: 10, class: sreg_32_xm0 }
+ - { id: 11, class: sreg_32_xm0 }
+ - { id: 12, class: sreg_32_xm0 }
+ - { id: 13, class: sreg_32_xm0 }
+ - { id: 14, class: sreg_32_xm0 }
+ - { id: 15, class: sreg_32_xm0 }
+ - { id: 16, class: sreg_64 }
+ - { id: 17, class: vgpr_32 }
+ - { id: 18, class: vreg_64 }
+ - { id: 19, class: sreg_32_xm0 }
+ - { id: 20, class: sreg_32 }
+ - { id: 21, class: sreg_32_xm0 }
+ - { id: 22, class: sreg_32_xm0 }
+ - { id: 23, class: sreg_32_xm0 }
+ - { id: 24, class: sreg_64 }
+ - { id: 25, class: sreg_32_xm0 }
+ - { id: 26, class: sreg_32_xm0 }
+ - { id: 27, class: sreg_32_xm0 }
+ - { id: 28, class: sreg_32_xm0 }
+ - { id: 29, class: sreg_64 }
+ - { id: 30, class: vgpr_32 }
+ - { id: 31, class: vreg_64 }
+ - { id: 32, class: sreg_32_xm0 }
+ - { id: 33, class: sreg_32_xm0 }
+ - { id: 34, class: sreg_64 }
+ - { id: 35, class: sreg_32_xm0 }
+ - { id: 36, class: sreg_32_xm0 }
+ - { id: 37, class: sreg_32_xm0 }
+ - { id: 38, class: sreg_32_xm0 }
+ - { id: 39, class: vreg_64 }
+ - { id: 40, class: vgpr_32 }
+ - { id: 41, class: vreg_64 }
+ - { id: 42, class: sreg_32_xm0 }
+ - { id: 43, class: sreg_32 }
+ - { id: 44, class: sreg_32_xm0 }
+ - { id: 45, class: sreg_64 }
+ - { id: 46, class: sreg_32_xm0 }
+ - { id: 47, class: sreg_32_xm0 }
+ - { id: 48, class: sreg_32_xm0 }
+ - { id: 49, class: sreg_32_xm0 }
+ - { id: 50, class: sreg_64 }
+ - { id: 51, class: vreg_64 }
+ - { id: 52, class: sreg_64 }
+ - { id: 53, class: sreg_32_xm0 }
+ - { id: 54, class: sreg_32_xm0 }
+ - { id: 55, class: sreg_32_xm0 }
+ - { id: 56, class: sreg_32_xm0 }
+ - { id: 57, class: sreg_64 }
+ - { id: 58, class: sreg_32_xm0 }
+ - { id: 59, class: sreg_32_xm0 }
+ - { id: 60, class: vgpr_32 }
+ - { id: 61, class: vgpr_32 }
+ - { id: 62, class: vreg_64 }
+ - { id: 63, class: vgpr_32 }
+ - { id: 64, class: vgpr_32 }
+ - { id: 65, class: vgpr_32 }
+ - { id: 66, class: vgpr_32 }
+ - { id: 67, class: vreg_64 }
+ - { id: 68, class: vgpr_32 }
+ - { id: 69, class: vgpr_32 }
+ - { id: 70, class: vgpr_32 }
+ - { id: 71, class: vgpr_32 }
+ - { id: 72, class: vgpr_32 }
+ - { id: 73, class: vgpr_32 }
+ - { id: 74, class: vgpr_32 }
+ - { id: 75, class: vreg_64 }
+ - { id: 76, class: vgpr_32 }
+ - { id: 77, class: vgpr_32 }
+ - { id: 78, class: vgpr_32 }
+ - { id: 79, class: vgpr_32 }
+ - { id: 80, class: vreg_64 }
+ - { id: 81, class: vgpr_32 }
+ - { id: 82, class: vgpr_32 }
+ - { id: 83, class: vgpr_32 }
+liveins:
+ - { reg: '%sgpr4_sgpr5', virtual-reg: '%4' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 0
+ adjustsStack: false
+ hasCalls: false
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+body: |
+ bb.0.bb:
+ successors: %bb.2.bb2(0x80000000)
+ liveins: %sgpr4_sgpr5
+
+ %4 = COPY %sgpr4_sgpr5
+ %9 = S_LOAD_DWORDX2_IMM %4, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+ %8 = S_MOV_B64 0
+ %7 = COPY %9
+ %30 = V_MOV_B32_e32 1, implicit %exec
+ S_BRANCH %bb.2.bb2
+
+ bb.1.bb1:
+ S_ENDPGM
+
+ bb.2.bb2:
+ successors: %bb.1.bb1(0x04000000), %bb.2.bb2(0x7c000000)
+
+ %0 = PHI %8, %bb.0.bb, %1, %bb.2.bb2
+ %13 = COPY %7.sub1
+ %14 = S_ADD_U32 %7.sub0, %0.sub0, implicit-def %scc
+ %15 = S_ADDC_U32 %7.sub1, %0.sub1, implicit-def dead %scc, implicit %scc
+ %16 = REG_SEQUENCE %14, 1, %15, 2
+ %18 = COPY %16
+ %17 = FLAT_LOAD_DWORD %18, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.uglygep45)
+ %60 = V_BFE_U32 %17, 8, 8, implicit %exec
+ %61 = V_LSHLREV_B32_e32 2, killed %60, implicit %exec
+ %70 = V_ADD_I32_e32 %7.sub0, %61, implicit-def %vcc, implicit %exec
+ %66 = COPY %13
+ %65 = V_ADDC_U32_e32 0, %66, implicit-def %vcc, implicit %vcc, implicit %exec
+ %67 = REG_SEQUENCE %70, 1, killed %65, 2
+ FLAT_STORE_DWORD %67, %30, 0, 0, implicit %exec, implicit %flat_scr :: (store 4 into %ir.tmp9)
+ %37 = S_ADD_U32 %14, 4, implicit-def %scc
+ %38 = S_ADDC_U32 %15, 0, implicit-def dead %scc, implicit %scc
+ %71 = COPY killed %37
+ %72 = COPY killed %38
+ %41 = REG_SEQUENCE killed %71, 1, killed %72, 2
+ %40 = FLAT_LOAD_DWORD killed %41, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.scevgep)
+ %73 = V_BFE_U32 %40, 8, 8, implicit %exec
+ %74 = V_LSHLREV_B32_e32 2, killed %73, implicit %exec
+ %83 = V_ADD_I32_e32 %7.sub0, %74, implicit-def %vcc, implicit %exec
+ %78 = V_ADDC_U32_e32 0, %66, implicit-def %vcc, implicit %vcc, implicit %exec
+ %80 = REG_SEQUENCE %83, 1, killed %78, 2
+ FLAT_STORE_DWORD %80, %30, 0, 0, implicit %exec, implicit %flat_scr :: (store 4 into %ir.tmp17)
+ %55 = S_ADD_U32 %0.sub0, 8, implicit-def %scc
+ %56 = S_ADDC_U32 %0.sub1, 0, implicit-def dead %scc, implicit %scc
+ %57 = REG_SEQUENCE %55, 1, killed %56, 2
+ %1 = COPY %57
+ S_CMPK_EQ_I32 %55, 4096, implicit-def %scc
+ S_CBRANCH_SCC1 %bb.1.bb1, implicit %scc
+ S_BRANCH %bb.2.bb2
+
+...
+---
+name: sdwa_sgpr_operand
+alignment: 0
+exposesReturnsTwice: false
+noVRegs: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: sreg_64 }
+ - { id: 1, class: sreg_64 }
+ - { id: 2, class: vgpr_32 }
+ - { id: 3, class: sgpr_128 }
+ - { id: 4, class: sgpr_64 }
+ - { id: 5, class: sreg_32_xm0 }
+ - { id: 6, class: sgpr_32 }
+ - { id: 7, class: sreg_64 }
+ - { id: 8, class: sreg_64 }
+ - { id: 9, class: sreg_64_xexec }
+ - { id: 10, class: sreg_32_xm0 }
+ - { id: 11, class: sreg_32_xm0 }
+ - { id: 12, class: sreg_32_xm0 }
+ - { id: 13, class: sreg_32_xm0 }
+ - { id: 14, class: sreg_32_xm0 }
+ - { id: 15, class: sreg_32_xm0 }
+ - { id: 16, class: sreg_64 }
+ - { id: 17, class: vgpr_32 }
+ - { id: 18, class: vreg_64 }
+ - { id: 19, class: sreg_32_xm0 }
+ - { id: 20, class: sreg_32 }
+ - { id: 21, class: sreg_32_xm0 }
+ - { id: 22, class: sreg_32_xm0 }
+ - { id: 23, class: sreg_32_xm0 }
+ - { id: 24, class: sreg_64 }
+ - { id: 25, class: sreg_32_xm0 }
+ - { id: 26, class: sreg_32_xm0 }
+ - { id: 27, class: sreg_32_xm0 }
+ - { id: 28, class: sreg_32_xm0 }
+ - { id: 29, class: sreg_64 }
+ - { id: 30, class: vgpr_32 }
+ - { id: 31, class: vreg_64 }
+ - { id: 32, class: sreg_32_xm0 }
+ - { id: 33, class: sreg_32_xm0 }
+ - { id: 34, class: sreg_64 }
+ - { id: 35, class: sreg_32_xm0 }
+ - { id: 36, class: sreg_32_xm0 }
+ - { id: 37, class: sreg_32_xm0 }
+ - { id: 38, class: sreg_32_xm0 }
+ - { id: 39, class: vreg_64 }
+ - { id: 40, class: vgpr_32 }
+ - { id: 41, class: vreg_64 }
+ - { id: 42, class: sreg_32_xm0 }
+ - { id: 43, class: sreg_32 }
+ - { id: 44, class: sreg_32_xm0 }
+ - { id: 45, class: sreg_64 }
+ - { id: 46, class: sreg_32_xm0 }
+ - { id: 47, class: sreg_32_xm0 }
+ - { id: 48, class: sreg_32_xm0 }
+ - { id: 49, class: sreg_32_xm0 }
+ - { id: 50, class: sreg_64 }
+ - { id: 51, class: vreg_64 }
+ - { id: 52, class: sreg_64 }
+ - { id: 53, class: sreg_32_xm0 }
+ - { id: 54, class: sreg_32_xm0 }
+ - { id: 55, class: sreg_32_xm0 }
+ - { id: 56, class: sreg_32_xm0 }
+ - { id: 57, class: sreg_64 }
+ - { id: 58, class: sreg_32_xm0 }
+ - { id: 59, class: sreg_32_xm0 }
+ - { id: 60, class: vgpr_32 }
+ - { id: 61, class: vgpr_32 }
+ - { id: 62, class: vreg_64 }
+ - { id: 63, class: vgpr_32 }
+ - { id: 64, class: vgpr_32 }
+ - { id: 65, class: vgpr_32 }
+ - { id: 66, class: vgpr_32 }
+ - { id: 67, class: vreg_64 }
+ - { id: 68, class: vgpr_32 }
+ - { id: 69, class: vgpr_32 }
+ - { id: 70, class: vgpr_32 }
+ - { id: 71, class: vgpr_32 }
+ - { id: 72, class: vgpr_32 }
+ - { id: 73, class: vgpr_32 }
+ - { id: 74, class: vgpr_32 }
+ - { id: 75, class: vreg_64 }
+ - { id: 76, class: vgpr_32 }
+ - { id: 77, class: vgpr_32 }
+ - { id: 78, class: vgpr_32 }
+ - { id: 79, class: vgpr_32 }
+ - { id: 80, class: vreg_64 }
+ - { id: 81, class: vgpr_32 }
+ - { id: 82, class: vgpr_32 }
+ - { id: 83, class: vgpr_32 }
+ - { id: 84, class: sreg_32_xm0 }
+liveins:
+ - { reg: '%sgpr4_sgpr5', virtual-reg: '%4' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 0
+ adjustsStack: false
+ hasCalls: false
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+body: |
+ bb.0.bb:
+ successors: %bb.2.bb2(0x80000000)
+ liveins: %sgpr4_sgpr5
+
+ %4 = COPY %sgpr4_sgpr5
+ %9 = S_LOAD_DWORDX2_IMM %4, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+ %8 = S_MOV_B64 0
+ %7 = COPY %9
+ %30 = V_MOV_B32_e32 1, implicit %exec
+ %84 = S_MOV_B32 2
+ S_BRANCH %bb.2.bb2
+
+ bb.1.bb1:
+ S_ENDPGM
+
+ bb.2.bb2:
+ successors: %bb.1.bb1(0x04000000), %bb.2.bb2(0x7c000000)
+
+ %0 = PHI %8, %bb.0.bb, %1, %bb.2.bb2
+ %13 = COPY %7.sub1
+ %14 = S_ADD_U32 %7.sub0, %0.sub0, implicit-def %scc
+ %15 = S_ADDC_U32 %7.sub1, %0.sub1, implicit-def dead %scc, implicit %scc
+ %16 = REG_SEQUENCE %14, 1, %15, 2
+ %18 = COPY %16
+ %17 = FLAT_LOAD_DWORD %18, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.uglygep45)
+ %60 = V_BFE_U32 %17, 8, 8, implicit %exec
+ %61 = V_LSHLREV_B32_e32 %84, killed %60, implicit %exec
+ %70 = V_ADD_I32_e32 %7.sub0, %61, implicit-def %vcc, implicit %exec
+ %66 = COPY %13
+ %65 = V_ADDC_U32_e32 0, %66, implicit-def %vcc, implicit %vcc, implicit %exec
+ %67 = REG_SEQUENCE %70, 1, killed %65, 2
+ FLAT_STORE_DWORD %67, %30, 0, 0, implicit %exec, implicit %flat_scr :: (store 4 into %ir.tmp9)
+ %37 = S_ADD_U32 %14, 4, implicit-def %scc
+ %38 = S_ADDC_U32 %15, 0, implicit-def dead %scc, implicit %scc
+ %71 = COPY killed %37
+ %72 = COPY killed %38
+ %41 = REG_SEQUENCE killed %71, 1, killed %72, 2
+ %40 = FLAT_LOAD_DWORD killed %41, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.scevgep)
+ %73 = V_BFE_U32 %40, 8, 8, implicit %exec
+ %74 = V_LSHLREV_B32_e32 %84, killed %73, implicit %exec
+ %83 = V_ADD_I32_e32 %7.sub0, %74, implicit-def %vcc, implicit %exec
+ %78 = V_ADDC_U32_e32 0, %66, implicit-def %vcc, implicit %vcc, implicit %exec
+ %80 = REG_SEQUENCE %83, 1, killed %78, 2
+ FLAT_STORE_DWORD %80, %30, 0, 0, implicit %exec, implicit %flat_scr :: (store 4 into %ir.tmp17)
+ %55 = S_ADD_U32 %0.sub0, 8, implicit-def %scc
+ %56 = S_ADDC_U32 %0.sub1, 0, implicit-def dead %scc, implicit %scc
+ %57 = REG_SEQUENCE %55, 1, killed %56, 2
+ %1 = COPY %57
+ S_CMPK_EQ_I32 %55, 4096, implicit-def %scc
+ S_CBRANCH_SCC1 %bb.1.bb1, implicit %scc
+ S_BRANCH %bb.2.bb2
+
+...
diff --git a/test/CodeGen/AMDGPU/select.f16.ll b/test/CodeGen/AMDGPU/select.f16.ll
index 2a7a9c9e0638..92ee2eb7f403 100644
--- a/test/CodeGen/AMDGPU/select.f16.ll
+++ b/test/CodeGen/AMDGPU/select.f16.ll
@@ -196,11 +196,11 @@ entry:
; SI: v_cvt_f32_f16_e32
; SI: v_cvt_f32_f16_e32
; SI: v_cvt_f32_f16_e32
-; SI: v_cmp_lt_f32_e64
-; SI: v_cmp_lt_f32_e32 vcc, 0.5
+; SI-DAG: v_cmp_gt_f32_e64
+; SI-DAG: v_cmp_lt_f32_e32 vcc, 0.5
; VI: v_cmp_lt_f16_e32
-; VI: v_cmp_lt_f16_e64
+; VI: v_cmp_gt_f16_e64
; GCN: v_cndmask_b32_e32
; GCN: v_cndmask_b32_e64
; SI: v_cvt_f16_f32_e32
@@ -228,11 +228,11 @@ entry:
; SI: v_cvt_f32_f16_e32
; SI: v_cvt_f32_f16_e32
; SI: v_cvt_f32_f16_e32
-; SI: v_cmp_gt_f32_e64
-; SI: v_cmp_gt_f32_e32 vcc, 0.5
+; SI-DAG: v_cmp_lt_f32_e64
+; SI-DAG: v_cmp_gt_f32_e32 vcc, 0.5
; VI: v_cmp_gt_f16_e32
-; VI: v_cmp_gt_f16_e64
+; VI: v_cmp_lt_f16_e64
; GCN: v_cndmask_b32_e32
; GCN: v_cndmask_b32_e64
diff --git a/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll b/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll
index 0a29db4a0580..4f7b61adc91d 100644
--- a/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll
+++ b/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll
@@ -5,7 +5,7 @@
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; GCN: v_mov_b32_e32 v[[ZERO0:[0-9]+]], 0{{$}}
-; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], 0{{$}}
+; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO0]]{{$}}
; GCN: v_mov_b32_e32 v[[ZERO2:[0-9]+]], v[[ZERO0]]{{$}}
; GCN: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
@@ -24,14 +24,15 @@ define amdgpu_kernel void @v_uextract_bit_31_i128(i128 addrspace(1)* %out, i128
; Extract the high bit of the 2nd quarter
; GCN-LABEL: {{^}}v_uextract_bit_63_i128:
-; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
+; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
; GCN-DAG: v_mov_b32_e32 v[[ZERO0:[0-9]+]], 0{{$}}
; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO0]]{{$}}
; GCN: v_mov_b32_e32 v[[ZERO2:[0-9]+]], v[[ZERO0]]{{$}}
+; GCN: v_mov_b32_e32 v[[ZERO3:[0-9]+]], v[[ZERO0]]{{$}}
; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
-; GCN-DAG: buffer_store_dwordx4 v{{\[}}[[SHIFT]]:[[ZERO2]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; GCN-DAG: buffer_store_dwordx4 v{{\[}}[[SHIFT]]:[[ZERO3]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @v_uextract_bit_63_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
@@ -49,7 +50,7 @@ define amdgpu_kernel void @v_uextract_bit_63_i128(i128 addrspace(1)* %out, i128
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
; GCN-DAG: v_mov_b32_e32 v[[ZERO0:[0-9]+]], 0{{$}}
-; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], 0{{$}}
+; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO0]]{{$}}
; GCN: v_mov_b32_e32 v[[ZERO2:[0-9]+]], v[[ZERO0]]{{$}}
; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
@@ -68,14 +69,15 @@ define amdgpu_kernel void @v_uextract_bit_95_i128(i128 addrspace(1)* %out, i128
; Extract the high bit of the 4th quarter
; GCN-LABEL: {{^}}v_uextract_bit_127_i128:
-; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
+; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
-; GCN: v_mov_b32_e32 v[[ZERO0:[0-9]+]], 0{{$}}
+; GCN-DAG: v_mov_b32_e32 v[[ZERO0:[0-9]+]], 0{{$}}
; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO0]]{{$}}
; GCN: v_mov_b32_e32 v[[ZERO2:[0-9]+]], v[[ZERO0]]{{$}}
+; GCN: v_mov_b32_e32 v[[ZERO3:[0-9]+]], v[[ZERO0]]{{$}}
; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
-; GCN-DAG: buffer_store_dwordx4 v{{\[}}[[SHIFT]]:[[ZERO2]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; GCN-DAG: buffer_store_dwordx4 v{{\[}}[[SHIFT]]:[[ZERO3]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @v_uextract_bit_127_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
@@ -90,15 +92,16 @@ define amdgpu_kernel void @v_uextract_bit_127_i128(i128 addrspace(1)* %out, i128
; Spans more than 2 dword boundaries
; GCN-LABEL: {{^}}v_uextract_bit_34_100_i128:
-; GCN: buffer_load_dwordx4 v{{\[}}[[VAL0:[0-9]+]]:[[VAL3:[0-9]+]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; GCN-DAG: buffer_load_dwordx4 v{{\[}}[[VAL0:[0-9]+]]:[[VAL3:[0-9]+]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; GCN-DAG: v_lshl_b64 v{{\[}}[[SHLLO:[0-9]+]]:[[SHLHI:[0-9]+]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, 30
; GCN-DAG: v_lshrrev_b32_e32 v[[ELT1PART:[0-9]+]], 2, v{{[[0-9]+}}
; GCN-DAG: v_bfe_u32 v[[ELT2PART:[0-9]+]], v[[VAL3]], 2, 2{{$}}
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; GCN-DAG: v_or_b32_e32 v[[OR0:[0-9]+]], v[[SHLLO]], v[[ELT1PART]]
+; GCN-DAG: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO]]{{$}}
-; GCN-DAG: buffer_store_dwordx4 v{{\[}}[[OR0]]:[[ZERO]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; GCN-DAG: buffer_store_dwordx4 v{{\[}}[[OR0]]:[[ZERO1]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @v_uextract_bit_34_100_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
diff --git a/test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll b/test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll
index 36c33b876919..a6026785b173 100644
--- a/test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll
+++ b/test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll
@@ -21,10 +21,11 @@ define amdgpu_kernel void @v_uextract_bit_31_i64(i64 addrspace(1)* %out, i64 add
; Extract the high bit of the high half
; GCN-LABEL: {{^}}v_uextract_bit_63_i64:
+; GCN: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
-; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
-; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO]]{{\]}}
+; GCN-DAG: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO]]
+; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO1]]{{\]}}
define amdgpu_kernel void @v_uextract_bit_63_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
@@ -69,10 +70,11 @@ define amdgpu_kernel void @v_uextract_bit_20_i64(i64 addrspace(1)* %out, i64 add
}
; GCN-LABEL: {{^}}v_uextract_bit_32_i64:
-; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
+; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
; GCN-DAG: v_and_b32_e32 v[[AND:[0-9]+]], 1, [[VAL]]
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
-; GCN: buffer_store_dwordx2 v{{\[}}[[AND]]:[[ZERO]]{{\]}}
+; GCN-DAG: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO]]{{$}}
+; GCN: buffer_store_dwordx2 v{{\[}}[[AND]]:[[ZERO1]]{{\]}}
define amdgpu_kernel void @v_uextract_bit_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
@@ -85,10 +87,11 @@ define amdgpu_kernel void @v_uextract_bit_32_i64(i64 addrspace(1)* %out, i64 add
}
; GCN-LABEL: {{^}}v_uextract_bit_33_i64:
+; GCN: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 1{{$}}
-; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
-; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO]]{{\]}}
+; GCN-DAG: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO]]
+; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO1]]{{\]}}
define amdgpu_kernel void @v_uextract_bit_33_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
@@ -167,10 +170,11 @@ define amdgpu_kernel void @v_uextract_bit_31_32_i64(i64 addrspace(1)* %out, i64
}
; GCN-LABEL: {{^}}v_uextract_bit_32_33_i64:
+; GCN: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 2
-; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
-; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}}
+; GCN-DAG: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO]]
+; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO1]]{{\]}}
define amdgpu_kernel void @v_uextract_bit_32_33_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
@@ -183,11 +187,12 @@ define amdgpu_kernel void @v_uextract_bit_32_33_i64(i64 addrspace(1)* %out, i64
}
; GCN-LABEL: {{^}}v_uextract_bit_30_60_i64:
+; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; GCN: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]
; GCN: v_lshr_b64 v{{\[}}[[SHRLO:[0-9]+]]:[[SHRHI:[0-9]+]]{{\]}}, [[VAL]], 30
; GCN-DAG: v_and_b32_e32 v[[AND:[0-9]+]], 0x3fffffff, v[[SHRLO]]{{$}}
-; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
-; GCN: buffer_store_dwordx2 v{{\[}}[[AND]]:[[ZERO]]{{\]}}
+; GCN-DAG: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO]]
+; GCN: buffer_store_dwordx2 v{{\[}}[[AND]]:[[ZERO1]]{{\]}}
define amdgpu_kernel void @v_uextract_bit_30_60_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
@@ -200,10 +205,11 @@ define amdgpu_kernel void @v_uextract_bit_30_60_i64(i64 addrspace(1)* %out, i64
}
; GCN-LABEL: {{^}}v_uextract_bit_33_63_i64:
+; GCN: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 30
-; GCN-DAG: v_mov_b32_e32 v[[BFE:[0-9]+]], 0{{$}}
-; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO]]{{\]}}
+; GCN-DAG: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO]]
+; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO1]]{{\]}}
define amdgpu_kernel void @v_uextract_bit_33_63_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
@@ -216,9 +222,10 @@ define amdgpu_kernel void @v_uextract_bit_33_63_i64(i64 addrspace(1)* %out, i64
}
; GCN-LABEL: {{^}}v_uextract_bit_31_63_i64:
+; GCN: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; GCN: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]
; GCN: v_lshr_b64 v{{\[}}[[SHRLO:[0-9]+]]:[[SHRHI:[0-9]+]]{{\]}}, [[VAL]], 31
-; GCN-NEXT: v_mov_b32_e32 v[[SHRHI]], 0{{$}}
+; GCN-NEXT: v_mov_b32_e32 v[[SHRHI]], v[[ZERO]]
; GCN: buffer_store_dwordx2 v{{\[}}[[SHRLO]]:[[SHRHI]]{{\]}}
define amdgpu_kernel void @v_uextract_bit_31_63_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
@@ -300,7 +307,8 @@ define amdgpu_kernel void @v_uextract_bit_31_32_i64_trunc_i32(i32 addrspace(1)*
; GCN-LABEL: {{^}}and_not_mask_i64:
; GCN-DAG: buffer_load_dwordx2 v{{\[}}[[VALLO:[0-9]+]]:[[VALHI:[0-9]+]]{{\]}}
-; GCN: v_mov_b32_e32 v[[SHRHI:[0-9]+]], 0{{$}}
+; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
+; GCN-DAG: v_mov_b32_e32 v[[SHRHI:[0-9]+]], v[[ZERO]]{{$}}
; GCN: v_lshrrev_b32_e32 [[SHR:v[0-9]+]], 20, v[[VALLO]]
; GCN-DAG: v_and_b32_e32 v[[SHRLO:[0-9]+]], 4, [[SHR]]
; GCN-NOT: v[[SHRLO]]
@@ -321,7 +329,7 @@ define amdgpu_kernel void @and_not_mask_i64(i64 addrspace(1)* %out, i64 addrspac
; keeping the 32-bit and has a smaller encoding size than the bfe.
; GCN-LABEL: {{^}}v_uextract_bit_27_29_multi_use_shift_i64:
-; GCN: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]
+; GCN-DAG: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]
; GCN-DAG: v_lshr_b64 v{{\[}}[[SHRLO:[0-9]+]]:[[SHRHI:[0-9]+]]{{\]}}, [[VAL]], 27
; GCN-DAG: v_and_b32_e32 v[[AND:[0-9]+]], 3, v[[SHRLO]]
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
@@ -340,8 +348,8 @@ define amdgpu_kernel void @v_uextract_bit_27_29_multi_use_shift_i64(i64 addrspac
}
; GCN-LABEL: {{^}}v_uextract_bit_34_37_multi_use_shift_i64:
-; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
-; GCN: v_mov_b32_e32 v[[ZERO_SHR:[0-9]+]], 0{{$}}
+; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
+; GCN-DAG: v_mov_b32_e32 v[[ZERO_SHR:[0-9]+]], 0{{$}}
; GCN: v_mov_b32_e32 v[[ZERO_BFE:[0-9]+]], v[[ZERO_SHR]]
; GCN-DAG: v_lshrrev_b32_e32 v[[SHR:[0-9]+]], 2, [[VAL]]
; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 2, 3
@@ -362,6 +370,7 @@ define amdgpu_kernel void @v_uextract_bit_34_37_multi_use_shift_i64(i64 addrspac
; GCN-LABEL: {{^}}v_uextract_bit_33_36_use_upper_half_shift_i64:
; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 3
+; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:{{[0-9]+\]}}
; GCN: buffer_store_dword v[[ZERO]]
define amdgpu_kernel void @v_uextract_bit_33_36_use_upper_half_shift_i64(i64 addrspace(1)* %out0, i32 addrspace(1)* %out1, i64 addrspace(1)* %in) #1 {
diff --git a/test/CodeGen/AMDGPU/sminmax.v2i16.ll b/test/CodeGen/AMDGPU/sminmax.v2i16.ll
index 16ce86bf8b11..5d71ad2c8ba3 100644
--- a/test/CodeGen/AMDGPU/sminmax.v2i16.ll
+++ b/test/CodeGen/AMDGPU/sminmax.v2i16.ll
@@ -40,13 +40,14 @@ define amdgpu_kernel void @s_abs_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %
; GFX9: v_pk_max_i16 [[MAX:v[0-9]+]], [[VAL]], [[SUB]]
; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]], [[MAX]], 2
+; VI: v_mov_b32_e32 [[TWO:v[0-9]+]], 2
; VI: v_lshrrev_b32_e32 v{{[0-9]+}}, 16,
; VI: v_sub_u16_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
; VI: v_sub_u16_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
; VI: v_max_i16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; VI: v_max_i16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; VI: v_add_u16_e32 v{{[0-9]+}}, 2, v{{[0-9]+}}
-; VI: v_add_u16_e32 v{{[0-9]+}}, 2, v{{[0-9]+}}
+; VI: v_add_u16_sdwa v{{[0-9]+}}, [[TWO]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI-NOT: v_and_b32
; VI: v_or_b32_e32
define amdgpu_kernel void @v_abs_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %src) #0 {
@@ -206,7 +207,7 @@ define amdgpu_kernel void @v_min_max_v2i16_user(<2 x i16> addrspace(1)* %out0, <
}
; GCN-LABEL: {{^}}u_min_max_v2i16:
-; GFX9: v_pk_max_u16 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}
+; GFX9: v_pk_max_u16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
; GFX9: v_pk_min_u16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
define amdgpu_kernel void @u_min_max_v2i16(<2 x i16> addrspace(1)* %out0, <2 x i16> addrspace(1)* %out1, <2 x i16> %val0, <2 x i16> %val1) nounwind {
%cond0 = icmp ugt <2 x i16> %val0, %val1
diff --git a/test/CodeGen/AMDGPU/srem.ll b/test/CodeGen/AMDGPU/srem.ll
index c89f798397ae..e06725892089 100644
--- a/test/CodeGen/AMDGPU/srem.ll
+++ b/test/CodeGen/AMDGPU/srem.ll
@@ -20,7 +20,7 @@ define amdgpu_kernel void @srem_i32_4(i32 addrspace(1)* %out, i32 addrspace(1)*
; FUNC-LABEL: {{^}}srem_i32_7:
; SI: v_mov_b32_e32 [[MAGIC:v[0-9]+]], 0x92492493
-; SI: v_mul_hi_i32 {{v[0-9]+}}, [[MAGIC]],
+; SI: v_mul_hi_i32 {{v[0-9]+}}, {{v[0-9]+}}, [[MAGIC]]
; SI: v_mul_lo_i32
; SI: v_sub_i32
; SI: s_endpgm
diff --git a/test/CodeGen/AMDGPU/sub.v2i16.ll b/test/CodeGen/AMDGPU/sub.v2i16.ll
index 431344670ffb..6aeff3fc3b6c 100644
--- a/test/CodeGen/AMDGPU/sub.v2i16.ll
+++ b/test/CodeGen/AMDGPU/sub.v2i16.ll
@@ -23,7 +23,7 @@ define amdgpu_kernel void @v_test_sub_v2i16(<2 x i16> addrspace(1)* %out, <2 x i
; GFX9: s_load_dword [[VAL0:s[0-9]+]]
; GFX9: s_load_dword [[VAL1:s[0-9]+]]
; GFX9: v_mov_b32_e32 [[VVAL1:v[0-9]+]]
-; GFX9: v_pk_sub_i16 v{{[0-9]+}}, [[VVAL1]], [[VAL0]]
+; GFX9: v_pk_sub_i16 v{{[0-9]+}}, [[VAL0]], [[VVAL1]]
; VI: s_sub_i32
; VI: s_sub_i32
@@ -47,7 +47,7 @@ define amdgpu_kernel void @s_test_sub_self_v2i16(<2 x i16> addrspace(1)* %out, <
; FIXME: VI should not scalarize arg access.
; GCN-LABEL: {{^}}s_test_sub_v2i16_kernarg:
-; GFX9: v_pk_sub_i16 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}
+; GFX9: v_pk_sub_i16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
; VI: v_subrev_i32_e32
; VI: v_subrev_i32_sdwa v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
@@ -59,9 +59,10 @@ define amdgpu_kernel void @s_test_sub_v2i16_kernarg(<2 x i16> addrspace(1)* %out
; GCN-LABEL: {{^}}v_test_sub_v2i16_constant:
; GFX9: s_mov_b32 [[CONST:s[0-9]+]], 0x1c8007b{{$}}
-; GFX9: v_pk_sub_i16 v{{[0-9]+}}, [[CONST]], v{{[0-9]+}}
+; GFX9: v_pk_sub_i16 v{{[0-9]+}}, v{{[0-9]+}}, [[CONST]]
-; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, 0xfffffe38, v{{[0-9]+}}
+; VI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xfffffe38
+; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, [[K]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, 0xffffff85, v{{[0-9]+}}
define amdgpu_kernel void @v_test_sub_v2i16_constant(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -76,9 +77,10 @@ define amdgpu_kernel void @v_test_sub_v2i16_constant(<2 x i16> addrspace(1)* %ou
; FIXME: Need to handle non-uniform case for function below (load without gep).
; GCN-LABEL: {{^}}v_test_sub_v2i16_neg_constant:
; GFX9: s_mov_b32 [[CONST:s[0-9]+]], 0xfc21fcb3{{$}}
-; GFX9: v_pk_sub_i16 v{{[0-9]+}}, [[CONST]], v{{[0-9]+}}
+; GFX9: v_pk_sub_i16 v{{[0-9]+}}, v{{[0-9]+}}, [[CONST]]
-; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, 0x3df, v{{[0-9]+}}
+; VI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3df
+; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, [[K]], v{{[0-9]+}}
; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, 0x34d, v{{[0-9]+}}
define amdgpu_kernel void @v_test_sub_v2i16_neg_constant(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -93,11 +95,11 @@ define amdgpu_kernel void @v_test_sub_v2i16_neg_constant(<2 x i16> addrspace(1)*
; GCN-LABEL: {{^}}v_test_sub_v2i16_inline_neg1:
; GFX9: v_pk_sub_i16 v{{[0-9]+}}, v{{[0-9]+}}, -1{{$}}
+; VI: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
; VI: flat_load_ushort [[LOAD0:v[0-9]+]]
; VI: flat_load_ushort [[LOAD1:v[0-9]+]]
-; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, 1, [[LOAD0]]
+; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, [[ONE]], [[LOAD0]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, 1, [[LOAD1]]
-; VI-DAG: v_lshlrev_b32_e32 v{{[0-9]+}}, 16,
; VI: v_or_b32_e32
define amdgpu_kernel void @v_test_sub_v2i16_inline_neg1(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -111,7 +113,7 @@ define amdgpu_kernel void @v_test_sub_v2i16_inline_neg1(<2 x i16> addrspace(1)*
; GCN-LABEL: {{^}}v_test_sub_v2i16_inline_lo_zero_hi:
; GFX9: s_mov_b32 [[K:s[0-9]+]], 32{{$}}
-; GFX9: v_pk_sub_i16 v{{[0-9]+}}, [[K]], v{{[0-9]+}}{{$}}
+; GFX9: v_pk_sub_i16 v{{[0-9]+}}, v{{[0-9]+}}, [[K]]
; VI-NOT: v_subrev_i16
; VI: v_add_u16_e32 v{{[0-9]+}}, 0xffffffe0, v{{[0-9]+}}
@@ -131,12 +133,12 @@ define amdgpu_kernel void @v_test_sub_v2i16_inline_lo_zero_hi(<2 x i16> addrspac
; The high element gives fp
; GCN-LABEL: {{^}}v_test_sub_v2i16_inline_fp_split:
; GFX9: s_mov_b32 [[K:s[0-9]+]], 1.0
-; GFX9: v_pk_sub_i16 v{{[0-9]+}}, [[K]], v{{[0-9]+}}{{$}}
+; GFX9: v_pk_sub_i16 v{{[0-9]+}}, v{{[0-9]+}}, [[K]]
; VI-NOT: v_subrev_i16
-; VI: v_add_u16_e32 v{{[0-9]+}}, 0xffffc080, v{{[0-9]+}}
+; VI: v_mov_b32_e32 [[K:v[0-9]+]], 0xffffc080
+; VI: v_add_u16_sdwa v{{[0-9]+}}, [[K]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI-NOT: v_subrev_i16
-; VI: v_lshlrev_b32_e32 v{{[0-9]+}}, 16,
; VI: v_or_b32_e32
define amdgpu_kernel void @v_test_sub_v2i16_inline_fp_split(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -185,10 +187,10 @@ define amdgpu_kernel void @v_test_sub_v2i16_zext_to_v2i32(<2 x i32> addrspace(1)
; FIXME: Need to handle non-uniform case for function below (load without gep).
; GCN-LABEL: {{^}}v_test_sub_v2i16_zext_to_v2i64:
+; GFX9: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
; GFX9: flat_load_dword [[A:v[0-9]+]]
; GFX9: flat_load_dword [[B:v[0-9]+]]
-; GFX9: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
; GFX9: v_pk_sub_i16 [[ADD:v[0-9]+]], [[A]], [[B]]
; GFX9-DAG: v_and_b32_e32 v[[ELT0:[0-9]+]], 0xffff, [[ADD]]
; GFX9-DAG: v_lshrrev_b32_e32 v[[ELT1:[0-9]+]], 16, [[ADD]]
@@ -199,8 +201,6 @@ define amdgpu_kernel void @v_test_sub_v2i16_zext_to_v2i32(<2 x i32> addrspace(1)
; VI: flat_load_ushort v[[B_LO:[0-9]+]]
; VI: flat_load_ushort v[[B_HI:[0-9]+]]
-; VI-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
-; VI-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
; VI-DAG: v_subrev_u16_e32
; VI-DAG: v_subrev_u16_e32
diff --git a/test/CodeGen/AMDGPU/udiv.ll b/test/CodeGen/AMDGPU/udiv.ll
index 2874a0cdbc05..d9dab0d40acf 100644
--- a/test/CodeGen/AMDGPU/udiv.ll
+++ b/test/CodeGen/AMDGPU/udiv.ll
@@ -74,7 +74,7 @@ define amdgpu_kernel void @udiv_i32_div_pow2(i32 addrspace(1)* %out, i32 addrspa
; FUNC-LABEL: {{^}}udiv_i32_div_k_even:
; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]]
; SI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xfabbd9c1
-; SI: v_mul_hi_u32 [[MULHI:v[0-9]+]], [[K]], [[VAL]]
+; SI: v_mul_hi_u32 [[MULHI:v[0-9]+]], [[VAL]], [[K]]
; SI: v_lshrrev_b32_e32 [[RESULT:v[0-9]+]], 25, [[MULHI]]
; SI: buffer_store_dword [[RESULT]]
define amdgpu_kernel void @udiv_i32_div_k_even(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
@@ -88,7 +88,7 @@ define amdgpu_kernel void @udiv_i32_div_k_even(i32 addrspace(1)* %out, i32 addrs
; FUNC-LABEL: {{^}}udiv_i32_div_k_odd:
; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]]
; SI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x7d5deca3
-; SI: v_mul_hi_u32 [[MULHI:v[0-9]+]], [[K]], [[VAL]]
+; SI: v_mul_hi_u32 [[MULHI:v[0-9]+]], [[VAL]], [[K]]
; SI: v_lshrrev_b32_e32 [[RESULT:v[0-9]+]], 24, [[MULHI]]
; SI: buffer_store_dword [[RESULT]]
define amdgpu_kernel void @udiv_i32_div_k_odd(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
@@ -176,7 +176,7 @@ define amdgpu_kernel void @test_udiv2(i32 %p) {
; FUNC-LABEL: {{^}}test_udiv_3_mulhu:
; SI: v_mov_b32_e32 v{{[0-9]+}}, 0xaaaaaaab
-; SI: v_mul_hi_u32 v0, {{v[0-9]+}}, {{s[0-9]+}}
+; SI: v_mul_hi_u32 v0, {{s[0-9]+}}, {{v[0-9]+}}
; SI-NEXT: v_lshrrev_b32_e32 v0, 1, v0
define amdgpu_kernel void @test_udiv_3_mulhu(i32 %p) {
%i = udiv i32 %p, 3
diff --git a/test/CodeGen/AMDGPU/urem.ll b/test/CodeGen/AMDGPU/urem.ll
index fd7f8fa2efab..fb4eab43a2d6 100644
--- a/test/CodeGen/AMDGPU/urem.ll
+++ b/test/CodeGen/AMDGPU/urem.ll
@@ -20,7 +20,7 @@ define amdgpu_kernel void @test_urem_i32(i32 addrspace(1)* %out, i32 addrspace(1
; FUNC-LABEL: {{^}}test_urem_i32_7:
; SI: v_mov_b32_e32 [[MAGIC:v[0-9]+]], 0x24924925
-; SI: v_mul_hi_u32 {{v[0-9]+}}, [[MAGIC]]
+; SI: v_mul_hi_u32 [[MAGIC]], {{v[0-9]+}}
; SI: v_subrev_i32
; SI: v_mul_lo_i32
; SI: v_sub_i32
diff --git a/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll b/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll
index f8e6b7edfe35..e6bdb68a4f77 100644
--- a/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll
+++ b/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll
@@ -54,8 +54,8 @@ define amdgpu_kernel void @test_sgpr_use_twice_ternary_op_a_a_b(float addrspace(
; VI: buffer_load_dword [[VA0:v[0-9]+]]
; VI: buffer_load_dword [[VA1:v[0-9]+]]
-; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[VA0]], [[SA]], [[VB]]
-; GCN-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[VA1]], [[SA]], [[VB]]
+; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[SA]], [[VA0]], [[VB]]
+; GCN-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[SA]], [[VA1]], [[VB]]
; GCN: buffer_store_dword [[RESULT0]]
; GCN: buffer_store_dword [[RESULT1]]
define amdgpu_kernel void @test_use_s_v_s(float addrspace(1)* %out, float %a, float %b, float addrspace(1)* %in) #0 {
@@ -74,7 +74,7 @@ define amdgpu_kernel void @test_use_s_v_s(float addrspace(1)* %out, float %a, fl
; VI-DAG: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
; VI-DAG: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
; GCN: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
-; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[VGPR1]], [[SGPR0]], [[SGPR0]]
+; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[VGPR1]], [[SGPR0]]
; GCN: buffer_store_dword [[RESULT]]
define amdgpu_kernel void @test_sgpr_use_twice_ternary_op_a_b_a(float addrspace(1)* %out, float %a, float %b) #0 {
%fma = call float @llvm.fma.f32(float %a, float %b, float %a) #1
@@ -88,7 +88,7 @@ define amdgpu_kernel void @test_sgpr_use_twice_ternary_op_a_b_a(float addrspace(
; VI-DAG: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
; VI-DAG: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
; GCN: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
-; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[VGPR1]], [[SGPR0]]
+; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[VGPR1]], [[SGPR0]], [[SGPR0]]
; GCN: buffer_store_dword [[RESULT]]
define amdgpu_kernel void @test_sgpr_use_twice_ternary_op_b_a_a(float addrspace(1)* %out, float %a, float %b) #0 {
%fma = call float @llvm.fma.f32(float %b, float %a, float %a) #1
@@ -228,7 +228,7 @@ define amdgpu_kernel void @test_literal_use_twice_ternary_op_s_k_k_x2(float addr
; GCN-DAG: v_mov_b32_e32 [[VK0:v[0-9]+]], 0x44800000
; GCN-DAG: v_mov_b32_e32 [[VS1:v[0-9]+]], [[SGPR1]]
-; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[VS1]], [[SGPR0]], [[VK0]]
+; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[SGPR0]], [[VS1]], [[VK0]]
; GCN-DAG: v_mov_b32_e32 [[VK1:v[0-9]+]], 0x45800000
; GCN-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[SGPR0]], [[VS1]], [[VK1]]
@@ -251,7 +251,7 @@ define amdgpu_kernel void @test_s0_s1_k_f32(float addrspace(1)* %out, float %a,
; GCN-DAG: v_mov_b32_e32 v[[VS1_SUB0:[0-9]+]], s[[SGPR1_SUB0]]
; GCN-DAG: v_mov_b32_e32 v[[VS1_SUB1:[0-9]+]], s[[SGPR1_SUB1]]
-; GCN: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[VS1_SUB0]]:[[VS1_SUB1]]{{\]}}, [[SGPR0]], v{{\[}}[[VZERO]]:[[VK0_SUB1]]{{\]}}
+; GCN: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[SGPR0]], v{{\[}}[[VS1_SUB0]]:[[VS1_SUB1]]{{\]}}, v{{\[}}[[VZERO]]:[[VK0_SUB1]]{{\]}}
; Same zero component is re-used for half of each immediate.
; GCN: v_mov_b32_e32 v[[VK1_SUB1:[0-9]+]], 0x40b00000
diff --git a/test/CodeGen/AMDGPU/v_mac_f16.ll b/test/CodeGen/AMDGPU/v_mac_f16.ll
index c45af522ec49..3da1a0324042 100644
--- a/test/CodeGen/AMDGPU/v_mac_f16.ll
+++ b/test/CodeGen/AMDGPU/v_mac_f16.ll
@@ -482,8 +482,9 @@ entry:
; SI-DAG: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v[[NEG_A0]]
; SI-DAG: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v[[NEG_A1]]
-; VI: v_sub_f16_e32 v[[NEG_A0:[0-9]+]], 0, v{{[0-9]+}}
-; VI: v_sub_f16_e32 v[[NEG_A1:[0-9]+]], 0, v{{[0-9]+}}
+; VI-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
+; VI-DAG: v_sub_f16_e32 v[[NEG_A1:[0-9]+]], 0, v{{[0-9]+}}
+; VI-DAG: v_sub_f16_sdwa v[[NEG_A0:[0-9]+]], [[ZERO]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; VI-DAG: v_mac_f16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v[[NEG_A0]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; VI-DAG: v_mac_f16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v[[NEG_A1]]
@@ -513,8 +514,9 @@ entry:
; SI-DAG: v_mac_f32_e32 v{{[0-9]+}}, v[[NEG_A0]], v{{[0-9]+}}
; SI-DAG: v_mac_f32_e32 v{{[0-9]+}}, v[[NEG_A1]], v{{[0-9]+}}
-; VI: v_sub_f16_e32 v[[NEG_A0:[0-9]+]], 0, v{{[0-9]+}}
+; VI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
; VI: v_sub_f16_e32 v[[NEG_A1:[0-9]+]], 0, v{{[0-9]+}}
+; VI: v_sub_f16_sdwa v[[NEG_A0:[0-9]+]], [[ZERO]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; VI-DAG: v_mac_f16_sdwa v{{[0-9]+}}, v[[NEG_A0]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; VI-DAG: v_mac_f16_e32 v{{[0-9]+}}, v[[NEG_A1]], v{{[0-9]+}}
@@ -544,8 +546,9 @@ entry:
; SI-DAG: v_mac_f32_e32 v[[NEG_A0]], v{{[0-9]+}}, v{{[0-9]+}}
; SI-DAG: v_mac_f32_e32 v[[NEG_A1]], v{{[0-9]+}}, v{{[0-9]+}}
-; VI: v_sub_f16_e32 v[[NEG_A0:[0-9]+]], 0, v{{[0-9]+}}
+; VI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
; VI: v_sub_f16_e32 v[[NEG_A1:[0-9]+]], 0, v{{[0-9]+}}
+; VI: v_sub_f16_sdwa v[[NEG_A0:[0-9]+]], [[ZERO]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; VI-DAG: v_mac_f16_sdwa v[[NEG_A0]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; VI-DAG: v_mac_f16_e32 v[[NEG_A1]], v{{[0-9]+}}, v{{[0-9]+}}
diff --git a/test/CodeGen/AMDGPU/wqm.ll b/test/CodeGen/AMDGPU/wqm.ll
index 9f277b2c9a59..133aaa35981e 100644
--- a/test/CodeGen/AMDGPU/wqm.ll
+++ b/test/CodeGen/AMDGPU/wqm.ll
@@ -349,7 +349,7 @@ main_body:
; CHECK: [[LOOPHDR:BB[0-9]+_[0-9]+]]: ; %body
; CHECK: v_add_f32_e32 [[CTR]], 2.0, [[CTR]]
-; CHECK: v_cmp_lt_f32_e32 vcc, [[SEVEN]], [[CTR]]
+; CHECK: v_cmp_gt_f32_e32 vcc, [[CTR]], [[SEVEN]]
; CHECK: s_cbranch_vccz [[LOOPHDR]]
; CHECK: ; %break
diff --git a/test/CodeGen/WebAssembly/negative-base-reg.ll b/test/CodeGen/WebAssembly/negative-base-reg.ll
index 377966ffa8d9..fc3a287f5858 100644
--- a/test/CodeGen/WebAssembly/negative-base-reg.ll
+++ b/test/CodeGen/WebAssembly/negative-base-reg.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt | FileCheck %s
target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
-target triple = "wasm32"
+target triple = "wasm32---elf"
@args = hidden local_unnamed_addr global [32 x i32] zeroinitializer, align 16
diff --git a/test/CodeGen/X86/bitcast-and-setcc-128.ll b/test/CodeGen/X86/bitcast-and-setcc-128.ll
new file mode 100644
index 000000000000..a681c3b0aa42
--- /dev/null
+++ b/test/CodeGen/X86/bitcast-and-setcc-128.ll
@@ -0,0 +1,1155 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=x86-64 -mattr=+sse2 < %s | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2
+; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=x86-64 -mattr=+ssse3 < %s | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3
+; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=x86-64 -mattr=+avx < %s | FileCheck %s --check-prefixes=AVX12,AVX1
+; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=x86-64 -mattr=+avx2 < %s | FileCheck %s --check-prefixes=AVX12,AVX2
+; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=x86-64 -mattr=+avx512f,+avx512vl,+avx512bw < %s | FileCheck %s --check-prefixes=AVX512
+
+define i8 @v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, <8 x i16> %d) {
+; SSE2-SSSE3-LABEL: v8i16:
+; SSE2-SSSE3: ## BB#0:
+; SSE2-SSSE3-NEXT: pcmpgtw %xmm1, %xmm0
+; SSE2-SSSE3-NEXT: pcmpgtw %xmm3, %xmm2
+; SSE2-SSSE3-NEXT: pand %xmm0, %xmm2
+; SSE2-SSSE3-NEXT: pextrw $7, %xmm2, %eax
+; SSE2-SSSE3-NEXT: andl $1, %eax
+; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: pextrw $6, %xmm2, %eax
+; SSE2-SSSE3-NEXT: andl $1, %eax
+; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: pextrw $5, %xmm2, %eax
+; SSE2-SSSE3-NEXT: andl $1, %eax
+; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: pextrw $4, %xmm2, %eax
+; SSE2-SSSE3-NEXT: andl $1, %eax
+; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: pextrw $3, %xmm2, %eax
+; SSE2-SSSE3-NEXT: andl $1, %eax
+; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: pextrw $2, %xmm2, %eax
+; SSE2-SSSE3-NEXT: andl $1, %eax
+; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: pextrw $1, %xmm2, %eax
+; SSE2-SSSE3-NEXT: andl $1, %eax
+; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: movd %xmm2, %eax
+; SSE2-SSSE3-NEXT: andl $1, %eax
+; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; SSE2-SSSE3-NEXT: retq
+;
+; AVX12-LABEL: v8i16:
+; AVX12: ## BB#0:
+; AVX12-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
+; AVX12-NEXT: vpcmpgtw %xmm3, %xmm2, %xmm1
+; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX12-NEXT: vpextrw $7, %xmm0, %eax
+; AVX12-NEXT: andl $1, %eax
+; AVX12-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX12-NEXT: vpextrw $6, %xmm0, %eax
+; AVX12-NEXT: andl $1, %eax
+; AVX12-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX12-NEXT: vpextrw $5, %xmm0, %eax
+; AVX12-NEXT: andl $1, %eax
+; AVX12-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX12-NEXT: vpextrw $4, %xmm0, %eax
+; AVX12-NEXT: andl $1, %eax
+; AVX12-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX12-NEXT: vpextrw $3, %xmm0, %eax
+; AVX12-NEXT: andl $1, %eax
+; AVX12-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX12-NEXT: vpextrw $2, %xmm0, %eax
+; AVX12-NEXT: andl $1, %eax
+; AVX12-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX12-NEXT: vpextrw $1, %xmm0, %eax
+; AVX12-NEXT: andl $1, %eax
+; AVX12-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX12-NEXT: vmovd %xmm0, %eax
+; AVX12-NEXT: andl $1, %eax
+; AVX12-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX12-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; AVX12-NEXT: retq
+;
+; AVX512-LABEL: v8i16:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpcmpgtw %xmm1, %xmm0, %k1
+; AVX512-NEXT: vpcmpgtw %xmm3, %xmm2, %k0 {%k1}
+; AVX512-NEXT: kmovd %k0, %eax
+; AVX512-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
+; AVX512-NEXT: retq
+ %x0 = icmp sgt <8 x i16> %a, %b
+ %x1 = icmp sgt <8 x i16> %c, %d
+ %y = and <8 x i1> %x0, %x1
+ %res = bitcast <8 x i1> %y to i8
+ ret i8 %res
+}
+
+define i4 @v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) {
+; SSE2-SSSE3-LABEL: v4i32:
+; SSE2-SSSE3: ## BB#0:
+; SSE2-SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
+; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm2
+; SSE2-SSSE3-NEXT: pand %xmm0, %xmm2
+; SSE2-SSSE3-NEXT: movd %xmm2, %eax
+; SSE2-SSSE3-NEXT: andl $1, %eax
+; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[3,1,2,3]
+; SSE2-SSSE3-NEXT: movd %xmm0, %eax
+; SSE2-SSSE3-NEXT: andl $1, %eax
+; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
+; SSE2-SSSE3-NEXT: movd %xmm0, %eax
+; SSE2-SSSE3-NEXT: andl $1, %eax
+; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
+; SSE2-SSSE3-NEXT: movd %xmm0, %eax
+; SSE2-SSSE3-NEXT: andl $1, %eax
+; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; SSE2-SSSE3-NEXT: retq
+;
+; AVX12-LABEL: v4i32:
+; AVX12: ## BB#0:
+; AVX12-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
+; AVX12-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm1
+; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX12-NEXT: vpextrd $3, %xmm0, %eax
+; AVX12-NEXT: andl $1, %eax
+; AVX12-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX12-NEXT: vpextrd $2, %xmm0, %eax
+; AVX12-NEXT: andl $1, %eax
+; AVX12-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX12-NEXT: vpextrd $1, %xmm0, %eax
+; AVX12-NEXT: andl $1, %eax
+; AVX12-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX12-NEXT: vmovd %xmm0, %eax
+; AVX12-NEXT: andl $1, %eax
+; AVX12-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX12-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; AVX12-NEXT: retq
+;
+; AVX512-LABEL: v4i32:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %k1
+; AVX512-NEXT: vpcmpgtd %xmm3, %xmm2, %k0 {%k1}
+; AVX512-NEXT: kmovd %k0, %eax
+; AVX512-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; AVX512-NEXT: retq
+ %x0 = icmp sgt <4 x i32> %a, %b
+ %x1 = icmp sgt <4 x i32> %c, %d
+ %y = and <4 x i1> %x0, %x1
+ %res = bitcast <4 x i1> %y to i4
+ ret i4 %res
+}
+
+define i4 @v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d) {
+; SSE2-SSSE3-LABEL: v4f32:
+; SSE2-SSSE3: ## BB#0:
+; SSE2-SSSE3-NEXT: cmpltps %xmm0, %xmm1
+; SSE2-SSSE3-NEXT: cmpltps %xmm2, %xmm3
+; SSE2-SSSE3-NEXT: andps %xmm1, %xmm3
+; SSE2-SSSE3-NEXT: movd %xmm3, %eax
+; SSE2-SSSE3-NEXT: andl $1, %eax
+; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[3,1,2,3]
+; SSE2-SSSE3-NEXT: movd %xmm0, %eax
+; SSE2-SSSE3-NEXT: andl $1, %eax
+; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[2,3,0,1]
+; SSE2-SSSE3-NEXT: movd %xmm0, %eax
+; SSE2-SSSE3-NEXT: andl $1, %eax
+; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,2,3]
+; SSE2-SSSE3-NEXT: movd %xmm0, %eax
+; SSE2-SSSE3-NEXT: andl $1, %eax
+; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; SSE2-SSSE3-NEXT: retq
+;
+; AVX12-LABEL: v4f32:
+; AVX12: ## BB#0:
+; AVX12-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
+; AVX12-NEXT: vcmpltps %xmm2, %xmm3, %xmm1
+; AVX12-NEXT: vandps %xmm1, %xmm0, %xmm0
+; AVX12-NEXT: vpextrd $3, %xmm0, %eax
+; AVX12-NEXT: andl $1, %eax
+; AVX12-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX12-NEXT: vpextrd $2, %xmm0, %eax
+; AVX12-NEXT: andl $1, %eax
+; AVX12-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX12-NEXT: vpextrd $1, %xmm0, %eax
+; AVX12-NEXT: andl $1, %eax
+; AVX12-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX12-NEXT: vmovd %xmm0, %eax
+; AVX12-NEXT: andl $1, %eax
+; AVX12-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX12-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; AVX12-NEXT: retq
+;
+; AVX512-LABEL: v4f32:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vcmpltps %xmm0, %xmm1, %k1
+; AVX512-NEXT: vcmpltps %xmm2, %xmm3, %k0 {%k1}
+; AVX512-NEXT: kmovd %k0, %eax
+; AVX512-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; AVX512-NEXT: retq
+ %x0 = fcmp ogt <4 x float> %a, %b
+ %x1 = fcmp ogt <4 x float> %c, %d
+ %y = and <4 x i1> %x0, %x1
+ %res = bitcast <4 x i1> %y to i4
+ ret i4 %res
+}
+
+define i16 @v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
+; SSE2-SSSE3-LABEL: v16i8:
+; SSE2-SSSE3: ## BB#0:
+; SSE2-SSSE3-NEXT: pcmpgtb %xmm1, %xmm0
+; SSE2-SSSE3-NEXT: pcmpgtb %xmm3, %xmm2
+; SSE2-SSSE3-NEXT: pand %xmm0, %xmm2
+; SSE2-SSSE3-NEXT: movdqa %xmm2, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; SSE2-SSSE3-NEXT: andb $1, %al
+; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; SSE2-SSSE3-NEXT: andb $1, %al
+; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; SSE2-SSSE3-NEXT: andb $1, %al
+; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; SSE2-SSSE3-NEXT: andb $1, %al
+; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; SSE2-SSSE3-NEXT: andb $1, %al
+; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; SSE2-SSSE3-NEXT: andb $1, %al
+; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; SSE2-SSSE3-NEXT: andb $1, %al
+; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; SSE2-SSSE3-NEXT: andb $1, %al
+; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; SSE2-SSSE3-NEXT: andb $1, %al
+; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; SSE2-SSSE3-NEXT: andb $1, %al
+; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; SSE2-SSSE3-NEXT: andb $1, %al
+; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; SSE2-SSSE3-NEXT: andb $1, %al
+; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; SSE2-SSSE3-NEXT: andb $1, %al
+; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; SSE2-SSSE3-NEXT: andb $1, %al
+; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %cl
+; SSE2-SSSE3-NEXT: andb $1, %cl
+; SSE2-SSSE3-NEXT: movb %cl, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: andb $1, %al
+; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
+; SSE2-SSSE3-NEXT: retq
+;
+; AVX12-LABEL: v16i8:
+; AVX12: ## BB#0:
+; AVX12-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
+; AVX12-NEXT: vpcmpgtb %xmm3, %xmm2, %xmm1
+; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX12-NEXT: vpextrb $15, %xmm0, %eax
+; AVX12-NEXT: andb $1, %al
+; AVX12-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX12-NEXT: vpextrb $14, %xmm0, %eax
+; AVX12-NEXT: andb $1, %al
+; AVX12-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX12-NEXT: vpextrb $13, %xmm0, %eax
+; AVX12-NEXT: andb $1, %al
+; AVX12-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX12-NEXT: vpextrb $12, %xmm0, %eax
+; AVX12-NEXT: andb $1, %al
+; AVX12-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX12-NEXT: vpextrb $11, %xmm0, %eax
+; AVX12-NEXT: andb $1, %al
+; AVX12-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX12-NEXT: vpextrb $10, %xmm0, %eax
+; AVX12-NEXT: andb $1, %al
+; AVX12-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX12-NEXT: vpextrb $9, %xmm0, %eax
+; AVX12-NEXT: andb $1, %al
+; AVX12-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX12-NEXT: vpextrb $8, %xmm0, %eax
+; AVX12-NEXT: andb $1, %al
+; AVX12-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX12-NEXT: vpextrb $7, %xmm0, %eax
+; AVX12-NEXT: andb $1, %al
+; AVX12-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX12-NEXT: vpextrb $6, %xmm0, %eax
+; AVX12-NEXT: andb $1, %al
+; AVX12-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX12-NEXT: vpextrb $5, %xmm0, %eax
+; AVX12-NEXT: andb $1, %al
+; AVX12-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX12-NEXT: vpextrb $4, %xmm0, %eax
+; AVX12-NEXT: andb $1, %al
+; AVX12-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX12-NEXT: vpextrb $3, %xmm0, %eax
+; AVX12-NEXT: andb $1, %al
+; AVX12-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX12-NEXT: vpextrb $2, %xmm0, %eax
+; AVX12-NEXT: andb $1, %al
+; AVX12-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX12-NEXT: vpextrb $1, %xmm0, %eax
+; AVX12-NEXT: andb $1, %al
+; AVX12-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX12-NEXT: vpextrb $0, %xmm0, %eax
+; AVX12-NEXT: andb $1, %al
+; AVX12-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX12-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
+; AVX12-NEXT: retq
+;
+; AVX512-LABEL: v16i8:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %k1
+; AVX512-NEXT: vpcmpgtb %xmm3, %xmm2, %k0 {%k1}
+; AVX512-NEXT: kmovd %k0, %eax
+; AVX512-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
+; AVX512-NEXT: retq
+ %x0 = icmp sgt <16 x i8> %a, %b
+ %x1 = icmp sgt <16 x i8> %c, %d
+ %y = and <16 x i1> %x0, %x1
+ %res = bitcast <16 x i1> %y to i16
+ ret i16 %res
+}
+
+define i2 @v2i8(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d) {
+; SSE2-SSSE3-LABEL: v2i8:
+; SSE2-SSSE3: ## BB#0:
+; SSE2-SSSE3-NEXT: psllq $56, %xmm2
+; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm4
+; SSE2-SSSE3-NEXT: psrad $31, %xmm4
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,3,2,3]
+; SSE2-SSSE3-NEXT: psrad $24, %xmm2
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
+; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
+; SSE2-SSSE3-NEXT: psllq $56, %xmm3
+; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm4
+; SSE2-SSSE3-NEXT: psrad $31, %xmm4
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,3,2,3]
+; SSE2-SSSE3-NEXT: psrad $24, %xmm3
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
+; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
+; SSE2-SSSE3-NEXT: psllq $56, %xmm0
+; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm4
+; SSE2-SSSE3-NEXT: psrad $31, %xmm4
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,3,2,3]
+; SSE2-SSSE3-NEXT: psrad $24, %xmm0
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
+; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
+; SSE2-SSSE3-NEXT: psllq $56, %xmm1
+; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm4
+; SSE2-SSSE3-NEXT: psrad $31, %xmm4
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,3,2,3]
+; SSE2-SSSE3-NEXT: psrad $24, %xmm1
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
+; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1]
+; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,0,2147483648,0]
+; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm1
+; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm0
+; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm5
+; SSE2-SSSE3-NEXT: pcmpgtd %xmm1, %xmm5
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
+; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE2-SSSE3-NEXT: pand %xmm6, %xmm0
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm5[1,1,3,3]
+; SSE2-SSSE3-NEXT: por %xmm0, %xmm1
+; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm3
+; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm2
+; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm0
+; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm0[0,0,2,2]
+; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm2
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE2-SSSE3-NEXT: por %xmm2, %xmm0
+; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
+; SSE2-SSSE3-NEXT: movq %xmm0, %rax
+; SSE2-SSSE3-NEXT: andl $1, %eax
+; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE2-SSSE3-NEXT: movq %xmm0, %rax
+; SSE2-SSSE3-NEXT: andl $1, %eax
+; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; SSE2-SSSE3-NEXT: retq
+;
+; AVX1-LABEL: v2i8:
+; AVX1: ## BB#0:
+; AVX1-NEXT: vpsllq $56, %xmm3, %xmm3
+; AVX1-NEXT: vpsrad $31, %xmm3, %xmm4
+; AVX1-NEXT: vpsrad $24, %xmm3, %xmm3
+; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1],xmm4[2,3],xmm3[4,5],xmm4[6,7]
+; AVX1-NEXT: vpsllq $56, %xmm2, %xmm2
+; AVX1-NEXT: vpsrad $31, %xmm2, %xmm4
+; AVX1-NEXT: vpsrad $24, %xmm2, %xmm2
+; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm4[2,3],xmm2[4,5],xmm4[6,7]
+; AVX1-NEXT: vpsllq $56, %xmm1, %xmm1
+; AVX1-NEXT: vpsrad $31, %xmm1, %xmm4
+; AVX1-NEXT: vpsrad $24, %xmm1, %xmm1
+; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm4[2,3],xmm1[4,5],xmm4[6,7]
+; AVX1-NEXT: vpsllq $56, %xmm0, %xmm0
+; AVX1-NEXT: vpsrad $31, %xmm0, %xmm4
+; AVX1-NEXT: vpsrad $24, %xmm0, %xmm0
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,3],xmm0[4,5],xmm4[6,7]
+; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm1
+; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpextrq $1, %xmm0, %rax
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX1-NEXT: vmovq %xmm0, %rax
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX1-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: v2i8:
+; AVX2: ## BB#0:
+; AVX2-NEXT: vpsllq $56, %xmm3, %xmm3
+; AVX2-NEXT: vpsrad $31, %xmm3, %xmm4
+; AVX2-NEXT: vpsrad $24, %xmm3, %xmm3
+; AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
+; AVX2-NEXT: vpblendd {{.*#+}} xmm3 = xmm3[0],xmm4[1],xmm3[2],xmm4[3]
+; AVX2-NEXT: vpsllq $56, %xmm2, %xmm2
+; AVX2-NEXT: vpsrad $31, %xmm2, %xmm4
+; AVX2-NEXT: vpsrad $24, %xmm2, %xmm2
+; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; AVX2-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0],xmm4[1],xmm2[2],xmm4[3]
+; AVX2-NEXT: vpsllq $56, %xmm1, %xmm1
+; AVX2-NEXT: vpsrad $31, %xmm1, %xmm4
+; AVX2-NEXT: vpsrad $24, %xmm1, %xmm1
+; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm4[1],xmm1[2],xmm4[3]
+; AVX2-NEXT: vpsllq $56, %xmm0, %xmm0
+; AVX2-NEXT: vpsrad $31, %xmm0, %xmm4
+; AVX2-NEXT: vpsrad $24, %xmm0, %xmm0
+; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm4[1],xmm0[2],xmm4[3]
+; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm1
+; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpextrq $1, %xmm0, %rax
+; AVX2-NEXT: andl $1, %eax
+; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovq %xmm0, %rax
+; AVX2-NEXT: andl $1, %eax
+; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: v2i8:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsllq $56, %xmm3, %xmm3
+; AVX512-NEXT: vpsraq $56, %xmm3, %xmm3
+; AVX512-NEXT: vpsllq $56, %xmm2, %xmm2
+; AVX512-NEXT: vpsraq $56, %xmm2, %xmm2
+; AVX512-NEXT: vpsllq $56, %xmm1, %xmm1
+; AVX512-NEXT: vpsraq $56, %xmm1, %xmm1
+; AVX512-NEXT: vpsllq $56, %xmm0, %xmm0
+; AVX512-NEXT: vpsraq $56, %xmm0, %xmm0
+; AVX512-NEXT: vpcmpgtq %xmm1, %xmm0, %k1
+; AVX512-NEXT: vpcmpgtq %xmm3, %xmm2, %k0 {%k1}
+; AVX512-NEXT: kmovd %k0, %eax
+; AVX512-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; AVX512-NEXT: retq
+ %x0 = icmp sgt <2 x i8> %a, %b
+ %x1 = icmp sgt <2 x i8> %c, %d
+ %y = and <2 x i1> %x0, %x1
+ %res = bitcast <2 x i1> %y to i2
+ ret i2 %res
+}
+
+define i2 @v2i16(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c, <2 x i16> %d) {
+; SSE2-SSSE3-LABEL: v2i16:
+; SSE2-SSSE3: ## BB#0:
+; SSE2-SSSE3-NEXT: psllq $48, %xmm2
+; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm4
+; SSE2-SSSE3-NEXT: psrad $31, %xmm4
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,3,2,3]
+; SSE2-SSSE3-NEXT: psrad $16, %xmm2
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
+; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
+; SSE2-SSSE3-NEXT: psllq $48, %xmm3
+; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm4
+; SSE2-SSSE3-NEXT: psrad $31, %xmm4
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,3,2,3]
+; SSE2-SSSE3-NEXT: psrad $16, %xmm3
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
+; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
+; SSE2-SSSE3-NEXT: psllq $48, %xmm0
+; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm4
+; SSE2-SSSE3-NEXT: psrad $31, %xmm4
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,3,2,3]
+; SSE2-SSSE3-NEXT: psrad $16, %xmm0
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
+; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
+; SSE2-SSSE3-NEXT: psllq $48, %xmm1
+; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm4
+; SSE2-SSSE3-NEXT: psrad $31, %xmm4
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,3,2,3]
+; SSE2-SSSE3-NEXT: psrad $16, %xmm1
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
+; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1]
+; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,0,2147483648,0]
+; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm1
+; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm0
+; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm5
+; SSE2-SSSE3-NEXT: pcmpgtd %xmm1, %xmm5
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
+; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE2-SSSE3-NEXT: pand %xmm6, %xmm0
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm5[1,1,3,3]
+; SSE2-SSSE3-NEXT: por %xmm0, %xmm1
+; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm3
+; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm2
+; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm0
+; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm0[0,0,2,2]
+; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm2
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE2-SSSE3-NEXT: por %xmm2, %xmm0
+; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
+; SSE2-SSSE3-NEXT: movq %xmm0, %rax
+; SSE2-SSSE3-NEXT: andl $1, %eax
+; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE2-SSSE3-NEXT: movq %xmm0, %rax
+; SSE2-SSSE3-NEXT: andl $1, %eax
+; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; SSE2-SSSE3-NEXT: retq
+;
+; AVX1-LABEL: v2i16:
+; AVX1: ## BB#0:
+; AVX1-NEXT: vpsllq $48, %xmm3, %xmm3
+; AVX1-NEXT: vpsrad $31, %xmm3, %xmm4
+; AVX1-NEXT: vpsrad $16, %xmm3, %xmm3
+; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1],xmm4[2,3],xmm3[4,5],xmm4[6,7]
+; AVX1-NEXT: vpsllq $48, %xmm2, %xmm2
+; AVX1-NEXT: vpsrad $31, %xmm2, %xmm4
+; AVX1-NEXT: vpsrad $16, %xmm2, %xmm2
+; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm4[2,3],xmm2[4,5],xmm4[6,7]
+; AVX1-NEXT: vpsllq $48, %xmm1, %xmm1
+; AVX1-NEXT: vpsrad $31, %xmm1, %xmm4
+; AVX1-NEXT: vpsrad $16, %xmm1, %xmm1
+; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm4[2,3],xmm1[4,5],xmm4[6,7]
+; AVX1-NEXT: vpsllq $48, %xmm0, %xmm0
+; AVX1-NEXT: vpsrad $31, %xmm0, %xmm4
+; AVX1-NEXT: vpsrad $16, %xmm0, %xmm0
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,3],xmm0[4,5],xmm4[6,7]
+; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm1
+; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpextrq $1, %xmm0, %rax
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX1-NEXT: vmovq %xmm0, %rax
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX1-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: v2i16:
+; AVX2: ## BB#0:
+; AVX2-NEXT: vpsllq $48, %xmm3, %xmm3
+; AVX2-NEXT: vpsrad $31, %xmm3, %xmm4
+; AVX2-NEXT: vpsrad $16, %xmm3, %xmm3
+; AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
+; AVX2-NEXT: vpblendd {{.*#+}} xmm3 = xmm3[0],xmm4[1],xmm3[2],xmm4[3]
+; AVX2-NEXT: vpsllq $48, %xmm2, %xmm2
+; AVX2-NEXT: vpsrad $31, %xmm2, %xmm4
+; AVX2-NEXT: vpsrad $16, %xmm2, %xmm2
+; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; AVX2-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0],xmm4[1],xmm2[2],xmm4[3]
+; AVX2-NEXT: vpsllq $48, %xmm1, %xmm1
+; AVX2-NEXT: vpsrad $31, %xmm1, %xmm4
+; AVX2-NEXT: vpsrad $16, %xmm1, %xmm1
+; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm4[1],xmm1[2],xmm4[3]
+; AVX2-NEXT: vpsllq $48, %xmm0, %xmm0
+; AVX2-NEXT: vpsrad $31, %xmm0, %xmm4
+; AVX2-NEXT: vpsrad $16, %xmm0, %xmm0
+; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm4[1],xmm0[2],xmm4[3]
+; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm1
+; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpextrq $1, %xmm0, %rax
+; AVX2-NEXT: andl $1, %eax
+; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovq %xmm0, %rax
+; AVX2-NEXT: andl $1, %eax
+; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: v2i16:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsllq $48, %xmm3, %xmm3
+; AVX512-NEXT: vpsraq $48, %xmm3, %xmm3
+; AVX512-NEXT: vpsllq $48, %xmm2, %xmm2
+; AVX512-NEXT: vpsraq $48, %xmm2, %xmm2
+; AVX512-NEXT: vpsllq $48, %xmm1, %xmm1
+; AVX512-NEXT: vpsraq $48, %xmm1, %xmm1
+; AVX512-NEXT: vpsllq $48, %xmm0, %xmm0
+; AVX512-NEXT: vpsraq $48, %xmm0, %xmm0
+; AVX512-NEXT: vpcmpgtq %xmm1, %xmm0, %k1
+; AVX512-NEXT: vpcmpgtq %xmm3, %xmm2, %k0 {%k1}
+; AVX512-NEXT: kmovd %k0, %eax
+; AVX512-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; AVX512-NEXT: retq
+ %x0 = icmp sgt <2 x i16> %a, %b
+ %x1 = icmp sgt <2 x i16> %c, %d
+ %y = and <2 x i1> %x0, %x1
+ %res = bitcast <2 x i1> %y to i2
+ ret i2 %res
+}
+
+define i2 @v2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x i32> %d) {
+; SSE2-SSSE3-LABEL: v2i32:
+; SSE2-SSSE3: ## BB#0:
+; SSE2-SSSE3-NEXT: psllq $32, %xmm2
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,3,2,3]
+; SSE2-SSSE3-NEXT: psrad $31, %xmm2
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
+; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
+; SSE2-SSSE3-NEXT: psllq $32, %xmm3
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,3,2,3]
+; SSE2-SSSE3-NEXT: psrad $31, %xmm3
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
+; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; SSE2-SSSE3-NEXT: psllq $32, %xmm0
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3]
+; SSE2-SSSE3-NEXT: psrad $31, %xmm0
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
+; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
+; SSE2-SSSE3-NEXT: psllq $32, %xmm1
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,3,2,3]
+; SSE2-SSSE3-NEXT: psrad $31, %xmm1
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
+; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,0,2147483648,0]
+; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm0
+; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm3
+; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm5
+; SSE2-SSSE3-NEXT: pcmpgtd %xmm0, %xmm5
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
+; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm3
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
+; SSE2-SSSE3-NEXT: pand %xmm6, %xmm0
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm5[1,1,3,3]
+; SSE2-SSSE3-NEXT: por %xmm0, %xmm3
+; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm2
+; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm4
+; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm0
+; SSE2-SSSE3-NEXT: pcmpgtd %xmm2, %xmm0
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
+; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm4
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
+; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE2-SSSE3-NEXT: por %xmm2, %xmm0
+; SSE2-SSSE3-NEXT: pand %xmm3, %xmm0
+; SSE2-SSSE3-NEXT: movq %xmm0, %rax
+; SSE2-SSSE3-NEXT: andl $1, %eax
+; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE2-SSSE3-NEXT: movq %xmm0, %rax
+; SSE2-SSSE3-NEXT: andl $1, %eax
+; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; SSE2-SSSE3-NEXT: retq
+;
+; AVX1-LABEL: v2i32:
+; AVX1: ## BB#0:
+; AVX1-NEXT: vpsllq $32, %xmm3, %xmm3
+; AVX1-NEXT: vpsrad $31, %xmm3, %xmm4
+; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1],xmm4[2,3],xmm3[4,5],xmm4[6,7]
+; AVX1-NEXT: vpsllq $32, %xmm2, %xmm2
+; AVX1-NEXT: vpsrad $31, %xmm2, %xmm4
+; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm4[2,3],xmm2[4,5],xmm4[6,7]
+; AVX1-NEXT: vpsllq $32, %xmm1, %xmm1
+; AVX1-NEXT: vpsrad $31, %xmm1, %xmm4
+; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm4[2,3],xmm1[4,5],xmm4[6,7]
+; AVX1-NEXT: vpsllq $32, %xmm0, %xmm0
+; AVX1-NEXT: vpsrad $31, %xmm0, %xmm4
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,3],xmm0[4,5],xmm4[6,7]
+; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm1
+; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpextrq $1, %xmm0, %rax
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX1-NEXT: vmovq %xmm0, %rax
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX1-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: v2i32:
+; AVX2: ## BB#0:
+; AVX2-NEXT: vpsllq $32, %xmm3, %xmm3
+; AVX2-NEXT: vpsrad $31, %xmm3, %xmm4
+; AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
+; AVX2-NEXT: vpblendd {{.*#+}} xmm3 = xmm3[0],xmm4[1],xmm3[2],xmm4[3]
+; AVX2-NEXT: vpsllq $32, %xmm2, %xmm2
+; AVX2-NEXT: vpsrad $31, %xmm2, %xmm4
+; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; AVX2-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0],xmm4[1],xmm2[2],xmm4[3]
+; AVX2-NEXT: vpsllq $32, %xmm1, %xmm1
+; AVX2-NEXT: vpsrad $31, %xmm1, %xmm4
+; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm4[1],xmm1[2],xmm4[3]
+; AVX2-NEXT: vpsllq $32, %xmm0, %xmm0
+; AVX2-NEXT: vpsrad $31, %xmm0, %xmm4
+; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm4[1],xmm0[2],xmm4[3]
+; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm1
+; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpextrq $1, %xmm0, %rax
+; AVX2-NEXT: andl $1, %eax
+; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovq %xmm0, %rax
+; AVX2-NEXT: andl $1, %eax
+; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: v2i32:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsllq $32, %xmm3, %xmm3
+; AVX512-NEXT: vpsraq $32, %xmm3, %xmm3
+; AVX512-NEXT: vpsllq $32, %xmm2, %xmm2
+; AVX512-NEXT: vpsraq $32, %xmm2, %xmm2
+; AVX512-NEXT: vpsllq $32, %xmm1, %xmm1
+; AVX512-NEXT: vpsraq $32, %xmm1, %xmm1
+; AVX512-NEXT: vpsllq $32, %xmm0, %xmm0
+; AVX512-NEXT: vpsraq $32, %xmm0, %xmm0
+; AVX512-NEXT: vpcmpgtq %xmm1, %xmm0, %k1
+; AVX512-NEXT: vpcmpgtq %xmm3, %xmm2, %k0 {%k1}
+; AVX512-NEXT: kmovd %k0, %eax
+; AVX512-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; AVX512-NEXT: retq
+ %x0 = icmp sgt <2 x i32> %a, %b
+ %x1 = icmp sgt <2 x i32> %c, %d
+ %y = and <2 x i1> %x0, %x1
+ %res = bitcast <2 x i1> %y to i2
+ ret i2 %res
+}
+
+define i2 @v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i64> %d) {
+; SSE2-SSSE3-LABEL: v2i64:
+; SSE2-SSSE3: ## BB#0:
+; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,0,2147483648,0]
+; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm1
+; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm0
+; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm5
+; SSE2-SSSE3-NEXT: pcmpgtd %xmm1, %xmm5
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
+; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE2-SSSE3-NEXT: pand %xmm6, %xmm0
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm5[1,1,3,3]
+; SSE2-SSSE3-NEXT: por %xmm0, %xmm1
+; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm3
+; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm2
+; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm0
+; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm0[0,0,2,2]
+; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm2
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE2-SSSE3-NEXT: por %xmm2, %xmm0
+; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
+; SSE2-SSSE3-NEXT: movq %xmm0, %rax
+; SSE2-SSSE3-NEXT: andl $1, %eax
+; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE2-SSSE3-NEXT: movq %xmm0, %rax
+; SSE2-SSSE3-NEXT: andl $1, %eax
+; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; SSE2-SSSE3-NEXT: retq
+;
+; AVX12-LABEL: v2i64:
+; AVX12: ## BB#0:
+; AVX12-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX12-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm1
+; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX12-NEXT: vpextrq $1, %xmm0, %rax
+; AVX12-NEXT: andl $1, %eax
+; AVX12-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX12-NEXT: vmovq %xmm0, %rax
+; AVX12-NEXT: andl $1, %eax
+; AVX12-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX12-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; AVX12-NEXT: retq
+;
+; AVX512-LABEL: v2i64:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpcmpgtq %xmm1, %xmm0, %k1
+; AVX512-NEXT: vpcmpgtq %xmm3, %xmm2, %k0 {%k1}
+; AVX512-NEXT: kmovd %k0, %eax
+; AVX512-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; AVX512-NEXT: retq
+ %x0 = icmp sgt <2 x i64> %a, %b
+ %x1 = icmp sgt <2 x i64> %c, %d
+ %y = and <2 x i1> %x0, %x1
+ %res = bitcast <2 x i1> %y to i2
+ ret i2 %res
+}
+
+define i2 @v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %d) {
+; SSE2-SSSE3-LABEL: v2f64:
+; SSE2-SSSE3: ## BB#0:
+; SSE2-SSSE3-NEXT: cmpltpd %xmm0, %xmm1
+; SSE2-SSSE3-NEXT: cmpltpd %xmm2, %xmm3
+; SSE2-SSSE3-NEXT: andpd %xmm1, %xmm3
+; SSE2-SSSE3-NEXT: movq %xmm3, %rax
+; SSE2-SSSE3-NEXT: andl $1, %eax
+; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[2,3,0,1]
+; SSE2-SSSE3-NEXT: movq %xmm0, %rax
+; SSE2-SSSE3-NEXT: andl $1, %eax
+; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; SSE2-SSSE3-NEXT: retq
+;
+; AVX12-LABEL: v2f64:
+; AVX12: ## BB#0:
+; AVX12-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
+; AVX12-NEXT: vcmpltpd %xmm2, %xmm3, %xmm1
+; AVX12-NEXT: vandpd %xmm1, %xmm0, %xmm0
+; AVX12-NEXT: vpextrq $1, %xmm0, %rax
+; AVX12-NEXT: andl $1, %eax
+; AVX12-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX12-NEXT: vmovq %xmm0, %rax
+; AVX12-NEXT: andl $1, %eax
+; AVX12-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX12-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; AVX12-NEXT: retq
+;
+; AVX512-LABEL: v2f64:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %k1
+; AVX512-NEXT: vcmpltpd %xmm2, %xmm3, %k0 {%k1}
+; AVX512-NEXT: kmovd %k0, %eax
+; AVX512-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; AVX512-NEXT: retq
+ %x0 = fcmp ogt <2 x double> %a, %b
+ %x1 = fcmp ogt <2 x double> %c, %d
+ %y = and <2 x i1> %x0, %x1
+ %res = bitcast <2 x i1> %y to i2
+ ret i2 %res
+}
+
+define i4 @v4i8(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i8> %d) {
+; SSE2-SSSE3-LABEL: v4i8:
+; SSE2-SSSE3: ## BB#0:
+; SSE2-SSSE3-NEXT: pslld $24, %xmm3
+; SSE2-SSSE3-NEXT: psrad $24, %xmm3
+; SSE2-SSSE3-NEXT: pslld $24, %xmm2
+; SSE2-SSSE3-NEXT: psrad $24, %xmm2
+; SSE2-SSSE3-NEXT: pslld $24, %xmm1
+; SSE2-SSSE3-NEXT: psrad $24, %xmm1
+; SSE2-SSSE3-NEXT: pslld $24, %xmm0
+; SSE2-SSSE3-NEXT: psrad $24, %xmm0
+; SSE2-SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
+; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm2
+; SSE2-SSSE3-NEXT: pand %xmm0, %xmm2
+; SSE2-SSSE3-NEXT: movd %xmm2, %eax
+; SSE2-SSSE3-NEXT: andl $1, %eax
+; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[3,1,2,3]
+; SSE2-SSSE3-NEXT: movd %xmm0, %eax
+; SSE2-SSSE3-NEXT: andl $1, %eax
+; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
+; SSE2-SSSE3-NEXT: movd %xmm0, %eax
+; SSE2-SSSE3-NEXT: andl $1, %eax
+; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
+; SSE2-SSSE3-NEXT: movd %xmm0, %eax
+; SSE2-SSSE3-NEXT: andl $1, %eax
+; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; SSE2-SSSE3-NEXT: retq
+;
+; AVX12-LABEL: v4i8:
+; AVX12: ## BB#0:
+; AVX12-NEXT: vpslld $24, %xmm3, %xmm3
+; AVX12-NEXT: vpsrad $24, %xmm3, %xmm3
+; AVX12-NEXT: vpslld $24, %xmm2, %xmm2
+; AVX12-NEXT: vpsrad $24, %xmm2, %xmm2
+; AVX12-NEXT: vpslld $24, %xmm1, %xmm1
+; AVX12-NEXT: vpsrad $24, %xmm1, %xmm1
+; AVX12-NEXT: vpslld $24, %xmm0, %xmm0
+; AVX12-NEXT: vpsrad $24, %xmm0, %xmm0
+; AVX12-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
+; AVX12-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm1
+; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX12-NEXT: vpextrd $3, %xmm0, %eax
+; AVX12-NEXT: andl $1, %eax
+; AVX12-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX12-NEXT: vpextrd $2, %xmm0, %eax
+; AVX12-NEXT: andl $1, %eax
+; AVX12-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX12-NEXT: vpextrd $1, %xmm0, %eax
+; AVX12-NEXT: andl $1, %eax
+; AVX12-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX12-NEXT: vmovd %xmm0, %eax
+; AVX12-NEXT: andl $1, %eax
+; AVX12-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX12-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; AVX12-NEXT: retq
+;
+; AVX512-LABEL: v4i8:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpslld $24, %xmm3, %xmm3
+; AVX512-NEXT: vpsrad $24, %xmm3, %xmm3
+; AVX512-NEXT: vpslld $24, %xmm2, %xmm2
+; AVX512-NEXT: vpsrad $24, %xmm2, %xmm2
+; AVX512-NEXT: vpslld $24, %xmm1, %xmm1
+; AVX512-NEXT: vpsrad $24, %xmm1, %xmm1
+; AVX512-NEXT: vpslld $24, %xmm0, %xmm0
+; AVX512-NEXT: vpsrad $24, %xmm0, %xmm0
+; AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %k1
+; AVX512-NEXT: vpcmpgtd %xmm3, %xmm2, %k0 {%k1}
+; AVX512-NEXT: kmovd %k0, %eax
+; AVX512-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; AVX512-NEXT: retq
+ %x0 = icmp sgt <4 x i8> %a, %b
+ %x1 = icmp sgt <4 x i8> %c, %d
+ %y = and <4 x i1> %x0, %x1
+ %res = bitcast <4 x i1> %y to i4
+ ret i4 %res
+}
+
+define i4 @v4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> %d) {
+; SSE2-SSSE3-LABEL: v4i16:
+; SSE2-SSSE3: ## BB#0:
+; SSE2-SSSE3-NEXT: pslld $16, %xmm3
+; SSE2-SSSE3-NEXT: psrad $16, %xmm3
+; SSE2-SSSE3-NEXT: pslld $16, %xmm2
+; SSE2-SSSE3-NEXT: psrad $16, %xmm2
+; SSE2-SSSE3-NEXT: pslld $16, %xmm1
+; SSE2-SSSE3-NEXT: psrad $16, %xmm1
+; SSE2-SSSE3-NEXT: pslld $16, %xmm0
+; SSE2-SSSE3-NEXT: psrad $16, %xmm0
+; SSE2-SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
+; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm2
+; SSE2-SSSE3-NEXT: pand %xmm0, %xmm2
+; SSE2-SSSE3-NEXT: movd %xmm2, %eax
+; SSE2-SSSE3-NEXT: andl $1, %eax
+; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[3,1,2,3]
+; SSE2-SSSE3-NEXT: movd %xmm0, %eax
+; SSE2-SSSE3-NEXT: andl $1, %eax
+; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
+; SSE2-SSSE3-NEXT: movd %xmm0, %eax
+; SSE2-SSSE3-NEXT: andl $1, %eax
+; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
+; SSE2-SSSE3-NEXT: movd %xmm0, %eax
+; SSE2-SSSE3-NEXT: andl $1, %eax
+; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; SSE2-SSSE3-NEXT: retq
+;
+; AVX12-LABEL: v4i16:
+; AVX12: ## BB#0:
+; AVX12-NEXT: vpslld $16, %xmm3, %xmm3
+; AVX12-NEXT: vpsrad $16, %xmm3, %xmm3
+; AVX12-NEXT: vpslld $16, %xmm2, %xmm2
+; AVX12-NEXT: vpsrad $16, %xmm2, %xmm2
+; AVX12-NEXT: vpslld $16, %xmm1, %xmm1
+; AVX12-NEXT: vpsrad $16, %xmm1, %xmm1
+; AVX12-NEXT: vpslld $16, %xmm0, %xmm0
+; AVX12-NEXT: vpsrad $16, %xmm0, %xmm0
+; AVX12-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
+; AVX12-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm1
+; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX12-NEXT: vpextrd $3, %xmm0, %eax
+; AVX12-NEXT: andl $1, %eax
+; AVX12-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX12-NEXT: vpextrd $2, %xmm0, %eax
+; AVX12-NEXT: andl $1, %eax
+; AVX12-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX12-NEXT: vpextrd $1, %xmm0, %eax
+; AVX12-NEXT: andl $1, %eax
+; AVX12-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX12-NEXT: vmovd %xmm0, %eax
+; AVX12-NEXT: andl $1, %eax
+; AVX12-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX12-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; AVX12-NEXT: retq
+;
+; AVX512-LABEL: v4i16:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpslld $16, %xmm3, %xmm3
+; AVX512-NEXT: vpsrad $16, %xmm3, %xmm3
+; AVX512-NEXT: vpslld $16, %xmm2, %xmm2
+; AVX512-NEXT: vpsrad $16, %xmm2, %xmm2
+; AVX512-NEXT: vpslld $16, %xmm1, %xmm1
+; AVX512-NEXT: vpsrad $16, %xmm1, %xmm1
+; AVX512-NEXT: vpslld $16, %xmm0, %xmm0
+; AVX512-NEXT: vpsrad $16, %xmm0, %xmm0
+; AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %k1
+; AVX512-NEXT: vpcmpgtd %xmm3, %xmm2, %k0 {%k1}
+; AVX512-NEXT: kmovd %k0, %eax
+; AVX512-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; AVX512-NEXT: retq
+ %x0 = icmp sgt <4 x i16> %a, %b
+ %x1 = icmp sgt <4 x i16> %c, %d
+ %y = and <4 x i1> %x0, %x1
+ %res = bitcast <4 x i1> %y to i4
+ ret i4 %res
+}
+
+define i8 @v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d) {
+; SSE2-SSSE3-LABEL: v8i8:
+; SSE2-SSSE3: ## BB#0:
+; SSE2-SSSE3-NEXT: psllw $8, %xmm3
+; SSE2-SSSE3-NEXT: psraw $8, %xmm3
+; SSE2-SSSE3-NEXT: psllw $8, %xmm2
+; SSE2-SSSE3-NEXT: psraw $8, %xmm2
+; SSE2-SSSE3-NEXT: psllw $8, %xmm1
+; SSE2-SSSE3-NEXT: psraw $8, %xmm1
+; SSE2-SSSE3-NEXT: psllw $8, %xmm0
+; SSE2-SSSE3-NEXT: psraw $8, %xmm0
+; SSE2-SSSE3-NEXT: pcmpgtw %xmm1, %xmm0
+; SSE2-SSSE3-NEXT: pcmpgtw %xmm3, %xmm2
+; SSE2-SSSE3-NEXT: pand %xmm0, %xmm2
+; SSE2-SSSE3-NEXT: pextrw $7, %xmm2, %eax
+; SSE2-SSSE3-NEXT: andl $1, %eax
+; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: pextrw $6, %xmm2, %eax
+; SSE2-SSSE3-NEXT: andl $1, %eax
+; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: pextrw $5, %xmm2, %eax
+; SSE2-SSSE3-NEXT: andl $1, %eax
+; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: pextrw $4, %xmm2, %eax
+; SSE2-SSSE3-NEXT: andl $1, %eax
+; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: pextrw $3, %xmm2, %eax
+; SSE2-SSSE3-NEXT: andl $1, %eax
+; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: pextrw $2, %xmm2, %eax
+; SSE2-SSSE3-NEXT: andl $1, %eax
+; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: pextrw $1, %xmm2, %eax
+; SSE2-SSSE3-NEXT: andl $1, %eax
+; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: movd %xmm2, %eax
+; SSE2-SSSE3-NEXT: andl $1, %eax
+; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; SSE2-SSSE3-NEXT: retq
+;
+; AVX12-LABEL: v8i8:
+; AVX12: ## BB#0:
+; AVX12-NEXT: vpsllw $8, %xmm3, %xmm3
+; AVX12-NEXT: vpsraw $8, %xmm3, %xmm3
+; AVX12-NEXT: vpsllw $8, %xmm2, %xmm2
+; AVX12-NEXT: vpsraw $8, %xmm2, %xmm2
+; AVX12-NEXT: vpsllw $8, %xmm1, %xmm1
+; AVX12-NEXT: vpsraw $8, %xmm1, %xmm1
+; AVX12-NEXT: vpsllw $8, %xmm0, %xmm0
+; AVX12-NEXT: vpsraw $8, %xmm0, %xmm0
+; AVX12-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
+; AVX12-NEXT: vpcmpgtw %xmm3, %xmm2, %xmm1
+; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX12-NEXT: vpextrw $7, %xmm0, %eax
+; AVX12-NEXT: andl $1, %eax
+; AVX12-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX12-NEXT: vpextrw $6, %xmm0, %eax
+; AVX12-NEXT: andl $1, %eax
+; AVX12-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX12-NEXT: vpextrw $5, %xmm0, %eax
+; AVX12-NEXT: andl $1, %eax
+; AVX12-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX12-NEXT: vpextrw $4, %xmm0, %eax
+; AVX12-NEXT: andl $1, %eax
+; AVX12-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX12-NEXT: vpextrw $3, %xmm0, %eax
+; AVX12-NEXT: andl $1, %eax
+; AVX12-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX12-NEXT: vpextrw $2, %xmm0, %eax
+; AVX12-NEXT: andl $1, %eax
+; AVX12-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX12-NEXT: vpextrw $1, %xmm0, %eax
+; AVX12-NEXT: andl $1, %eax
+; AVX12-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX12-NEXT: vmovd %xmm0, %eax
+; AVX12-NEXT: andl $1, %eax
+; AVX12-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX12-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; AVX12-NEXT: retq
+;
+; AVX512-LABEL: v8i8:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsllw $8, %xmm3, %xmm3
+; AVX512-NEXT: vpsraw $8, %xmm3, %xmm3
+; AVX512-NEXT: vpsllw $8, %xmm2, %xmm2
+; AVX512-NEXT: vpsraw $8, %xmm2, %xmm2
+; AVX512-NEXT: vpsllw $8, %xmm1, %xmm1
+; AVX512-NEXT: vpsraw $8, %xmm1, %xmm1
+; AVX512-NEXT: vpsllw $8, %xmm0, %xmm0
+; AVX512-NEXT: vpsraw $8, %xmm0, %xmm0
+; AVX512-NEXT: vpcmpgtw %xmm1, %xmm0, %k1
+; AVX512-NEXT: vpcmpgtw %xmm3, %xmm2, %k0 {%k1}
+; AVX512-NEXT: kmovd %k0, %eax
+; AVX512-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
+; AVX512-NEXT: retq
+ %x0 = icmp sgt <8 x i8> %a, %b
+ %x1 = icmp sgt <8 x i8> %c, %d
+ %y = and <8 x i1> %x0, %x1
+ %res = bitcast <8 x i1> %y to i8
+ ret i8 %res
+}
diff --git a/test/CodeGen/X86/bitcast-and-setcc-256.ll b/test/CodeGen/X86/bitcast-and-setcc-256.ll
new file mode 100644
index 000000000000..06b1a76f6bae
--- /dev/null
+++ b/test/CodeGen/X86/bitcast-and-setcc-256.ll
@@ -0,0 +1,403 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=x86-64 -mattr=+avx2 < %s | FileCheck %s --check-prefix=AVX2
+; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=x86-64 -mattr=+avx512f,+avx512vl,+avx512bw < %s | FileCheck %s --check-prefix=AVX512
+
+define i4 @v4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i64> %d) {
+; AVX2-LABEL: v4i64:
+; AVX2: ## BB#0:
+; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm1
+; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
+; AVX2-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpextrd $3, %xmm0, %eax
+; AVX2-NEXT: andl $1, %eax
+; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vpextrd $2, %xmm0, %eax
+; AVX2-NEXT: andl $1, %eax
+; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vpextrd $1, %xmm0, %eax
+; AVX2-NEXT: andl $1, %eax
+; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovd %xmm0, %eax
+; AVX2-NEXT: andl $1, %eax
+; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: v4i64:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %k1
+; AVX512-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 {%k1}
+; AVX512-NEXT: kmovd %k0, %eax
+; AVX512-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+ %x0 = icmp sgt <4 x i64> %a, %b
+ %x1 = icmp sgt <4 x i64> %c, %d
+ %y = and <4 x i1> %x0, %x1
+ %res = bitcast <4 x i1> %y to i4
+ ret i4 %res
+}
+
+define i4 @v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x double> %d) {
+; AVX2-LABEL: v4f64:
+; AVX2: ## BB#0:
+; AVX2-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vcmpltpd %ymm2, %ymm3, %ymm1
+; AVX2-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX2-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpextrd $3, %xmm0, %eax
+; AVX2-NEXT: andl $1, %eax
+; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vpextrd $2, %xmm0, %eax
+; AVX2-NEXT: andl $1, %eax
+; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vpextrd $1, %xmm0, %eax
+; AVX2-NEXT: andl $1, %eax
+; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovd %xmm0, %eax
+; AVX2-NEXT: andl $1, %eax
+; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: v4f64:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %k1
+; AVX512-NEXT: vcmpltpd %ymm2, %ymm3, %k0 {%k1}
+; AVX512-NEXT: kmovd %k0, %eax
+; AVX512-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+ %x0 = fcmp ogt <4 x double> %a, %b
+ %x1 = fcmp ogt <4 x double> %c, %d
+ %y = and <4 x i1> %x0, %x1
+ %res = bitcast <4 x i1> %y to i4
+ ret i4 %res
+}
+
+define i16 @v16i16(<16 x i16> %a, <16 x i16> %b, <16 x i16> %c, <16 x i16> %d) {
+; AVX2-LABEL: v16i16:
+; AVX2: ## BB#0:
+; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpcmpgtw %ymm3, %ymm2, %ymm1
+; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
+; AVX2-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpextrb $15, %xmm0, %eax
+; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vpextrb $14, %xmm0, %eax
+; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vpextrb $13, %xmm0, %eax
+; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vpextrb $12, %xmm0, %eax
+; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vpextrb $11, %xmm0, %eax
+; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vpextrb $10, %xmm0, %eax
+; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vpextrb $9, %xmm0, %eax
+; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vpextrb $8, %xmm0, %eax
+; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vpextrb $7, %xmm0, %eax
+; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vpextrb $6, %xmm0, %eax
+; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vpextrb $5, %xmm0, %eax
+; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vpextrb $4, %xmm0, %eax
+; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vpextrb $3, %xmm0, %eax
+; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vpextrb $2, %xmm0, %eax
+; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vpextrb $1, %xmm0, %eax
+; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: v16i16:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpcmpgtw %ymm1, %ymm0, %k1
+; AVX512-NEXT: vpcmpgtw %ymm3, %ymm2, %k0 {%k1}
+; AVX512-NEXT: kmovd %k0, %eax
+; AVX512-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+ %x0 = icmp sgt <16 x i16> %a, %b
+ %x1 = icmp sgt <16 x i16> %c, %d
+ %y = and <16 x i1> %x0, %x1
+ %res = bitcast <16 x i1> %y to i16
+ ret i16 %res
+}
+
+define i8 @v8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i32> %d) {
+; AVX2-LABEL: v8i32:
+; AVX2: ## BB#0:
+; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpcmpgtd %ymm3, %ymm2, %ymm1
+; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
+; AVX2-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpextrw $7, %xmm0, %eax
+; AVX2-NEXT: andl $1, %eax
+; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vpextrw $6, %xmm0, %eax
+; AVX2-NEXT: andl $1, %eax
+; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vpextrw $5, %xmm0, %eax
+; AVX2-NEXT: andl $1, %eax
+; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vpextrw $4, %xmm0, %eax
+; AVX2-NEXT: andl $1, %eax
+; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vpextrw $3, %xmm0, %eax
+; AVX2-NEXT: andl $1, %eax
+; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vpextrw $2, %xmm0, %eax
+; AVX2-NEXT: andl $1, %eax
+; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vpextrw $1, %xmm0, %eax
+; AVX2-NEXT: andl $1, %eax
+; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovd %xmm0, %eax
+; AVX2-NEXT: andl $1, %eax
+; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: v8i32:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %k1
+; AVX512-NEXT: vpcmpgtd %ymm3, %ymm2, %k0 {%k1}
+; AVX512-NEXT: kmovd %k0, %eax
+; AVX512-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+ %x0 = icmp sgt <8 x i32> %a, %b
+ %x1 = icmp sgt <8 x i32> %c, %d
+ %y = and <8 x i1> %x0, %x1
+ %res = bitcast <8 x i1> %y to i8
+ ret i8 %res
+}
+
+define i8 @v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float> %d) {
+; AVX2-LABEL: v8f32:
+; AVX2: ## BB#0:
+; AVX2-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vcmpltps %ymm2, %ymm3, %ymm1
+; AVX2-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX2-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpextrw $7, %xmm0, %eax
+; AVX2-NEXT: andl $1, %eax
+; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vpextrw $6, %xmm0, %eax
+; AVX2-NEXT: andl $1, %eax
+; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vpextrw $5, %xmm0, %eax
+; AVX2-NEXT: andl $1, %eax
+; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vpextrw $4, %xmm0, %eax
+; AVX2-NEXT: andl $1, %eax
+; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vpextrw $3, %xmm0, %eax
+; AVX2-NEXT: andl $1, %eax
+; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vpextrw $2, %xmm0, %eax
+; AVX2-NEXT: andl $1, %eax
+; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vpextrw $1, %xmm0, %eax
+; AVX2-NEXT: andl $1, %eax
+; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovd %xmm0, %eax
+; AVX2-NEXT: andl $1, %eax
+; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: v8f32:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %k1
+; AVX512-NEXT: vcmpltps %ymm2, %ymm3, %k0 {%k1}
+; AVX512-NEXT: kmovd %k0, %eax
+; AVX512-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+ %x0 = fcmp ogt <8 x float> %a, %b
+ %x1 = fcmp ogt <8 x float> %c, %d
+ %y = and <8 x i1> %x0, %x1
+ %res = bitcast <8 x i1> %y to i8
+ ret i8 %res
+}
+
+define i32 @v32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8> %c, <32 x i8> %d) {
+; AVX2-LABEL: v32i8:
+; AVX2: ## BB#0:
+; AVX2-NEXT: pushq %rbp
+; AVX2-NEXT: Lcfi0:
+; AVX2-NEXT: .cfi_def_cfa_offset 16
+; AVX2-NEXT: Lcfi1:
+; AVX2-NEXT: .cfi_offset %rbp, -16
+; AVX2-NEXT: movq %rsp, %rbp
+; AVX2-NEXT: Lcfi2:
+; AVX2-NEXT: .cfi_def_cfa_register %rbp
+; AVX2-NEXT: andq $-32, %rsp
+; AVX2-NEXT: subq $32, %rsp
+; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpcmpgtb %ymm3, %ymm2, %ymm1
+; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpextrb $15, %xmm1, %eax
+; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: movb %al, (%rsp)
+; AVX2-NEXT: vpextrb $14, %xmm1, %eax
+; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: movb %al, (%rsp)
+; AVX2-NEXT: vpextrb $13, %xmm1, %eax
+; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: movb %al, (%rsp)
+; AVX2-NEXT: vpextrb $12, %xmm1, %eax
+; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: movb %al, (%rsp)
+; AVX2-NEXT: vpextrb $11, %xmm1, %eax
+; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: movb %al, (%rsp)
+; AVX2-NEXT: vpextrb $10, %xmm1, %eax
+; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: movb %al, (%rsp)
+; AVX2-NEXT: vpextrb $9, %xmm1, %eax
+; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: movb %al, (%rsp)
+; AVX2-NEXT: vpextrb $8, %xmm1, %eax
+; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: movb %al, (%rsp)
+; AVX2-NEXT: vpextrb $7, %xmm1, %eax
+; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: movb %al, (%rsp)
+; AVX2-NEXT: vpextrb $6, %xmm1, %eax
+; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: movb %al, (%rsp)
+; AVX2-NEXT: vpextrb $5, %xmm1, %eax
+; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: movb %al, (%rsp)
+; AVX2-NEXT: vpextrb $4, %xmm1, %eax
+; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: movb %al, (%rsp)
+; AVX2-NEXT: vpextrb $3, %xmm1, %eax
+; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: movb %al, (%rsp)
+; AVX2-NEXT: vpextrb $2, %xmm1, %eax
+; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: movb %al, (%rsp)
+; AVX2-NEXT: vpextrb $1, %xmm1, %eax
+; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: movb %al, (%rsp)
+; AVX2-NEXT: vpextrb $0, %xmm1, %eax
+; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: movb %al, (%rsp)
+; AVX2-NEXT: vpextrb $15, %xmm0, %eax
+; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: movb %al, (%rsp)
+; AVX2-NEXT: vpextrb $14, %xmm0, %eax
+; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: movb %al, (%rsp)
+; AVX2-NEXT: vpextrb $13, %xmm0, %eax
+; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: movb %al, (%rsp)
+; AVX2-NEXT: vpextrb $12, %xmm0, %eax
+; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: movb %al, (%rsp)
+; AVX2-NEXT: vpextrb $11, %xmm0, %eax
+; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: movb %al, (%rsp)
+; AVX2-NEXT: vpextrb $10, %xmm0, %eax
+; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: movb %al, (%rsp)
+; AVX2-NEXT: vpextrb $9, %xmm0, %eax
+; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: movb %al, (%rsp)
+; AVX2-NEXT: vpextrb $8, %xmm0, %eax
+; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: movb %al, (%rsp)
+; AVX2-NEXT: vpextrb $7, %xmm0, %eax
+; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: movb %al, (%rsp)
+; AVX2-NEXT: vpextrb $6, %xmm0, %eax
+; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: movb %al, (%rsp)
+; AVX2-NEXT: vpextrb $5, %xmm0, %eax
+; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: movb %al, (%rsp)
+; AVX2-NEXT: vpextrb $4, %xmm0, %eax
+; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: movb %al, (%rsp)
+; AVX2-NEXT: vpextrb $3, %xmm0, %eax
+; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: movb %al, (%rsp)
+; AVX2-NEXT: vpextrb $2, %xmm0, %eax
+; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: movb %al, (%rsp)
+; AVX2-NEXT: vpextrb $1, %xmm0, %eax
+; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: movb %al, (%rsp)
+; AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: movb %al, (%rsp)
+; AVX2-NEXT: movl (%rsp), %eax
+; AVX2-NEXT: movq %rbp, %rsp
+; AVX2-NEXT: popq %rbp
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: v32i8:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpcmpgtb %ymm1, %ymm0, %k1
+; AVX512-NEXT: vpcmpgtb %ymm3, %ymm2, %k0 {%k1}
+; AVX512-NEXT: kmovd %k0, %eax
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+ %x0 = icmp sgt <32 x i8> %a, %b
+ %x1 = icmp sgt <32 x i8> %c, %d
+ %y = and <32 x i1> %x0, %x1
+ %res = bitcast <32 x i1> %y to i32
+ ret i32 %res
+}
diff --git a/test/CodeGen/X86/mul-constant-i16.ll b/test/CodeGen/X86/mul-constant-i16.ll
index e3e2737cf3e6..6d2465ddd3a8 100644
--- a/test/CodeGen/X86/mul-constant-i16.ll
+++ b/test/CodeGen/X86/mul-constant-i16.ll
@@ -188,13 +188,16 @@ define i16 @test_mul_by_11(i16 %x) {
; X86-LABEL: test_mul_by_11:
; X86: # BB#0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: imull $11, %eax, %eax
+; X86-NEXT: leal (%eax,%eax,4), %ecx
+; X86-NEXT: leal (%eax,%ecx,2), %eax
; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X86-NEXT: retl
;
; X64-LABEL: test_mul_by_11:
; X64: # BB#0:
-; X64-NEXT: imull $11, %edi, %eax
+; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-NEXT: leal (%rdi,%rdi,4), %eax
+; X64-NEXT: leal (%rdi,%rax,2), %eax
; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X64-NEXT: retq
%mul = mul nsw i16 %x, 11
@@ -225,13 +228,16 @@ define i16 @test_mul_by_13(i16 %x) {
; X86-LABEL: test_mul_by_13:
; X86: # BB#0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: imull $13, %eax, %eax
+; X86-NEXT: leal (%eax,%eax,2), %ecx
+; X86-NEXT: leal (%eax,%ecx,4), %eax
; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X86-NEXT: retl
;
; X64-LABEL: test_mul_by_13:
; X64: # BB#0:
-; X64-NEXT: imull $13, %edi, %eax
+; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-NEXT: leal (%rdi,%rdi,2), %eax
+; X64-NEXT: leal (%rdi,%rax,4), %eax
; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X64-NEXT: retq
%mul = mul nsw i16 %x, 13
@@ -241,14 +247,19 @@ define i16 @test_mul_by_13(i16 %x) {
define i16 @test_mul_by_14(i16 %x) {
; X86-LABEL: test_mul_by_14:
; X86: # BB#0:
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: imull $14, %eax, %eax
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: leal (%ecx,%ecx,2), %eax
+; X86-NEXT: leal (%ecx,%eax,4), %eax
+; X86-NEXT: addl %ecx, %eax
; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X86-NEXT: retl
;
; X64-LABEL: test_mul_by_14:
; X64: # BB#0:
-; X64-NEXT: imull $14, %edi, %eax
+; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-NEXT: leal (%rdi,%rdi,2), %eax
+; X64-NEXT: leal (%rdi,%rax,4), %eax
+; X64-NEXT: addl %edi, %eax
; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X64-NEXT: retq
%mul = mul nsw i16 %x, 14
@@ -338,14 +349,19 @@ define i16 @test_mul_by_19(i16 %x) {
; X86-LABEL: test_mul_by_19:
; X86: # BB#0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: imull $19, %eax, %eax
+; X86-NEXT: leal (%eax,%eax,4), %ecx
+; X86-NEXT: shll $2, %ecx
+; X86-NEXT: subl %ecx, %eax
; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X86-NEXT: retl
;
; X64-LABEL: test_mul_by_19:
; X64: # BB#0:
-; X64-NEXT: imull $19, %edi, %eax
-; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-NEXT: leal (%rdi,%rdi,4), %eax
+; X64-NEXT: shll $2, %eax
+; X64-NEXT: subl %eax, %edi
+; X64-NEXT: movl %edi, %eax
; X64-NEXT: retq
%mul = mul nsw i16 %x, 19
ret i16 %mul
@@ -375,13 +391,16 @@ define i16 @test_mul_by_21(i16 %x) {
; X86-LABEL: test_mul_by_21:
; X86: # BB#0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: imull $21, %eax, %eax
+; X86-NEXT: leal (%eax,%eax,4), %ecx
+; X86-NEXT: leal (%eax,%ecx,4), %eax
; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X86-NEXT: retl
;
; X64-LABEL: test_mul_by_21:
; X64: # BB#0:
-; X64-NEXT: imull $21, %edi, %eax
+; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-NEXT: leal (%rdi,%rdi,4), %eax
+; X64-NEXT: leal (%rdi,%rax,4), %eax
; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X64-NEXT: retq
%mul = mul nsw i16 %x, 21
@@ -391,14 +410,19 @@ define i16 @test_mul_by_21(i16 %x) {
define i16 @test_mul_by_22(i16 %x) {
; X86-LABEL: test_mul_by_22:
; X86: # BB#0:
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: imull $22, %eax, %eax
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: leal (%ecx,%ecx,4), %eax
+; X86-NEXT: leal (%ecx,%eax,4), %eax
+; X86-NEXT: addl %ecx, %eax
; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X86-NEXT: retl
;
; X64-LABEL: test_mul_by_22:
; X64: # BB#0:
-; X64-NEXT: imull $22, %edi, %eax
+; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-NEXT: leal (%rdi,%rdi,4), %eax
+; X64-NEXT: leal (%rdi,%rax,4), %eax
+; X64-NEXT: addl %edi, %eax
; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X64-NEXT: retq
%mul = mul nsw i16 %x, 22
@@ -409,14 +433,19 @@ define i16 @test_mul_by_23(i16 %x) {
; X86-LABEL: test_mul_by_23:
; X86: # BB#0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: imull $23, %eax, %eax
+; X86-NEXT: leal (%eax,%eax,2), %ecx
+; X86-NEXT: shll $3, %ecx
+; X86-NEXT: subl %ecx, %eax
; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X86-NEXT: retl
;
; X64-LABEL: test_mul_by_23:
; X64: # BB#0:
-; X64-NEXT: imull $23, %edi, %eax
-; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-NEXT: leal (%rdi,%rdi,2), %eax
+; X64-NEXT: shll $3, %eax
+; X64-NEXT: subl %eax, %edi
+; X64-NEXT: movl %edi, %eax
; X64-NEXT: retq
%mul = mul nsw i16 %x, 23
ret i16 %mul
@@ -466,14 +495,19 @@ define i16 @test_mul_by_26(i16 %x) {
; X86-LABEL: test_mul_by_26:
; X86: # BB#0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: imull $26, %eax, %eax
+; X86-NEXT: leal (%eax,%eax,8), %ecx
+; X86-NEXT: leal (%ecx,%ecx,2), %ecx
+; X86-NEXT: subl %ecx, %eax
; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X86-NEXT: retl
;
; X64-LABEL: test_mul_by_26:
; X64: # BB#0:
-; X64-NEXT: imull $26, %edi, %eax
-; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-NEXT: leal (%rdi,%rdi,8), %eax
+; X64-NEXT: leal (%rax,%rax,2), %eax
+; X64-NEXT: subl %eax, %edi
+; X64-NEXT: movl %edi, %eax
; X64-NEXT: retq
%mul = mul nsw i16 %x, 26
ret i16 %mul
@@ -502,14 +536,19 @@ define i16 @test_mul_by_27(i16 %x) {
define i16 @test_mul_by_28(i16 %x) {
; X86-LABEL: test_mul_by_28:
; X86: # BB#0:
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: imull $28, %eax, %eax
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: leal (%ecx,%ecx,8), %eax
+; X86-NEXT: leal (%eax,%eax,2), %eax
+; X86-NEXT: addl %ecx, %eax
; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X86-NEXT: retl
;
; X64-LABEL: test_mul_by_28:
; X64: # BB#0:
-; X64-NEXT: imull $28, %edi, %eax
+; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-NEXT: leal (%rdi,%rdi,8), %eax
+; X64-NEXT: leal (%rax,%rax,2), %eax
+; X64-NEXT: addl %edi, %eax
; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X64-NEXT: retq
%mul = mul nsw i16 %x, 28
@@ -519,14 +558,21 @@ define i16 @test_mul_by_28(i16 %x) {
define i16 @test_mul_by_29(i16 %x) {
; X86-LABEL: test_mul_by_29:
; X86: # BB#0:
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: imull $29, %eax, %eax
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: leal (%ecx,%ecx,8), %eax
+; X86-NEXT: leal (%eax,%eax,2), %eax
+; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: addl %ecx, %eax
; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X86-NEXT: retl
;
; X64-LABEL: test_mul_by_29:
; X64: # BB#0:
-; X64-NEXT: imull $29, %edi, %eax
+; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-NEXT: leal (%rdi,%rdi,8), %eax
+; X64-NEXT: leal (%rax,%rax,2), %eax
+; X64-NEXT: addl %edi, %eax
+; X64-NEXT: addl %edi, %eax
; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X64-NEXT: retq
%mul = mul nsw i16 %x, 29
@@ -537,14 +583,22 @@ define i16 @test_mul_by_30(i16 %x) {
; X86-LABEL: test_mul_by_30:
; X86: # BB#0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: imull $30, %eax, %eax
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: shll $5, %ecx
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: subl %ecx, %edx
+; X86-NEXT: subl %edx, %eax
; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X86-NEXT: retl
;
; X64-LABEL: test_mul_by_30:
; X64: # BB#0:
-; X64-NEXT: imull $30, %edi, %eax
-; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: shll $5, %eax
+; X64-NEXT: movl %edi, %ecx
+; X64-NEXT: subl %eax, %ecx
+; X64-NEXT: subl %ecx, %edi
+; X64-NEXT: movl %edi, %eax
; X64-NEXT: retq
%mul = mul nsw i16 %x, 30
ret i16 %mul
@@ -587,3 +641,30 @@ define i16 @test_mul_by_32(i16 %x) {
%mul = mul nsw i16 %x, 32
ret i16 %mul
}
+
+; (x*9+42)*(x*5+2)
+define i16 @test_mul_spec(i16 %x) nounwind {
+; X86-LABEL: test_mul_spec:
+; X86: # BB#0:
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: leal 42(%eax,%eax,8), %ecx
+; X86-NEXT: leal 2(%eax,%eax,4), %eax
+; X86-NEXT: imull %ecx, %eax
+; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; X86-NEXT: retl
+;
+; X64-LABEL: test_mul_spec:
+; X64: # BB#0:
+; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-NEXT: leal 42(%rdi,%rdi,8), %ecx
+; X64-NEXT: leal 2(%rdi,%rdi,4), %eax
+; X64-NEXT: imull %ecx, %eax
+; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; X64-NEXT: retq
+ %mul = mul nsw i16 %x, 9
+ %add = add nsw i16 %mul, 42
+ %mul2 = mul nsw i16 %x, 5
+ %add2 = add nsw i16 %mul2, 2
+ %mul3 = mul nsw i16 %add, %add2
+ ret i16 %mul3
+}
diff --git a/test/CodeGen/X86/mul-constant-i32.ll b/test/CodeGen/X86/mul-constant-i32.ll
index 76e46e1f1b09..b1e9a929b7f2 100644
--- a/test/CodeGen/X86/mul-constant-i32.ll
+++ b/test/CodeGen/X86/mul-constant-i32.ll
@@ -1,6 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefix=X86
-; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule=true -mcpu=haswell| FileCheck %s --check-prefix=X64-HSW
+; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule=true -mcpu=btver2| FileCheck %s --check-prefix=X64-JAG
+; RUN: llc < %s -mtriple=i686-unknown -mul-constant-optimization=false | FileCheck %s --check-prefix=X86-NOOPT
+; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -print-schedule=true -mcpu=haswell| FileCheck %s --check-prefix=HSW-NOOPT
+; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -print-schedule=true -mcpu=btver2| FileCheck %s --check-prefix=JAG-NOOPT
+; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule=true -mcpu=slm| FileCheck %s --check-prefix=X64-SLM
+; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -print-schedule=true -mcpu=slm| FileCheck %s --check-prefix=SLM-NOOPT
define i32 @test_mul_by_1(i32 %x) {
; X86-LABEL: test_mul_by_1:
@@ -8,10 +14,40 @@ define i32 @test_mul_by_1(i32 %x) {
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_1:
-; X64: # BB#0:
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_1:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_1:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.17]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_1:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_1:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.25]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_1:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.17]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_1:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: movl %edi, %eax # sched: [1:0.50]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_1:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.50]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 1
ret i32 %mul
}
@@ -23,11 +59,47 @@ define i32 @test_mul_by_2(i32 %x) {
; X86-NEXT: addl %eax, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_2:
-; X64: # BB#0:
-; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT: leal (%rdi,%rdi), %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_2:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (%rdi,%rdi), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_2:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (%rdi,%rdi), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_2:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: addl %eax, %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_2:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; HSW-NOOPT-NEXT: leal (%rdi,%rdi), %eax # sched: [1:0.50]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_2:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; JAG-NOOPT-NEXT: leal (%rdi,%rdi), %eax # sched: [1:0.50]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_2:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT: leal (%rdi,%rdi), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_2:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; SLM-NOOPT-NEXT: leal (%rdi,%rdi), %eax # sched: [1:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 2
ret i32 %mul
}
@@ -38,11 +110,46 @@ define i32 @test_mul_by_3(i32 %x) {
; X86-NEXT: imull $3, {{[0-9]+}}(%esp), %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_3:
-; X64: # BB#0:
-; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT: leal (%rdi,%rdi,2), %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_3:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_3:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_3:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $3, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_3:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; HSW-NOOPT-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_3:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; JAG-NOOPT-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_3:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_3:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; SLM-NOOPT-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 3
ret i32 %mul
}
@@ -54,11 +161,47 @@ define i32 @test_mul_by_4(i32 %x) {
; X86-NEXT: shll $2, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_4:
-; X64: # BB#0:
-; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT: leal (,%rdi,4), %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_4:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (,%rdi,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_4:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (,%rdi,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_4:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: shll $2, %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_4:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; HSW-NOOPT-NEXT: leal (,%rdi,4), %eax # sched: [1:0.50]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_4:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; JAG-NOOPT-NEXT: leal (,%rdi,4), %eax # sched: [1:0.50]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_4:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT: leal (,%rdi,4), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_4:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; SLM-NOOPT-NEXT: leal (,%rdi,4), %eax # sched: [1:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 4
ret i32 %mul
}
@@ -69,11 +212,46 @@ define i32 @test_mul_by_5(i32 %x) {
; X86-NEXT: imull $5, {{[0-9]+}}(%esp), %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_5:
-; X64: # BB#0:
-; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT: leal (%rdi,%rdi,4), %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_5:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_5:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_5:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $5, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_5:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; HSW-NOOPT-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_5:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; JAG-NOOPT-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_5:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_5:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; SLM-NOOPT-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 5
ret i32 %mul
}
@@ -86,12 +264,46 @@ define i32 @test_mul_by_6(i32 %x) {
; X86-NEXT: leal (%eax,%eax,2), %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_6:
-; X64: # BB#0:
-; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT: addl %edi, %edi
-; X64-NEXT: leal (%rdi,%rdi,2), %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_6:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: addl %edi, %edi # sched: [1:0.25]
+; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_6:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: addl %edi, %edi # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_6:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $6, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_6:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $6, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_6:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $6, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_6:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT: addl %edi, %edi # sched: [1:0.50]
+; X64-SLM-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_6:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $6, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 6
ret i32 %mul
}
@@ -104,12 +316,46 @@ define i32 @test_mul_by_7(i32 %x) {
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_7:
-; X64: # BB#0:
-; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT: leal (,%rdi,8), %eax
-; X64-NEXT: subl %edi, %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_7:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_7:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: subl %edi, %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_7:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $7, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_7:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $7, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_7:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $7, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_7:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT: leal (,%rdi,8), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: subl %edi, %eax # sched: [1:0.50]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_7:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $7, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 7
ret i32 %mul
}
@@ -121,11 +367,47 @@ define i32 @test_mul_by_8(i32 %x) {
; X86-NEXT: shll $3, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_8:
-; X64: # BB#0:
-; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT: leal (,%rdi,8), %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_8:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_8:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_8:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: shll $3, %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_8:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; HSW-NOOPT-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_8:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; JAG-NOOPT-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_8:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT: leal (,%rdi,8), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_8:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; SLM-NOOPT-NEXT: leal (,%rdi,8), %eax # sched: [1:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 8
ret i32 %mul
}
@@ -136,11 +418,46 @@ define i32 @test_mul_by_9(i32 %x) {
; X86-NEXT: imull $9, {{[0-9]+}}(%esp), %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_9:
-; X64: # BB#0:
-; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT: leal (%rdi,%rdi,8), %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_9:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_9:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_9:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $9, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_9:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; HSW-NOOPT-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_9:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; JAG-NOOPT-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_9:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_9:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; SLM-NOOPT-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 9
ret i32 %mul
}
@@ -153,12 +470,46 @@ define i32 @test_mul_by_10(i32 %x) {
; X86-NEXT: leal (%eax,%eax,4), %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_10:
-; X64: # BB#0:
-; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT: addl %edi, %edi
-; X64-NEXT: leal (%rdi,%rdi,4), %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_10:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: addl %edi, %edi # sched: [1:0.25]
+; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_10:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: addl %edi, %edi # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_10:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $10, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_10:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $10, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_10:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $10, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_10:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT: addl %edi, %edi # sched: [1:0.50]
+; X64-SLM-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_10:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $10, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 10
ret i32 %mul
}
@@ -166,13 +517,49 @@ define i32 @test_mul_by_10(i32 %x) {
define i32 @test_mul_by_11(i32 %x) {
; X86-LABEL: test_mul_by_11:
; X86: # BB#0:
-; X86-NEXT: imull $11, {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: leal (%eax,%eax,4), %ecx
+; X86-NEXT: leal (%eax,%ecx,2), %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_11:
-; X64: # BB#0:
-; X64-NEXT: imull $11, %edi, %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_11:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: leal (%rdi,%rax,2), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_11:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rdi,%rax,2), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_11:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $11, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_11:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $11, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_11:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $11, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_11:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imull $11, %edi, %eax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_11:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $11, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 11
ret i32 %mul
}
@@ -185,12 +572,46 @@ define i32 @test_mul_by_12(i32 %x) {
; X86-NEXT: leal (%eax,%eax,2), %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_12:
-; X64: # BB#0:
-; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT: shll $2, %edi
-; X64-NEXT: leal (%rdi,%rdi,2), %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_12:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: shll $2, %edi # sched: [1:0.50]
+; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_12:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: shll $2, %edi # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_12:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $12, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_12:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $12, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_12:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $12, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_12:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT: shll $2, %edi # sched: [1:1.00]
+; X64-SLM-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_12:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $12, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 12
ret i32 %mul
}
@@ -198,13 +619,49 @@ define i32 @test_mul_by_12(i32 %x) {
define i32 @test_mul_by_13(i32 %x) {
; X86-LABEL: test_mul_by_13:
; X86: # BB#0:
-; X86-NEXT: imull $13, {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: leal (%eax,%eax,2), %ecx
+; X86-NEXT: leal (%eax,%ecx,4), %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_13:
-; X64: # BB#0:
-; X64-NEXT: imull $13, %edi, %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_13:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_13:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_13:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $13, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_13:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $13, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_13:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $13, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_13:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imull $13, %edi, %eax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_13:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $13, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 13
ret i32 %mul
}
@@ -212,13 +669,52 @@ define i32 @test_mul_by_13(i32 %x) {
define i32 @test_mul_by_14(i32 %x) {
; X86-LABEL: test_mul_by_14:
; X86: # BB#0:
-; X86-NEXT: imull $14, {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: leal (%ecx,%ecx,2), %eax
+; X86-NEXT: leal (%ecx,%eax,4), %eax
+; X86-NEXT: addl %ecx, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_14:
-; X64: # BB#0:
-; X64-NEXT: imull $14, %edi, %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_14:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_14:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: addl %edi, %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_14:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $14, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_14:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $14, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_14:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $14, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_14:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imull $14, %edi, %eax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_14:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $14, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 14
ret i32 %mul
}
@@ -231,12 +727,46 @@ define i32 @test_mul_by_15(i32 %x) {
; X86-NEXT: leal (%eax,%eax,2), %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_15:
-; X64: # BB#0:
-; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT: leal (%rdi,%rdi,4), %eax
-; X64-NEXT: leal (%rax,%rax,2), %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_15:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_15:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_15:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $15, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_15:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $15, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_15:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $15, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_15:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: leal (%rax,%rax,2), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_15:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $15, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 15
ret i32 %mul
}
@@ -248,11 +778,47 @@ define i32 @test_mul_by_16(i32 %x) {
; X86-NEXT: shll $4, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_16:
-; X64: # BB#0:
-; X64-NEXT: shll $4, %edi
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_16:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: shll $4, %edi # sched: [1:0.50]
+; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_16:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: shll $4, %edi # sched: [1:0.50]
+; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.17]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_16:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: shll $4, %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_16:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: shll $4, %edi # sched: [1:0.50]
+; HSW-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.25]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_16:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: shll $4, %edi # sched: [1:0.50]
+; JAG-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.17]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_16:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: shll $4, %edi # sched: [1:1.00]
+; X64-SLM-NEXT: movl %edi, %eax # sched: [1:0.50]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_16:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: shll $4, %edi # sched: [1:1.00]
+; SLM-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.50]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 16
ret i32 %mul
}
@@ -266,13 +832,49 @@ define i32 @test_mul_by_17(i32 %x) {
; X86-NEXT: addl %ecx, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_17:
-; X64: # BB#0:
-; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: shll $4, %eax
-; X64-NEXT: leal (%rax,%rdi), %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_17:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: shll $4, %eax # sched: [1:0.50]
+; X64-HSW-NEXT: leal (%rax,%rdi), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_17:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.17]
+; X64-JAG-NEXT: shll $4, %eax # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rax,%rdi), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_17:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $17, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_17:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $17, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_17:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $17, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_17:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT: movl %edi, %eax # sched: [1:0.50]
+; X64-SLM-NEXT: shll $4, %eax # sched: [1:1.00]
+; X64-SLM-NEXT: leal (%rax,%rdi), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_17:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $17, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 17
ret i32 %mul
}
@@ -285,12 +887,46 @@ define i32 @test_mul_by_18(i32 %x) {
; X86-NEXT: leal (%eax,%eax,8), %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_18:
-; X64: # BB#0:
-; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT: addl %edi, %edi
-; X64-NEXT: leal (%rdi,%rdi,8), %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_18:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: addl %edi, %edi # sched: [1:0.25]
+; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_18:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: addl %edi, %edi # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_18:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $18, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_18:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $18, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_18:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $18, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_18:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT: addl %edi, %edi # sched: [1:0.50]
+; X64-SLM-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_18:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $18, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 18
ret i32 %mul
}
@@ -298,13 +934,54 @@ define i32 @test_mul_by_18(i32 %x) {
define i32 @test_mul_by_19(i32 %x) {
; X86-LABEL: test_mul_by_19:
; X86: # BB#0:
-; X86-NEXT: imull $19, {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: leal (%eax,%eax,4), %ecx
+; X86-NEXT: shll $2, %ecx
+; X86-NEXT: subl %ecx, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_19:
-; X64: # BB#0:
-; X64-NEXT: imull $19, %edi, %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_19:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: shll $2, %eax # sched: [1:0.50]
+; X64-HSW-NEXT: subl %eax, %edi # sched: [1:0.25]
+; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_19:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: shll $2, %eax # sched: [1:0.50]
+; X64-JAG-NEXT: subl %eax, %edi # sched: [1:0.50]
+; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.17]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_19:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $19, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_19:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $19, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_19:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $19, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_19:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imull $19, %edi, %eax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_19:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $19, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 19
ret i32 %mul
}
@@ -317,12 +994,46 @@ define i32 @test_mul_by_20(i32 %x) {
; X86-NEXT: leal (%eax,%eax,4), %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_20:
-; X64: # BB#0:
-; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT: shll $2, %edi
-; X64-NEXT: leal (%rdi,%rdi,4), %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_20:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: shll $2, %edi # sched: [1:0.50]
+; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_20:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: shll $2, %edi # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_20:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $20, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_20:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $20, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_20:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $20, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_20:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT: shll $2, %edi # sched: [1:1.00]
+; X64-SLM-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_20:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $20, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 20
ret i32 %mul
}
@@ -330,13 +1041,49 @@ define i32 @test_mul_by_20(i32 %x) {
define i32 @test_mul_by_21(i32 %x) {
; X86-LABEL: test_mul_by_21:
; X86: # BB#0:
-; X86-NEXT: imull $21, {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: leal (%eax,%eax,4), %ecx
+; X86-NEXT: leal (%eax,%ecx,4), %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_21:
-; X64: # BB#0:
-; X64-NEXT: imull $21, %edi, %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_21:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_21:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_21:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $21, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_21:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $21, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_21:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $21, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_21:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imull $21, %edi, %eax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_21:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $21, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 21
ret i32 %mul
}
@@ -344,13 +1091,52 @@ define i32 @test_mul_by_21(i32 %x) {
define i32 @test_mul_by_22(i32 %x) {
; X86-LABEL: test_mul_by_22:
; X86: # BB#0:
-; X86-NEXT: imull $22, {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: leal (%ecx,%ecx,4), %eax
+; X86-NEXT: leal (%ecx,%eax,4), %eax
+; X86-NEXT: addl %ecx, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_22:
-; X64: # BB#0:
-; X64-NEXT: imull $22, %edi, %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_22:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_22:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: addl %edi, %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_22:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $22, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_22:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $22, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_22:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $22, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_22:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imull $22, %edi, %eax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_22:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $22, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 22
ret i32 %mul
}
@@ -358,13 +1144,54 @@ define i32 @test_mul_by_22(i32 %x) {
define i32 @test_mul_by_23(i32 %x) {
; X86-LABEL: test_mul_by_23:
; X86: # BB#0:
-; X86-NEXT: imull $23, {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: leal (%eax,%eax,2), %ecx
+; X86-NEXT: shll $3, %ecx
+; X86-NEXT: subl %ecx, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_23:
-; X64: # BB#0:
-; X64-NEXT: imull $23, %edi, %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_23:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: shll $3, %eax # sched: [1:0.50]
+; X64-HSW-NEXT: subl %eax, %edi # sched: [1:0.25]
+; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_23:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: shll $3, %eax # sched: [1:0.50]
+; X64-JAG-NEXT: subl %eax, %edi # sched: [1:0.50]
+; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.17]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_23:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $23, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_23:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $23, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_23:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $23, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_23:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imull $23, %edi, %eax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_23:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $23, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 23
ret i32 %mul
}
@@ -377,12 +1204,46 @@ define i32 @test_mul_by_24(i32 %x) {
; X86-NEXT: leal (%eax,%eax,2), %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_24:
-; X64: # BB#0:
-; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT: shll $3, %edi
-; X64-NEXT: leal (%rdi,%rdi,2), %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_24:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: shll $3, %edi # sched: [1:0.50]
+; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_24:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: shll $3, %edi # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_24:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $24, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_24:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $24, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_24:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $24, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_24:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT: shll $3, %edi # sched: [1:1.00]
+; X64-SLM-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_24:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $24, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 24
ret i32 %mul
}
@@ -395,12 +1256,46 @@ define i32 @test_mul_by_25(i32 %x) {
; X86-NEXT: leal (%eax,%eax,4), %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_25:
-; X64: # BB#0:
-; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT: leal (%rdi,%rdi,4), %eax
-; X64-NEXT: leal (%rax,%rax,4), %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_25:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: leal (%rax,%rax,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_25:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rax,%rax,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_25:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $25, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_25:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $25, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_25:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $25, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_25:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: leal (%rax,%rax,4), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_25:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $25, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 25
ret i32 %mul
}
@@ -408,13 +1303,54 @@ define i32 @test_mul_by_25(i32 %x) {
define i32 @test_mul_by_26(i32 %x) {
; X86-LABEL: test_mul_by_26:
; X86: # BB#0:
-; X86-NEXT: imull $26, {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: leal (%eax,%eax,8), %ecx
+; X86-NEXT: leal (%ecx,%ecx,2), %ecx
+; X86-NEXT: subl %ecx, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_26:
-; X64: # BB#0:
-; X64-NEXT: imull $26, %edi, %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_26:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: subl %eax, %edi # sched: [1:0.25]
+; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_26:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: subl %eax, %edi # sched: [1:0.50]
+; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.17]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_26:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $26, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_26:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $26, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_26:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $26, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_26:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imull $26, %edi, %eax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_26:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $26, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 26
ret i32 %mul
}
@@ -427,12 +1363,46 @@ define i32 @test_mul_by_27(i32 %x) {
; X86-NEXT: leal (%eax,%eax,2), %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_27:
-; X64: # BB#0:
-; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT: leal (%rdi,%rdi,8), %eax
-; X64-NEXT: leal (%rax,%rax,2), %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_27:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_27:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_27:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $27, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_27:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $27, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_27:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $27, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_27:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: leal (%rax,%rax,2), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_27:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $27, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 27
ret i32 %mul
}
@@ -440,13 +1410,52 @@ define i32 @test_mul_by_27(i32 %x) {
define i32 @test_mul_by_28(i32 %x) {
; X86-LABEL: test_mul_by_28:
; X86: # BB#0:
-; X86-NEXT: imull $28, {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: leal (%ecx,%ecx,8), %eax
+; X86-NEXT: leal (%eax,%eax,2), %eax
+; X86-NEXT: addl %ecx, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_28:
-; X64: # BB#0:
-; X64-NEXT: imull $28, %edi, %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_28:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_28:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: addl %edi, %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_28:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $28, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_28:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $28, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_28:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $28, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_28:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imull $28, %edi, %eax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_28:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $28, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 28
ret i32 %mul
}
@@ -454,13 +1463,55 @@ define i32 @test_mul_by_28(i32 %x) {
define i32 @test_mul_by_29(i32 %x) {
; X86-LABEL: test_mul_by_29:
; X86: # BB#0:
-; X86-NEXT: imull $29, {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: leal (%ecx,%ecx,8), %eax
+; X86-NEXT: leal (%eax,%eax,2), %eax
+; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: addl %ecx, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_29:
-; X64: # BB#0:
-; X64-NEXT: imull $29, %edi, %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_29:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_29:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: addl %edi, %eax # sched: [1:0.50]
+; X64-JAG-NEXT: addl %edi, %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_29:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $29, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_29:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $29, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_29:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $29, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_29:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imull $29, %edi, %eax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_29:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $29, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 29
ret i32 %mul
}
@@ -468,13 +1519,58 @@ define i32 @test_mul_by_29(i32 %x) {
define i32 @test_mul_by_30(i32 %x) {
; X86-LABEL: test_mul_by_30:
; X86: # BB#0:
-; X86-NEXT: imull $30, {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: shll $5, %ecx
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: subl %ecx, %edx
+; X86-NEXT: subl %edx, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_30:
-; X64: # BB#0:
-; X64-NEXT: imull $30, %edi, %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_30:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: shll $5, %eax # sched: [1:0.50]
+; X64-HSW-NEXT: movl %edi, %ecx # sched: [1:0.25]
+; X64-HSW-NEXT: subl %eax, %ecx # sched: [1:0.25]
+; X64-HSW-NEXT: subl %ecx, %edi # sched: [1:0.25]
+; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_30:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.17]
+; X64-JAG-NEXT: movl %edi, %ecx # sched: [1:0.17]
+; X64-JAG-NEXT: shll $5, %eax # sched: [1:0.50]
+; X64-JAG-NEXT: subl %eax, %ecx # sched: [1:0.50]
+; X64-JAG-NEXT: subl %ecx, %edi # sched: [1:0.50]
+; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.17]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_30:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $30, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_30:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $30, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_30:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $30, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_30:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imull $30, %edi, %eax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_30:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $30, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 30
ret i32 %mul
}
@@ -488,12 +1584,46 @@ define i32 @test_mul_by_31(i32 %x) {
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_31:
-; X64: # BB#0:
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: shll $5, %eax
-; X64-NEXT: subl %edi, %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_31:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: shll $5, %eax # sched: [1:0.50]
+; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_31:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.17]
+; X64-JAG-NEXT: shll $5, %eax # sched: [1:0.50]
+; X64-JAG-NEXT: subl %edi, %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_31:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $31, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_31:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $31, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_31:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $31, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_31:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: movl %edi, %eax # sched: [1:0.50]
+; X64-SLM-NEXT: shll $5, %eax # sched: [1:1.00]
+; X64-SLM-NEXT: subl %edi, %eax # sched: [1:0.50]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_31:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $31, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 31
ret i32 %mul
}
@@ -505,11 +1635,124 @@ define i32 @test_mul_by_32(i32 %x) {
; X86-NEXT: shll $5, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_32:
-; X64: # BB#0:
-; X64-NEXT: shll $5, %edi
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_32:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: shll $5, %edi # sched: [1:0.50]
+; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_32:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: shll $5, %edi # sched: [1:0.50]
+; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.17]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_32:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: shll $5, %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_32:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: shll $5, %edi # sched: [1:0.50]
+; HSW-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.25]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_32:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: shll $5, %edi # sched: [1:0.50]
+; JAG-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.17]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_32:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: shll $5, %edi # sched: [1:1.00]
+; X64-SLM-NEXT: movl %edi, %eax # sched: [1:0.50]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_32:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: shll $5, %edi # sched: [1:1.00]
+; SLM-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.50]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 32
ret i32 %mul
}
+
+; (x*9+42)*(x*5+2)
+define i32 @test_mul_spec(i32 %x) nounwind {
+; X86-LABEL: test_mul_spec:
+; X86: # BB#0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: leal 42(%eax,%eax,8), %ecx
+; X86-NEXT: leal 2(%eax,%eax,4), %eax
+; X86-NEXT: imull %ecx, %eax
+; X86-NEXT: retl
+;
+; X64-HSW-LABEL: test_mul_spec:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (%rdi,%rdi,8), %ecx # sched: [1:0.50]
+; X64-HSW-NEXT: addl $42, %ecx # sched: [1:0.25]
+; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: addl $2, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: imull %ecx, %eax # sched: [4:1.00]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_spec:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal 42(%rdi,%rdi,8), %ecx # sched: [1:0.50]
+; X64-JAG-NEXT: leal 2(%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: imull %ecx, %eax # sched: [3:1.00]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_spec:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: leal 42(%eax,%eax,8), %ecx
+; X86-NOOPT-NEXT: leal 2(%eax,%eax,4), %eax
+; X86-NOOPT-NEXT: imull %ecx, %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_spec:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; HSW-NOOPT-NEXT: leal (%rdi,%rdi,8), %ecx # sched: [1:0.50]
+; HSW-NOOPT-NEXT: addl $42, %ecx # sched: [1:0.25]
+; HSW-NOOPT-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; HSW-NOOPT-NEXT: addl $2, %eax # sched: [1:0.25]
+; HSW-NOOPT-NEXT: imull %ecx, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_spec:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; JAG-NOOPT-NEXT: leal 42(%rdi,%rdi,8), %ecx # sched: [1:0.50]
+; JAG-NOOPT-NEXT: leal 2(%rdi,%rdi,4), %eax # sched: [1:0.50]
+; JAG-NOOPT-NEXT: imull %ecx, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_spec:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT: leal 42(%rdi,%rdi,8), %ecx # sched: [1:1.00]
+; X64-SLM-NEXT: leal 2(%rdi,%rdi,4), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: imull %ecx, %eax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_spec:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; SLM-NOOPT-NEXT: leal 42(%rdi,%rdi,8), %ecx # sched: [1:1.00]
+; SLM-NOOPT-NEXT: leal 2(%rdi,%rdi,4), %eax # sched: [1:1.00]
+; SLM-NOOPT-NEXT: imull %ecx, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+ %mul = mul nsw i32 %x, 9
+ %add = add nsw i32 %mul, 42
+ %mul2 = mul nsw i32 %x, 5
+ %add2 = add nsw i32 %mul2, 2
+ %mul3 = mul nsw i32 %add, %add2
+ ret i32 %mul3
+}
diff --git a/test/CodeGen/X86/mul-constant-i64.ll b/test/CodeGen/X86/mul-constant-i64.ll
index 8579179a8231..22eb0bdc6c3f 100644
--- a/test/CodeGen/X86/mul-constant-i64.ll
+++ b/test/CodeGen/X86/mul-constant-i64.ll
@@ -1,18 +1,55 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefix=X86
-; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule=true -mcpu=haswell| FileCheck %s --check-prefix=X64-HSW
+; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule=true -mcpu=btver2| FileCheck %s --check-prefix=X64-JAG
+; RUN: llc < %s -mtriple=i686-unknown -mul-constant-optimization=false | FileCheck %s --check-prefix=X86-NOOPT
+; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -print-schedule=true -mcpu=haswell| FileCheck %s --check-prefix=HSW-NOOPT
+; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -print-schedule=true -mcpu=btver2| FileCheck %s --check-prefix=JAG-NOOPT
+; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule=true -mcpu=slm| FileCheck %s --check-prefix=X64-SLM
+; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -print-schedule=true -mcpu=slm| FileCheck %s --check-prefix=SLM-NOOPT
-define i64 @test_mul_by_1(i64 %x) {
+define i64 @test_mul_by_1(i64 %x) nounwind {
; X86-LABEL: test_mul_by_1:
; X86: # BB#0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_1:
-; X64: # BB#0:
-; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_1:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_1:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.17]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_1:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_1:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.25]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_1:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.17]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_1:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: movq %rdi, %rax # sched: [1:0.50]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_1:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.50]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 1
ret i64 %mul
}
@@ -26,10 +63,43 @@ define i64 @test_mul_by_2(i64 %x) {
; X86-NEXT: addl %eax, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_2:
-; X64: # BB#0:
-; X64-NEXT: leaq (%rdi,%rdi), %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_2:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_2:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_2:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NOOPT-NEXT: shldl $1, %eax, %edx
+; X86-NOOPT-NEXT: addl %eax, %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_2:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:0.50]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_2:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:0.50]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_2:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_2:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 2
ret i64 %mul
}
@@ -43,10 +113,43 @@ define i64 @test_mul_by_3(i64 %x) {
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_3:
-; X64: # BB#0:
-; X64-NEXT: leaq (%rdi,%rdi,2), %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_3:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_3:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_3:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $3, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $3, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_3:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_3:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_3:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_3:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 3
ret i64 %mul
}
@@ -60,10 +163,43 @@ define i64 @test_mul_by_4(i64 %x) {
; X86-NEXT: shll $2, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_4:
-; X64: # BB#0:
-; X64-NEXT: leaq (,%rdi,4), %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_4:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (,%rdi,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_4:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (,%rdi,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_4:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NOOPT-NEXT: shldl $2, %eax, %edx
+; X86-NOOPT-NEXT: shll $2, %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_4:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: leaq (,%rdi,4), %rax # sched: [1:0.50]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_4:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: leaq (,%rdi,4), %rax # sched: [1:0.50]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_4:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: leaq (,%rdi,4), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_4:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: leaq (,%rdi,4), %rax # sched: [1:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 4
ret i64 %mul
}
@@ -77,10 +213,43 @@ define i64 @test_mul_by_5(i64 %x) {
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_5:
-; X64: # BB#0:
-; X64-NEXT: leaq (%rdi,%rdi,4), %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_5:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_5:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_5:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $5, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $5, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_5:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_5:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_5:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_5:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 5
ret i64 %mul
}
@@ -95,11 +264,46 @@ define i64 @test_mul_by_6(i64 %x) {
; X86-NEXT: leal (%edx,%ecx,2), %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_6:
-; X64: # BB#0:
-; X64-NEXT: addq %rdi, %rdi
-; X64-NEXT: leaq (%rdi,%rdi,2), %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_6:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: addq %rdi, %rdi # sched: [1:0.25]
+; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_6:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: addq %rdi, %rdi # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_6:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $6, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $6, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_6:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $6, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_6:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $6, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_6:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: addq %rdi, %rdi # sched: [1:0.50]
+; X64-SLM-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_6:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $6, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 6
ret i64 %mul
}
@@ -115,11 +319,46 @@ define i64 @test_mul_by_7(i64 %x) {
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_7:
-; X64: # BB#0:
-; X64-NEXT: leaq (,%rdi,8), %rax
-; X64-NEXT: subq %rdi, %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_7:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_7:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: subq %rdi, %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_7:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $7, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $7, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_7:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $7, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_7:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $7, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_7:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: leaq (,%rdi,8), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: subq %rdi, %rax # sched: [1:0.50]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_7:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $7, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 7
ret i64 %mul
}
@@ -133,10 +372,43 @@ define i64 @test_mul_by_8(i64 %x) {
; X86-NEXT: shll $3, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_8:
-; X64: # BB#0:
-; X64-NEXT: leaq (,%rdi,8), %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_8:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_8:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_8:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NOOPT-NEXT: shldl $3, %eax, %edx
+; X86-NOOPT-NEXT: shll $3, %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_8:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_8:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_8:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: leaq (,%rdi,8), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_8:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: leaq (,%rdi,8), %rax # sched: [1:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 8
ret i64 %mul
}
@@ -150,10 +422,43 @@ define i64 @test_mul_by_9(i64 %x) {
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_9:
-; X64: # BB#0:
-; X64-NEXT: leaq (%rdi,%rdi,8), %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_9:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_9:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_9:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $9, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $9, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_9:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_9:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_9:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_9:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 9
ret i64 %mul
}
@@ -168,11 +473,46 @@ define i64 @test_mul_by_10(i64 %x) {
; X86-NEXT: leal (%edx,%ecx,2), %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_10:
-; X64: # BB#0:
-; X64-NEXT: addq %rdi, %rdi
-; X64-NEXT: leaq (%rdi,%rdi,4), %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_10:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: addq %rdi, %rdi # sched: [1:0.25]
+; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_10:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: addq %rdi, %rdi # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_10:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $10, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $10, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_10:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $10, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_10:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $10, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_10:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: addq %rdi, %rdi # sched: [1:0.50]
+; X64-SLM-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_10:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $10, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 10
ret i64 %mul
}
@@ -180,16 +520,53 @@ define i64 @test_mul_by_10(i64 %x) {
define i64 @test_mul_by_11(i64 %x) {
; X86-LABEL: test_mul_by_11:
; X86: # BB#0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: leal (%eax,%eax,4), %ecx
+; X86-NEXT: leal (%eax,%ecx,2), %ecx
; X86-NEXT: movl $11, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: imull $11, {{[0-9]+}}(%esp), %ecx
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_11:
-; X64: # BB#0:
-; X64-NEXT: imulq $11, %rdi, %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_11:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: leaq (%rdi,%rax,2), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_11:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rdi,%rax,2), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_11:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $11, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $11, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_11:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $11, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_11:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $11, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_11:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imulq $11, %rdi, %rax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_11:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $11, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 11
ret i64 %mul
}
@@ -204,11 +581,46 @@ define i64 @test_mul_by_12(i64 %x) {
; X86-NEXT: leal (%edx,%ecx,4), %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_12:
-; X64: # BB#0:
-; X64-NEXT: shlq $2, %rdi
-; X64-NEXT: leaq (%rdi,%rdi,2), %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_12:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: shlq $2, %rdi # sched: [1:0.50]
+; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_12:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: shlq $2, %rdi # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_12:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $12, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $12, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_12:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $12, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_12:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $12, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_12:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: shlq $2, %rdi # sched: [1:1.00]
+; X64-SLM-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_12:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $12, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 12
ret i64 %mul
}
@@ -216,16 +628,53 @@ define i64 @test_mul_by_12(i64 %x) {
define i64 @test_mul_by_13(i64 %x) {
; X86-LABEL: test_mul_by_13:
; X86: # BB#0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: leal (%eax,%eax,2), %ecx
+; X86-NEXT: leal (%eax,%ecx,4), %ecx
; X86-NEXT: movl $13, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: imull $13, {{[0-9]+}}(%esp), %ecx
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_13:
-; X64: # BB#0:
-; X64-NEXT: imulq $13, %rdi, %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_13:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_13:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_13:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $13, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $13, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_13:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $13, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_13:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $13, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_13:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imulq $13, %rdi, %rax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_13:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $13, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 13
ret i64 %mul
}
@@ -233,16 +682,56 @@ define i64 @test_mul_by_13(i64 %x) {
define i64 @test_mul_by_14(i64 %x) {
; X86-LABEL: test_mul_by_14:
; X86: # BB#0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: leal (%eax,%eax,2), %ecx
+; X86-NEXT: leal (%eax,%ecx,4), %ecx
+; X86-NEXT: addl %eax, %ecx
; X86-NEXT: movl $14, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: imull $14, {{[0-9]+}}(%esp), %ecx
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_14:
-; X64: # BB#0:
-; X64-NEXT: imulq $14, %rdi, %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_14:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_14:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: addq %rdi, %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_14:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $14, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $14, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_14:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $14, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_14:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $14, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_14:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imulq $14, %rdi, %rax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_14:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $14, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 14
ret i64 %mul
}
@@ -258,11 +747,46 @@ define i64 @test_mul_by_15(i64 %x) {
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_15:
-; X64: # BB#0:
-; X64-NEXT: leaq (%rdi,%rdi,4), %rax
-; X64-NEXT: leaq (%rax,%rax,2), %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_15:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_15:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_15:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $15, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $15, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_15:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $15, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_15:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $15, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_15:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_15:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $15, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 15
ret i64 %mul
}
@@ -276,11 +800,49 @@ define i64 @test_mul_by_16(i64 %x) {
; X86-NEXT: shll $4, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_16:
-; X64: # BB#0:
-; X64-NEXT: shlq $4, %rdi
-; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_16:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: shlq $4, %rdi # sched: [1:0.50]
+; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_16:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: shlq $4, %rdi # sched: [1:0.50]
+; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.17]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_16:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NOOPT-NEXT: shldl $4, %eax, %edx
+; X86-NOOPT-NEXT: shll $4, %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_16:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: shlq $4, %rdi # sched: [1:0.50]
+; HSW-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.25]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_16:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: shlq $4, %rdi # sched: [1:0.50]
+; JAG-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.17]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_16:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: shlq $4, %rdi # sched: [1:1.00]
+; X64-SLM-NEXT: movq %rdi, %rax # sched: [1:0.50]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_16:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: shlq $4, %rdi # sched: [1:1.00]
+; SLM-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.50]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 16
ret i64 %mul
}
@@ -297,12 +859,49 @@ define i64 @test_mul_by_17(i64 %x) {
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_17:
-; X64: # BB#0:
-; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: shlq $4, %rax
-; X64-NEXT: leaq (%rax,%rdi), %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_17:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: shlq $4, %rax # sched: [1:0.50]
+; X64-HSW-NEXT: leaq (%rax,%rdi), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_17:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.17]
+; X64-JAG-NEXT: shlq $4, %rax # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rax,%rdi), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_17:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $17, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $17, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_17:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $17, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_17:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $17, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_17:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: movq %rdi, %rax # sched: [1:0.50]
+; X64-SLM-NEXT: shlq $4, %rax # sched: [1:1.00]
+; X64-SLM-NEXT: addq %rdi, %rax # sched: [1:0.50]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_17:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $17, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 17
ret i64 %mul
}
@@ -317,11 +916,46 @@ define i64 @test_mul_by_18(i64 %x) {
; X86-NEXT: leal (%edx,%ecx,2), %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_18:
-; X64: # BB#0:
-; X64-NEXT: addq %rdi, %rdi
-; X64-NEXT: leaq (%rdi,%rdi,8), %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_18:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: addq %rdi, %rdi # sched: [1:0.25]
+; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_18:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: addq %rdi, %rdi # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_18:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $18, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $18, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_18:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $18, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_18:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $18, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_18:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: addq %rdi, %rdi # sched: [1:0.50]
+; X64-SLM-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_18:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $18, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 18
ret i64 %mul
}
@@ -329,16 +963,58 @@ define i64 @test_mul_by_18(i64 %x) {
define i64 @test_mul_by_19(i64 %x) {
; X86-LABEL: test_mul_by_19:
; X86: # BB#0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: leal (%ecx,%ecx,4), %eax
+; X86-NEXT: shll $2, %eax
+; X86-NEXT: subl %eax, %ecx
; X86-NEXT: movl $19, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: imull $19, {{[0-9]+}}(%esp), %ecx
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_19:
-; X64: # BB#0:
-; X64-NEXT: imulq $19, %rdi, %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_19:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: shlq $2, %rax # sched: [1:0.50]
+; X64-HSW-NEXT: subq %rax, %rdi # sched: [1:0.25]
+; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_19:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: shlq $2, %rax # sched: [1:0.50]
+; X64-JAG-NEXT: subq %rax, %rdi # sched: [1:0.50]
+; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.17]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_19:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $19, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $19, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_19:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $19, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_19:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $19, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_19:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imulq $19, %rdi, %rax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_19:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $19, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 19
ret i64 %mul
}
@@ -353,11 +1029,46 @@ define i64 @test_mul_by_20(i64 %x) {
; X86-NEXT: leal (%edx,%ecx,4), %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_20:
-; X64: # BB#0:
-; X64-NEXT: shlq $2, %rdi
-; X64-NEXT: leaq (%rdi,%rdi,4), %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_20:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: shlq $2, %rdi # sched: [1:0.50]
+; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_20:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: shlq $2, %rdi # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_20:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $20, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $20, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_20:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $20, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_20:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $20, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_20:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: shlq $2, %rdi # sched: [1:1.00]
+; X64-SLM-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_20:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $20, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 20
ret i64 %mul
}
@@ -365,16 +1076,53 @@ define i64 @test_mul_by_20(i64 %x) {
define i64 @test_mul_by_21(i64 %x) {
; X86-LABEL: test_mul_by_21:
; X86: # BB#0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: leal (%eax,%eax,4), %ecx
+; X86-NEXT: leal (%eax,%ecx,4), %ecx
; X86-NEXT: movl $21, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: imull $21, {{[0-9]+}}(%esp), %ecx
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_21:
-; X64: # BB#0:
-; X64-NEXT: imulq $21, %rdi, %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_21:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_21:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_21:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $21, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $21, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_21:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $21, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_21:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $21, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_21:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imulq $21, %rdi, %rax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_21:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $21, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 21
ret i64 %mul
}
@@ -382,16 +1130,56 @@ define i64 @test_mul_by_21(i64 %x) {
define i64 @test_mul_by_22(i64 %x) {
; X86-LABEL: test_mul_by_22:
; X86: # BB#0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: leal (%eax,%eax,4), %ecx
+; X86-NEXT: leal (%eax,%ecx,4), %ecx
+; X86-NEXT: addl %eax, %ecx
; X86-NEXT: movl $22, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: imull $22, {{[0-9]+}}(%esp), %ecx
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_22:
-; X64: # BB#0:
-; X64-NEXT: imulq $22, %rdi, %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_22:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_22:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: addq %rdi, %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_22:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $22, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $22, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_22:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $22, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_22:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $22, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_22:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imulq $22, %rdi, %rax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_22:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $22, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 22
ret i64 %mul
}
@@ -399,16 +1187,58 @@ define i64 @test_mul_by_22(i64 %x) {
define i64 @test_mul_by_23(i64 %x) {
; X86-LABEL: test_mul_by_23:
; X86: # BB#0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: leal (%ecx,%ecx,2), %eax
+; X86-NEXT: shll $3, %eax
+; X86-NEXT: subl %eax, %ecx
; X86-NEXT: movl $23, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: imull $23, {{[0-9]+}}(%esp), %ecx
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_23:
-; X64: # BB#0:
-; X64-NEXT: imulq $23, %rdi, %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_23:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: shlq $3, %rax # sched: [1:0.50]
+; X64-HSW-NEXT: subq %rax, %rdi # sched: [1:0.25]
+; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_23:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: shlq $3, %rax # sched: [1:0.50]
+; X64-JAG-NEXT: subq %rax, %rdi # sched: [1:0.50]
+; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.17]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_23:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $23, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $23, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_23:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $23, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_23:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $23, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_23:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imulq $23, %rdi, %rax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_23:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $23, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 23
ret i64 %mul
}
@@ -423,11 +1253,46 @@ define i64 @test_mul_by_24(i64 %x) {
; X86-NEXT: leal (%edx,%ecx,8), %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_24:
-; X64: # BB#0:
-; X64-NEXT: shlq $3, %rdi
-; X64-NEXT: leaq (%rdi,%rdi,2), %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_24:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: shlq $3, %rdi # sched: [1:0.50]
+; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_24:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: shlq $3, %rdi # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_24:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $24, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $24, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_24:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $24, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_24:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $24, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_24:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: shlq $3, %rdi # sched: [1:1.00]
+; X64-SLM-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_24:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $24, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 24
ret i64 %mul
}
@@ -443,11 +1308,46 @@ define i64 @test_mul_by_25(i64 %x) {
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_25:
-; X64: # BB#0:
-; X64-NEXT: leaq (%rdi,%rdi,4), %rax
-; X64-NEXT: leaq (%rax,%rax,4), %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_25:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: leaq (%rax,%rax,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_25:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rax,%rax,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_25:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $25, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $25, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_25:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $25, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_25:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $25, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_25:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: leaq (%rax,%rax,4), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_25:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $25, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 25
ret i64 %mul
}
@@ -455,16 +1355,58 @@ define i64 @test_mul_by_25(i64 %x) {
define i64 @test_mul_by_26(i64 %x) {
; X86-LABEL: test_mul_by_26:
; X86: # BB#0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: leal (%ecx,%ecx,8), %eax
+; X86-NEXT: leal (%eax,%eax,2), %eax
+; X86-NEXT: subl %eax, %ecx
; X86-NEXT: movl $26, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: imull $26, {{[0-9]+}}(%esp), %ecx
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_26:
-; X64: # BB#0:
-; X64-NEXT: imulq $26, %rdi, %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_26:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: subq %rax, %rdi # sched: [1:0.25]
+; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_26:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: subq %rax, %rdi # sched: [1:0.50]
+; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.17]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_26:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $26, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $26, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_26:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $26, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_26:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $26, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_26:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imulq $26, %rdi, %rax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_26:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $26, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 26
ret i64 %mul
}
@@ -480,11 +1422,46 @@ define i64 @test_mul_by_27(i64 %x) {
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_27:
-; X64: # BB#0:
-; X64-NEXT: leaq (%rdi,%rdi,8), %rax
-; X64-NEXT: leaq (%rax,%rax,2), %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_27:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_27:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_27:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $27, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $27, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_27:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $27, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_27:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $27, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_27:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_27:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $27, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 27
ret i64 %mul
}
@@ -492,16 +1469,56 @@ define i64 @test_mul_by_27(i64 %x) {
define i64 @test_mul_by_28(i64 %x) {
; X86-LABEL: test_mul_by_28:
; X86: # BB#0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: leal (%eax,%eax,8), %ecx
+; X86-NEXT: leal (%ecx,%ecx,2), %ecx
+; X86-NEXT: addl %eax, %ecx
; X86-NEXT: movl $28, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: imull $28, {{[0-9]+}}(%esp), %ecx
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_28:
-; X64: # BB#0:
-; X64-NEXT: imulq $28, %rdi, %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_28:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_28:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: addq %rdi, %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_28:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $28, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $28, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_28:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $28, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_28:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $28, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_28:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imulq $28, %rdi, %rax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_28:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $28, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 28
ret i64 %mul
}
@@ -509,16 +1526,59 @@ define i64 @test_mul_by_28(i64 %x) {
define i64 @test_mul_by_29(i64 %x) {
; X86-LABEL: test_mul_by_29:
; X86: # BB#0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: leal (%eax,%eax,8), %ecx
+; X86-NEXT: leal (%ecx,%ecx,2), %ecx
+; X86-NEXT: addl %eax, %ecx
+; X86-NEXT: addl %eax, %ecx
; X86-NEXT: movl $29, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: imull $29, {{[0-9]+}}(%esp), %ecx
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_29:
-; X64: # BB#0:
-; X64-NEXT: imulq $29, %rdi, %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_29:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_29:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: addq %rdi, %rax # sched: [1:0.50]
+; X64-JAG-NEXT: addq %rdi, %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_29:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $29, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $29, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_29:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $29, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_29:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $29, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_29:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imulq $29, %rdi, %rax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_29:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $29, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 29
ret i64 %mul
}
@@ -526,16 +1586,60 @@ define i64 @test_mul_by_29(i64 %x) {
define i64 @test_mul_by_30(i64 %x) {
; X86-LABEL: test_mul_by_30:
; X86: # BB#0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: shll $5, %ecx
; X86-NEXT: movl $30, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: imull $30, {{[0-9]+}}(%esp), %ecx
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_30:
-; X64: # BB#0:
-; X64-NEXT: imulq $30, %rdi, %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_30:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: shlq $5, %rax # sched: [1:0.50]
+; X64-HSW-NEXT: movq %rdi, %rcx # sched: [1:0.25]
+; X64-HSW-NEXT: subq %rax, %rcx # sched: [1:0.25]
+; X64-HSW-NEXT: subq %rcx, %rdi # sched: [1:0.25]
+; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_30:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.17]
+; X64-JAG-NEXT: movq %rdi, %rcx # sched: [1:0.17]
+; X64-JAG-NEXT: shlq $5, %rax # sched: [1:0.50]
+; X64-JAG-NEXT: subq %rax, %rcx # sched: [1:0.50]
+; X64-JAG-NEXT: subq %rcx, %rdi # sched: [1:0.50]
+; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.17]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_30:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $30, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $30, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_30:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $30, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_30:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $30, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_30:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imulq $30, %rdi, %rax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_30:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $30, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 30
ret i64 %mul
}
@@ -552,12 +1656,49 @@ define i64 @test_mul_by_31(i64 %x) {
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_31:
-; X64: # BB#0:
-; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: shlq $5, %rax
-; X64-NEXT: subq %rdi, %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_31:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: shlq $5, %rax # sched: [1:0.50]
+; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_31:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.17]
+; X64-JAG-NEXT: shlq $5, %rax # sched: [1:0.50]
+; X64-JAG-NEXT: subq %rdi, %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_31:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $31, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $31, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_31:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $31, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_31:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $31, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_31:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: movq %rdi, %rax # sched: [1:0.50]
+; X64-SLM-NEXT: shlq $5, %rax # sched: [1:1.00]
+; X64-SLM-NEXT: subq %rdi, %rax # sched: [1:0.50]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_31:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $31, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 31
ret i64 %mul
}
@@ -571,11 +1712,168 @@ define i64 @test_mul_by_32(i64 %x) {
; X86-NEXT: shll $5, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_32:
-; X64: # BB#0:
-; X64-NEXT: shlq $5, %rdi
-; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_32:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: shlq $5, %rdi # sched: [1:0.50]
+; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_32:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: shlq $5, %rdi # sched: [1:0.50]
+; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.17]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_32:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NOOPT-NEXT: shldl $5, %eax, %edx
+; X86-NOOPT-NEXT: shll $5, %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_32:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: shlq $5, %rdi # sched: [1:0.50]
+; HSW-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.25]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_32:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: shlq $5, %rdi # sched: [1:0.50]
+; JAG-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.17]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_32:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: shlq $5, %rdi # sched: [1:1.00]
+; X64-SLM-NEXT: movq %rdi, %rax # sched: [1:0.50]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_32:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: shlq $5, %rdi # sched: [1:1.00]
+; SLM-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.50]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 32
ret i64 %mul
}
+
+; (x*9+42)*(x*5+2)
+define i64 @test_mul_spec(i64 %x) nounwind {
+; X86-LABEL: test_mul_spec:
+; X86: # BB#0:
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl $9, %edx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: mull %edx
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: leal (%edi,%edi,8), %ebx
+; X86-NEXT: addl $42, %esi
+; X86-NEXT: adcl %edx, %ebx
+; X86-NEXT: movl $5, %edx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: mull %edx
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: leal (%edi,%edi,4), %edi
+; X86-NEXT: addl $2, %ecx
+; X86-NEXT: adcl %edx, %edi
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: mull %ecx
+; X86-NEXT: imull %esi, %edi
+; X86-NEXT: addl %edi, %edx
+; X86-NEXT: imull %ebx, %ecx
+; X86-NEXT: addl %ecx, %edx
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: retl
+;
+; X64-HSW-LABEL: test_mul_spec:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rcx # sched: [1:0.50]
+; X64-HSW-NEXT: addq $42, %rcx # sched: [1:0.25]
+; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: addq $2, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: imulq %rcx, %rax # sched: [3:1.00]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_spec:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq 42(%rdi,%rdi,8), %rcx # sched: [1:0.50]
+; X64-JAG-NEXT: leaq 2(%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: imulq %rcx, %rax # sched: [3:1.00]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_spec:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: pushl %ebx
+; X86-NOOPT-NEXT: pushl %edi
+; X86-NOOPT-NEXT: pushl %esi
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NOOPT-NEXT: movl $9, %edx
+; X86-NOOPT-NEXT: movl %ecx, %eax
+; X86-NOOPT-NEXT: mull %edx
+; X86-NOOPT-NEXT: movl %eax, %esi
+; X86-NOOPT-NEXT: leal (%edi,%edi,8), %ebx
+; X86-NOOPT-NEXT: addl $42, %esi
+; X86-NOOPT-NEXT: adcl %edx, %ebx
+; X86-NOOPT-NEXT: movl $5, %edx
+; X86-NOOPT-NEXT: movl %ecx, %eax
+; X86-NOOPT-NEXT: mull %edx
+; X86-NOOPT-NEXT: movl %eax, %ecx
+; X86-NOOPT-NEXT: leal (%edi,%edi,4), %edi
+; X86-NOOPT-NEXT: addl $2, %ecx
+; X86-NOOPT-NEXT: adcl %edx, %edi
+; X86-NOOPT-NEXT: movl %esi, %eax
+; X86-NOOPT-NEXT: mull %ecx
+; X86-NOOPT-NEXT: imull %esi, %edi
+; X86-NOOPT-NEXT: addl %edi, %edx
+; X86-NOOPT-NEXT: imull %ebx, %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: popl %esi
+; X86-NOOPT-NEXT: popl %edi
+; X86-NOOPT-NEXT: popl %ebx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_spec:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,8), %rcx # sched: [1:0.50]
+; HSW-NOOPT-NEXT: addq $42, %rcx # sched: [1:0.25]
+; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; HSW-NOOPT-NEXT: addq $2, %rax # sched: [1:0.25]
+; HSW-NOOPT-NEXT: imulq %rcx, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_spec:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: leaq 42(%rdi,%rdi,8), %rcx # sched: [1:0.50]
+; JAG-NOOPT-NEXT: leaq 2(%rdi,%rdi,4), %rax # sched: [1:0.50]
+; JAG-NOOPT-NEXT: imulq %rcx, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_spec:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: leaq 42(%rdi,%rdi,8), %rcx # sched: [1:1.00]
+; X64-SLM-NEXT: leaq 2(%rdi,%rdi,4), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: imulq %rcx, %rax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_spec:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: leaq 42(%rdi,%rdi,8), %rcx # sched: [1:1.00]
+; SLM-NOOPT-NEXT: leaq 2(%rdi,%rdi,4), %rax # sched: [1:1.00]
+; SLM-NOOPT-NEXT: imulq %rcx, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+ %mul = mul nsw i64 %x, 9
+ %add = add nsw i64 %mul, 42
+ %mul2 = mul nsw i64 %x, 5
+ %add2 = add nsw i64 %mul2, 2
+ %mul3 = mul nsw i64 %add, %add2
+ ret i64 %mul3
+}
diff --git a/test/CodeGen/X86/setcc-lowering.ll b/test/CodeGen/X86/setcc-lowering.ll
index 391f1cc9fb43..1b8f8e7ae559 100644
--- a/test/CodeGen/X86/setcc-lowering.ll
+++ b/test/CodeGen/X86/setcc-lowering.ll
@@ -41,14 +41,67 @@ entry:
ret <8 x i16> %3
}
-define void @pr26232(i64 %a) {
+define void @pr26232(i64 %a, <16 x i1> %b) {
; AVX-LABEL: pr26232:
; AVX: # BB#0: # %for_loop599.preheader
+; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX-NEXT: .p2align 4, 0x90
; AVX-NEXT: .LBB1_1: # %for_loop599
; AVX-NEXT: # =>This Inner Loop Header: Depth=1
+; AVX-NEXT: xorl %eax, %eax
; AVX-NEXT: cmpq $65536, %rdi # imm = 0x10000
-; AVX-NEXT: setl -{{[0-9]+}}(%rsp)
+; AVX-NEXT: setl %al
+; AVX-NEXT: vmovd %eax, %xmm2
+; AVX-NEXT: vpshufb %xmm1, %xmm2, %xmm2
+; AVX-NEXT: vpand %xmm0, %xmm2, %xmm2
+; AVX-NEXT: vpextrb $15, %xmm2, %eax
+; AVX-NEXT: andb $1, %al
+; AVX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX-NEXT: vpextrb $14, %xmm2, %eax
+; AVX-NEXT: andb $1, %al
+; AVX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX-NEXT: vpextrb $13, %xmm2, %eax
+; AVX-NEXT: andb $1, %al
+; AVX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX-NEXT: vpextrb $12, %xmm2, %eax
+; AVX-NEXT: andb $1, %al
+; AVX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX-NEXT: vpextrb $11, %xmm2, %eax
+; AVX-NEXT: andb $1, %al
+; AVX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX-NEXT: vpextrb $10, %xmm2, %eax
+; AVX-NEXT: andb $1, %al
+; AVX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX-NEXT: vpextrb $9, %xmm2, %eax
+; AVX-NEXT: andb $1, %al
+; AVX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX-NEXT: vpextrb $8, %xmm2, %eax
+; AVX-NEXT: andb $1, %al
+; AVX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX-NEXT: vpextrb $7, %xmm2, %eax
+; AVX-NEXT: andb $1, %al
+; AVX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX-NEXT: vpextrb $6, %xmm2, %eax
+; AVX-NEXT: andb $1, %al
+; AVX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX-NEXT: vpextrb $5, %xmm2, %eax
+; AVX-NEXT: andb $1, %al
+; AVX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX-NEXT: vpextrb $4, %xmm2, %eax
+; AVX-NEXT: andb $1, %al
+; AVX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX-NEXT: vpextrb $3, %xmm2, %eax
+; AVX-NEXT: andb $1, %al
+; AVX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX-NEXT: vpextrb $2, %xmm2, %eax
+; AVX-NEXT: andb $1, %al
+; AVX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX-NEXT: vpextrb $1, %xmm2, %eax
+; AVX-NEXT: andb $1, %al
+; AVX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX-NEXT: vpextrb $0, %xmm2, %eax
+; AVX-NEXT: andb $1, %al
+; AVX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; AVX-NEXT: cmpw $0, -{{[0-9]+}}(%rsp)
; AVX-NEXT: jne .LBB1_1
; AVX-NEXT: # BB#2: # %for_exit600
@@ -61,6 +114,9 @@ define void @pr26232(i64 %a) {
; KNL-32-NEXT: .cfi_def_cfa_offset 8
; KNL-32-NEXT: .Lcfi1:
; KNL-32-NEXT: .cfi_offset %esi, -8
+; KNL-32-NEXT: vpmovsxbd %xmm0, %zmm0
+; KNL-32-NEXT: vpslld $31, %zmm0, %zmm0
+; KNL-32-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-32-NEXT: movl {{[0-9]+}}(%esp), %eax
; KNL-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; KNL-32-NEXT: movw $-1, %dx
@@ -72,6 +128,9 @@ define void @pr26232(i64 %a) {
; KNL-32-NEXT: sbbl $0, %esi
; KNL-32-NEXT: movl $0, %esi
; KNL-32-NEXT: cmovlw %dx, %si
+; KNL-32-NEXT: kmovw %esi, %k1
+; KNL-32-NEXT: kandw %k0, %k1, %k1
+; KNL-32-NEXT: kmovw %k1, %esi
; KNL-32-NEXT: testw %si, %si
; KNL-32-NEXT: jne .LBB1_1
; KNL-32-NEXT: # BB#2: # %for_exit600
@@ -87,7 +146,7 @@ for_loop599: ; preds = %for_loop599, %for_t
%less_i_load605_ = icmp slt i64 %a, 65536
%less_i_load605__broadcast_init = insertelement <16 x i1> undef, i1 %less_i_load605_, i32 0
%less_i_load605__broadcast = shufflevector <16 x i1> %less_i_load605__broadcast_init, <16 x i1> undef, <16 x i32> zeroinitializer
- %"oldMask&test607" = and <16 x i1> %less_i_load605__broadcast, undef
+ %"oldMask&test607" = and <16 x i1> %less_i_load605__broadcast, %b
%intmask.i894 = bitcast <16 x i1> %"oldMask&test607" to i16
%res.i895 = icmp eq i16 %intmask.i894, 0
br i1 %res.i895, label %for_exit600, label %for_loop599
diff --git a/test/CodeGen/X86/vector-sext.ll b/test/CodeGen/X86/vector-sext.ll
index 8cc1d8c765ac..53e471d6f175 100644
--- a/test/CodeGen/X86/vector-sext.ll
+++ b/test/CodeGen/X86/vector-sext.ll
@@ -1749,6 +1749,62 @@ entry:
ret <4 x i64> %Y
}
+define <2 x i64> @load_sext_4i8_to_4i64_extract(<4 x i8> *%ptr) {
+; SSE2-LABEL: load_sext_4i8_to_4i64_extract:
+; SSE2: # BB#0:
+; SSE2-NEXT: movsbq 3(%rdi), %rax
+; SSE2-NEXT: movq %rax, %xmm1
+; SSE2-NEXT: movsbq 2(%rdi), %rax
+; SSE2-NEXT: movq %rax, %xmm0
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: load_sext_4i8_to_4i64_extract:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: movsbq 3(%rdi), %rax
+; SSSE3-NEXT: movq %rax, %xmm1
+; SSSE3-NEXT: movsbq 2(%rdi), %rax
+; SSSE3-NEXT: movq %rax, %xmm0
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: load_sext_4i8_to_4i64_extract:
+; SSE41: # BB#0:
+; SSE41-NEXT: pmovsxbq 2(%rdi), %xmm0
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: load_sext_4i8_to_4i64_extract:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpmovsxbd (%rdi), %xmm0
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: load_sext_4i8_to_4i64_extract:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpmovsxbq (%rdi), %ymm0
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: load_sext_4i8_to_4i64_extract:
+; AVX512: # BB#0:
+; AVX512-NEXT: vpmovsxbq (%rdi), %ymm0
+; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+;
+; X32-SSE41-LABEL: load_sext_4i8_to_4i64_extract:
+; X32-SSE41: # BB#0:
+; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE41-NEXT: pmovsxbq 2(%eax), %xmm0
+; X32-SSE41-NEXT: retl
+ %ld = load <4 x i8>, <4 x i8>* %ptr
+ %sext = sext <4 x i8> %ld to <4 x i64>
+ %extract = shufflevector <4 x i64> %sext, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
+ ret <2 x i64> %extract
+}
+
define <8 x i16> @load_sext_8i1_to_8i16(<8 x i1> *%ptr) {
; SSE2-LABEL: load_sext_8i1_to_8i16:
; SSE2: # BB#0: # %entry
diff --git a/test/CodeGen/X86/xchg-nofold.ll b/test/CodeGen/X86/xchg-nofold.ll
new file mode 100644
index 000000000000..fddc7906e08f
--- /dev/null
+++ b/test/CodeGen/X86/xchg-nofold.ll
@@ -0,0 +1,37 @@
+; RUN: llc -mtriple=x86_64-linux-gnu < %s | FileCheck %s
+
+%"struct.std::atomic" = type { %"struct.std::atomic_bool" }
+%"struct.std::atomic_bool" = type { %"struct.std::__atomic_base" }
+%"struct.std::__atomic_base" = type { i8 }
+
+; CHECK-LABEL: _Z3fooRSt6atomicIbEb
+define zeroext i1 @_Z3fooRSt6atomicIbEb(%"struct.std::atomic"* nocapture dereferenceable(1) %a, i1 returned zeroext %b) nounwind {
+entry:
+ %frombool.i.i = zext i1 %b to i8
+ %_M_i.i.i = getelementptr inbounds %"struct.std::atomic", %"struct.std::atomic"* %a, i64 0, i32 0, i32 0, i32 0
+ %0 = ptrtoint i8* %_M_i.i.i to i64
+ %1 = lshr i64 %0, 3
+ %2 = add i64 %1, 2147450880
+ %3 = inttoptr i64 %2 to i8*
+ %4 = load i8, i8* %3
+ %5 = icmp ne i8 %4, 0
+ br i1 %5, label %6, label %11
+
+; <label>:6: ; preds = %entry
+ %7 = and i64 %0, 7
+ %8 = trunc i64 %7 to i8
+ %9 = icmp sge i8 %8, %4
+ br i1 %9, label %10, label %11
+
+; <label>:10: ; preds = %6
+ call void @__asan_report_store1(i64 %0)
+ call void asm sideeffect "", ""()
+ unreachable
+
+; <label>:11: ; preds = %6, %entry
+ store atomic i8 %frombool.i.i, i8* %_M_i.i.i seq_cst, align 1
+; CHECK: xchgb %{{.*}}, (%{{.*}})
+ ret i1 %b
+}
+
+declare void @__asan_report_store1(i64)
diff --git a/test/MC/AArch64/ldr-pseudo.s b/test/MC/AArch64/ldr-pseudo.s
index e132f7cf651f..1d99d1401801 100644
--- a/test/MC/AArch64/ldr-pseudo.s
+++ b/test/MC/AArch64/ldr-pseudo.s
@@ -205,6 +205,13 @@ f18:
ldr x1, =0x320064
// CHECK: ldr x1, .Ltmp[[TMP26:[0-9]+]]
+// We previously used a DenseMap with constant values as keys, check that
+// sentinel values can be used.
+ ldr x0, =0x7ffffffffffffffe
+// CHECK: ldr x0, .Ltmp[[TMP27:[0-9]+]]
+ ldr x1, =0x7fffffffffffffff
+// CHECK: ldr x1, .Ltmp[[TMP28:[0-9]+]]
+
//
// Constant Pools
//
@@ -311,3 +318,8 @@ f18:
// CHECK: .p2align 2
// CHECK: .Ltmp[[TMP25]]
// CHECK: .word 3276900
+
+// CHECK: .Ltmp[[TMP27]]
+// CHECK: .xword 9223372036854775806
+// CHECK: .Ltmp[[TMP28]]
+// CHECK: .xword 9223372036854775807
diff --git a/test/MC/Disassembler/SystemZ/insns-z13.txt b/test/MC/Disassembler/SystemZ/insns-z13.txt
index 4f5ec43f7348..c48bdee8d613 100644
--- a/test/MC/Disassembler/SystemZ/insns-z13.txt
+++ b/test/MC/Disassembler/SystemZ/insns-z13.txt
@@ -2,6 +2,114 @@
# RUN: llvm-mc --disassemble %s -triple=s390x-linux-gnu -mcpu=z13 \
# RUN: | FileCheck %s
+# CHECK: cdpt %f0, 0(1), 0
+0xed 0x00 0x00 0x00 0x00 0xae
+
+# CHECK: cdpt %f15, 0(1), 0
+0xed 0x00 0x00 0x00 0xf0 0xae
+
+# CHECK: cdpt %f0, 0(1), 15
+0xed 0x00 0x00 0x00 0x0f 0xae
+
+# CHECK: cdpt %f0, 0(1,%r1), 0
+0xed 0x00 0x10 0x00 0x00 0xae
+
+# CHECK: cdpt %f0, 0(1,%r15), 0
+0xed 0x00 0xf0 0x00 0x00 0xae
+
+# CHECK: cdpt %f0, 4095(1,%r1), 0
+0xed 0x00 0x1f 0xff 0x00 0xae
+
+# CHECK: cdpt %f0, 4095(1,%r15), 0
+0xed 0x00 0xff 0xff 0x00 0xae
+
+# CHECK: cdpt %f0, 0(256,%r1), 0
+0xed 0xff 0x10 0x00 0x00 0xae
+
+# CHECK: cdpt %f0, 0(256,%r15), 0
+0xed 0xff 0xf0 0x00 0x00 0xae
+
+# CHECK: cpdt %f0, 0(1), 0
+0xed 0x00 0x00 0x00 0x00 0xac
+
+# CHECK: cpdt %f15, 0(1), 0
+0xed 0x00 0x00 0x00 0xf0 0xac
+
+# CHECK: cpdt %f0, 0(1), 15
+0xed 0x00 0x00 0x00 0x0f 0xac
+
+# CHECK: cpdt %f0, 0(1,%r1), 0
+0xed 0x00 0x10 0x00 0x00 0xac
+
+# CHECK: cpdt %f0, 0(1,%r15), 0
+0xed 0x00 0xf0 0x00 0x00 0xac
+
+# CHECK: cpdt %f0, 4095(1,%r1), 0
+0xed 0x00 0x1f 0xff 0x00 0xac
+
+# CHECK: cpdt %f0, 4095(1,%r15), 0
+0xed 0x00 0xff 0xff 0x00 0xac
+
+# CHECK: cpdt %f0, 0(256,%r1), 0
+0xed 0xff 0x10 0x00 0x00 0xac
+
+# CHECK: cpdt %f0, 0(256,%r15), 0
+0xed 0xff 0xf0 0x00 0x00 0xac
+
+# CHECK: cpxt %f0, 0(1), 0
+0xed 0x00 0x00 0x00 0x00 0xad
+
+# CHECK: cpxt %f13, 0(1), 0
+0xed 0x00 0x00 0x00 0xd0 0xad
+
+# CHECK: cpxt %f0, 0(1), 15
+0xed 0x00 0x00 0x00 0x0f 0xad
+
+# CHECK: cpxt %f0, 0(1,%r1), 0
+0xed 0x00 0x10 0x00 0x00 0xad
+
+# CHECK: cpxt %f0, 0(1,%r15), 0
+0xed 0x00 0xf0 0x00 0x00 0xad
+
+# CHECK: cpxt %f0, 4095(1,%r1), 0
+0xed 0x00 0x1f 0xff 0x00 0xad
+
+# CHECK: cpxt %f0, 4095(1,%r15), 0
+0xed 0x00 0xff 0xff 0x00 0xad
+
+# CHECK: cpxt %f0, 0(256,%r1), 0
+0xed 0xff 0x10 0x00 0x00 0xad
+
+# CHECK: cpxt %f0, 0(256,%r15), 0
+0xed 0xff 0xf0 0x00 0x00 0xad
+
+# CHECK: cxpt %f0, 0(1), 0
+0xed 0x00 0x00 0x00 0x00 0xaf
+
+# CHECK: cxpt %f13, 0(1), 0
+0xed 0x00 0x00 0x00 0xd0 0xaf
+
+# CHECK: cxpt %f0, 0(1), 15
+0xed 0x00 0x00 0x00 0x0f 0xaf
+
+# CHECK: cxpt %f0, 0(1,%r1), 0
+0xed 0x00 0x10 0x00 0x00 0xaf
+
+# CHECK: cxpt %f0, 0(1,%r15), 0
+0xed 0x00 0xf0 0x00 0x00 0xaf
+
+# CHECK: cxpt %f0, 4095(1,%r1), 0
+0xed 0x00 0x1f 0xff 0x00 0xaf
+
+# CHECK: cxpt %f0, 4095(1,%r15), 0
+0xed 0x00 0xff 0xff 0x00 0xaf
+
+# CHECK: cxpt %f0, 0(256,%r1), 0
+0xed 0xff 0x10 0x00 0x00 0xaf
+
+# CHECK: cxpt %f0, 0(256,%r15), 0
+0xed 0xff 0xf0 0x00 0x00 0xaf
+
# CHECK: lcbb %r0, 0, 0
0xe7 0x00 0x00 0x00 0x00 0x27
diff --git a/test/MC/Disassembler/SystemZ/insns.txt b/test/MC/Disassembler/SystemZ/insns.txt
index dac94099f276..75f7f9669b5c 100644
--- a/test/MC/Disassembler/SystemZ/insns.txt
+++ b/test/MC/Disassembler/SystemZ/insns.txt
@@ -22,6 +22,27 @@
# CHECK: a %r15, 0
0x5a 0xf0 0x00 0x00
+# CHECK: ad %f0, 0
+0x6a 0x00 0x00 0x00
+
+# CHECK: ad %f0, 4095
+0x6a 0x00 0x0f 0xff
+
+# CHECK: ad %f0, 0(%r1)
+0x6a 0x00 0x10 0x00
+
+# CHECK: ad %f0, 0(%r15)
+0x6a 0x00 0xf0 0x00
+
+# CHECK: ad %f0, 4095(%r1,%r15)
+0x6a 0x01 0xff 0xff
+
+# CHECK: ad %f0, 4095(%r15,%r1)
+0x6a 0x0f 0x1f 0xff
+
+# CHECK: ad %f15, 0
+0x6a 0xf0 0x00 0x00
+
# CHECK: adb %f0, 0
0xed 0x00 0x00 0x00 0x00 0x1a
@@ -55,6 +76,72 @@
# CHECK: adbr %f15, %f0
0xb3 0x1a 0x00 0xf0
+# CHECK: adr %f0, %f0
+0x2a 0x00
+
+# CHECK: adr %f0, %f15
+0x2a 0x0f
+
+# CHECK: adr %f7, %f8
+0x2a 0x78
+
+# CHECK: adr %f15, %f0
+0x2a 0xf0
+
+# CHECK: adtr %f0, %f0, %f0
+0xb3 0xd2 0x00 0x00
+
+# CHECK: adtr %f0, %f0, %f15
+0xb3 0xd2 0xf0 0x00
+
+# CHECK: adtr %f0, %f15, %f0
+0xb3 0xd2 0x00 0x0f
+
+# CHECK: adtr %f15, %f0, %f0
+0xb3 0xd2 0x00 0xf0
+
+# CHECK: adtr %f7, %f8, %f9
+0xb3 0xd2 0x90 0x78
+
+# CHECK: adtra %f0, %f0, %f0, 1
+0xb3 0xd2 0x01 0x00
+
+# CHECK: adtra %f0, %f0, %f0, 15
+0xb3 0xd2 0x0f 0x00
+
+# CHECK: adtra %f0, %f0, %f15, 1
+0xb3 0xd2 0xf1 0x00
+
+# CHECK: adtra %f0, %f15, %f0, 1
+0xb3 0xd2 0x01 0x0f
+
+# CHECK: adtra %f15, %f0, %f0, 1
+0xb3 0xd2 0x01 0xf0
+
+# CHECK: adtra %f7, %f8, %f9, 10
+0xb3 0xd2 0x9a 0x78
+
+# CHECK: ae %f0, 0
+0x7a 0x00 0x00 0x00
+
+# CHECK: ae %f0, 4095
+0x7a 0x00 0x0f 0xff
+
+# CHECK: ae %f0, 0(%r1)
+0x7a 0x00 0x10 0x00
+
+# CHECK: ae %f0, 0(%r15)
+0x7a 0x00 0xf0 0x00
+
+# CHECK: ae %f0, 4095(%r1,%r15)
+0x7a 0x01 0xff 0xff
+
+# CHECK: ae %f0, 4095(%r15,%r1)
+0x7a 0x0f 0x1f 0xff
+
+# CHECK: ae %f15, 0
+0x7a 0xf0 0x00 0x00
+
# CHECK: aeb %f0, 0
0xed 0x00 0x00 0x00 0x00 0x0a
@@ -88,6 +175,18 @@
# CHECK: aebr %f15, %f0
0xb3 0x0a 0x00 0xf0
+# CHECK: aer %f0, %f0
+0x3a 0x00
+
+# CHECK: aer %f0, %f15
+0x3a 0x0f
+
+# CHECK: aer %f7, %f8
+0x3a 0x78
+
+# CHECK: aer %f15, %f0
+0x3a 0xf0
+
# CHECK: afi %r0, -2147483648
0xc2 0x09 0x80 0x00 0x00 0x00
@@ -856,6 +955,72 @@
# CHECK: asi 524287(%r15), 42
0xeb 0x2a 0xff 0xff 0x7f 0x6a
+# CHECK: au %f0, 0
+0x7e 0x00 0x00 0x00
+
+# CHECK: au %f0, 4095
+0x7e 0x00 0x0f 0xff
+
+# CHECK: au %f0, 0(%r1)
+0x7e 0x00 0x10 0x00
+
+# CHECK: au %f0, 0(%r15)
+0x7e 0x00 0xf0 0x00
+
+# CHECK: au %f0, 4095(%r1,%r15)
+0x7e 0x01 0xff 0xff
+
+# CHECK: au %f0, 4095(%r15,%r1)
+0x7e 0x0f 0x1f 0xff
+
+# CHECK: au %f15, 0
+0x7e 0xf0 0x00 0x00
+
+# CHECK: aur %f0, %f0
+0x3e 0x00
+
+# CHECK: aur %f0, %f15
+0x3e 0x0f
+
+# CHECK: aur %f7, %f8
+0x3e 0x78
+
+# CHECK: aur %f15, %f0
+0x3e 0xf0
+
+# CHECK: aw %f0, 0
+0x6e 0x00 0x00 0x00
+
+# CHECK: aw %f0, 4095
+0x6e 0x00 0x0f 0xff
+
+# CHECK: aw %f0, 0(%r1)
+0x6e 0x00 0x10 0x00
+
+# CHECK: aw %f0, 0(%r15)
+0x6e 0x00 0xf0 0x00
+
+# CHECK: aw %f0, 4095(%r1,%r15)
+0x6e 0x01 0xff 0xff
+
+# CHECK: aw %f0, 4095(%r15,%r1)
+0x6e 0x0f 0x1f 0xff
+
+# CHECK: aw %f15, 0
+0x6e 0xf0 0x00 0x00
+
+# CHECK: awr %f0, %f0
+0x2e 0x00
+
+# CHECK: awr %f0, %f15
+0x2e 0x0f
+
+# CHECK: awr %f7, %f8
+0x2e 0x78
+
+# CHECK: awr %f15, %f0
+0x2e 0xf0
+
# CHECK: axbr %f0, %f0
0xb3 0x4a 0x00 0x00
@@ -868,6 +1033,51 @@
# CHECK: axbr %f13, %f0
0xb3 0x4a 0x00 0xd0
+# CHECK: axr %f0, %f0
+0x36 0x00
+
+# CHECK: axr %f0, %f13
+0x36 0x0d
+
+# CHECK: axr %f8, %f8
+0x36 0x88
+
+# CHECK: axr %f13, %f0
+0x36 0xd0
+
+# CHECK: axtr %f0, %f0, %f0
+0xb3 0xda 0x00 0x00
+
+# CHECK: axtr %f0, %f0, %f13
+0xb3 0xda 0xd0 0x00
+
+# CHECK: axtr %f0, %f13, %f0
+0xb3 0xda 0x00 0x0d
+
+# CHECK: axtr %f13, %f0, %f0
+0xb3 0xda 0x00 0xd0
+
+# CHECK: axtr %f8, %f8, %f8
+0xb3 0xda 0x80 0x88
+
+# CHECK: axtra %f0, %f0, %f0, 1
+0xb3 0xda 0x01 0x00
+
+# CHECK: axtra %f0, %f0, %f0, 15
+0xb3 0xda 0x0f 0x00
+
+# CHECK: axtra %f0, %f0, %f13, 1
+0xb3 0xda 0xd1 0x00
+
+# CHECK: axtra %f0, %f13, %f0, 1
+0xb3 0xda 0x01 0x0d
+
+# CHECK: axtra %f13, %f0, %f0, 1
+0xb3 0xda 0x01 0xd0
+
+# CHECK: axtra %f8, %f8, %f8, 8
+0xb3 0xda 0x88 0x88
+
# CHECK: ay %r0, -524288
0xe3 0x00 0x00 0x00 0x80 0x5a
@@ -1348,6 +1558,27 @@
# CHECK: c %r15, 0
0x59 0xf0 0x00 0x00
+# CHECK: cd %f0, 0
+0x69 0x00 0x00 0x00
+
+# CHECK: cd %f0, 4095
+0x69 0x00 0x0f 0xff
+
+# CHECK: cd %f0, 0(%r1)
+0x69 0x00 0x10 0x00
+
+# CHECK: cd %f0, 0(%r15)
+0x69 0x00 0xf0 0x00
+
+# CHECK: cd %f0, 4095(%r1,%r15)
+0x69 0x01 0xff 0xff
+
+# CHECK: cd %f0, 4095(%r15,%r1)
+0x69 0x0f 0x1f 0xff
+
+# CHECK: cd %f15, 0
+0x69 0xf0 0x00 0x00
+
# CHECK: cdb %f0, 0
0xed 0x00 0x00 0x00 0x00 0x19
@@ -1414,6 +1645,39 @@
# CHECK: cdfbra %f15, 0, %r0, 1
0xb3 0x95 0x01 0xf0
+# CHECK: cdfr %f0, %r0
+0xb3 0xb5 0x00 0x00
+
+# CHECK: cdfr %f0, %r15
+0xb3 0xb5 0x00 0x0f
+
+# CHECK: cdfr %f15, %r0
+0xb3 0xb5 0x00 0xf0
+
+# CHECK: cdfr %f7, %r8
+0xb3 0xb5 0x00 0x78
+
+# CHECK: cdfr %f15, %r15
+0xb3 0xb5 0x00 0xff
+
+# CHECK: cdftr %f0, 0, %r0, 0
+0xb9 0x51 0x00 0x00
+
+# CHECK: cdftr %f0, 0, %r0, 15
+0xb9 0x51 0x0f 0x00
+
+# CHECK: cdftr %f0, 0, %r15, 0
+0xb9 0x51 0x00 0x0f
+
+# CHECK: cdftr %f0, 15, %r0, 0
+0xb9 0x51 0xf0 0x00
+
+# CHECK: cdftr %f4, 5, %r6, 7
+0xb9 0x51 0x57 0x46
+
+# CHECK: cdftr %f15, 0, %r0, 0
+0xb9 0x51 0x00 0xf0
+
# CHECK: cdgbr %f0, %r0
0xb3 0xa5 0x00 0x00
@@ -1447,6 +1711,54 @@
# CHECK: cdgbra %f15, 0, %r0, 1
0xb3 0xa5 0x01 0xf0
+# CHECK: cdgr %f0, %r0
+0xb3 0xc5 0x00 0x00
+
+# CHECK: cdgr %f0, %r15
+0xb3 0xc5 0x00 0x0f
+
+# CHECK: cdgr %f15, %r0
+0xb3 0xc5 0x00 0xf0
+
+# CHECK: cdgr %f7, %r8
+0xb3 0xc5 0x00 0x78
+
+# CHECK: cdgr %f15, %r15
+0xb3 0xc5 0x00 0xff
+
+# CHECK: cdgtr %f0, %r0
+0xb3 0xf1 0x00 0x00
+
+# CHECK: cdgtr %f0, %r15
+0xb3 0xf1 0x00 0x0f
+
+# CHECK: cdgtr %f15, %r0
+0xb3 0xf1 0x00 0xf0
+
+# CHECK: cdgtr %f7, %r8
+0xb3 0xf1 0x00 0x78
+
+# CHECK: cdgtr %f15, %r15
+0xb3 0xf1 0x00 0xff
+
+# CHECK: cdgtra %f0, 0, %r0, 1
+0xb3 0xf1 0x01 0x00
+
+# CHECK: cdgtra %f0, 0, %r0, 15
+0xb3 0xf1 0x0f 0x00
+
+# CHECK: cdgtra %f0, 0, %r15, 1
+0xb3 0xf1 0x01 0x0f
+
+# CHECK: cdgtra %f0, 15, %r0, 1
+0xb3 0xf1 0xf1 0x00
+
+# CHECK: cdgtra %f4, 5, %r6, 7
+0xb3 0xf1 0x57 0x46
+
+# CHECK: cdgtra %f15, 0, %r0, 1
+0xb3 0xf1 0x01 0xf0
+
# CHECK: cdlfbr %f0, 0, %r0, 1
0xb3 0x91 0x01 0x00
@@ -1465,6 +1777,24 @@
# CHECK: cdlfbr %f15, 0, %r0, 1
0xb3 0x91 0x01 0xf0
+# CHECK: cdlftr %f0, 0, %r0, 0
+0xb9 0x53 0x00 0x00
+
+# CHECK: cdlftr %f0, 0, %r0, 15
+0xb9 0x53 0x0f 0x00
+
+# CHECK: cdlftr %f0, 0, %r15, 0
+0xb9 0x53 0x00 0x0f
+
+# CHECK: cdlftr %f0, 15, %r0, 0
+0xb9 0x53 0xf0 0x00
+
+# CHECK: cdlftr %f4, 5, %r6, 7
+0xb9 0x53 0x57 0x46
+
+# CHECK: cdlftr %f15, 0, %r0, 0
+0xb9 0x53 0x00 0xf0
+
# CHECK: cdlgbr %f0, 0, %r0, 1
0xb3 0xa1 0x01 0x00
@@ -1483,6 +1813,36 @@
# CHECK: cdlgbr %f15, 0, %r0, 1
0xb3 0xa1 0x01 0xf0
+# CHECK: cdlgtr %f0, 0, %r0, 0
+0xb9 0x52 0x00 0x00
+
+# CHECK: cdlgtr %f0, 0, %r0, 15
+0xb9 0x52 0x0f 0x00
+
+# CHECK: cdlgtr %f0, 0, %r15, 0
+0xb9 0x52 0x00 0x0f
+
+# CHECK: cdlgtr %f0, 15, %r0, 0
+0xb9 0x52 0xf0 0x00
+
+# CHECK: cdlgtr %f4, 5, %r6, 7
+0xb9 0x52 0x57 0x46
+
+# CHECK: cdlgtr %f15, 0, %r0, 0
+0xb9 0x52 0x00 0xf0
+
+# CHECK: cdr %f0, %f0
+0x29 0x00
+
+# CHECK: cdr %f0, %f15
+0x29 0x0f
+
+# CHECK: cdr %f7, %f8
+0x29 0x78
+
+# CHECK: cdr %f15, %f0
+0x29 0xf0
+
# CHECK: cds %r0, %r0, 0
0xbb 0x00 0x00 0x00
@@ -1540,6 +1900,21 @@
# CHECK: cdsg %r14, %r0, 0
0xeb 0xe0 0x00 0x00 0x00 0x3e
+# CHECK: cdstr %f0, %r0
+0xb3 0xf3 0x00 0x00
+
+# CHECK: cdstr %f0, %r15
+0xb3 0xf3 0x00 0x0f
+
+# CHECK: cdstr %f15, %r0
+0xb3 0xf3 0x00 0xf0
+
+# CHECK: cdstr %f7, %r8
+0xb3 0xf3 0x00 0x78
+
+# CHECK: cdstr %f15, %r15
+0xb3 0xf3 0x00 0xff
+
# CHECK: cdsy %r0, %r0, -524288
0xeb 0x00 0x00 0x00 0x80 0x31
@@ -1573,6 +1948,81 @@
# CHECK: cdsy %r14, %r0, 0
0xeb 0xe0 0x00 0x00 0x00 0x31
+# CHECK: cdtr %f0, %f0
+0xb3 0xe4 0x00 0x00
+
+# CHECK: cdtr %f0, %f15
+0xb3 0xe4 0x00 0x0f
+
+# CHECK: cdtr %f7, %f8
+0xb3 0xe4 0x00 0x78
+
+# CHECK: cdtr %f15, %f0
+0xb3 0xe4 0x00 0xf0
+
+# CHECK: cdutr %f0, %r0
+0xb3 0xf2 0x00 0x00
+
+# CHECK: cdutr %f0, %r15
+0xb3 0xf2 0x00 0x0f
+
+# CHECK: cdutr %f15, %r0
+0xb3 0xf2 0x00 0xf0
+
+# CHECK: cdutr %f7, %r8
+0xb3 0xf2 0x00 0x78
+
+# CHECK: cdutr %f15, %r15
+0xb3 0xf2 0x00 0xff
+
+# CHECK: cdzt %f0, 0(1), 0
+0xed 0x00 0x00 0x00 0x00 0xaa
+
+# CHECK: cdzt %f15, 0(1), 0
+0xed 0x00 0x00 0x00 0xf0 0xaa
+
+# CHECK: cdzt %f0, 0(1), 15
+0xed 0x00 0x00 0x00 0x0f 0xaa
+
+# CHECK: cdzt %f0, 0(1,%r1), 0
+0xed 0x00 0x10 0x00 0x00 0xaa
+
+# CHECK: cdzt %f0, 0(1,%r15), 0
+0xed 0x00 0xf0 0x00 0x00 0xaa
+
+# CHECK: cdzt %f0, 4095(1,%r1), 0
+0xed 0x00 0x1f 0xff 0x00 0xaa
+
+# CHECK: cdzt %f0, 4095(1,%r15), 0
+0xed 0x00 0xff 0xff 0x00 0xaa
+
+# CHECK: cdzt %f0, 0(256,%r1), 0
+0xed 0xff 0x10 0x00 0x00 0xaa
+
+# CHECK: cdzt %f0, 0(256,%r15), 0
+0xed 0xff 0xf0 0x00 0x00 0xaa
+
+# CHECK: ce %f0, 0
+0x79 0x00 0x00 0x00
+
+# CHECK: ce %f0, 4095
+0x79 0x00 0x0f 0xff
+
+# CHECK: ce %f0, 0(%r1)
+0x79 0x00 0x10 0x00
+
+# CHECK: ce %f0, 0(%r15)
+0x79 0x00 0xf0 0x00
+
+# CHECK: ce %f0, 4095(%r1,%r15)
+0x79 0x01 0xff 0xff
+
+# CHECK: ce %f0, 4095(%r15,%r1)
+0x79 0x0f 0x1f 0xff
+
+# CHECK: ce %f15, 0
+0x79 0xf0 0x00 0x00
+
# CHECK: ceb %f0, 0
0xed 0x00 0x00 0x00 0x00 0x09
@@ -1606,6 +2056,18 @@
# CHECK: cebr %f15, %f0
0xb3 0x09 0x00 0xf0
+# CHECK: cedtr %f0, %f0
+0xb3 0xf4 0x00 0x00
+
+# CHECK: cedtr %f0, %f15
+0xb3 0xf4 0x00 0x0f
+
+# CHECK: cedtr %f7, %f8
+0xb3 0xf4 0x00 0x78
+
+# CHECK: cedtr %f15, %f0
+0xb3 0xf4 0x00 0xf0
+
# CHECK: cefbr %f0, %r0
0xb3 0x94 0x00 0x00
@@ -1639,6 +2101,21 @@
# CHECK: cefbra %f15, 0, %r0, 1
0xb3 0x94 0x01 0xf0
+# CHECK: cefr %f0, %r0
+0xb3 0xb4 0x00 0x00
+
+# CHECK: cefr %f0, %r15
+0xb3 0xb4 0x00 0x0f
+
+# CHECK: cefr %f15, %r0
+0xb3 0xb4 0x00 0xf0
+
+# CHECK: cefr %f7, %r8
+0xb3 0xb4 0x00 0x78
+
+# CHECK: cefr %f15, %r15
+0xb3 0xb4 0x00 0xff
+
# CHECK: cegbr %f0, %r0
0xb3 0xa4 0x00 0x00
@@ -1672,6 +2149,21 @@
# CHECK: cegbra %f15, 0, %r0, 1
0xb3 0xa4 0x01 0xf0
+# CHECK: cegr %f0, %r0
+0xb3 0xc4 0x00 0x00
+
+# CHECK: cegr %f0, %r15
+0xb3 0xc4 0x00 0x0f
+
+# CHECK: cegr %f15, %r0
+0xb3 0xc4 0x00 0xf0
+
+# CHECK: cegr %f7, %r8
+0xb3 0xc4 0x00 0x78
+
+# CHECK: cegr %f15, %r15
+0xb3 0xc4 0x00 0xff
+
# CHECK: celfbr %f0, 0, %r0, 1
0xb3 0x90 0x01 0x00
@@ -1708,6 +2200,30 @@
# CHECK: celgbr %f15, 0, %r0, 1
0xb3 0xa0 0x01 0xf0
+# CHECK: cer %f0, %f0
+0x39 0x00
+
+# CHECK: cer %f0, %f15
+0x39 0x0f
+
+# CHECK: cer %f7, %f8
+0x39 0x78
+
+# CHECK: cer %f15, %f0
+0x39 0xf0
+
+# CHECK: cextr %f0, %f0
+0xb3 0xfc 0x00 0x00
+
+# CHECK: cextr %f0, %f13
+0xb3 0xfc 0x00 0x0d
+
+# CHECK: cextr %f8, %f8
+0xb3 0xfc 0x00 0x88
+
+# CHECK: cextr %f13, %f0
+0xb3 0xfc 0x00 0xd0
+
# CHECK: cfc 0
0xb2 0x1a 0x00 0x00
@@ -1759,6 +2275,39 @@
# CHECK: cfdbra %r15, 0, %f0, 1
0xb3 0x99 0x01 0xf0
+# CHECK: cfdr %r0, 0, %f0
+0xb3 0xb9 0x00 0x00
+
+# CHECK: cfdr %r0, 0, %f15
+0xb3 0xb9 0x00 0x0f
+
+# CHECK: cfdr %r0, 15, %f0
+0xb3 0xb9 0xf0 0x00
+
+# CHECK: cfdr %r4, 5, %f6
+0xb3 0xb9 0x50 0x46
+
+# CHECK: cfdr %r15, 0, %f0
+0xb3 0xb9 0x00 0xf0
+
+# CHECK: cfdtr %r0, 0, %f0, 0
+0xb9 0x41 0x00 0x00
+
+# CHECK: cfdtr %r0, 0, %f0, 15
+0xb9 0x41 0x0f 0x00
+
+# CHECK: cfdtr %r0, 0, %f15, 0
+0xb9 0x41 0x00 0x0f
+
+# CHECK: cfdtr %r0, 15, %f0, 0
+0xb9 0x41 0xf0 0x00
+
+# CHECK: cfdtr %r4, 5, %f6, 7
+0xb9 0x41 0x57 0x46
+
+# CHECK: cfdtr %r15, 0, %f0, 0
+0xb9 0x41 0x00 0xf0
+
# CHECK: cfebr %r0, 0, %f0
0xb3 0x98 0x00 0x00
@@ -1792,6 +2341,21 @@
# CHECK: cfebra %r15, 0, %f0, 1
0xb3 0x98 0x01 0xf0
+# CHECK: cfer %r0, 0, %f0
+0xb3 0xb8 0x00 0x00
+
+# CHECK: cfer %r0, 0, %f15
+0xb3 0xb8 0x00 0x0f
+
+# CHECK: cfer %r0, 15, %f0
+0xb3 0xb8 0xf0 0x00
+
+# CHECK: cfer %r4, 5, %f6
+0xb3 0xb8 0x50 0x46
+
+# CHECK: cfer %r15, 0, %f0
+0xb3 0xb8 0x00 0xf0
+
# CHECK: cfi %r0, -2147483648
0xc2 0x0d 0x80 0x00 0x00 0x00
@@ -1843,6 +2407,39 @@
# CHECK: cfxbra %r15, 0, %f0, 1
0xb3 0x9a 0x01 0xf0
+# CHECK: cfxr %r0, 0, %f0
+0xb3 0xba 0x00 0x00
+
+# CHECK: cfxr %r0, 0, %f13
+0xb3 0xba 0x00 0x0d
+
+# CHECK: cfxr %r0, 15, %f0
+0xb3 0xba 0xf0 0x00
+
+# CHECK: cfxr %r4, 5, %f8
+0xb3 0xba 0x50 0x48
+
+# CHECK: cfxr %r15, 0, %f0
+0xb3 0xba 0x00 0xf0
+
+# CHECK: cfxtr %r0, 0, %f0, 0
+0xb9 0x49 0x00 0x00
+
+# CHECK: cfxtr %r0, 0, %f0, 15
+0xb9 0x49 0x0f 0x00
+
+# CHECK: cfxtr %r0, 0, %f13, 0
+0xb9 0x49 0x00 0x0d
+
+# CHECK: cfxtr %r0, 15, %f0, 0
+0xb9 0x49 0xf0 0x00
+
+# CHECK: cfxtr %r7, 5, %f8, 9
+0xb9 0x49 0x59 0x78
+
+# CHECK: cfxtr %r15, 0, %f0, 0
+0xb9 0x49 0x00 0xf0
+
# CHECK: cg %r0, -524288
0xe3 0x00 0x00 0x00 0x80 0x20
@@ -1906,6 +2503,54 @@
# CHECK: cgdbra %r15, 0, %f0, 1
0xb3 0xa9 0x01 0xf0
+# CHECK: cgdr %r0, 0, %f0
+0xb3 0xc9 0x00 0x00
+
+# CHECK: cgdr %r0, 0, %f15
+0xb3 0xc9 0x00 0x0f
+
+# CHECK: cgdr %r0, 15, %f0
+0xb3 0xc9 0xf0 0x00
+
+# CHECK: cgdr %r4, 5, %f6
+0xb3 0xc9 0x50 0x46
+
+# CHECK: cgdr %r15, 0, %f0
+0xb3 0xc9 0x00 0xf0
+
+# CHECK: cgdtr %r0, 0, %f0
+0xb3 0xe1 0x00 0x00
+
+# CHECK: cgdtr %r0, 0, %f15
+0xb3 0xe1 0x00 0x0f
+
+# CHECK: cgdtr %r0, 15, %f0
+0xb3 0xe1 0xf0 0x00
+
+# CHECK: cgdtr %r4, 5, %f6
+0xb3 0xe1 0x50 0x46
+
+# CHECK: cgdtr %r15, 0, %f0
+0xb3 0xe1 0x00 0xf0
+
+# CHECK: cgdtra %r0, 0, %f0, 1
+0xb3 0xe1 0x01 0x00
+
+# CHECK: cgdtra %r0, 0, %f0, 15
+0xb3 0xe1 0x0f 0x00
+
+# CHECK: cgdtra %r0, 0, %f15, 1
+0xb3 0xe1 0x01 0x0f
+
+# CHECK: cgdtra %r0, 15, %f0, 1
+0xb3 0xe1 0xf1 0x00
+
+# CHECK: cgdtra %r4, 5, %f6, 7
+0xb3 0xe1 0x57 0x46
+
+# CHECK: cgdtra %r15, 0, %f0, 1
+0xb3 0xe1 0x01 0xf0
+
# CHECK: cgebr %r0, 0, %f0
0xb3 0xa8 0x00 0x00
@@ -1939,6 +2584,21 @@
# CHECK: cgebra %r15, 0, %f0, 1
0xb3 0xa8 0x01 0xf0
+# CHECK: cger %r0, 0, %f0
+0xb3 0xc8 0x00 0x00
+
+# CHECK: cger %r0, 0, %f15
+0xb3 0xc8 0x00 0x0f
+
+# CHECK: cger %r0, 15, %f0
+0xb3 0xc8 0xf0 0x00
+
+# CHECK: cger %r4, 5, %f6
+0xb3 0xc8 0x50 0x46
+
+# CHECK: cger %r15, 0, %f0
+0xb3 0xc8 0x00 0xf0
+
# CHECK: cgf %r0, -524288
0xe3 0x00 0x00 0x00 0x80 0x30
@@ -2299,6 +2959,54 @@
# CHECK: cgxbra %r15, 0, %f0, 1
0xb3 0xaa 0x01 0xf0
+# CHECK: cgxr %r0, 0, %f0
+0xb3 0xca 0x00 0x00
+
+# CHECK: cgxr %r0, 0, %f13
+0xb3 0xca 0x00 0x0d
+
+# CHECK: cgxr %r0, 15, %f0
+0xb3 0xca 0xf0 0x00
+
+# CHECK: cgxr %r4, 5, %f8
+0xb3 0xca 0x50 0x48
+
+# CHECK: cgxr %r15, 0, %f0
+0xb3 0xca 0x00 0xf0
+
+# CHECK: cgxtr %r0, 0, %f0
+0xb3 0xe9 0x00 0x00
+
+# CHECK: cgxtr %r0, 0, %f13
+0xb3 0xe9 0x00 0x0d
+
+# CHECK: cgxtr %r0, 15, %f0
+0xb3 0xe9 0xf0 0x00
+
+# CHECK: cgxtr %r4, 5, %f8
+0xb3 0xe9 0x50 0x48
+
+# CHECK: cgxtr %r15, 0, %f0
+0xb3 0xe9 0x00 0xf0
+
+# CHECK: cgxtra %r0, 0, %f0, 1
+0xb3 0xe9 0x01 0x00
+
+# CHECK: cgxtra %r0, 0, %f0, 15
+0xb3 0xe9 0x0f 0x00
+
+# CHECK: cgxtra %r0, 0, %f13, 1
+0xb3 0xe9 0x01 0x0d
+
+# CHECK: cgxtra %r0, 15, %f0, 1
+0xb3 0xe9 0xf1 0x00
+
+# CHECK: cgxtra %r7, 5, %f8, 9
+0xb3 0xe9 0x59 0x78
+
+# CHECK: cgxtra %r15, 0, %f0, 1
+0xb3 0xe9 0x01 0xf0
+
# CHECK: ch %r0, 0
0x49 0x00 0x00 0x00
@@ -2722,6 +3430,24 @@
# CHECK: clfdbr %r15, 0, %f0, 1
0xb3 0x9d 0x01 0xf0
+# CHECK: clfdtr %r0, 0, %f0, 0
+0xb9 0x43 0x00 0x00
+
+# CHECK: clfdtr %r0, 0, %f0, 15
+0xb9 0x43 0x0f 0x00
+
+# CHECK: clfdtr %r0, 0, %f15, 0
+0xb9 0x43 0x00 0x0f
+
+# CHECK: clfdtr %r0, 15, %f0, 0
+0xb9 0x43 0xf0 0x00
+
+# CHECK: clfdtr %r4, 5, %f6, 7
+0xb9 0x43 0x57 0x46
+
+# CHECK: clfdtr %r15, 0, %f0, 0
+0xb9 0x43 0x00 0xf0
+
# CHECK: clfebr %r0, 0, %f0, 1
0xb3 0x9c 0x01 0x00
@@ -2758,6 +3484,24 @@
# CHECK: clfxbr %r15, 0, %f0, 1
0xb3 0x9e 0x01 0xf0
+# CHECK: clfxtr %r0, 0, %f0, 0
+0xb9 0x4b 0x00 0x00
+
+# CHECK: clfxtr %r0, 0, %f0, 15
+0xb9 0x4b 0x0f 0x00
+
+# CHECK: clfxtr %r0, 0, %f13, 0
+0xb9 0x4b 0x00 0x0d
+
+# CHECK: clfxtr %r0, 15, %f0, 0
+0xb9 0x4b 0xf0 0x00
+
+# CHECK: clfxtr %r7, 5, %f8, 9
+0xb9 0x4b 0x59 0x78
+
+# CHECK: clfxtr %r15, 0, %f0, 0
+0xb9 0x4b 0x00 0xf0
+
# CHECK: clgdbr %r0, 0, %f0, 1
0xb3 0xad 0x01 0x00
@@ -2776,6 +3520,24 @@
# CHECK: clgdbr %r15, 0, %f0, 1
0xb3 0xad 0x01 0xf0
+# CHECK: clgdtr %r0, 0, %f0, 0
+0xb9 0x42 0x00 0x00
+
+# CHECK: clgdtr %r0, 0, %f0, 15
+0xb9 0x42 0x0f 0x00
+
+# CHECK: clgdtr %r0, 0, %f15, 0
+0xb9 0x42 0x00 0x0f
+
+# CHECK: clgdtr %r0, 15, %f0, 0
+0xb9 0x42 0xf0 0x00
+
+# CHECK: clgdtr %r4, 5, %f6, 7
+0xb9 0x42 0x57 0x46
+
+# CHECK: clgdtr %r15, 0, %f0, 0
+0xb9 0x42 0x00 0xf0
+
# CHECK: clgebr %r0, 0, %f0, 1
0xb3 0xac 0x01 0x00
@@ -2884,6 +3646,24 @@
# CHECK: clgxbr %r15, 0, %f0, 1
0xb3 0xae 0x01 0xf0
+# CHECK: clgxtr %r0, 0, %f0, 0
+0xb9 0x4a 0x00 0x00
+
+# CHECK: clgxtr %r0, 0, %f0, 15
+0xb9 0x4a 0x0f 0x00
+
+# CHECK: clgxtr %r0, 0, %f13, 0
+0xb9 0x4a 0x00 0x0d
+
+# CHECK: clgxtr %r0, 15, %f0, 0
+0xb9 0x4a 0xf0 0x00
+
+# CHECK: clgxtr %r7, 5, %f8, 9
+0xb9 0x4a 0x59 0x78
+
+# CHECK: clgxtr %r15, 0, %f0, 0
+0xb9 0x4a 0x00 0xf0
+
# CHECK: clfhsi 0, 0
0xe5 0x5d 0x00 0x00 0x00 0x00
@@ -3844,6 +4624,21 @@
# CHECK: cs %r15, %r0, 0
0xba 0xf0 0x00 0x00
+# CHECK: csdtr %r0, %f0, 0
+0xb3 0xe3 0x00 0x00
+
+# CHECK: csdtr %r0, %f15, 0
+0xb3 0xe3 0x00 0x0f
+
+# CHECK: csdtr %r0, %f0, 15
+0xb3 0xe3 0x0f 0x00
+
+# CHECK: csdtr %r4, %f5, 6
+0xb3 0xe3 0x06 0x45
+
+# CHECK: csdtr %r15, %f0, 0
+0xb3 0xe3 0x00 0xf0
+
# CHECK: csg %r0, %r0, -524288
0xeb 0x00 0x00 0x00 0x80 0x30
@@ -3898,6 +4693,21 @@
# CHECK: csst 4095(%r1), 0(%r15), %r2
0xc8 0x22 0x1f 0xff 0xf0 0x00
+# CHECK: csxtr %r0, %f0, 0
+0xb3 0xeb 0x00 0x00
+
+# CHECK: csxtr %r0, %f13, 0
+0xb3 0xeb 0x00 0x0d
+
+# CHECK: csxtr %r0, %f0, 15
+0xb3 0xeb 0x0f 0x00
+
+# CHECK: csxtr %r4, %f5, 6
+0xb3 0xeb 0x06 0x45
+
+# CHECK: csxtr %r14, %f0, 0
+0xb3 0xeb 0x00 0xe0
+
# CHECK: csy %r0, %r0, -524288
0xeb 0x00 0x00 0x00 0x80 0x14
@@ -4027,6 +4837,21 @@
# CHECK: cu42 %r6, %r8
0xb9 0xb3 0x00 0x68
+# CHECK: cudtr %r0, %f0
+0xb3 0xe2 0x00 0x00
+
+# CHECK: cudtr %r0, %f15
+0xb3 0xe2 0x00 0x0f
+
+# CHECK: cudtr %r15, %f0
+0xb3 0xe2 0x00 0xf0
+
+# CHECK: cudtr %r7, %f8
+0xb3 0xe2 0x00 0x78
+
+# CHECK: cudtr %r15, %f15
+0xb3 0xe2 0x00 0xff
+
# CHECK: cuse %r0, %r0
0xb2 0x57 0x00 0x00
@@ -4039,6 +4864,21 @@
# CHECK: cuse %r6, %r8
0xb2 0x57 0x00 0x68
+# CHECK: cuxtr %r0, %f0
+0xb3 0xea 0x00 0x00
+
+# CHECK: cuxtr %r0, %f13
+0xb3 0xea 0x00 0x0d
+
+# CHECK: cuxtr %r14, %f0
+0xb3 0xea 0x00 0xe0
+
+# CHECK: cuxtr %r6, %f8
+0xb3 0xea 0x00 0x68
+
+# CHECK: cuxtr %r14, %f13
+0xb3 0xea 0x00 0xed
+
# CHECK: cvb %r0, 0
0x4f 0x00 0x00 0x00
@@ -4246,6 +5086,39 @@
# CHECK: cxfbra %f13, 0, %r0, 1
0xb3 0x96 0x01 0xd0
+# CHECK: cxfr %f0, %r0
+0xb3 0xb6 0x00 0x00
+
+# CHECK: cxfr %f0, %r15
+0xb3 0xb6 0x00 0x0f
+
+# CHECK: cxfr %f13, %r0
+0xb3 0xb6 0x00 0xd0
+
+# CHECK: cxfr %f8, %r7
+0xb3 0xb6 0x00 0x87
+
+# CHECK: cxfr %f13, %r15
+0xb3 0xb6 0x00 0xdf
+
+# CHECK: cxftr %f0, 0, %r0, 0
+0xb9 0x59 0x00 0x00
+
+# CHECK: cxftr %f0, 0, %r0, 15
+0xb9 0x59 0x0f 0x00
+
+# CHECK: cxftr %f0, 0, %r15, 0
+0xb9 0x59 0x00 0x0f
+
+# CHECK: cxftr %f0, 15, %r0, 0
+0xb9 0x59 0xf0 0x00
+
+# CHECK: cxftr %f4, 5, %r9, 10
+0xb9 0x59 0x5a 0x49
+
+# CHECK: cxftr %f13, 0, %r0, 0
+0xb9 0x59 0x00 0xd0
+
# CHECK: cxgbr %f0, %r0
0xb3 0xa6 0x00 0x00
@@ -4279,6 +5152,54 @@
# CHECK: cxgbra %f13, 0, %r0, 1
0xb3 0xa6 0x01 0xd0
+# CHECK: cxgr %f0, %r0
+0xb3 0xc6 0x00 0x00
+
+# CHECK: cxgr %f0, %r15
+0xb3 0xc6 0x00 0x0f
+
+# CHECK: cxgr %f13, %r0
+0xb3 0xc6 0x00 0xd0
+
+# CHECK: cxgr %f8, %r7
+0xb3 0xc6 0x00 0x87
+
+# CHECK: cxgr %f13, %r15
+0xb3 0xc6 0x00 0xdf
+
+# CHECK: cxgtr %f0, %r0
+0xb3 0xf9 0x00 0x00
+
+# CHECK: cxgtr %f0, %r15
+0xb3 0xf9 0x00 0x0f
+
+# CHECK: cxgtr %f13, %r0
+0xb3 0xf9 0x00 0xd0
+
+# CHECK: cxgtr %f8, %r7
+0xb3 0xf9 0x00 0x87
+
+# CHECK: cxgtr %f13, %r15
+0xb3 0xf9 0x00 0xdf
+
+# CHECK: cxgtra %f0, 0, %r0, 1
+0xb3 0xf9 0x01 0x00
+
+# CHECK: cxgtra %f0, 0, %r0, 15
+0xb3 0xf9 0x0f 0x00
+
+# CHECK: cxgtra %f0, 0, %r15, 1
+0xb3 0xf9 0x01 0x0f
+
+# CHECK: cxgtra %f0, 15, %r0, 1
+0xb3 0xf9 0xf1 0x00
+
+# CHECK: cxgtra %f4, 5, %r9, 10
+0xb3 0xf9 0x5a 0x49
+
+# CHECK: cxgtra %f13, 0, %r0, 1
+0xb3 0xf9 0x01 0xd0
+
# CHECK: cxlfbr %f0, 0, %r0, 1
0xb3 0x92 0x01 0x00
@@ -4297,6 +5218,24 @@
# CHECK: cxlfbr %f13, 0, %r0, 1
0xb3 0x92 0x01 0xd0
+# CHECK: cxlftr %f0, 0, %r0, 0
+0xb9 0x5b 0x00 0x00
+
+# CHECK: cxlftr %f0, 0, %r0, 15
+0xb9 0x5b 0x0f 0x00
+
+# CHECK: cxlftr %f0, 0, %r15, 0
+0xb9 0x5b 0x00 0x0f
+
+# CHECK: cxlftr %f0, 15, %r0, 0
+0xb9 0x5b 0xf0 0x00
+
+# CHECK: cxlftr %f4, 5, %r9, 10
+0xb9 0x5b 0x5a 0x49
+
+# CHECK: cxlftr %f13, 0, %r0, 0
+0xb9 0x5b 0x00 0xd0
+
# CHECK: cxlgbr %f0, 0, %r0, 1
0xb3 0xa2 0x01 0x00
@@ -4315,6 +5254,105 @@
# CHECK: cxlgbr %f13, 0, %r0, 1
0xb3 0xa2 0x01 0xd0
+# CHECK: cxlgtr %f0, 0, %r0, 0
+0xb9 0x5a 0x00 0x00
+
+# CHECK: cxlgtr %f0, 0, %r0, 15
+0xb9 0x5a 0x0f 0x00
+
+# CHECK: cxlgtr %f0, 0, %r15, 0
+0xb9 0x5a 0x00 0x0f
+
+# CHECK: cxlgtr %f0, 15, %r0, 0
+0xb9 0x5a 0xf0 0x00
+
+# CHECK: cxlgtr %f4, 5, %r9, 10
+0xb9 0x5a 0x5a 0x49
+
+# CHECK: cxlgtr %f13, 0, %r0, 0
+0xb9 0x5a 0x00 0xd0
+
+# CHECK: cxr %f0, %f0
+0xb3 0x69 0x00 0x00
+
+# CHECK: cxr %f0, %f13
+0xb3 0x69 0x00 0x0d
+
+# CHECK: cxr %f8, %f8
+0xb3 0x69 0x00 0x88
+
+# CHECK: cxr %f13, %f0
+0xb3 0x69 0x00 0xd0
+
+# CHECK: cxstr %f0, %r0
+0xb3 0xfb 0x00 0x00
+
+# CHECK: cxstr %f0, %r14
+0xb3 0xfb 0x00 0x0e
+
+# CHECK: cxstr %f13, %r0
+0xb3 0xfb 0x00 0xd0
+
+# CHECK: cxstr %f8, %r6
+0xb3 0xfb 0x00 0x86
+
+# CHECK: cxstr %f13, %r14
+0xb3 0xfb 0x00 0xde
+
+# CHECK: cxtr %f0, %f0
+0xb3 0xec 0x00 0x00
+
+# CHECK: cxtr %f0, %f13
+0xb3 0xec 0x00 0x0d
+
+# CHECK: cxtr %f8, %f8
+0xb3 0xec 0x00 0x88
+
+# CHECK: cxtr %f13, %f0
+0xb3 0xec 0x00 0xd0
+
+# CHECK: cxutr %f0, %r0
+0xb3 0xfa 0x00 0x00
+
+# CHECK: cxutr %f0, %r14
+0xb3 0xfa 0x00 0x0e
+
+# CHECK: cxutr %f13, %r0<