aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2015-05-21 06:57:07 +0000
committerDimitry Andric <dim@FreeBSD.org>2015-05-21 06:57:07 +0000
commitf03b5bed27d0d2eafd68562ce14f8b5e3f1f0801 (patch)
tree311f96478e9fceea407d1f187f9c5cef712f796e
parentb6bcb9a905dec7821221e8ceaf1504c1f329815e (diff)
downloadsrc-vendor/llvm/llvm-release_361-r237755.tar.gz
src-vendor/llvm/llvm-release_361-r237755.zip
Vendor import of llvm RELEASE_361/final tag r237755 (effectively, 3.6.1 release):vendor/llvm/llvm-release_361-r237755vendor/llvm/llvm-3.6.x
-rw-r--r--CMakeLists.txt2
-rw-r--r--autoconf/configure.ac6
-rwxr-xr-xcmake/config-ix.cmake2
-rwxr-xr-xconfigure190
-rw-r--r--docs/ReleaseNotes.rst25
-rw-r--r--include/llvm/Config/config.h.cmake6
-rw-r--r--include/llvm/Config/config.h.in6
-rw-r--r--include/llvm/Target/TargetCallingConv.h11
-rw-r--r--include/llvm/Target/TargetLowering.h5
-rw-r--r--lib/Analysis/ScalarEvolutionExpander.cpp9
-rw-r--r--lib/CodeGen/MachineCopyPropagation.cpp7
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp22
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp6
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp3
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp39
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp7
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp30
-rw-r--r--lib/IR/ConstantFold.cpp33
-rw-r--r--lib/IR/GCOV.cpp4
-rw-r--r--lib/MC/MCParser/AsmParser.cpp24
-rw-r--r--lib/Support/Unix/Memory.inc15
-rw-r--r--lib/Support/Windows/explicit_symbols.inc6
-rw-r--r--lib/Target/AArch64/AArch64AsmPrinter.cpp59
-rw-r--r--lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp6
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp182
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.h10
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.td39
-rw-r--r--lib/Target/AArch64/AArch64MCInstLower.cpp11
-rw-r--r--lib/Target/AArch64/Utils/AArch64BaseInfo.h15
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp11
-rw-r--r--lib/Target/Mips/AsmParser/MipsAsmParser.cpp53
-rw-r--r--lib/Target/Mips/Disassembler/MipsDisassembler.cpp43
-rw-r--r--lib/Target/Mips/Mips.td12
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.cpp3
-rw-r--r--lib/Target/Mips/Mips32r6InstrInfo.td4
-rw-r--r--lib/Target/Mips/Mips64InstrInfo.td10
-rw-r--r--lib/Target/Mips/MipsCCState.cpp4
-rw-r--r--lib/Target/Mips/MipsCallingConv.td6
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp105
-rw-r--r--lib/Target/Mips/MipsISelLowering.h2
-rw-r--r--lib/Target/Mips/MipsInstrFPU.td28
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.td2
-rw-r--r--lib/Target/Mips/MipsSEISelDAGToDAG.cpp34
-rw-r--r--lib/Target/Mips/MipsSEISelLowering.cpp27
-rw-r--r--lib/Target/Mips/MipsSEInstrInfo.cpp3
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp18
-rw-r--r--lib/Target/R600/AMDGPU.td5
-rw-r--r--lib/Target/R600/AMDGPUAlwaysInlinePass.cpp5
-rw-r--r--lib/Target/R600/AMDGPUAsmPrinter.cpp7
-rw-r--r--lib/Target/R600/AMDGPUISelDAGToDAG.cpp44
-rw-r--r--lib/Target/R600/AMDGPUISelLowering.cpp20
-rw-r--r--lib/Target/R600/AMDGPUInstrInfo.h6
-rw-r--r--lib/Target/R600/AMDGPUInstrInfo.td7
-rw-r--r--lib/Target/R600/AMDGPUInstructions.td25
-rw-r--r--lib/Target/R600/AMDGPUIntrinsics.td1
-rw-r--r--lib/Target/R600/AMDGPUSubtarget.cpp2
-rw-r--r--lib/Target/R600/AMDGPUSubtarget.h9
-rw-r--r--lib/Target/R600/CaymanInstructions.td2
-rw-r--r--lib/Target/R600/EvergreenInstructions.td2
-rw-r--r--lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp2
-rw-r--r--lib/Target/R600/Processors.td8
-rw-r--r--lib/Target/R600/R600ISelLowering.cpp6
-rw-r--r--lib/Target/R600/R600Instructions.td2
-rw-r--r--lib/Target/R600/SIAnnotateControlFlow.cpp29
-rw-r--r--lib/Target/R600/SIDefines.h3
-rw-r--r--lib/Target/R600/SIFoldOperands.cpp16
-rw-r--r--lib/Target/R600/SIISelLowering.cpp101
-rw-r--r--lib/Target/R600/SIInsertWaits.cpp34
-rw-r--r--lib/Target/R600/SIInstrFormats.td36
-rw-r--r--lib/Target/R600/SIInstrInfo.cpp84
-rw-r--r--lib/Target/R600/SIInstrInfo.h26
-rw-r--r--lib/Target/R600/SIInstrInfo.td758
-rw-r--r--lib/Target/R600/SIInstructions.td594
-rw-r--r--lib/Target/R600/SILowerControlFlow.cpp79
-rw-r--r--lib/Target/R600/SIRegisterInfo.cpp74
-rw-r--r--lib/Target/R600/SIRegisterInfo.h4
-rw-r--r--lib/Target/R600/SIRegisterInfo.td4
-rw-r--r--lib/Target/R600/VIInstrFormats.td26
-rw-r--r--lib/Target/R600/VIInstructions.td41
-rw-r--r--lib/Target/X86/X86FrameLowering.cpp84
-rw-r--r--lib/Target/X86/X86FrameLowering.h8
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp124
-rw-r--r--lib/Target/X86/X86InstrControl.td3
-rw-r--r--lib/Target/X86/X86IntrinsicsInfo.h2
-rw-r--r--lib/Transforms/Instrumentation/GCOVProfiling.cpp19
-rw-r--r--lib/Transforms/Scalar/GVN.cpp10
-rw-r--r--lib/Transforms/Scalar/LoopRotation.cpp2
-rw-r--r--lib/Transforms/Utils/SimplifyIndVar.cpp3
-rw-r--r--test/Analysis/ScalarEvolution/pr22856.ll33
-rw-r--r--test/CodeGen/AArch64/arm64-tls-dynamics.ll128
-rw-r--r--test/CodeGen/AArch64/arm64-tls-execs.ll25
-rw-r--r--test/CodeGen/AArch64/implicit-sret.ll13
-rw-r--r--test/CodeGen/AArch64/machine-copy-prop.ll101
-rw-r--r--test/CodeGen/AArch64/tailcall-explicit-sret.ll106
-rw-r--r--test/CodeGen/AArch64/tailcall-implicit-sret.ll46
-rw-r--r--test/CodeGen/Mips/adjust-callstack-sp.ll20
-rw-r--r--test/CodeGen/Mips/cconv/arguments-small-structures-bigger-than-32bits.ll80
-rw-r--r--test/CodeGen/Mips/cconv/arguments-varargs-small-structs-byte.ll282
-rw-r--r--test/CodeGen/Mips/cconv/arguments-varargs-small-structs-combinations.ll149
-rw-r--r--test/CodeGen/Mips/cconv/arguments-varargs-small-structs-multiple-args.ll161
-rw-r--r--test/CodeGen/Mips/check-adde-redundant-moves.ll29
-rw-r--r--test/CodeGen/Mips/fcmp.ll302
-rw-r--r--test/CodeGen/Mips/fmadd1.ll64
-rw-r--r--test/CodeGen/Mips/llvm-ir/add.ll115
-rw-r--r--test/CodeGen/Mips/llvm-ir/and.ll94
-rw-r--r--test/CodeGen/Mips/llvm-ir/ashr.ll188
-rw-r--r--test/CodeGen/Mips/llvm-ir/lshr.ll176
-rw-r--r--test/CodeGen/Mips/llvm-ir/mul.ll59
-rw-r--r--test/CodeGen/Mips/llvm-ir/or.ll95
-rw-r--r--test/CodeGen/Mips/llvm-ir/sdiv.ll136
-rw-r--r--test/CodeGen/Mips/llvm-ir/shl.ll188
-rw-r--r--test/CodeGen/Mips/llvm-ir/srem.ll129
-rw-r--r--test/CodeGen/Mips/llvm-ir/sub.ll114
-rw-r--r--test/CodeGen/Mips/llvm-ir/udiv.ll108
-rw-r--r--test/CodeGen/Mips/llvm-ir/urem.ll145
-rw-r--r--test/CodeGen/Mips/llvm-ir/xor.ll94
-rw-r--r--test/CodeGen/Mips/mips64-f128.ll2
-rw-r--r--test/CodeGen/Mips/mips64signextendsesf.ll214
-rw-r--r--test/CodeGen/Mips/mips64sinttofpsf.ll15
-rw-r--r--test/CodeGen/Mips/no-odd-spreg-msa.ll131
-rw-r--r--test/CodeGen/R600/128bit-kernel-args.ll33
-rw-r--r--test/CodeGen/R600/32-bit-local-address-space.ll1
-rw-r--r--test/CodeGen/R600/64bit-kernel-args.ll10
-rw-r--r--test/CodeGen/R600/add-debug.ll1
-rw-r--r--test/CodeGen/R600/add.ll143
-rw-r--r--test/CodeGen/R600/address-space.ll1
-rw-r--r--test/CodeGen/R600/and.ll1
-rw-r--r--test/CodeGen/R600/anyext.ll1
-rw-r--r--test/CodeGen/R600/atomic_load_add.ll1
-rw-r--r--test/CodeGen/R600/atomic_load_sub.ll1
-rw-r--r--test/CodeGen/R600/basic-branch.ll1
-rw-r--r--test/CodeGen/R600/basic-loop.ll1
-rw-r--r--test/CodeGen/R600/bfi_int.ll31
-rw-r--r--test/CodeGen/R600/bitcast.ll1
-rw-r--r--test/CodeGen/R600/bswap.ll1
-rw-r--r--test/CodeGen/R600/build_vector.ll45
-rw-r--r--test/CodeGen/R600/call.ll20
-rw-r--r--test/CodeGen/R600/call_fs.ll16
-rw-r--r--test/CodeGen/R600/cf_end.ll10
-rw-r--r--test/CodeGen/R600/concat_vectors.ll12
-rw-r--r--test/CodeGen/R600/copy-illegal-type.ll1
-rw-r--r--test/CodeGen/R600/copy-to-reg.ll1
-rw-r--r--test/CodeGen/R600/ctlz_zero_undef.ll1
-rw-r--r--test/CodeGen/R600/cttz-ctlz.ll1
-rw-r--r--test/CodeGen/R600/cttz_zero_undef.ll1
-rw-r--r--test/CodeGen/R600/cvt_f32_ubyte.ll3
-rw-r--r--test/CodeGen/R600/default-fp-mode.ll7
-rw-r--r--test/CodeGen/R600/ds_read2_offset_order.ll1
-rw-r--r--test/CodeGen/R600/elf.ll31
-rw-r--r--test/CodeGen/R600/elf.r600.ll18
-rw-r--r--test/CodeGen/R600/empty-function.ll1
-rw-r--r--test/CodeGen/R600/extload-private.ll46
-rw-r--r--test/CodeGen/R600/extload.ll1
-rw-r--r--test/CodeGen/R600/extract_vector_elt_i16.ll1
-rw-r--r--test/CodeGen/R600/fadd.ll1
-rw-r--r--test/CodeGen/R600/fadd64.ll1
-rw-r--r--test/CodeGen/R600/fceil.ll3
-rw-r--r--test/CodeGen/R600/fcmp64.ll1
-rw-r--r--test/CodeGen/R600/fconst64.ll1
-rw-r--r--test/CodeGen/R600/fdiv.f64.ll96
-rw-r--r--test/CodeGen/R600/fdiv.ll1
-rw-r--r--test/CodeGen/R600/fdiv64.ll14
-rw-r--r--test/CodeGen/R600/ffloor.ll1
-rw-r--r--test/CodeGen/R600/flat-address-space.ll2
-rw-r--r--test/CodeGen/R600/fma.f64.ll1
-rw-r--r--test/CodeGen/R600/fmax3.f64.ll24
-rw-r--r--test/CodeGen/R600/fmax3.ll1
-rw-r--r--test/CodeGen/R600/fmaxnum.f64.ll1
-rw-r--r--test/CodeGen/R600/fmaxnum.ll1
-rw-r--r--test/CodeGen/R600/fmin3.ll2
-rw-r--r--test/CodeGen/R600/fminnum.f64.ll1
-rw-r--r--test/CodeGen/R600/fminnum.ll1
-rw-r--r--test/CodeGen/R600/fmul.ll1
-rw-r--r--test/CodeGen/R600/fmul64.ll1
-rw-r--r--test/CodeGen/R600/fnearbyint.ll1
-rw-r--r--test/CodeGen/R600/fneg-fabs.f64.ll1
-rw-r--r--test/CodeGen/R600/fneg-fabs.ll1
-rw-r--r--test/CodeGen/R600/fp-classify.ll1
-rw-r--r--test/CodeGen/R600/fp16_to_fp.ll1
-rw-r--r--test/CodeGen/R600/fp32_to_fp16.ll1
-rw-r--r--test/CodeGen/R600/fp_to_sint.ll1
-rw-r--r--test/CodeGen/R600/fp_to_uint.ll1
-rw-r--r--test/CodeGen/R600/fpext.ll3
-rw-r--r--test/CodeGen/R600/fptrunc.ll3
-rw-r--r--test/CodeGen/R600/frem.ll10
-rw-r--r--test/CodeGen/R600/fsqrt.ll2
-rw-r--r--test/CodeGen/R600/fsub.ll1
-rw-r--r--test/CodeGen/R600/fsub64.ll1
-rw-r--r--test/CodeGen/R600/ftrunc.ll1
-rw-r--r--test/CodeGen/R600/global-directive.ll1
-rw-r--r--test/CodeGen/R600/global-extload-i1.ll1
-rw-r--r--test/CodeGen/R600/global-extload-i16.ll1
-rw-r--r--test/CodeGen/R600/global-extload-i32.ll1
-rw-r--r--test/CodeGen/R600/global-extload-i8.ll1
-rw-r--r--test/CodeGen/R600/global-zero-initializer.ll1
-rw-r--r--test/CodeGen/R600/half.ll1
-rw-r--r--test/CodeGen/R600/i1-copy-implicit-def.ll1
-rw-r--r--test/CodeGen/R600/i1-copy-phi.ll1
-rw-r--r--test/CodeGen/R600/icmp64.ll1
-rw-r--r--test/CodeGen/R600/indirect-addressing-si.ll81
-rw-r--r--test/CodeGen/R600/indirect-private-64.ll2
-rw-r--r--test/CodeGen/R600/infinite-loop.ll1
-rw-r--r--test/CodeGen/R600/inline-asm.ll1
-rw-r--r--test/CodeGen/R600/inline-calls.ll1
-rw-r--r--test/CodeGen/R600/input-mods.ll18
-rw-r--r--test/CodeGen/R600/insert_subreg.ll1
-rw-r--r--test/CodeGen/R600/insert_vector_elt.ll1
-rw-r--r--test/CodeGen/R600/kernel-args.ll536
-rw-r--r--test/CodeGen/R600/large-alloca.ll1
-rw-r--r--test/CodeGen/R600/large-constant-initializer.ll1
-rw-r--r--test/CodeGen/R600/lds-initializer.ll1
-rw-r--r--test/CodeGen/R600/lds-zero-initializer.ll1
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.abs.ll1
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll1
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll1
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.bfi.ll1
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.bfm.ll1
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.brev.ll1
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.clamp.ll1
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.cvt_f32_ubyte.ll1
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll127
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.div_scale.ll77
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.flbit.i32.ll28
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.fract.ll46
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.imad24.ll1
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.imax.ll1
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.imin.ll1
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.imul24.ll1
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.kill.ll17
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.ldexp.ll1
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.rcp.f64.ll3
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.rcp.ll3
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.rsq.ll1
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll1
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.trunc.ll13
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.umax.ll1
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.umin.ll1
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.umul24.ll1
-rw-r--r--test/CodeGen/R600/llvm.SI.fs.interp.constant.ll21
-rw-r--r--test/CodeGen/R600/llvm.SI.fs.interp.ll30
-rw-r--r--test/CodeGen/R600/llvm.SI.gather4.ll1
-rw-r--r--test/CodeGen/R600/llvm.SI.getlod.ll1
-rw-r--r--test/CodeGen/R600/llvm.SI.image.ll1
-rw-r--r--test/CodeGen/R600/llvm.SI.image.sample.ll21
-rw-r--r--test/CodeGen/R600/llvm.SI.image.sample.o.ll21
-rw-r--r--test/CodeGen/R600/llvm.SI.imageload.ll1
-rw-r--r--test/CodeGen/R600/llvm.SI.load.dword.ll1
-rw-r--r--test/CodeGen/R600/llvm.SI.resinfo.ll1
-rw-r--r--test/CodeGen/R600/llvm.SI.sample-masked.ll1
-rw-r--r--test/CodeGen/R600/llvm.SI.sample.ll1
-rw-r--r--test/CodeGen/R600/llvm.SI.sampled.ll1
-rw-r--r--test/CodeGen/R600/llvm.SI.sendmsg-m0.ll20
-rw-r--r--test/CodeGen/R600/llvm.SI.sendmsg.ll1
-rw-r--r--test/CodeGen/R600/llvm.SI.tbuffer.store.ll1
-rw-r--r--test/CodeGen/R600/llvm.amdgpu.kilp.ll1
-rw-r--r--test/CodeGen/R600/llvm.amdgpu.lrp.ll1
-rw-r--r--test/CodeGen/R600/llvm.cos.ll1
-rw-r--r--test/CodeGen/R600/llvm.exp2.ll91
-rw-r--r--test/CodeGen/R600/llvm.log2.ll91
-rw-r--r--test/CodeGen/R600/llvm.memcpy.ll1
-rw-r--r--test/CodeGen/R600/llvm.rint.ll1
-rw-r--r--test/CodeGen/R600/llvm.sin.ll2
-rw-r--r--test/CodeGen/R600/llvm.sqrt.ll59
-rw-r--r--test/CodeGen/R600/load-i1.ll1
-rw-r--r--test/CodeGen/R600/load.ll597
-rw-r--r--test/CodeGen/R600/load.vec.ll21
-rw-r--r--test/CodeGen/R600/load64.ll1
-rw-r--r--test/CodeGen/R600/local-memory-two-objects.ll32
-rw-r--r--test/CodeGen/R600/loop-idiom.ll1
-rw-r--r--test/CodeGen/R600/lshl.ll1
-rw-r--r--test/CodeGen/R600/lshr.ll1
-rw-r--r--test/CodeGen/R600/m0-spill.ll1
-rw-r--r--test/CodeGen/R600/mad_int24.ll1
-rw-r--r--test/CodeGen/R600/mad_uint24.ll1
-rw-r--r--test/CodeGen/R600/mul.ll1
-rw-r--r--test/CodeGen/R600/mul_int24.ll1
-rw-r--r--test/CodeGen/R600/mul_uint24.ll1
-rw-r--r--test/CodeGen/R600/mulhu.ll1
-rw-r--r--test/CodeGen/R600/no-initializer-constant-addrspace.ll1
-rw-r--r--test/CodeGen/R600/or.ll5
-rw-r--r--test/CodeGen/R600/private-memory-atomics.ll1
-rw-r--r--test/CodeGen/R600/private-memory-broken.ll1
-rw-r--r--test/CodeGen/R600/r600-encoding.ll12
-rw-r--r--test/CodeGen/R600/reorder-stores.ll1
-rw-r--r--test/CodeGen/R600/rotl.i64.ll27
-rw-r--r--test/CodeGen/R600/rotl.ll1
-rw-r--r--test/CodeGen/R600/rotr.i64.ll27
-rw-r--r--test/CodeGen/R600/rotr.ll1
-rw-r--r--test/CodeGen/R600/s_movk_i32.ll1
-rw-r--r--test/CodeGen/R600/saddo.ll1
-rw-r--r--test/CodeGen/R600/scalar_to_vector.ll1
-rw-r--r--test/CodeGen/R600/schedule-kernel-arg-loads.ll34
-rw-r--r--test/CodeGen/R600/schedule-vs-if-nested-loop-failure.ll1
-rw-r--r--test/CodeGen/R600/sdiv.ll1
-rw-r--r--test/CodeGen/R600/sdivrem24.ll1
-rw-r--r--test/CodeGen/R600/select-i1.ll1
-rw-r--r--test/CodeGen/R600/select-vectors.ll1
-rw-r--r--test/CodeGen/R600/select64.ll1
-rw-r--r--test/CodeGen/R600/selectcc-opt.ll1
-rw-r--r--test/CodeGen/R600/selectcc.ll1
-rw-r--r--test/CodeGen/R600/setcc64.ll1
-rw-r--r--test/CodeGen/R600/seto.ll1
-rw-r--r--test/CodeGen/R600/setuo.ll1
-rw-r--r--test/CodeGen/R600/sgpr-copy-duplicate-operand.ll1
-rw-r--r--test/CodeGen/R600/sgpr-copy.ll1
-rw-r--r--test/CodeGen/R600/shl.ll228
-rw-r--r--test/CodeGen/R600/shl_add_ptr.ll1
-rw-r--r--test/CodeGen/R600/si-annotate-cf-assertion.ll1
-rw-r--r--test/CodeGen/R600/si-annotate-cf.ll63
-rw-r--r--test/CodeGen/R600/si-lod-bias.ll1
-rw-r--r--test/CodeGen/R600/si-sgpr-spill.ll2
-rw-r--r--test/CodeGen/R600/si-vector-hang.ll1
-rw-r--r--test/CodeGen/R600/sign_extend.ll1
-rw-r--r--test/CodeGen/R600/simplify-demanded-bits-build-pair.ll1
-rw-r--r--test/CodeGen/R600/sint_to_fp.ll1
-rw-r--r--test/CodeGen/R600/sra.ll271
-rw-r--r--test/CodeGen/R600/srem.ll1
-rw-r--r--test/CodeGen/R600/ssubo.ll1
-rw-r--r--test/CodeGen/R600/store-v3i32.ll1
-rw-r--r--test/CodeGen/R600/store-v3i64.ll1
-rw-r--r--test/CodeGen/R600/store-vector-ptrs.ll1
-rw-r--r--test/CodeGen/R600/store.ll315
-rw-r--r--test/CodeGen/R600/store.r600.ll10
-rw-r--r--test/CodeGen/R600/subreg-coalescer-crash.ll5
-rw-r--r--test/CodeGen/R600/swizzle-export.ll18
-rw-r--r--test/CodeGen/R600/trunc-cmp-constant.ll1
-rw-r--r--test/CodeGen/R600/trunc-store-i1.ll1
-rw-r--r--test/CodeGen/R600/trunc.ll31
-rw-r--r--test/CodeGen/R600/uaddo.ll1
-rw-r--r--test/CodeGen/R600/udiv.ll27
-rw-r--r--test/CodeGen/R600/udivrem.ll1
-rw-r--r--test/CodeGen/R600/udivrem24.ll1
-rw-r--r--test/CodeGen/R600/udivrem64.ll5
-rw-r--r--test/CodeGen/R600/uint_to_fp.ll1
-rw-r--r--test/CodeGen/R600/unaligned-load-store.ll172
-rw-r--r--test/CodeGen/R600/unhandled-loop-condition-assertion.ll1
-rw-r--r--test/CodeGen/R600/urecip.ll1
-rw-r--r--test/CodeGen/R600/urem.ll1
-rw-r--r--test/CodeGen/R600/usubo.ll1
-rw-r--r--test/CodeGen/R600/v_cndmask.ll1
-rw-r--r--test/CodeGen/R600/vector-alloca.ll2
-rw-r--r--test/CodeGen/R600/vertex-fetch-encoding.ll16
-rw-r--r--test/CodeGen/R600/vop-shrink.ll1
-rw-r--r--test/CodeGen/R600/vselect.ll59
-rw-r--r--test/CodeGen/R600/wait.ll1
-rw-r--r--test/CodeGen/R600/xor.ll1
-rw-r--r--test/CodeGen/R600/zero_extend.ll31
-rw-r--r--test/CodeGen/X86/and-load-fold.ll15
-rw-r--r--test/CodeGen/X86/avx-vperm2x128.ll72
-rw-r--r--test/CodeGen/X86/avx2-intrinsics-x86.ll10
-rw-r--r--test/CodeGen/X86/dag-optnone.ll73
-rw-r--r--test/CodeGen/X86/fastmath-optnone.ll35
-rw-r--r--test/CodeGen/X86/getelementptr.ll80
-rw-r--r--test/CodeGen/X86/inalloca-stdcall.ll1
-rw-r--r--test/CodeGen/X86/lower-vec-shuffle-bug.ll41
-rw-r--r--test/CodeGen/X86/pr22774.ll20
-rw-r--r--test/CodeGen/X86/scheduler-backtracking.ll51
-rw-r--r--test/CodeGen/X86/setcc-combine.ll166
-rw-r--r--test/CodeGen/X86/vector-shuffle-512-v8.ll13
-rw-r--r--test/CodeGen/X86/win64_alloca_dynalloca.ll22
-rw-r--r--test/CodeGen/X86/win_chkstk.ll5
-rw-r--r--test/ExecutionEngine/RuntimeDyld/X86/MachO_x86-64_PIC_relocations.s7
-rw-r--r--test/MC/Disassembler/Mips/mips1/valid-mips1-el.txt228
-rw-r--r--test/MC/Disassembler/Mips/mips1/valid-mips1.txt228
-rw-r--r--test/MC/Disassembler/Mips/mips1/valid-xfail.txt5
-rw-r--r--test/MC/Disassembler/Mips/mips2/valid-mips2-el.txt314
-rw-r--r--test/MC/Disassembler/Mips/mips2/valid-mips2.txt314
-rw-r--r--test/MC/Disassembler/Mips/mips3/valid-mips3-el.txt374
-rw-r--r--test/MC/Disassembler/Mips/mips3/valid-mips3.txt414
-rw-r--r--test/MC/Disassembler/Mips/mips32/valid-mips32-el.txt441
-rw-r--r--test/MC/Disassembler/Mips/mips32/valid-mips32.txt441
-rw-r--r--test/MC/Disassembler/Mips/mips32/valid-xfail-mips32.txt56
-rw-r--r--test/MC/Disassembler/Mips/mips32r2/valid-mips32r2-le.txt504
-rw-r--r--test/MC/Disassembler/Mips/mips32r2/valid-mips32r2.txt504
-rw-r--r--test/MC/Disassembler/Mips/mips32r2/valid-xfail-mips32r2.txt162
-rw-r--r--test/MC/Disassembler/Mips/mips32r6/valid-mips32r6-el.txt148
-rw-r--r--test/MC/Disassembler/Mips/mips32r6/valid-mips32r6.txt148
-rw-r--r--test/MC/Disassembler/Mips/mips32r6/valid-xfail-mips32r6.txt15
-rw-r--r--test/MC/Disassembler/Mips/mips4/valid-mips4-el.txt454
-rw-r--r--test/MC/Disassembler/Mips/mips4/valid-mips4.txt454
-rw-r--r--test/MC/Disassembler/Mips/mips4/valid-xfail-mips4.txt80
-rw-r--r--test/MC/Disassembler/Mips/mips64/valid-mips64-el.txt216
-rw-r--r--test/MC/Disassembler/Mips/mips64/valid-mips64-xfail.txt80
-rw-r--r--test/MC/Disassembler/Mips/mips64/valid-mips64.txt216
-rw-r--r--test/MC/Disassembler/Mips/mips64r2/valid-mips64r2-el.txt234
-rw-r--r--test/MC/Disassembler/Mips/mips64r2/valid-mips64r2.txt234
-rw-r--r--test/MC/Disassembler/Mips/mips64r2/valid-xfail-mips64r2.txt76
-rw-r--r--test/MC/Disassembler/Mips/mips64r6/valid-mips64r6-el.txt166
-rw-r--r--test/MC/Disassembler/Mips/mips64r6/valid-mips64r6.txt166
-rw-r--r--test/MC/Disassembler/Mips/mips64r6/valid-xfail-mips64r6.txt20
-rw-r--r--test/MC/ELF/uleb.s9
-rw-r--r--test/MC/Mips/mips-abi-bad.s36
-rw-r--r--test/MC/Mips/mips4/invalid-mips64r2.s4
-rw-r--r--test/MC/Mips/mips4/valid-xfail.s8
-rw-r--r--test/MC/Mips/mips4/valid.s8
-rw-r--r--test/MC/Mips/mips5/invalid-mips64r2.s4
-rw-r--r--test/MC/Mips/mips5/valid-xfail.s8
-rw-r--r--test/MC/Mips/mips5/valid.s8
-rw-r--r--test/MC/Mips/mips64/invalid-mips64r2.s4
-rw-r--r--test/MC/Mips/mips64/valid-xfail.s8
-rw-r--r--test/MC/Mips/mips64/valid.s8
-rw-r--r--test/MC/Mips/mips64r2/abi-bad.s12
-rw-r--r--test/MC/Mips/mips64r2/valid-xfail.s4
-rw-r--r--test/Transforms/ConstProp/shift.ll69
-rw-r--r--test/Transforms/GCOVProfiling/return-block.ll23
-rw-r--r--test/Transforms/GVN/edge.ll47
-rw-r--r--test/Transforms/LoopRotate/crash.ll18
-rw-r--r--test/tools/llvm-cov/Inputs/test_exit_block_arcs.gcdabin0 -> 124 bytes
-rw-r--r--test/tools/llvm-cov/Inputs/test_exit_block_arcs.gcnobin0 -> 216 bytes
-rw-r--r--test/tools/llvm-cov/llvm-cov.test4
-rwxr-xr-xutils/release/tag.sh2
412 files changed, 12692 insertions, 5960 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index cfa32cf4bc5a..bb21cc5d7516 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -48,7 +48,7 @@ set(CMAKE_MODULE_PATH
set(LLVM_VERSION_MAJOR 3)
set(LLVM_VERSION_MINOR 6)
-set(LLVM_VERSION_PATCH 0)
+set(LLVM_VERSION_PATCH 1)
if (NOT PACKAGE_VERSION)
set(PACKAGE_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}")
diff --git a/autoconf/configure.ac b/autoconf/configure.ac
index 3bd8aa1806c2..577a7d521ea9 100644
--- a/autoconf/configure.ac
+++ b/autoconf/configure.ac
@@ -32,11 +32,11 @@ dnl===-----------------------------------------------------------------------===
dnl Initialize autoconf and define the package name, version number and
dnl address for reporting bugs.
-AC_INIT([LLVM],[3.6.0],[http://llvm.org/bugs/])
+AC_INIT([LLVM],[3.6.1],[http://llvm.org/bugs/])
LLVM_VERSION_MAJOR=3
LLVM_VERSION_MINOR=6
-LLVM_VERSION_PATCH=0
+LLVM_VERSION_PATCH=1
LLVM_VERSION_SUFFIX=
AC_DEFINE_UNQUOTED([LLVM_VERSION_MAJOR], $LLVM_VERSION_MAJOR, [Major version of the LLVM API])
@@ -1714,7 +1714,9 @@ if test "$llvm_cv_os_type" = "MingW" ; then
AC_CHECK_LIB(gcc,_alloca,AC_DEFINE([HAVE__ALLOCA],[1],[Have host's _alloca]))
AC_CHECK_LIB(gcc,__alloca,AC_DEFINE([HAVE___ALLOCA],[1],[Have host's __alloca]))
AC_CHECK_LIB(gcc,__chkstk,AC_DEFINE([HAVE___CHKSTK],[1],[Have host's __chkstk]))
+ AC_CHECK_LIB(gcc,__chkstk_ms,AC_DEFINE([HAVE___CHKSTK_MS],[1],[Have host's __chkstk_ms]))
AC_CHECK_LIB(gcc,___chkstk,AC_DEFINE([HAVE____CHKSTK],[1],[Have host's ___chkstk]))
+ AC_CHECK_LIB(gcc,___chkstk_ms,AC_DEFINE([HAVE____CHKSTK_MS],[1],[Have host's ___chkstk_ms]))
AC_CHECK_LIB(gcc,__ashldi3,AC_DEFINE([HAVE___ASHLDI3],[1],[Have host's __ashldi3]))
AC_CHECK_LIB(gcc,__ashrdi3,AC_DEFINE([HAVE___ASHRDI3],[1],[Have host's __ashrdi3]))
diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake
index f806d9c54ed9..6a7538895c41 100755
--- a/cmake/config-ix.cmake
+++ b/cmake/config-ix.cmake
@@ -198,7 +198,9 @@ if( PURE_WINDOWS )
check_function_exists(_alloca HAVE__ALLOCA)
check_function_exists(__alloca HAVE___ALLOCA)
check_function_exists(__chkstk HAVE___CHKSTK)
+ check_function_exists(__chkstk_ms HAVE___CHKSTK_MS)
check_function_exists(___chkstk HAVE____CHKSTK)
+ check_function_exists(___chkstk_ms HAVE____CHKSTK_MS)
check_function_exists(__ashldi3 HAVE___ASHLDI3)
check_function_exists(__ashrdi3 HAVE___ASHRDI3)
diff --git a/configure b/configure
index ba5ecbef334f..ea5bf52101b5 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.60 for LLVM 3.6.0.
+# Generated by GNU Autoconf 2.60 for LLVM 3.6.1.
#
# Report bugs to <http://llvm.org/bugs/>.
#
@@ -561,8 +561,8 @@ SHELL=${CONFIG_SHELL-/bin/sh}
# Identity of this package.
PACKAGE_NAME='LLVM'
PACKAGE_TARNAME='llvm'
-PACKAGE_VERSION='3.6.0'
-PACKAGE_STRING='LLVM 3.6.0'
+PACKAGE_VERSION='3.6.1'
+PACKAGE_STRING='LLVM 3.6.1'
PACKAGE_BUGREPORT='http://llvm.org/bugs/'
ac_unique_file="lib/IR/Module.cpp"
@@ -1314,7 +1314,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures LLVM 3.6.0 to adapt to many kinds of systems.
+\`configure' configures LLVM 3.6.1 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1380,7 +1380,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of LLVM 3.6.0:";;
+ short | recursive ) echo "Configuration of LLVM 3.6.1:";;
esac
cat <<\_ACEOF
@@ -1550,7 +1550,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-LLVM configure 3.6.0
+LLVM configure 3.6.1
generated by GNU Autoconf 2.60
Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
@@ -1566,7 +1566,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by LLVM $as_me 3.6.0, which was
+It was created by LLVM $as_me 3.6.1, which was
generated by GNU Autoconf 2.60. Invocation command line was
$ $0 $@
@@ -1922,7 +1922,7 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
LLVM_VERSION_MAJOR=3
LLVM_VERSION_MINOR=6
-LLVM_VERSION_PATCH=0
+LLVM_VERSION_PATCH=1
LLVM_VERSION_SUFFIX=
@@ -15438,6 +15438,91 @@ _ACEOF
fi
+ { echo "$as_me:$LINENO: checking for __chkstk_ms in -lgcc" >&5
+echo $ECHO_N "checking for __chkstk_ms in -lgcc... $ECHO_C" >&6; }
+if test "${ac_cv_lib_gcc___chkstk_ms+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ ac_check_lib_save_LIBS=$LIBS
+LIBS="-lgcc $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char __chkstk_ms ();
+int
+main ()
+{
+return __chkstk_ms ();
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_link") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+ { (case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_try") 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; } &&
+ { ac_try='test -s conftest$ac_exeext'
+ { (case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_try") 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ac_cv_lib_gcc___chkstk_ms=yes
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_cv_lib_gcc___chkstk_ms=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_gcc___chkstk_ms" >&5
+echo "${ECHO_T}$ac_cv_lib_gcc___chkstk_ms" >&6; }
+if test $ac_cv_lib_gcc___chkstk_ms = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE___CHKSTK_MS 1
+_ACEOF
+
+fi
+
{ echo "$as_me:$LINENO: checking for ___chkstk in -lgcc" >&5
echo $ECHO_N "checking for ___chkstk in -lgcc... $ECHO_C" >&6; }
if test "${ac_cv_lib_gcc____chkstk+set}" = set; then
@@ -15523,6 +15608,91 @@ _ACEOF
fi
+ { echo "$as_me:$LINENO: checking for ___chkstk_ms in -lgcc" >&5
+echo $ECHO_N "checking for ___chkstk_ms in -lgcc... $ECHO_C" >&6; }
+if test "${ac_cv_lib_gcc____chkstk_ms+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ ac_check_lib_save_LIBS=$LIBS
+LIBS="-lgcc $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char ___chkstk_ms ();
+int
+main ()
+{
+return ___chkstk_ms ();
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_link") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+ { (case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_try") 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; } &&
+ { ac_try='test -s conftest$ac_exeext'
+ { (case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_try") 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ac_cv_lib_gcc____chkstk_ms=yes
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_cv_lib_gcc____chkstk_ms=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_gcc____chkstk_ms" >&5
+echo "${ECHO_T}$ac_cv_lib_gcc____chkstk_ms" >&6; }
+if test $ac_cv_lib_gcc____chkstk_ms = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE____CHKSTK_MS 1
+_ACEOF
+
+fi
+
{ echo "$as_me:$LINENO: checking for __ashldi3 in -lgcc" >&5
echo $ECHO_N "checking for __ashldi3 in -lgcc... $ECHO_C" >&6; }
@@ -18901,7 +19071,7 @@ exec 6>&1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by LLVM $as_me 3.6.0, which was
+This file was extended by LLVM $as_me 3.6.1, which was
generated by GNU Autoconf 2.60. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -18954,7 +19124,7 @@ Report bugs to <bug-autoconf@gnu.org>."
_ACEOF
cat >>$CONFIG_STATUS <<_ACEOF
ac_cs_version="\\
-LLVM config.status 3.6.0
+LLVM config.status 3.6.1
configured by $0, generated by GNU Autoconf 2.60,
with options \\"`echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\"
diff --git a/docs/ReleaseNotes.rst b/docs/ReleaseNotes.rst
index 04d7f5266510..a10f7e004abc 100644
--- a/docs/ReleaseNotes.rst
+++ b/docs/ReleaseNotes.rst
@@ -25,6 +25,31 @@ them.
Non-comprehensive list of changes in this release
=================================================
+Changes to the MIPS Target
+--------------------------
+
+* Added support for 128-bit integers on 64-bit targets.
+
+* Fixed some remaining N32/N64 calling convention bugs when using small
+ structures on big-endian targets.
+
+* Fixed missing sign-extensions that are required by the N32/N64 calling
+ convention when generating calls to library functions with 32-bit parameters.
+
+* ``-mno-odd-spreg`` is now honoured for vector insertion/extraction operations
+ when using ``-mmsa``.
+
+* Corrected the representation of member function pointers. This makes them
+ usable on microMIPS targets.
+
+* Fixed multiple segfaults and assertions in the disassembler when
+ disassembling instructions that have memory operands.
+
+* Fixed multiple cases of suboptimal code generation involving ``$zero``.
+
+Non-comprehensive list of changes in 3.6.0
+==========================================
+
.. NOTE
For small 1-3 sentence descriptions, just add an entry at the end of
this list. If your description won't fit comfortably in one bullet
diff --git a/include/llvm/Config/config.h.cmake b/include/llvm/Config/config.h.cmake
index 27f07d4e39d1..5c512f6bca6c 100644
--- a/include/llvm/Config/config.h.cmake
+++ b/include/llvm/Config/config.h.cmake
@@ -423,6 +423,9 @@
/* Have host's __chkstk */
#cmakedefine HAVE___CHKSTK ${HAVE___CHKSTK}
+/* Have host's __chkstk_ms */
+#cmakedefine HAVE___CHKSTK_MS ${HAVE___CHKSTK_MS}
+
/* Have host's __cmpdi2 */
#cmakedefine HAVE___CMPDI2 ${HAVE___CMPDI2}
@@ -459,6 +462,9 @@
/* Have host's ___chkstk */
#cmakedefine HAVE____CHKSTK ${HAVE____CHKSTK}
+/* Have host's ___chkstk_ms */
+#cmakedefine HAVE____CHKSTK_MS ${HAVE____CHKSTK_MS}
+
/* Define if we link Polly to the tools */
#cmakedefine LINK_POLLY_INTO_TOOLS
diff --git a/include/llvm/Config/config.h.in b/include/llvm/Config/config.h.in
index ec09c84c5b71..c68c77aa6076 100644
--- a/include/llvm/Config/config.h.in
+++ b/include/llvm/Config/config.h.in
@@ -420,6 +420,9 @@
/* Have host's __chkstk */
#undef HAVE___CHKSTK
+/* Have host's __chkstk_ms */
+#undef HAVE___CHKSTK_MS
+
/* Have host's __cmpdi2 */
#undef HAVE___CMPDI2
@@ -456,6 +459,9 @@
/* Have host's ___chkstk */
#undef HAVE____CHKSTK
+/* Have host's ___chkstk_ms */
+#undef HAVE____CHKSTK_MS
+
/* Linker version detected at compile time. */
#undef HOST_LINK_VERSION
diff --git a/include/llvm/Target/TargetCallingConv.h b/include/llvm/Target/TargetCallingConv.h
index a0f26741a8f0..9071bfeec7ed 100644
--- a/include/llvm/Target/TargetCallingConv.h
+++ b/include/llvm/Target/TargetCallingConv.h
@@ -134,6 +134,8 @@ namespace ISD {
/// Index original Function's argument.
unsigned OrigArgIndex;
+ /// Sentinel value for implicit machine-level input arguments.
+ static const unsigned NoArgIndex = UINT_MAX;
/// Offset in bytes of current input value relative to the beginning of
/// original argument. E.g. if argument was splitted into four 32 bit
@@ -147,6 +149,15 @@ namespace ISD {
VT = vt.getSimpleVT();
ArgVT = argvt;
}
+
+ bool isOrigArg() const {
+ return OrigArgIndex != NoArgIndex;
+ }
+
+ unsigned getOrigArgIndex() const {
+ assert(OrigArgIndex != NoArgIndex && "Implicit machine-level argument");
+ return OrigArgIndex;
+ }
};
/// OutputArg - This struct carries flags and a value for a
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index c5fed02e17b8..43d6f2772c5e 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -2806,6 +2806,11 @@ public:
virtual bool useLoadStackGuardNode() const {
return false;
}
+
+ /// Returns true if arguments should be sign-extended in lib calls.
+ virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const {
+ return IsSigned;
+ }
};
/// Given an LLVM IR type and return type attributes, compute the return value
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index 59f19a002ecc..7e9e35127a49 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -1776,9 +1776,12 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
<< *IsomorphicInc << '\n');
Value *NewInc = OrigInc;
if (OrigInc->getType() != IsomorphicInc->getType()) {
- Instruction *IP = isa<PHINode>(OrigInc)
- ? (Instruction*)L->getHeader()->getFirstInsertionPt()
- : OrigInc->getNextNode();
+ Instruction *IP = nullptr;
+ if (PHINode *PN = dyn_cast<PHINode>(OrigInc))
+ IP = PN->getParent()->getFirstInsertionPt();
+ else
+ IP = OrigInc->getNextNode();
+
IRBuilder<> Builder(IP);
Builder.SetCurrentDebugLocation(IsomorphicInc->getDebugLoc());
NewInc = Builder.
diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp
index cbd62728ace9..96111225db5f 100644
--- a/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/lib/CodeGen/MachineCopyPropagation.cpp
@@ -75,10 +75,9 @@ MachineCopyPropagation::SourceNoLongerAvailable(unsigned Reg,
I != E; ++I) {
unsigned MappedDef = *I;
// Source of copy is no longer available for propagation.
- if (AvailCopyMap.erase(MappedDef)) {
- for (MCSubRegIterator SR(MappedDef, TRI); SR.isValid(); ++SR)
- AvailCopyMap.erase(*SR);
- }
+ AvailCopyMap.erase(MappedDef);
+ for (MCSubRegIterator SR(MappedDef, TRI); SR.isValid(); ++SR)
+ AvailCopyMap.erase(*SR);
}
}
}
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index afb986f1d7c6..1df61e4fb1fe 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1160,13 +1160,6 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
LegalOperations = Level >= AfterLegalizeVectorOps;
LegalTypes = Level >= AfterLegalizeTypes;
- // Early exit if this basic block is in an optnone function.
- AttributeSet FnAttrs =
- DAG.getMachineFunction().getFunction()->getAttributes();
- if (FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::OptimizeNone))
- return;
-
// Add all the dag nodes to the worklist.
for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
E = DAG.allnodes_end(); I != E; ++I)
@@ -2788,9 +2781,13 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
SplatBitSize = SplatBitSize * 2)
SplatValue |= SplatValue.shl(SplatBitSize);
- Constant = APInt::getAllOnesValue(BitWidth);
- for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
- Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
+ // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
+ // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
+ if (SplatBitSize % BitWidth == 0) {
+ Constant = APInt::getAllOnesValue(BitWidth);
+ for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
+ Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
+ }
}
}
@@ -11043,7 +11040,9 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
} else if (VecInT.getSizeInBits() == VT.getSizeInBits() * 2) {
// If the input vector is too large, try to split it.
// We don't support having two input vectors that are too large.
- if (VecIn2.getNode())
+ // If the zero vector was used, we can not split the vector,
+ // since we'd need 3 inputs.
+ if (UsesZeroVector || VecIn2.getNode())
return SDValue();
if (!TLI.isExtractSubvectorCheap(VT, VT.getVectorNumElements()))
@@ -11055,7 +11054,6 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
DAG.getConstant(VT.getVectorNumElements(), TLI.getVectorIdxTy()));
VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
DAG.getConstant(0, TLI.getVectorIdxTy()));
- UsesZeroVector = false;
} else
return SDValue();
}
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 97fed230c536..c46539b71dbe 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -497,7 +497,7 @@ bool FastISel::selectGetElementPtr(const User *I) {
OI != E; ++OI) {
const Value *Idx = *OI;
if (auto *StTy = dyn_cast<StructType>(Ty)) {
- unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
+ uint64_t Field = cast<ConstantInt>(Idx)->getZExtValue();
if (Field) {
// N = N + Offset
TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
@@ -518,8 +518,8 @@ bool FastISel::selectGetElementPtr(const User *I) {
if (CI->isZero())
continue;
// N = N + Offset
- TotalOffs +=
- DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
+ uint64_t IdxN = CI->getValue().sextOrTrunc(64).getSExtValue();
+ TotalOffs += DL.getTypeAllocSize(Ty) * IdxN;
if (TotalOffs >= MaxOffs) {
N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
if (!N) // Unhandled operand. Halt "fast" selection and bail.
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 4591e79316d8..b59671554348 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -658,7 +658,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) {
NVT, N->getOperand(0));
return TLI.makeLibCall(DAG, LC,
TLI.getTypeToTransformTo(*DAG.getContext(), RVT),
- &Op, 1, false, dl).first;
+ &Op, 1, Signed, dl).first;
}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 8b54e6568b9e..5222de1063bb 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -1423,9 +1423,10 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
// If one or more successors has been unscheduled, then the current
// node is no longer available.
- if (!TrySU->isAvailable)
+ if (!TrySU->isAvailable || !TrySU->NodeQueueId)
CurSU = AvailableQueue->pop();
else {
+ // Available and in AvailableQueue
AvailableQueue->remove(TrySU);
CurSU = TrySU;
}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index d1929107fcb7..66095ee015e6 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3399,30 +3399,21 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
Ty = StTy->getElementType(Field);
} else {
Ty = cast<SequentialType>(Ty)->getElementType();
+ MVT PtrTy = DAG.getTargetLoweringInfo().getPointerTy(AS);
+ unsigned PtrSize = PtrTy.getSizeInBits();
+ APInt ElementSize(PtrSize, DL->getTypeAllocSize(Ty));
// If this is a constant subscript, handle it quickly.
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
- if (CI->isZero()) continue;
- uint64_t Offs =
- DL->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
- SDValue OffsVal;
- EVT PTy = TLI.getPointerTy(AS);
- unsigned PtrBits = PTy.getSizeInBits();
- if (PtrBits < 64)
- OffsVal = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), PTy,
- DAG.getConstant(Offs, MVT::i64));
- else
- OffsVal = DAG.getConstant(Offs, PTy);
-
- N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N,
- OffsVal);
+ if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
+ if (CI->isZero())
+ continue;
+ APInt Offs = ElementSize * CI->getValue().sextOrTrunc(PtrSize);
+ SDValue OffsVal = DAG.getConstant(Offs, PtrTy);
+ N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N, OffsVal);
continue;
}
// N = N + Idx * ElementSize;
- APInt ElementSize =
- APInt(TLI.getPointerSizeInBits(AS), DL->getTypeAllocSize(Ty));
SDValue IdxN = getValue(Idx);
// If the index is smaller or larger than intptr_t, truncate or extend
@@ -5727,6 +5718,11 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
// Skip the first return-type Attribute to get to params.
Entry.setAttributes(&CS, i - CS.arg_begin() + 1);
Args.push_back(Entry);
+
+ // If we have an explicit sret argument that is an Instruction, (i.e., it
+ // might point to function-local memory), we can't meaningfully tail-call.
+ if (Entry.isSRet && isa<Instruction>(V))
+ isTailCall = false;
}
// Check if target-independent constraints permit a tail call here.
@@ -7353,6 +7349,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
Entry.Alignment = Align;
CLI.getArgs().insert(CLI.getArgs().begin(), Entry);
CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext());
+
+ // sret demotion isn't compatible with tail-calls, since the sret argument
+ // points into the callers stack frame.
+ CLI.IsTailCall = false;
} else {
for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
EVT VT = RetTys[I];
@@ -7638,7 +7638,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
ISD::ArgFlagsTy Flags;
Flags.setSRet();
MVT RegisterVT = TLI->getRegisterType(*DAG.getContext(), ValueVTs[0]);
- ISD::InputArg RetArg(Flags, RegisterVT, ValueVTs[0], true, 0, 0);
+ ISD::InputArg RetArg(Flags, RegisterVT, ValueVTs[0], true,
+ ISD::InputArg::NoArgIndex, 0);
Ins.push_back(RetArg);
}
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 72e0aca84080..f12c035e7858 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -96,18 +96,19 @@ TargetLowering::makeLibCall(SelectionDAG &DAG,
for (unsigned i = 0; i != NumOps; ++i) {
Entry.Node = Ops[i];
Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
- Entry.isSExt = isSigned;
- Entry.isZExt = !isSigned;
+ Entry.isSExt = shouldSignExtendTypeInLibCall(Ops[i].getValueType(), isSigned);
+ Entry.isZExt = !shouldSignExtendTypeInLibCall(Ops[i].getValueType(), isSigned);
Args.push_back(Entry);
}
SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), getPointerTy());
Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
TargetLowering::CallLoweringInfo CLI(DAG);
+ bool signExtend = shouldSignExtendTypeInLibCall(RetVT, isSigned);
CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
.setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0)
.setNoReturn(doesNotReturn).setDiscardResult(!isReturnValueUsed)
- .setSExtResult(isSigned).setZExtResult(!isSigned);
+ .setSExtResult(signExtend).setZExtResult(!signExtend);
return LowerCallTo(CLI);
}
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
index 21893d2909ed..d75be2807862 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
@@ -177,25 +177,30 @@ bool RuntimeDyldMachO::isCompatibleFile(const object::ObjectFile &Obj) const {
}
template <typename Impl>
-void RuntimeDyldMachOCRTPBase<Impl>::finalizeLoad(const ObjectFile &ObjImg,
+void RuntimeDyldMachOCRTPBase<Impl>::finalizeLoad(const ObjectFile &Obj,
ObjSectionToIDMap &SectionMap) {
unsigned EHFrameSID = RTDYLD_INVALID_SECTION_ID;
unsigned TextSID = RTDYLD_INVALID_SECTION_ID;
unsigned ExceptTabSID = RTDYLD_INVALID_SECTION_ID;
- ObjSectionToIDMap::iterator i, e;
- for (i = SectionMap.begin(), e = SectionMap.end(); i != e; ++i) {
- const SectionRef &Section = i->first;
+ for (const auto &Section : Obj.sections()) {
StringRef Name;
Section.getName(Name);
- if (Name == "__eh_frame")
- EHFrameSID = i->second;
- else if (Name == "__text")
- TextSID = i->second;
+
+ // Force emission of the __text, __eh_frame, and __gcc_except_tab sections
+ // if they're present. Otherwise call down to the impl to handle other
+ // sections that have already been emitted.
+ if (Name == "__text")
+ TextSID = findOrEmitSection(Obj, Section, true, SectionMap);
+ else if (Name == "__eh_frame")
+ EHFrameSID = findOrEmitSection(Obj, Section, false, SectionMap);
else if (Name == "__gcc_except_tab")
- ExceptTabSID = i->second;
- else
- impl().finalizeSection(ObjImg, i->second, Section);
+ ExceptTabSID = findOrEmitSection(Obj, Section, true, SectionMap);
+ else {
+ auto I = SectionMap.find(Section);
+ if (I != SectionMap.end())
+ impl().finalizeSection(Obj, I->second, Section);
+ }
}
UnregisteredEHFrameSections.push_back(
EHFrameRelatedSections(EHFrameSID, TextSID, ExceptTabSID));
@@ -238,7 +243,8 @@ unsigned char *RuntimeDyldMachOCRTPBase<Impl>::processFDE(unsigned char *P,
}
static int64_t computeDelta(SectionEntry *A, SectionEntry *B) {
- int64_t ObjDistance = A->ObjAddress - B->ObjAddress;
+ int64_t ObjDistance =
+ static_cast<int64_t>(A->ObjAddress) - static_cast<int64_t>(B->ObjAddress);
int64_t MemDistance = A->LoadAddress - B->LoadAddress;
return ObjDistance - MemDistance;
}
diff --git a/lib/IR/ConstantFold.cpp b/lib/IR/ConstantFold.cpp
index 9176bf2aeeea..39d9a1d0a282 100644
--- a/lib/IR/ConstantFold.cpp
+++ b/lib/IR/ConstantFold.cpp
@@ -1120,27 +1120,18 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
return ConstantInt::get(CI1->getContext(), C1V | C2V);
case Instruction::Xor:
return ConstantInt::get(CI1->getContext(), C1V ^ C2V);
- case Instruction::Shl: {
- uint32_t shiftAmt = C2V.getZExtValue();
- if (shiftAmt < C1V.getBitWidth())
- return ConstantInt::get(CI1->getContext(), C1V.shl(shiftAmt));
- else
- return UndefValue::get(C1->getType()); // too big shift is undef
- }
- case Instruction::LShr: {
- uint32_t shiftAmt = C2V.getZExtValue();
- if (shiftAmt < C1V.getBitWidth())
- return ConstantInt::get(CI1->getContext(), C1V.lshr(shiftAmt));
- else
- return UndefValue::get(C1->getType()); // too big shift is undef
- }
- case Instruction::AShr: {
- uint32_t shiftAmt = C2V.getZExtValue();
- if (shiftAmt < C1V.getBitWidth())
- return ConstantInt::get(CI1->getContext(), C1V.ashr(shiftAmt));
- else
- return UndefValue::get(C1->getType()); // too big shift is undef
- }
+ case Instruction::Shl:
+ if (C2V.ult(C1V.getBitWidth()))
+ return ConstantInt::get(CI1->getContext(), C1V.shl(C2V));
+ return UndefValue::get(C1->getType()); // too big shift is undef
+ case Instruction::LShr:
+ if (C2V.ult(C1V.getBitWidth()))
+ return ConstantInt::get(CI1->getContext(), C1V.lshr(C2V));
+ return UndefValue::get(C1->getType()); // too big shift is undef
+ case Instruction::AShr:
+ if (C2V.ult(C1V.getBitWidth()))
+ return ConstantInt::get(CI1->getContext(), C1V.ashr(C2V));
+ return UndefValue::get(C1->getType()); // too big shift is undef
}
}
diff --git a/lib/IR/GCOV.cpp b/lib/IR/GCOV.cpp
index 245c500cf621..88e0cd094ec2 100644
--- a/lib/IR/GCOV.cpp
+++ b/lib/IR/GCOV.cpp
@@ -263,10 +263,12 @@ bool GCOVFunction::readGCDA(GCOVBuffer &Buff, GCOV::GCOVVersion Version) {
// required to combine the edge counts that are contained in the GCDA file.
for (uint32_t BlockNo = 0; Count > 0; ++BlockNo) {
// The last block is always reserved for exit block
- if (BlockNo >= Blocks.size()-1) {
+ if (BlockNo >= Blocks.size()) {
errs() << "Unexpected number of edges (in " << Name << ").\n";
return false;
}
+ if (BlockNo == Blocks.size() - 1)
+ errs() << "(" << Name << ") has arcs from exit block.\n";
GCOVBlock &Block = *Blocks[BlockNo];
for (size_t EdgeNo = 0, End = Block.getNumDstEdges(); EdgeNo < End;
++EdgeNo) {
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index 8eff90a9ef7f..e2a4fc1bef7b 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -3636,21 +3636,27 @@ bool AsmParser::parseDirectiveSpace(StringRef IDVal) {
}
/// parseDirectiveLEB128
-/// ::= (.sleb128 | .uleb128) expression
+/// ::= (.sleb128 | .uleb128) [ expression (, expression)* ]
bool AsmParser::parseDirectiveLEB128(bool Signed) {
checkForValidSection();
const MCExpr *Value;
- if (parseExpression(Value))
- return true;
+ for (;;) {
+ if (parseExpression(Value))
+ return true;
- if (getLexer().isNot(AsmToken::EndOfStatement))
- return TokError("unexpected token in directive");
+ if (Signed)
+ getStreamer().EmitSLEB128Value(Value);
+ else
+ getStreamer().EmitULEB128Value(Value);
- if (Signed)
- getStreamer().EmitSLEB128Value(Value);
- else
- getStreamer().EmitULEB128Value(Value);
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in directive");
+ Lex();
+ }
return false;
}
diff --git a/lib/Support/Unix/Memory.inc b/lib/Support/Unix/Memory.inc
index 7ccde463459d..c421ee84c2b7 100644
--- a/lib/Support/Unix/Memory.inc
+++ b/lib/Support/Unix/Memory.inc
@@ -333,23 +333,12 @@ void Memory::InvalidateInstructionCache(const void *Addr,
for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize)
asm volatile("icbi 0, %0" : : "r"(Line));
asm volatile("isync");
-# elif (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__)
+# elif (defined(__arm__) || defined(__aarch64__) || defined(__mips__)) && \
+ defined(__GNUC__)
// FIXME: Can we safely always call this for __GNUC__ everywhere?
const char *Start = static_cast<const char *>(Addr);
const char *End = Start + Len;
__clear_cache(const_cast<char *>(Start), const_cast<char *>(End));
-# elif defined(__mips__)
- const char *Start = static_cast<const char *>(Addr);
-# if defined(ANDROID)
- // The declaration of "cacheflush" in Android bionic:
- // extern int cacheflush(long start, long end, long flags);
- const char *End = Start + Len;
- long LStart = reinterpret_cast<long>(const_cast<char *>(Start));
- long LEnd = reinterpret_cast<long>(const_cast<char *>(End));
- cacheflush(LStart, LEnd, BCACHE);
-# else
- cacheflush(const_cast<char *>(Start), Len, BCACHE);
-# endif
# endif
#endif // end apple
diff --git a/lib/Support/Windows/explicit_symbols.inc b/lib/Support/Windows/explicit_symbols.inc
index cd56b13c14c3..bbbf7ea6a777 100644
--- a/lib/Support/Windows/explicit_symbols.inc
+++ b/lib/Support/Windows/explicit_symbols.inc
@@ -10,9 +10,15 @@
#ifdef HAVE___CHKSTK
EXPLICIT_SYMBOL(__chkstk)
#endif
+#ifdef HAVE___CHKSTK_MS
+ EXPLICIT_SYMBOL(__chkstk_ms)
+#endif
#ifdef HAVE____CHKSTK
EXPLICIT_SYMBOL(___chkstk)
#endif
+#ifdef HAVE____CHKSTK_MS
+ EXPLICIT_SYMBOL(___chkstk_ms)
+#endif
#ifdef HAVE___MAIN
EXPLICIT_SYMBOL(__main) // FIXME: Don't call it.
#endif
diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 08ee687d84a2..5159dbf0529d 100644
--- a/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -12,6 +12,8 @@
//
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "MCTargetDesc/AArch64MCExpr.h"
#include "AArch64.h"
#include "AArch64MCInstLower.h"
#include "AArch64MachineFunctionInfo.h"
@@ -494,24 +496,57 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
EmitToStreamer(OutStreamer, TmpInst);
return;
}
- case AArch64::TLSDESC_BLR: {
- MCOperand Callee, Sym;
- MCInstLowering.lowerOperand(MI->getOperand(0), Callee);
- MCInstLowering.lowerOperand(MI->getOperand(1), Sym);
-
- // First emit a relocation-annotation. This expands to no code, but requests
+ case AArch64::TLSDESC_CALLSEQ: {
+ /// lower this to:
+ /// adrp x0, :tlsdesc:var
+ /// ldr x1, [x0, #:tlsdesc_lo12:var]
+ /// add x0, x0, #:tlsdesc_lo12:var
+ /// .tlsdesccall var
+ /// blr x1
+ /// (TPIDR_EL0 offset now in x0)
+ const MachineOperand &MO_Sym = MI->getOperand(0);
+ MachineOperand MO_TLSDESC_LO12(MO_Sym), MO_TLSDESC(MO_Sym);
+ MCOperand Sym, SymTLSDescLo12, SymTLSDesc;
+ MO_TLSDESC_LO12.setTargetFlags(AArch64II::MO_TLS | AArch64II::MO_PAGEOFF |
+ AArch64II::MO_NC);
+ MO_TLSDESC.setTargetFlags(AArch64II::MO_TLS | AArch64II::MO_PAGE);
+ MCInstLowering.lowerOperand(MO_Sym, Sym);
+ MCInstLowering.lowerOperand(MO_TLSDESC_LO12, SymTLSDescLo12);
+ MCInstLowering.lowerOperand(MO_TLSDESC, SymTLSDesc);
+
+ MCInst Adrp;
+ Adrp.setOpcode(AArch64::ADRP);
+ Adrp.addOperand(MCOperand::CreateReg(AArch64::X0));
+ Adrp.addOperand(SymTLSDesc);
+ EmitToStreamer(OutStreamer, Adrp);
+
+ MCInst Ldr;
+ Ldr.setOpcode(AArch64::LDRXui);
+ Ldr.addOperand(MCOperand::CreateReg(AArch64::X1));
+ Ldr.addOperand(MCOperand::CreateReg(AArch64::X0));
+ Ldr.addOperand(SymTLSDescLo12);
+ Ldr.addOperand(MCOperand::CreateImm(0));
+ EmitToStreamer(OutStreamer, Ldr);
+
+ MCInst Add;
+ Add.setOpcode(AArch64::ADDXri);
+ Add.addOperand(MCOperand::CreateReg(AArch64::X0));
+ Add.addOperand(MCOperand::CreateReg(AArch64::X0));
+ Add.addOperand(SymTLSDescLo12);
+ Add.addOperand(MCOperand::CreateImm(AArch64_AM::getShiftValue(0)));
+ EmitToStreamer(OutStreamer, Add);
+
+ // Emit a relocation-annotation. This expands to no code, but requests
// the following instruction gets an R_AARCH64_TLSDESC_CALL.
MCInst TLSDescCall;
TLSDescCall.setOpcode(AArch64::TLSDESCCALL);
TLSDescCall.addOperand(Sym);
EmitToStreamer(OutStreamer, TLSDescCall);
- // Other than that it's just a normal indirect call to the function loaded
- // from the descriptor.
- MCInst BLR;
- BLR.setOpcode(AArch64::BLR);
- BLR.addOperand(Callee);
- EmitToStreamer(OutStreamer, BLR);
+ MCInst Blr;
+ Blr.setOpcode(AArch64::BLR);
+ Blr.addOperand(MCOperand::CreateReg(AArch64::X1));
+ EmitToStreamer(OutStreamer, Blr);
return;
}
diff --git a/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp b/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
index aab8e384b8d0..ba4fc3b25e0e 100644
--- a/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
+++ b/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
@@ -62,10 +62,10 @@ struct LDTLSCleanup : public MachineFunctionPass {
for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;
++I) {
switch (I->getOpcode()) {
- case AArch64::TLSDESC_BLR:
+ case AArch64::TLSDESC_CALLSEQ:
// Make sure it's a local dynamic access.
- if (!I->getOperand(1).isSymbol() ||
- strcmp(I->getOperand(1).getSymbolName(), "_TLS_MODULE_BASE_"))
+ if (!I->getOperand(0).isSymbol() ||
+ strcmp(I->getOperand(0).getSymbolName(), "_TLS_MODULE_BASE_"))
break;
if (TLSBaseAddrReg)
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 399b5eeaf5f5..6458d56c751f 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -64,8 +64,16 @@ EnableAArch64ExtrGeneration("aarch64-extr-generation", cl::Hidden,
static cl::opt<bool>
EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden,
- cl::desc("Allow AArch64 SLI/SRI formation"),
- cl::init(false));
+ cl::desc("Allow AArch64 SLI/SRI formation"),
+ cl::init(false));
+
+// FIXME: The necessary dtprel relocations don't seem to be supported
+// well in the GNU bfd and gold linkers at the moment. Therefore, by
+// default, for now, fall back to GeneralDynamic code generation.
+cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration(
+ "aarch64-elf-ldtls-generation", cl::Hidden,
+ cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
+ cl::init(false));
AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM)
@@ -760,7 +768,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
case AArch64ISD::CSNEG: return "AArch64ISD::CSNEG";
case AArch64ISD::CSINC: return "AArch64ISD::CSINC";
case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER";
- case AArch64ISD::TLSDESC_CALL: return "AArch64ISD::TLSDESC_CALL";
+ case AArch64ISD::TLSDESC_CALLSEQ: return "AArch64ISD::TLSDESC_CALLSEQ";
case AArch64ISD::ADC: return "AArch64ISD::ADC";
case AArch64ISD::SBC: return "AArch64ISD::SBC";
case AArch64ISD::ADDS: return "AArch64ISD::ADDS";
@@ -2023,18 +2031,19 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
unsigned CurArgIdx = 0;
for (unsigned i = 0; i != NumArgs; ++i) {
MVT ValVT = Ins[i].VT;
- std::advance(CurOrigArg, Ins[i].OrigArgIndex - CurArgIdx);
- CurArgIdx = Ins[i].OrigArgIndex;
-
- // Get type of the original argument.
- EVT ActualVT = getValueType(CurOrigArg->getType(), /*AllowUnknown*/ true);
- MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other;
- // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
- if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
- ValVT = MVT::i8;
- else if (ActualMVT == MVT::i16)
- ValVT = MVT::i16;
+ if (Ins[i].isOrigArg()) {
+ std::advance(CurOrigArg, Ins[i].getOrigArgIndex() - CurArgIdx);
+ CurArgIdx = Ins[i].getOrigArgIndex();
+ // Get type of the original argument.
+ EVT ActualVT = getValueType(CurOrigArg->getType(), /*AllowUnknown*/ true);
+ MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other;
+ // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
+ if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
+ ValVT = MVT::i8;
+ else if (ActualMVT == MVT::i16)
+ ValVT = MVT::i16;
+ }
CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
bool Res =
AssignFn(i, ValVT, ValVT, CCValAssign::Full, Ins[i].Flags, CCInfo);
@@ -3049,61 +3058,34 @@ AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
/// When accessing thread-local variables under either the general-dynamic or
/// local-dynamic system, we make a "TLS-descriptor" call. The variable will
/// have a descriptor, accessible via a PC-relative ADRP, and whose first entry
-/// is a function pointer to carry out the resolution. This function takes the
-/// address of the descriptor in X0 and returns the TPIDR_EL0 offset in X0. All
-/// other registers (except LR, NZCV) are preserved.
-///
-/// Thus, the ideal call sequence on AArch64 is:
-///
-/// adrp x0, :tlsdesc:thread_var
-/// ldr x8, [x0, :tlsdesc_lo12:thread_var]
-/// add x0, x0, :tlsdesc_lo12:thread_var
-/// .tlsdesccall thread_var
-/// blr x8
-/// (TPIDR_EL0 offset now in x0).
+/// is a function pointer to carry out the resolution.
///
-/// The ".tlsdesccall" directive instructs the assembler to insert a particular
-/// relocation to help the linker relax this sequence if it turns out to be too
-/// conservative.
+/// The sequence is:
+/// adrp x0, :tlsdesc:var
+/// ldr x1, [x0, #:tlsdesc_lo12:var]
+/// add x0, x0, #:tlsdesc_lo12:var
+/// .tlsdesccall var
+/// blr x1
+/// (TPIDR_EL0 offset now in x0)
///
-/// FIXME: we currently produce an extra, duplicated, ADRP instruction, but this
-/// is harmless.
-SDValue AArch64TargetLowering::LowerELFTLSDescCall(SDValue SymAddr,
- SDValue DescAddr, SDLoc DL,
- SelectionDAG &DAG) const {
+/// The above sequence must be produced unscheduled, to enable the linker to
+/// optimize/relax this sequence.
+/// Therefore, a pseudo-instruction (TLSDESC_CALLSEQ) is used to represent the
+/// above sequence, and expanded really late in the compilation flow, to ensure
+/// the sequence is produced as per above.
+SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(SDValue SymAddr, SDLoc DL,
+ SelectionDAG &DAG) const {
EVT PtrVT = getPointerTy();
- // The function we need to call is simply the first entry in the GOT for this
- // descriptor, load it in preparation.
- SDValue Func = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, SymAddr);
-
- // TLS calls preserve all registers except those that absolutely must be
- // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
- // silly).
- const TargetRegisterInfo *TRI =
- getTargetMachine().getSubtargetImpl()->getRegisterInfo();
- const AArch64RegisterInfo *ARI =
- static_cast<const AArch64RegisterInfo *>(TRI);
- const uint32_t *Mask = ARI->getTLSCallPreservedMask();
-
- // The function takes only one argument: the address of the descriptor itself
- // in X0.
- SDValue Glue, Chain;
- Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, AArch64::X0, DescAddr, Glue);
- Glue = Chain.getValue(1);
+ SDValue Chain = DAG.getEntryNode();
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
- // We're now ready to populate the argument list, as with a normal call:
- SmallVector<SDValue, 6> Ops;
+ SmallVector<SDValue, 2> Ops;
Ops.push_back(Chain);
- Ops.push_back(Func);
Ops.push_back(SymAddr);
- Ops.push_back(DAG.getRegister(AArch64::X0, PtrVT));
- Ops.push_back(DAG.getRegisterMask(Mask));
- Ops.push_back(Glue);
- SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
- Chain = DAG.getNode(AArch64ISD::TLSDESC_CALL, DL, NodeTys, Ops);
- Glue = Chain.getValue(1);
+ Chain = DAG.getNode(AArch64ISD::TLSDESC_CALLSEQ, DL, NodeTys, Ops);
+ SDValue Glue = Chain.getValue(1);
return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue);
}
@@ -3114,9 +3096,18 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
assert(Subtarget->isTargetELF() && "This function expects an ELF target");
assert(getTargetMachine().getCodeModel() == CodeModel::Small &&
"ELF TLS only supported in small memory model");
+ // Different choices can be made for the maximum size of the TLS area for a
+ // module. For the small address model, the default TLS size is 16MiB and the
+ // maximum TLS size is 4GiB.
+ // FIXME: add -mtls-size command line option and make it control the 16MiB
+ // vs. 4GiB code sequence generation.
const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());
+ if (!EnableAArch64ELFLocalDynamicTLSGeneration) {
+ if (Model == TLSModel::LocalDynamic)
+ Model = TLSModel::GeneralDynamic;
+ }
SDValue TPOff;
EVT PtrVT = getPointerTy();
@@ -3127,17 +3118,20 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
if (Model == TLSModel::LocalExec) {
SDValue HiVar = DAG.getTargetGlobalAddress(
- GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_G1);
+ GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
SDValue LoVar = DAG.getTargetGlobalAddress(
GV, DL, PtrVT, 0,
- AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC);
+ AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
- TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar,
- DAG.getTargetConstant(16, MVT::i32)),
- 0);
- TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, LoVar,
- DAG.getTargetConstant(0, MVT::i32)),
- 0);
+ SDValue TPWithOff_lo =
+ SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
+ HiVar, DAG.getTargetConstant(0, MVT::i32)),
+ 0);
+ SDValue TPWithOff =
+ SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPWithOff_lo,
+ LoVar, DAG.getTargetConstant(0, MVT::i32)),
+ 0);
+ return TPWithOff;
} else if (Model == TLSModel::InitialExec) {
TPOff = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
TPOff = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TPOff);
@@ -3152,19 +3146,6 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
MFI->incNumLocalDynamicTLSAccesses();
- // Accesses used in this sequence go via the TLS descriptor which lives in
- // the GOT. Prepare an address we can use to handle this.
- SDValue HiDesc = DAG.getTargetExternalSymbol(
- "_TLS_MODULE_BASE_", PtrVT, AArch64II::MO_TLS | AArch64II::MO_PAGE);
- SDValue LoDesc = DAG.getTargetExternalSymbol(
- "_TLS_MODULE_BASE_", PtrVT,
- AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
-
- // First argument to the descriptor call is the address of the descriptor
- // itself.
- SDValue DescAddr = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, HiDesc);
- DescAddr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, DescAddr, LoDesc);
-
// The call needs a relocation too for linker relaxation. It doesn't make
// sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
// the address.
@@ -3173,40 +3154,23 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
// Now we can calculate the offset from TPIDR_EL0 to this module's
// thread-local area.
- TPOff = LowerELFTLSDescCall(SymAddr, DescAddr, DL, DAG);
+ TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
// Now use :dtprel_whatever: operations to calculate this variable's offset
// in its thread-storage area.
SDValue HiVar = DAG.getTargetGlobalAddress(
- GV, DL, MVT::i64, 0, AArch64II::MO_TLS | AArch64II::MO_G1);
+ GV, DL, MVT::i64, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
SDValue LoVar = DAG.getTargetGlobalAddress(
GV, DL, MVT::i64, 0,
- AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC);
-
- SDValue DTPOff =
- SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar,
- DAG.getTargetConstant(16, MVT::i32)),
- 0);
- DTPOff =
- SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, DTPOff, LoVar,
- DAG.getTargetConstant(0, MVT::i32)),
- 0);
-
- TPOff = DAG.getNode(ISD::ADD, DL, PtrVT, TPOff, DTPOff);
- } else if (Model == TLSModel::GeneralDynamic) {
- // Accesses used in this sequence go via the TLS descriptor which lives in
- // the GOT. Prepare an address we can use to handle this.
- SDValue HiDesc = DAG.getTargetGlobalAddress(
- GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_PAGE);
- SDValue LoDesc = DAG.getTargetGlobalAddress(
- GV, DL, PtrVT, 0,
AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
- // First argument to the descriptor call is the address of the descriptor
- // itself.
- SDValue DescAddr = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, HiDesc);
- DescAddr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, DescAddr, LoDesc);
-
+ TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, HiVar,
+ DAG.getTargetConstant(0, MVT::i32)),
+ 0);
+ TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, LoVar,
+ DAG.getTargetConstant(0, MVT::i32)),
+ 0);
+ } else if (Model == TLSModel::GeneralDynamic) {
// The call needs a relocation too for linker relaxation. It doesn't make
// sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
// the address.
@@ -3214,7 +3178,7 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
// Finally we can make a call to calculate the offset from tpidr_el0.
- TPOff = LowerELFTLSDescCall(SymAddr, DescAddr, DL, DAG);
+ TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
} else
llvm_unreachable("Unsupported ELF TLS access model");
diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h
index cc25bede8d62..5a193228f189 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -29,9 +29,9 @@ enum {
WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses.
CALL, // Function call.
- // Almost the same as a normal call node, except that a TLSDesc relocation is
- // needed so the linker can relax it correctly if possible.
- TLSDESC_CALL,
+ // Produces the full sequence of instructions for getting the thread pointer
+ // offset of a variable into X0, using the TLSDesc model.
+ TLSDESC_CALLSEQ,
ADRP, // Page address of a TargetGlobalAddress operand.
ADDlow, // Add the low 12 bits of a TargetGlobalAddress operand.
LOADgot, // Load from automatically generated descriptor (e.g. Global
@@ -399,8 +399,8 @@ private:
SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerELFTLSDescCall(SDValue SymAddr, SDValue DescAddr, SDLoc DL,
- SelectionDAG &DAG) const;
+ SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, SDLoc DL,
+ SelectionDAG &DAG) const;
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index e0fb90a9f621..a6f09e944af4 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -96,6 +96,19 @@ def SDT_AArch64ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>;
def SDT_AArch64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>,
SDTCisPtrTy<1>]>;
+
+// Generates the general dynamic sequences, i.e.
+// adrp x0, :tlsdesc:var
+// ldr x1, [x0, #:tlsdesc_lo12:var]
+// add x0, x0, #:tlsdesc_lo12:var
+// .tlsdesccall var
+// blr x1
+
+// (the TPIDR_EL0 offset is put directly in X0, hence no "result" here)
+// number of operands (the variable)
+def SDT_AArch64TLSDescCallSeq : SDTypeProfile<0,1,
+ [SDTCisPtrTy<0>]>;
+
def SDT_AArch64WrapperLarge : SDTypeProfile<1, 4,
[SDTCisVT<0, i64>, SDTCisVT<1, i32>,
SDTCisSameAs<1, 2>, SDTCisSameAs<1, 3>,
@@ -229,10 +242,11 @@ def AArch64Prefetch : SDNode<"AArch64ISD::PREFETCH", SDT_AArch64PREFETCH,
def AArch64sitof: SDNode<"AArch64ISD::SITOF", SDT_AArch64ITOF>;
def AArch64uitof: SDNode<"AArch64ISD::UITOF", SDT_AArch64ITOF>;
-def AArch64tlsdesc_call : SDNode<"AArch64ISD::TLSDESC_CALL",
- SDT_AArch64TLSDescCall,
- [SDNPInGlue, SDNPOutGlue, SDNPHasChain,
- SDNPVariadic]>;
+def AArch64tlsdesc_callseq : SDNode<"AArch64ISD::TLSDESC_CALLSEQ",
+ SDT_AArch64TLSDescCallSeq,
+ [SDNPInGlue, SDNPOutGlue, SDNPHasChain,
+ SDNPVariadic]>;
+
def AArch64WrapperLarge : SDNode<"AArch64ISD::WrapperLarge",
SDT_AArch64WrapperLarge>;
@@ -1049,15 +1063,16 @@ def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym), []> {
let AsmString = ".tlsdesccall $sym";
}
-// Pseudo-instruction representing a BLR with attached TLSDESC relocation. It
-// gets expanded to two MCInsts during lowering.
-let isCall = 1, Defs = [LR] in
-def TLSDESC_BLR
- : Pseudo<(outs), (ins GPR64:$dest, i64imm:$sym),
- [(AArch64tlsdesc_call GPR64:$dest, tglobaltlsaddr:$sym)]>;
+// FIXME: maybe the scratch register used shouldn't be fixed to X1?
+// FIXME: can "hasSideEffects be dropped?
+let isCall = 1, Defs = [LR, X0, X1], hasSideEffects = 1,
+ isCodeGenOnly = 1 in
+def TLSDESC_CALLSEQ
+ : Pseudo<(outs), (ins i64imm:$sym),
+ [(AArch64tlsdesc_callseq tglobaltlsaddr:$sym)]>;
+def : Pat<(AArch64tlsdesc_callseq texternalsym:$sym),
+ (TLSDESC_CALLSEQ texternalsym:$sym)>;
-def : Pat<(AArch64tlsdesc_call GPR64:$dest, texternalsym:$sym),
- (TLSDESC_BLR GPR64:$dest, texternalsym:$sym)>;
//===----------------------------------------------------------------------===//
// Conditional branch (immediate) instruction.
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AArch64/AArch64MCInstLower.cpp b/lib/Target/AArch64/AArch64MCInstLower.cpp
index e57b0f4dbb09..b82934134d91 100644
--- a/lib/Target/AArch64/AArch64MCInstLower.cpp
+++ b/lib/Target/AArch64/AArch64MCInstLower.cpp
@@ -22,9 +22,12 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
+extern cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration;
+
AArch64MCInstLower::AArch64MCInstLower(MCContext &ctx, AsmPrinter &printer)
: Ctx(ctx), Printer(printer), TargetTriple(printer.getTargetTriple()) {}
@@ -84,10 +87,16 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandELF(const MachineOperand &MO,
if (MO.isGlobal()) {
const GlobalValue *GV = MO.getGlobal();
Model = Printer.TM.getTLSModel(GV);
+ if (!EnableAArch64ELFLocalDynamicTLSGeneration &&
+ Model == TLSModel::LocalDynamic)
+ Model = TLSModel::GeneralDynamic;
+
} else {
assert(MO.isSymbol() &&
StringRef(MO.getSymbolName()) == "_TLS_MODULE_BASE_" &&
"unexpected external TLS symbol");
+ // The general dynamic access sequence is used to get the
+ // address of _TLS_MODULE_BASE_.
Model = TLSModel::GeneralDynamic;
}
switch (Model) {
@@ -123,6 +132,8 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandELF(const MachineOperand &MO,
RefFlags |= AArch64MCExpr::VK_G1;
else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_G0)
RefFlags |= AArch64MCExpr::VK_G0;
+ else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_HI12)
+ RefFlags |= AArch64MCExpr::VK_HI12;
if (MO.getTargetFlags() & AArch64II::MO_NC)
RefFlags |= AArch64MCExpr::VK_NC;
diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index c60b09a5f119..6d0337ce15ed 100644
--- a/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -1229,7 +1229,7 @@ namespace AArch64II {
MO_NO_FLAG,
- MO_FRAGMENT = 0x7,
+ MO_FRAGMENT = 0xf,
/// MO_PAGE - A symbol operand with this flag represents the pc-relative
/// offset of the 4K page containing the symbol. This is used with the
@@ -1257,26 +1257,31 @@ namespace AArch64II {
/// 0-15 of a 64-bit address, used in a MOVZ or MOVK instruction
MO_G0 = 6,
+ /// MO_HI12 - This flag indicates that a symbol operand represents the bits
+ /// 13-24 of a 64-bit address, used in a arithmetic immediate-shifted-left-
+ /// by-12-bits instruction.
+ MO_HI12 = 7,
+
/// MO_GOT - This flag indicates that a symbol operand represents the
/// address of the GOT entry for the symbol, rather than the address of
/// the symbol itself.
- MO_GOT = 8,
+ MO_GOT = 0x10,
/// MO_NC - Indicates whether the linker is expected to check the symbol
/// reference for overflow. For example in an ADRP/ADD pair of relocations
/// the ADRP usually does check, but not the ADD.
- MO_NC = 0x10,
+ MO_NC = 0x20,
/// MO_TLS - Indicates that the operand being accessed is some kind of
/// thread-local symbol. On Darwin, only one type of thread-local access
/// exists (pre linker-relaxation), but on ELF the TLSModel used for the
/// referee will affect interpretation.
- MO_TLS = 0x20,
+ MO_TLS = 0x40,
/// MO_CONSTPOOL - This flag indicates that a symbol operand represents
/// the address of a constant pool entry for the symbol, rather than the
/// address of the symbol itself.
- MO_CONSTPOOL = 0x40
+ MO_CONSTPOOL = 0x80
};
} // end namespace AArch64II
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index a1de5efb4507..2dc4707bb0a1 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -3092,8 +3092,11 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
- std::advance(CurOrigArg, Ins[VA.getValNo()].OrigArgIndex - CurArgIdx);
- CurArgIdx = Ins[VA.getValNo()].OrigArgIndex;
+ if (Ins[VA.getValNo()].isOrigArg()) {
+ std::advance(CurOrigArg,
+ Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx);
+ CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex();
+ }
// Arguments stored in registers.
if (VA.isRegLoc()) {
EVT RegVT = VA.getLocVT();
@@ -3173,7 +3176,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
assert(VA.isMemLoc());
assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
- int index = ArgLocs[i].getValNo();
+ int index = VA.getValNo();
// Some Ins[] entries become multiple ArgLoc[] entries.
// Process them only once.
@@ -3186,6 +3189,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
// Since they could be overwritten by lowering of arguments in case of
// a tail call.
if (Flags.isByVal()) {
+ assert(Ins[index].isOrigArg() &&
+ "Byval arguments cannot be implicit");
unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
ByValStoreOffset = RoundUpToAlignment(ByValStoreOffset, Flags.getByValAlign());
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 7db5b34204c3..699dfae02fc9 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -3667,43 +3667,44 @@ bool MipsAsmParser::parseDirectiveModule() {
return false;
}
- if (Lexer.is(AsmToken::Identifier)) {
- StringRef Option = Parser.getTok().getString();
- Parser.Lex();
-
- if (Option == "oddspreg") {
- getTargetStreamer().emitDirectiveModuleOddSPReg(true, isABI_O32());
- clearFeatureBits(Mips::FeatureNoOddSPReg, "nooddspreg");
+ StringRef Option;
+ if (Parser.parseIdentifier(Option)) {
+ reportParseError("expected .module option identifier");
+ return false;
+ }
- if (getLexer().isNot(AsmToken::EndOfStatement)) {
- reportParseError("unexpected token, expected end of statement");
- return false;
- }
+ if (Option == "oddspreg") {
+ getTargetStreamer().emitDirectiveModuleOddSPReg(true, isABI_O32());
+ clearFeatureBits(Mips::FeatureNoOddSPReg, "nooddspreg");
+ // If this is not the end of the statement, report an error.
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ reportParseError("unexpected token, expected end of statement");
return false;
- } else if (Option == "nooddspreg") {
- if (!isABI_O32()) {
- Error(L, "'.module nooddspreg' requires the O32 ABI");
- return false;
- }
+ }
- getTargetStreamer().emitDirectiveModuleOddSPReg(false, isABI_O32());
- setFeatureBits(Mips::FeatureNoOddSPReg, "nooddspreg");
+ return false; // parseDirectiveModule has finished successfully.
+ } else if (Option == "nooddspreg") {
+ if (!isABI_O32()) {
+ Error(L, "'.module nooddspreg' requires the O32 ABI");
+ return false;
+ }
- if (getLexer().isNot(AsmToken::EndOfStatement)) {
- reportParseError("unexpected token, expected end of statement");
- return false;
- }
+ getTargetStreamer().emitDirectiveModuleOddSPReg(false, isABI_O32());
+ setFeatureBits(Mips::FeatureNoOddSPReg, "nooddspreg");
+ // If this is not the end of the statement, report an error.
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ reportParseError("unexpected token, expected end of statement");
return false;
- } else if (Option == "fp") {
- return parseDirectiveModuleFP();
}
+ return false; // parseDirectiveModule has finished successfully.
+ } else if (Option == "fp") {
+ return parseDirectiveModuleFP();
+ } else {
return Error(L, "'" + Twine(Option) + "' is not a valid .module option.");
}
-
- return false;
}
/// parseDirectiveModuleFP
diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
index da33f3b913cd..d42b948cc615 100644
--- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
+++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
@@ -259,6 +259,11 @@ static DecodeStatus DecodeCacheOp(MCInst &Inst,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeCacheOpR6(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
static DecodeStatus DecodeCacheOpMM(MCInst &Inst,
unsigned Insn,
uint64_t Address,
@@ -304,6 +309,10 @@ static DecodeStatus DecodeFMem3(MCInst &Inst, unsigned Insn,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeFMemCop2R6(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
static DecodeStatus DecodeSpecial3LlSc(MCInst &Inst,
unsigned Insn,
uint64_t Address,
@@ -1118,6 +1127,23 @@ static DecodeStatus DecodeCacheOpMM(MCInst &Inst,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeCacheOpR6(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ int Offset = fieldFromInstruction(Insn, 7, 9);
+ unsigned Hint = fieldFromInstruction(Insn, 16, 5);
+ unsigned Base = fieldFromInstruction(Insn, 21, 5);
+
+ Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
+
+ Inst.addOperand(MCOperand::CreateReg(Base));
+ Inst.addOperand(MCOperand::CreateImm(Offset));
+ Inst.addOperand(MCOperand::CreateImm(Hint));
+
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeSyncI(MCInst &Inst,
unsigned Insn,
uint64_t Address,
@@ -1354,6 +1380,23 @@ static DecodeStatus DecodeFMem3(MCInst &Inst,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeFMemCop2R6(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ int Offset = SignExtend32<11>(Insn & 0x07ff);
+ unsigned Reg = fieldFromInstruction(Insn, 16, 5);
+ unsigned Base = fieldFromInstruction(Insn, 11, 5);
+
+ Reg = getReg(Decoder, Mips::COP2RegClassID, Reg);
+ Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
+
+ Inst.addOperand(MCOperand::CreateReg(Reg));
+ Inst.addOperand(MCOperand::CreateReg(Base));
+ Inst.addOperand(MCOperand::CreateImm(Offset));
+
+ return MCDisassembler::Success;
+}
static DecodeStatus DecodeSpecial3LlSc(MCInst &Inst,
unsigned Insn,
uint64_t Address,
diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td
index 3e1d047091a9..5ad56834607e 100644
--- a/lib/Target/Mips/Mips.td
+++ b/lib/Target/Mips/Mips.td
@@ -58,15 +58,15 @@ def MipsInstrInfo : InstrInfo;
//===----------------------------------------------------------------------===//
def FeatureNoABICalls : SubtargetFeature<"noabicalls", "NoABICalls", "true",
- "Disable SVR4-style position-independent code.">;
+ "Disable SVR4-style position-independent code">;
def FeatureGP64Bit : SubtargetFeature<"gp64", "IsGP64bit", "true",
- "General Purpose Registers are 64-bit wide.">;
+ "General Purpose Registers are 64-bit wide">;
def FeatureFP64Bit : SubtargetFeature<"fp64", "IsFP64bit", "true",
- "Support 64-bit FP registers.">;
+ "Support 64-bit FP registers">;
def FeatureFPXX : SubtargetFeature<"fpxx", "IsFPXX", "true",
- "Support for FPXX.">;
+ "Support for FPXX">;
def FeatureNaN2008 : SubtargetFeature<"nan2008", "IsNaN2008bit", "true",
- "IEEE 754-2008 NaN encoding.">;
+ "IEEE 754-2008 NaN encoding">;
def FeatureSingleFloat : SubtargetFeature<"single-float", "IsSingleFloat",
"true", "Only supports single precision float">;
def FeatureO32 : SubtargetFeature<"o32", "ABI", "MipsABIInfo::O32()",
@@ -81,7 +81,7 @@ def FeatureNoOddSPReg : SubtargetFeature<"nooddspreg", "UseOddSPReg", "false",
"Disable odd numbered single-precision "
"registers">;
def FeatureVFPU : SubtargetFeature<"vfpu", "HasVFPU",
- "true", "Enable vector FPU instructions.">;
+ "true", "Enable vector FPU instructions">;
def FeatureMips1 : SubtargetFeature<"mips1", "MipsArchVersion", "Mips1",
"Mips I ISA Support [highly experimental]">;
def FeatureMips2 : SubtargetFeature<"mips2", "MipsArchVersion", "Mips2",
diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp
index 976beccfed9d..7e1fbfdcac46 100644
--- a/lib/Target/Mips/Mips16InstrInfo.cpp
+++ b/lib/Target/Mips/Mips16InstrInfo.cpp
@@ -293,6 +293,9 @@ void Mips16InstrInfo::adjustStackPtrBigUnrestricted(
void Mips16InstrInfo::adjustStackPtr(unsigned SP, int64_t Amount,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
+ if (Amount == 0)
+ return;
+
if (isInt<16>(Amount)) // need to change to addiu sp, ....and isInt<16>
BuildAddiuSpImm(MBB, I, Amount);
else
diff --git a/lib/Target/Mips/Mips32r6InstrInfo.td b/lib/Target/Mips/Mips32r6InstrInfo.td
index 185d12ec93fd..49c63226dc06 100644
--- a/lib/Target/Mips/Mips32r6InstrInfo.td
+++ b/lib/Target/Mips/Mips32r6InstrInfo.td
@@ -379,7 +379,6 @@ class JMP_IDX_COMPACT_DESC_BASE<string opstr, DAGOperand opnd,
list<dag> Pattern = [];
bit isTerminator = 1;
bit hasDelaySlot = 0;
- string DecoderMethod = "DecodeSimm16";
}
class JIALC_DESC : JMP_IDX_COMPACT_DESC_BASE<"jialc", calloffset16,
@@ -550,6 +549,7 @@ class CACHE_HINT_DESC<string instr_asm, Operand MemOpnd,
dag InOperandList = (ins MemOpnd:$addr, uimm5:$hint);
string AsmString = !strconcat(instr_asm, "\t$hint, $addr");
list<dag> Pattern = [];
+ string DecoderMethod = "DecodeCacheOpR6";
}
class CACHE_DESC : CACHE_HINT_DESC<"cache", mem_simm9, GPR32Opnd>;
@@ -561,6 +561,7 @@ class COP2LD_DESC_BASE<string instr_asm, RegisterOperand COPOpnd> {
string AsmString = !strconcat(instr_asm, "\t$rt, $addr");
list<dag> Pattern = [];
bit mayLoad = 1;
+ string DecoderMethod = "DecodeFMemCop2R6";
}
class LDC2_R6_DESC : COP2LD_DESC_BASE<"ldc2", COP2Opnd>;
@@ -572,6 +573,7 @@ class COP2ST_DESC_BASE<string instr_asm, RegisterOperand COPOpnd> {
string AsmString = !strconcat(instr_asm, "\t$rt, $addr");
list<dag> Pattern = [];
bit mayStore = 1;
+ string DecoderMethod = "DecodeFMemCop2R6";
}
class SDC2_R6_DESC : COP2ST_DESC_BASE<"sdc2", COP2Opnd>;
diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td
index bfe2ba011957..d6628d408480 100644
--- a/lib/Target/Mips/Mips64InstrInfo.td
+++ b/lib/Target/Mips/Mips64InstrInfo.td
@@ -440,6 +440,16 @@ def : MipsPat<(i64 (sext_inreg GPR64:$src, i32)),
// bswap MipsPattern
def : MipsPat<(bswap GPR64:$rt), (DSHD (DSBH GPR64:$rt))>;
+// Carry pattern
+def : MipsPat<(subc GPR64:$lhs, GPR64:$rhs),
+ (DSUBu GPR64:$lhs, GPR64:$rhs)>;
+let AdditionalPredicates = [NotDSP] in {
+ def : MipsPat<(addc GPR64:$lhs, GPR64:$rhs),
+ (DADDu GPR64:$lhs, GPR64:$rhs)>;
+ def : MipsPat<(addc GPR64:$lhs, immSExt16:$imm),
+ (DADDiu GPR64:$lhs, imm:$imm)>;
+}
+
//===----------------------------------------------------------------------===//
// Instruction aliases
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/MipsCCState.cpp b/lib/Target/Mips/MipsCCState.cpp
index e18cc8b1f7b3..b8081295ca64 100644
--- a/lib/Target/Mips/MipsCCState.cpp
+++ b/lib/Target/Mips/MipsCCState.cpp
@@ -132,8 +132,8 @@ void MipsCCState::PreAnalyzeFormalArgumentsForF128(
continue;
}
- assert(Ins[i].OrigArgIndex < MF.getFunction()->arg_size());
- std::advance(FuncArg, Ins[i].OrigArgIndex);
+ assert(Ins[i].getOrigArgIndex() < MF.getFunction()->arg_size());
+ std::advance(FuncArg, Ins[i].getOrigArgIndex());
OriginalArgWasF128.push_back(
originalTypeIsF128(FuncArg->getType(), nullptr));
diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td
index 90bf9f3f5835..dcd88f25d25c 100644
--- a/lib/Target/Mips/MipsCallingConv.td
+++ b/lib/Target/Mips/MipsCallingConv.td
@@ -123,7 +123,7 @@ def CC_MipsN_SoftFloat : CallingConv<[
]>;
def CC_MipsN : CallingConv<[
- CCIfType<[i8, i16, i32],
+ CCIfType<[i8, i16, i32, i64],
CCIfSubtargetNot<"isLittle()",
CCIfInReg<CCPromoteToUpperBitsInType<i64>>>>,
@@ -159,6 +159,10 @@ def CC_MipsN : CallingConv<[
// N32/64 variable arguments.
// All arguments are passed in integer registers.
def CC_MipsN_VarArg : CallingConv<[
+ CCIfType<[i8, i16, i32, i64],
+ CCIfSubtargetNot<"isLittle()",
+ CCIfInReg<CCPromoteToUpperBitsInType<i64>>>>,
+
// All integers are promoted to 64-bit.
CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index d25f5637f57c..37fc7849310b 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -261,6 +261,9 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM,
setOperationAction(ISD::LOAD, MVT::i64, Custom);
setOperationAction(ISD::STORE, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
+ setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
+ setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
+ setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
}
if (!Subtarget.isGP64bit()) {
@@ -616,6 +619,33 @@ static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+static SDValue performCMovFPCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const MipsSubtarget &Subtarget) {
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ SDValue ValueIfTrue = N->getOperand(0), ValueIfFalse = N->getOperand(2);
+
+ ConstantSDNode *FalseC = dyn_cast<ConstantSDNode>(ValueIfFalse);
+ if (!FalseC || FalseC->getZExtValue())
+ return SDValue();
+
+ // Since RHS (False) is 0, we swap the order of the True/False operands
+ // (obviously also inverting the condition) so that we can
+ // take advantage of conditional moves using the $0 register.
+ // Example:
+ // return (a != 0) ? x : 0;
+ // load $reg, x
+ // movz $reg, $0, a
+ unsigned Opc = (N->getOpcode() == MipsISD::CMovFP_T) ? MipsISD::CMovFP_F :
+ MipsISD::CMovFP_T;
+
+ SDValue FCC = N->getOperand(1), Glue = N->getOperand(3);
+ return DAG.getNode(Opc, SDLoc(N), ValueIfFalse.getValueType(),
+ ValueIfFalse, FCC, ValueIfTrue, Glue);
+}
+
static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const MipsSubtarget &Subtarget) {
@@ -749,6 +779,9 @@ SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
return performDivRemCombine(N, DAG, DCI, Subtarget);
case ISD::SELECT:
return performSELECTCombine(N, DAG, DCI, Subtarget);
+ case MipsISD::CMovFP_F:
+ case MipsISD::CMovFP_T:
+ return performCMovFPCombine(N, DAG, DCI, Subtarget);
case ISD::AND:
return performANDCombine(N, DAG, DCI, Subtarget);
case ISD::OR:
@@ -2017,10 +2050,11 @@ SDValue MipsTargetLowering::lowerATOMIC_FENCE(SDValue Op,
SDValue MipsTargetLowering::lowerShiftLeftParts(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
+ MVT VT = Subtarget.isGP64bit() ? MVT::i64 : MVT::i32;
+
SDValue Lo = Op.getOperand(0), Hi = Op.getOperand(1);
SDValue Shamt = Op.getOperand(2);
-
- // if shamt < 32:
+ // if shamt < (VT.bits):
// lo = (shl lo, shamt)
// hi = (or (shl hi, shamt) (srl (srl lo, 1), ~shamt))
// else:
@@ -2028,18 +2062,17 @@ SDValue MipsTargetLowering::lowerShiftLeftParts(SDValue Op,
// hi = (shl lo, shamt[4:0])
SDValue Not = DAG.getNode(ISD::XOR, DL, MVT::i32, Shamt,
DAG.getConstant(-1, MVT::i32));
- SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, MVT::i32, Lo,
- DAG.getConstant(1, MVT::i32));
- SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, MVT::i32, ShiftRight1Lo,
- Not);
- SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, MVT::i32, Hi, Shamt);
- SDValue Or = DAG.getNode(ISD::OR, DL, MVT::i32, ShiftLeftHi, ShiftRightLo);
- SDValue ShiftLeftLo = DAG.getNode(ISD::SHL, DL, MVT::i32, Lo, Shamt);
+ SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo,
+ DAG.getConstant(1, VT));
+ SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, Not);
+ SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
+ SDValue Or = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
+ SDValue ShiftLeftLo = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
SDValue Cond = DAG.getNode(ISD::AND, DL, MVT::i32, Shamt,
- DAG.getConstant(0x20, MVT::i32));
- Lo = DAG.getNode(ISD::SELECT, DL, MVT::i32, Cond,
- DAG.getConstant(0, MVT::i32), ShiftLeftLo);
- Hi = DAG.getNode(ISD::SELECT, DL, MVT::i32, Cond, ShiftLeftLo, Or);
+ DAG.getConstant(VT.getSizeInBits(), MVT::i32));
+ Lo = DAG.getNode(ISD::SELECT, DL, VT, Cond,
+ DAG.getConstant(0, VT), ShiftLeftLo);
+ Hi = DAG.getNode(ISD::SELECT, DL, VT, Cond, ShiftLeftLo, Or);
SDValue Ops[2] = {Lo, Hi};
return DAG.getMergeValues(Ops, DL);
@@ -2050,8 +2083,9 @@ SDValue MipsTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
SDLoc DL(Op);
SDValue Lo = Op.getOperand(0), Hi = Op.getOperand(1);
SDValue Shamt = Op.getOperand(2);
+ MVT VT = Subtarget.isGP64bit() ? MVT::i64 : MVT::i32;
- // if shamt < 32:
+ // if shamt < (VT.bits):
// lo = (or (shl (shl hi, 1), ~shamt) (srl lo, shamt))
// if isSRA:
// hi = (sra hi, shamt)
@@ -2066,21 +2100,20 @@ SDValue MipsTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
// hi = 0
SDValue Not = DAG.getNode(ISD::XOR, DL, MVT::i32, Shamt,
DAG.getConstant(-1, MVT::i32));
- SDValue ShiftLeft1Hi = DAG.getNode(ISD::SHL, DL, MVT::i32, Hi,
- DAG.getConstant(1, MVT::i32));
- SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, MVT::i32, ShiftLeft1Hi, Not);
- SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, MVT::i32, Lo, Shamt);
- SDValue Or = DAG.getNode(ISD::OR, DL, MVT::i32, ShiftLeftHi, ShiftRightLo);
- SDValue ShiftRightHi = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, DL, MVT::i32,
- Hi, Shamt);
+ SDValue ShiftLeft1Hi = DAG.getNode(ISD::SHL, DL, VT, Hi,
+ DAG.getConstant(1, VT));
+ SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, ShiftLeft1Hi, Not);
+ SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
+ SDValue Or = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
+ SDValue ShiftRightHi = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL,
+ DL, VT, Hi, Shamt);
SDValue Cond = DAG.getNode(ISD::AND, DL, MVT::i32, Shamt,
- DAG.getConstant(0x20, MVT::i32));
- SDValue Shift31 = DAG.getNode(ISD::SRA, DL, MVT::i32, Hi,
- DAG.getConstant(31, MVT::i32));
- Lo = DAG.getNode(ISD::SELECT, DL, MVT::i32, Cond, ShiftRightHi, Or);
- Hi = DAG.getNode(ISD::SELECT, DL, MVT::i32, Cond,
- IsSRA ? Shift31 : DAG.getConstant(0, MVT::i32),
- ShiftRightHi);
+ DAG.getConstant(VT.getSizeInBits(), MVT::i32));
+ SDValue Ext = DAG.getNode(ISD::SRA, DL, VT, Hi,
+ DAG.getConstant(VT.getSizeInBits() - 1, VT));
+ Lo = DAG.getNode(ISD::SELECT, DL, VT, Cond, ShiftRightHi, Or);
+ Hi = DAG.getNode(ISD::SELECT, DL, VT, Cond,
+ IsSRA ? Ext : DAG.getConstant(0, VT), ShiftRightHi);
SDValue Ops[2] = {Lo, Hi};
return DAG.getMergeValues(Ops, DL);
@@ -2900,13 +2933,16 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
- std::advance(FuncArg, Ins[i].OrigArgIndex - CurArgIdx);
- CurArgIdx = Ins[i].OrigArgIndex;
+ if (Ins[i].isOrigArg()) {
+ std::advance(FuncArg, Ins[i].getOrigArgIndex() - CurArgIdx);
+ CurArgIdx = Ins[i].getOrigArgIndex();
+ }
EVT ValVT = VA.getValVT();
ISD::ArgFlagsTy Flags = Ins[i].Flags;
bool IsRegLoc = VA.isRegLoc();
if (Flags.isByVal()) {
+ assert(Ins[i].isOrigArg() && "Byval arguments cannot be implicit");
unsigned FirstByValReg, LastByValReg;
unsigned ByValIdx = CCInfo.getInRegsParamsProcessed();
CCInfo.getInRegsParamInfo(ByValIdx, FirstByValReg, LastByValReg);
@@ -3027,6 +3063,15 @@ MipsTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
return CCInfo.CheckReturn(Outs, RetCC_Mips);
}
+bool
+MipsTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const {
+ if (Subtarget.hasMips3() && Subtarget.abiUsesSoftFloat()) {
+ if (Type == MVT::i32)
+ return true;
+ }
+ return IsSigned;
+}
+
SDValue
MipsTargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool IsVarArg,
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index 4132de6dbcad..4da337a61dad 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -472,6 +472,8 @@ namespace llvm {
const SmallVectorImpl<SDValue> &OutVals,
SDLoc dl, SelectionDAG &DAG) const override;
+ bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const override;
+
// Inline asm support
ConstraintType
getConstraintType(const std::string &Constraint) const override;
diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td
index 2aa83289a106..ed97cb461923 100644
--- a/lib/Target/Mips/MipsInstrFPU.td
+++ b/lib/Target/Mips/MipsInstrFPU.td
@@ -458,42 +458,42 @@ def FSUB_S : MMRel, ADDS_FT<"sub.s", FGR32Opnd, II_SUB_S, 0, fsub>,
defm FSUB : ADDS_M<"sub.d", II_SUB_D, 0, fsub>, ADDS_FM<0x01, 17>;
def MADD_S : MMRel, MADDS_FT<"madd.s", FGR32Opnd, II_MADD_S, fadd>,
- MADDS_FM<4, 0>, ISA_MIPS32R2_NOT_32R6_64R6;
+ MADDS_FM<4, 0>, INSN_MIPS4_32R2_NOT_32R6_64R6;
def MSUB_S : MMRel, MADDS_FT<"msub.s", FGR32Opnd, II_MSUB_S, fsub>,
- MADDS_FM<5, 0>, ISA_MIPS32R2_NOT_32R6_64R6;
+ MADDS_FM<5, 0>, INSN_MIPS4_32R2_NOT_32R6_64R6;
let AdditionalPredicates = [NoNaNsFPMath] in {
def NMADD_S : MMRel, NMADDS_FT<"nmadd.s", FGR32Opnd, II_NMADD_S, fadd>,
- MADDS_FM<6, 0>, ISA_MIPS32R2_NOT_32R6_64R6;
+ MADDS_FM<6, 0>, INSN_MIPS4_32R2_NOT_32R6_64R6;
def NMSUB_S : MMRel, NMADDS_FT<"nmsub.s", FGR32Opnd, II_NMSUB_S, fsub>,
- MADDS_FM<7, 0>, ISA_MIPS32R2_NOT_32R6_64R6;
+ MADDS_FM<7, 0>, INSN_MIPS4_32R2_NOT_32R6_64R6;
}
def MADD_D32 : MMRel, MADDS_FT<"madd.d", AFGR64Opnd, II_MADD_D, fadd>,
- MADDS_FM<4, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_32;
+ MADDS_FM<4, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32;
def MSUB_D32 : MMRel, MADDS_FT<"msub.d", AFGR64Opnd, II_MSUB_D, fsub>,
- MADDS_FM<5, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_32;
+ MADDS_FM<5, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32;
let AdditionalPredicates = [NoNaNsFPMath] in {
def NMADD_D32 : MMRel, NMADDS_FT<"nmadd.d", AFGR64Opnd, II_NMADD_D, fadd>,
- MADDS_FM<6, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_32;
+ MADDS_FM<6, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32;
def NMSUB_D32 : MMRel, NMADDS_FT<"nmsub.d", AFGR64Opnd, II_NMSUB_D, fsub>,
- MADDS_FM<7, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_32;
+ MADDS_FM<7, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32;
}
-let isCodeGenOnly=1 in {
+let DecoderNamespace = "Mips64" in {
def MADD_D64 : MADDS_FT<"madd.d", FGR64Opnd, II_MADD_D, fadd>,
- MADDS_FM<4, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_64;
+ MADDS_FM<4, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64;
def MSUB_D64 : MADDS_FT<"msub.d", FGR64Opnd, II_MSUB_D, fsub>,
- MADDS_FM<5, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_64;
+ MADDS_FM<5, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64;
}
let AdditionalPredicates = [NoNaNsFPMath],
- isCodeGenOnly=1 in {
+ DecoderNamespace = "Mips64" in {
def NMADD_D64 : NMADDS_FT<"nmadd.d", FGR64Opnd, II_NMADD_D, fadd>,
- MADDS_FM<6, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_64;
+ MADDS_FM<6, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64;
def NMSUB_D64 : NMADDS_FT<"nmsub.d", FGR64Opnd, II_NMSUB_D, fsub>,
- MADDS_FM<7, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_64;
+ MADDS_FM<7, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64;
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td
index 2b3b6c1e5242..1eb8d9ad821d 100644
--- a/lib/Target/Mips/MipsRegisterInfo.td
+++ b/lib/Target/Mips/MipsRegisterInfo.td
@@ -388,6 +388,8 @@ def MSA128W: RegisterClass<"Mips", [v4i32, v4f32], 128,
(sequence "W%u", 0, 31)>;
def MSA128D: RegisterClass<"Mips", [v2i64, v2f64], 128,
(sequence "W%u", 0, 31)>;
+def MSA128WEvens: RegisterClass<"Mips", [v4i32, v4f32], 128,
+ (decimate (sequence "W%u", 0, 31), 2)>;
def MSACtrl: RegisterClass<"Mips", [i32], 32, (add
MSAIR, MSACSR, MSAAccess, MSASave, MSAModify, MSARequest, MSAMap, MSAUnmap)>;
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index 70963821690c..8c2620ca7f02 100644
--- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -236,13 +236,35 @@ SDNode *MipsSEDAGToDAGISel::selectAddESubE(unsigned MOp, SDValue InFlag,
(Opc == ISD::SUBC || Opc == ISD::SUBE)) &&
"(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn");
+ unsigned SLTuOp = Mips::SLTu, ADDuOp = Mips::ADDu;
+ if (Subtarget->isGP64bit()) {
+ SLTuOp = Mips::SLTu64;
+ ADDuOp = Mips::DADDu;
+ }
+
SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) };
SDValue LHS = Node->getOperand(0), RHS = Node->getOperand(1);
EVT VT = LHS.getValueType();
- SDNode *Carry = CurDAG->getMachineNode(Mips::SLTu, DL, VT, Ops);
- SDNode *AddCarry = CurDAG->getMachineNode(Mips::ADDu, DL, VT,
- SDValue(Carry, 0), RHS);
+ SDNode *Carry = CurDAG->getMachineNode(SLTuOp, DL, VT, Ops);
+
+ if (Subtarget->isGP64bit()) {
+ // On 64-bit targets, sltu produces an i64 but our backend currently says
+ // that SLTu64 produces an i32. We need to fix this in the long run but for
+ // now, just make the DAG type-correct by asserting the upper bits are zero.
+ Carry = CurDAG->getMachineNode(Mips::SUBREG_TO_REG, DL, VT,
+ CurDAG->getTargetConstant(0, VT),
+ SDValue(Carry, 0),
+ CurDAG->getTargetConstant(Mips::sub_32, VT));
+ }
+
+ // Generate a second addition only if we know that RHS is not a
+ // constant-zero node.
+ SDNode *AddCarry = Carry;
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS);
+ if (!C || C->getZExtValue())
+ AddCarry = CurDAG->getMachineNode(ADDuOp, DL, VT, SDValue(Carry, 0), RHS);
+
return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, LHS,
SDValue(AddCarry, 0));
}
@@ -641,7 +663,8 @@ std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) {
case ISD::SUBE: {
SDValue InFlag = Node->getOperand(2);
- Result = selectAddESubE(Mips::SUBu, InFlag, InFlag.getOperand(0), DL, Node);
+ unsigned Opc = Subtarget->isGP64bit() ? Mips::DSUBu : Mips::SUBu;
+ Result = selectAddESubE(Opc, InFlag, InFlag.getOperand(0), DL, Node);
return std::make_pair(true, Result);
}
@@ -649,7 +672,8 @@ std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) {
if (Subtarget->hasDSP()) // Select DSP instructions, ADDSC and ADDWC.
break;
SDValue InFlag = Node->getOperand(2);
- Result = selectAddESubE(Mips::ADDu, InFlag, InFlag.getValue(0), DL, Node);
+ unsigned Opc = Subtarget->isGP64bit() ? Mips::DADDu : Mips::ADDu;
+ Result = selectAddESubE(Opc, InFlag, InFlag.getValue(0), DL, Node);
return std::make_pair(true, Result);
}
diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp
index 29aac2e276b0..2c033ce61c16 100644
--- a/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -122,6 +122,8 @@ MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM,
setOperationAction(ISD::MUL, MVT::i64, Custom);
if (Subtarget.isGP64bit()) {
+ setOperationAction(ISD::SMUL_LOHI, MVT::i64, Custom);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i64, Custom);
setOperationAction(ISD::MULHS, MVT::i64, Custom);
setOperationAction(ISD::MULHU, MVT::i64, Custom);
setOperationAction(ISD::SDIVREM, MVT::i64, Custom);
@@ -200,6 +202,8 @@ MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM,
if (Subtarget.hasMips64r6()) {
// MIPS64r6 replaces the accumulator-based multiplies with a three register
// instruction
+ setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
setOperationAction(ISD::MUL, MVT::i64, Legal);
setOperationAction(ISD::MULHS, MVT::i64, Legal);
setOperationAction(ISD::MULHU, MVT::i64, Legal);
@@ -2879,10 +2883,21 @@ emitCOPY_FW(MachineInstr *MI, MachineBasicBlock *BB) const{
unsigned Ws = MI->getOperand(1).getReg();
unsigned Lane = MI->getOperand(2).getImm();
- if (Lane == 0)
- BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Ws, 0, Mips::sub_lo);
- else {
- unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
+ if (Lane == 0) {
+ unsigned Wt = Ws;
+ if (!Subtarget.useOddSPReg()) {
+ // We must copy to an even-numbered MSA register so that the
+ // single-precision sub-register is also guaranteed to be even-numbered.
+ Wt = RegInfo.createVirtualRegister(&Mips::MSA128WEvensRegClass);
+
+ BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Wt).addReg(Ws);
+ }
+
+ BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo);
+ } else {
+ unsigned Wt = RegInfo.createVirtualRegister(
+ Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass :
+ &Mips::MSA128WEvensRegClass);
BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wt).addReg(Ws).addImm(Lane);
BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo);
@@ -2944,7 +2959,9 @@ MipsSETargetLowering::emitINSERT_FW(MachineInstr *MI,
unsigned Wd_in = MI->getOperand(1).getReg();
unsigned Lane = MI->getOperand(2).getImm();
unsigned Fs = MI->getOperand(3).getReg();
- unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
+ unsigned Wt = RegInfo.createVirtualRegister(
+ Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass :
+ &Mips::MSA128WEvensRegClass);
BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt)
.addImm(0)
diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp
index 74f291f609fd..180b04327fc9 100644
--- a/lib/Target/Mips/MipsSEInstrInfo.cpp
+++ b/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -364,6 +364,9 @@ void MipsSEInstrInfo::adjustStackPtr(unsigned SP, int64_t Amount,
unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu;
unsigned ADDiu = STI.isABI_N64() ? Mips::DADDiu : Mips::ADDiu;
+ if (Amount == 0)
+ return;
+
if (isInt<16>(Amount))// addi sp, sp, amount
BuildMI(MBB, I, DL, get(ADDiu), SP).addReg(SP).addImm(Amount);
else { // Expand immediate that doesn't fit in 16-bit.
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 5c52bb19b0d1..e580c811d71f 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -2688,9 +2688,10 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
unsigned ObjSize = ObjectVT.getStoreSize();
unsigned ArgSize = ObjSize;
ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
- std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx);
- CurArgIdx = Ins[ArgNo].OrigArgIndex;
-
+ if (Ins[ArgNo].isOrigArg()) {
+ std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
+ CurArgIdx = Ins[ArgNo].getOrigArgIndex();
+ }
/* Respect alignment of argument on the stack. */
unsigned Align =
CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
@@ -2704,6 +2705,8 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
// FIXME the codegen can be much improved in some cases.
// We do not have to keep everything in memory.
if (Flags.isByVal()) {
+ assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
+
// ObjSize is the true size, ArgSize rounded up to multiple of registers.
ObjSize = Flags.getByValSize();
ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
@@ -3064,9 +3067,10 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
unsigned ObjSize = ObjectVT.getSizeInBits()/8;
unsigned ArgSize = ObjSize;
ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
- std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx);
- CurArgIdx = Ins[ArgNo].OrigArgIndex;
-
+ if (Ins[ArgNo].isOrigArg()) {
+ std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
+ CurArgIdx = Ins[ArgNo].getOrigArgIndex();
+ }
unsigned CurArgOffset = ArgOffset;
// Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
@@ -3087,6 +3091,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
// FIXME the codegen can be much improved in some cases.
// We do not have to keep everything in memory.
if (Flags.isByVal()) {
+ assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
+
// ObjSize is the true size, ArgSize rounded up to multiple of registers.
ObjSize = Flags.getByValSize();
ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
diff --git a/lib/Target/R600/AMDGPU.td b/lib/Target/R600/AMDGPU.td
index 1df4448abf05..03f2bbe4820e 100644
--- a/lib/Target/R600/AMDGPU.td
+++ b/lib/Target/R600/AMDGPU.td
@@ -97,6 +97,11 @@ def FeatureVGPRSpilling : SubtargetFeature<"vgpr-spilling",
"true",
"Enable spilling of VGPRs to scratch memory">;
+def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug",
+ "SGPRInitBug",
+ "true",
+ "VI SGPR initilization bug requiring a fixed SGPR allocation size">;
+
class SubtargetFeatureFetchLimit <string Value> :
SubtargetFeature <"fetch"#Value,
"TexVTXClauseSize",
diff --git a/lib/Target/R600/AMDGPUAlwaysInlinePass.cpp b/lib/Target/R600/AMDGPUAlwaysInlinePass.cpp
index b545b456161f..0b426bc63dd5 100644
--- a/lib/Target/R600/AMDGPUAlwaysInlinePass.cpp
+++ b/lib/Target/R600/AMDGPUAlwaysInlinePass.cpp
@@ -40,7 +40,8 @@ bool AMDGPUAlwaysInline::runOnModule(Module &M) {
std::vector<Function*> FuncsToClone;
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
Function &F = *I;
- if (!F.hasLocalLinkage() && !F.isDeclaration() && !F.use_empty())
+ if (!F.hasLocalLinkage() && !F.isDeclaration() && !F.use_empty() &&
+ !F.hasFnAttribute(Attribute::NoInline))
FuncsToClone.push_back(&F);
}
@@ -54,7 +55,7 @@ bool AMDGPUAlwaysInline::runOnModule(Module &M) {
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
Function &F = *I;
- if (F.hasLocalLinkage()) {
+ if (F.hasLocalLinkage() && !F.hasFnAttribute(Attribute::NoInline)) {
F.addFnAttr(Attribute::AlwaysInline);
}
}
diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp
index 6185e367ff50..1fae26e18a44 100644
--- a/lib/Target/R600/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp
@@ -343,6 +343,13 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
ProgInfo.NumVGPR = MaxVGPR + 1;
ProgInfo.NumSGPR = MaxSGPR + 1;
+ if (STM.hasSGPRInitBug()) {
+ if (ProgInfo.NumSGPR > AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG)
+ llvm_unreachable("Too many SGPRs used with the SGPR init bug");
+
+ ProgInfo.NumSGPR = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
+ }
+
ProgInfo.VGPRBlocks = (ProgInfo.NumVGPR - 1) / 4;
ProgInfo.SGPRBlocks = (ProgInfo.NumSGPR - 1) / 8;
// Set the value to initialize FP_ROUND and FP_DENORM parts of the mode
diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
index 15112c7e54d4..68d557a1cf70 100644
--- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
@@ -439,6 +439,31 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
break;
}
+ case ISD::STORE: {
+ // Handle i64 stores here for the same reason mentioned above for loads.
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ SDValue Value = ST->getValue();
+ if (Value.getValueType() != MVT::i64 || ST->isTruncatingStore())
+ break;
+
+ SDValue NewValue = CurDAG->getNode(ISD::BITCAST, SDLoc(N),
+ MVT::v2i32, Value);
+ SDValue NewStore = CurDAG->getStore(ST->getChain(), SDLoc(N), NewValue,
+ ST->getBasePtr(), ST->getMemOperand());
+
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewStore);
+
+ if (NewValue.getOpcode() == ISD::BITCAST) {
+ Select(NewStore.getNode());
+ return SelectCode(NewValue.getNode());
+ }
+
+ // getNode() may fold the bitcast if its input was another bitcast. If that
+ // happens we should only select the new store.
+ N = NewStore.getNode();
+ break;
+ }
+
case AMDGPUISD::REGISTER_LOAD: {
if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
break;
@@ -761,6 +786,8 @@ SDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args);
}
+// We need to handle this here because tablegen doesn't support matching
+// instructions with multiple outputs.
SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
SDLoc SL(N);
EVT VT = N->getValueType(0);
@@ -770,19 +797,12 @@ SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
unsigned Opc
= (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
- const SDValue Zero = CurDAG->getTargetConstant(0, MVT::i32);
- const SDValue False = CurDAG->getTargetConstant(0, MVT::i1);
- SDValue Ops[] = {
- Zero, // src0_modifiers
- N->getOperand(0), // src0
- Zero, // src1_modifiers
- N->getOperand(1), // src1
- Zero, // src2_modifiers
- N->getOperand(2), // src2
- False, // clamp
- Zero // omod
- };
+ // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod
+ SDValue Ops[8];
+ SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
+ SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]);
+ SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]);
return CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops);
}
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
index 2adcdf1c299e..b137053fbbc2 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -141,9 +141,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
setOperationAction(ISD::STORE, MVT::v2f32, Promote);
AddPromotedToType(ISD::STORE, MVT::v2f32, MVT::v2i32);
- setOperationAction(ISD::STORE, MVT::i64, Promote);
- AddPromotedToType(ISD::STORE, MVT::i64, MVT::v2i32);
-
setOperationAction(ISD::STORE, MVT::v4f32, Promote);
AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32);
@@ -162,9 +159,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
// Custom lowering of vector stores is required for local address space
// stores.
setOperationAction(ISD::STORE, MVT::v4i32, Custom);
- // XXX: Native v2i32 local address space stores are possible, but not
- // currently implemented.
- setOperationAction(ISD::STORE, MVT::v2i32, Custom);
setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);
setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);
@@ -832,11 +826,9 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
SDValue AMDGPUTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
SelectionDAG &DAG) const {
SmallVector<SDValue, 8> Args;
- SDValue A = Op.getOperand(0);
- SDValue B = Op.getOperand(1);
- DAG.ExtractVectorElements(A, Args);
- DAG.ExtractVectorElements(B, Args);
+ for (const SDUse &U : Op->ops())
+ DAG.ExtractVectorElements(U.get(), Args);
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(), Args);
}
@@ -881,9 +873,6 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return LowerIntrinsicIABS(Op, DAG);
case AMDGPUIntrinsic::AMDGPU_lrp:
return LowerIntrinsicLRP(Op, DAG);
- case AMDGPUIntrinsic::AMDGPU_fract:
- case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name.
- return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
case AMDGPUIntrinsic::AMDGPU_clamp:
case AMDGPUIntrinsic::AMDIL_clamp: // Legacy name.
@@ -913,10 +902,9 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
}
case Intrinsic::AMDGPU_div_fmas:
- // FIXME: Dropping bool parameter. Work is needed to support the implicit
- // read from VCC.
return DAG.getNode(AMDGPUISD::DIV_FMAS, DL, VT,
- Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+ Op.getOperand(1), Op.getOperand(2), Op.getOperand(3),
+ Op.getOperand(4));
case Intrinsic::AMDGPU_div_fixup:
return DAG.getNode(AMDGPUISD::DIV_FIXUP, DL, VT,
diff --git a/lib/Target/R600/AMDGPUInstrInfo.h b/lib/Target/R600/AMDGPUInstrInfo.h
index e28ce0f03acc..202183c18a8d 100644
--- a/lib/Target/R600/AMDGPUInstrInfo.h
+++ b/lib/Target/R600/AMDGPUInstrInfo.h
@@ -140,6 +140,12 @@ public:
/// not exist. If Opcode is not a pseudo instruction, this is identity.
int pseudoToMCOpcode(int Opcode) const;
+ /// \brief Return the descriptor of the target-specific machine instruction
+ /// that corresponds to the specified pseudo or native opcode.
+ const MCInstrDesc &getMCOpcodeFromPseudo(unsigned Opcode) const {
+ return get(pseudoToMCOpcode(Opcode));
+ }
+
//===---------------------------------------------------------------------===//
// Pure virtual funtions to be implemented by sub-classes.
//===---------------------------------------------------------------------===//
diff --git a/lib/Target/R600/AMDGPUInstrInfo.td b/lib/Target/R600/AMDGPUInstrInfo.td
index 0e34392bd50d..d657ad05c8c4 100644
--- a/lib/Target/R600/AMDGPUInstrInfo.td
+++ b/lib/Target/R600/AMDGPUInstrInfo.td
@@ -35,6 +35,11 @@ def AMDGPUDivScaleOp : SDTypeProfile<2, 3,
[SDTCisFP<0>, SDTCisInt<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisSameAs<0, 4>]
>;
+// float, float, float, vcc
+def AMDGPUFmasOp : SDTypeProfile<1, 4,
+ [SDTCisFP<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisInt<4>]
+>;
+
//===----------------------------------------------------------------------===//
// AMDGPU DAG Nodes
//
@@ -153,7 +158,7 @@ def AMDGPUdiv_scale : SDNode<"AMDGPUISD::DIV_SCALE", AMDGPUDivScaleOp>;
// Special case divide FMA with scale and flags (src0 = Quotient,
// src1 = Denominator, src2 = Numerator).
-def AMDGPUdiv_fmas : SDNode<"AMDGPUISD::DIV_FMAS", SDTFPTernaryOp>;
+def AMDGPUdiv_fmas : SDNode<"AMDGPUISD::DIV_FMAS", AMDGPUFmasOp>;
// Single or double precision division fixup.
// Special case divide fixup and flags(src0 = Quotient, src1 =
diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td
index 4e536c37b0bd..34b1fc8b5f47 100644
--- a/lib/Target/R600/AMDGPUInstructions.td
+++ b/lib/Target/R600/AMDGPUInstructions.td
@@ -164,10 +164,6 @@ class PrivateStore <SDPatternOperator op> : PrivateMemOp <
(ops node:$value, node:$ptr), (op node:$value, node:$ptr)
>;
-def extloadi8_private : PrivateLoad <extloadi8>;
-def sextloadi8_private : PrivateLoad <sextloadi8>;
-def extloadi16_private : PrivateLoad <extloadi16>;
-def sextloadi16_private : PrivateLoad <sextloadi16>;
def load_private : PrivateLoad <load>;
def truncstorei8_private : PrivateStore <truncstorei8>;
@@ -231,6 +227,9 @@ def sextloadi8_local : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{
return isLocalLoad(dyn_cast<LoadSDNode>(N));
}]>;
+def extloadi8_private : PrivateLoad <az_extloadi8>;
+def sextloadi8_private : PrivateLoad <sextloadi8>;
+
def az_extloadi16 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16;
}]>;
@@ -267,6 +266,9 @@ def sextloadi16_local : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{
return isLocalLoad(dyn_cast<LoadSDNode>(N));
}]>;
+def extloadi16_private : PrivateLoad <az_extloadi16>;
+def sextloadi16_private : PrivateLoad <sextloadi16>;
+
def az_extloadi32 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32;
}]>;
@@ -649,17 +651,10 @@ class RcpPat<Instruction RcpInst, ValueType vt> : Pat <
(RcpInst $src)
>;
-multiclass RsqPat<Instruction RsqInst, ValueType vt> {
- def : Pat <
- (fdiv FP_ONE, (fsqrt vt:$src)),
- (RsqInst $src)
- >;
-
- def : Pat <
- (AMDGPUrcp (fsqrt vt:$src)),
- (RsqInst $src)
- >;
-}
+class RsqPat<Instruction RsqInst, ValueType vt> : Pat <
+ (AMDGPUrcp (fsqrt vt:$src)),
+ (RsqInst $src)
+>;
include "R600Instructions.td"
include "R700Instructions.td"
diff --git a/lib/Target/R600/AMDGPUIntrinsics.td b/lib/Target/R600/AMDGPUIntrinsics.td
index eee9c29038d0..ab489cd2a4ab 100644
--- a/lib/Target/R600/AMDGPUIntrinsics.td
+++ b/lib/Target/R600/AMDGPUIntrinsics.td
@@ -68,6 +68,7 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in {
def int_AMDGPU_bfe_u32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_bfm : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_brev : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_flbit_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_barrier_local : Intrinsic<[], [], []>;
def int_AMDGPU_barrier_global : Intrinsic<[], [], []>;
}
diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp
index b1c7498fc142..87cdb5f8db83 100644
--- a/lib/Target/R600/AMDGPUSubtarget.cpp
+++ b/lib/Target/R600/AMDGPUSubtarget.cpp
@@ -80,7 +80,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef GPU, StringRef FS,
FlatAddressSpace(false), EnableIRStructurizer(true),
EnablePromoteAlloca(false), EnableIfCvt(true),
EnableLoadStoreOpt(false), WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
- EnableVGPRSpilling(false),
+ EnableVGPRSpilling(false),SGPRInitBug(false),
DL(computeDataLayout(initializeSubtargetDependencies(GPU, FS))),
FrameLowering(TargetFrameLowering::StackGrowsUp,
64 * 16, // Maximum stack alignment (long16)
diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h
index 566b45c1dccc..eeb41d3ec7f4 100644
--- a/lib/Target/R600/AMDGPUSubtarget.h
+++ b/lib/Target/R600/AMDGPUSubtarget.h
@@ -45,6 +45,10 @@ public:
VOLCANIC_ISLANDS,
};
+ enum {
+ FIXED_SGPR_COUNT_FOR_INIT_BUG = 80
+ };
+
private:
std::string DevName;
bool Is64bit;
@@ -66,6 +70,7 @@ private:
bool CFALUBug;
int LocalMemorySize;
bool EnableVGPRSpilling;
+ bool SGPRInitBug;
const DataLayout DL;
AMDGPUFrameLowering FrameLowering;
@@ -203,6 +208,10 @@ public:
return LocalMemorySize;
}
+ bool hasSGPRInitBug() const {
+ return SGPRInitBug;
+ }
+
unsigned getAmdKernelCodeChipID() const;
bool enableMachineScheduler() const override {
diff --git a/lib/Target/R600/CaymanInstructions.td b/lib/Target/R600/CaymanInstructions.td
index 58b5ce24b4a3..433c3fc01855 100644
--- a/lib/Target/R600/CaymanInstructions.td
+++ b/lib/Target/R600/CaymanInstructions.td
@@ -46,7 +46,7 @@ def SIN_cm : SIN_Common<0x8D>;
def COS_cm : COS_Common<0x8E>;
} // End isVector = 1
-defm : RsqPat<RECIPSQRT_IEEE_cm, f32>;
+def : RsqPat<RECIPSQRT_IEEE_cm, f32>;
def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL>;
diff --git a/lib/Target/R600/EvergreenInstructions.td b/lib/Target/R600/EvergreenInstructions.td
index f24f76b7fe16..299d1fa14896 100644
--- a/lib/Target/R600/EvergreenInstructions.td
+++ b/lib/Target/R600/EvergreenInstructions.td
@@ -69,7 +69,7 @@ def EXP_IEEE_eg : EXP_IEEE_Common<0x81>;
def LOG_IEEE_eg : LOG_IEEE_Common<0x83>;
def RECIP_CLAMPED_eg : RECIP_CLAMPED_Common<0x84>;
def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>;
-defm : RsqPat<RECIPSQRT_IEEE_eg, f32>;
+def : RsqPat<RECIPSQRT_IEEE_eg, f32>;
def SIN_eg : SIN_Common<0x8D>;
def COS_eg : COS_Common<0x8E>;
diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
index 8271c6f45fb9..b66ed1000251 100644
--- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
@@ -291,6 +291,8 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
printImmediate64(Op.getImm(), O);
else
llvm_unreachable("Invalid register class size");
+ } else if (Desc.OpInfo[OpNo].OperandType == MCOI::OPERAND_IMMEDIATE) {
+ printImmediate32(Op.getImm(), O);
} else {
// We hit this for the immediate instruction bits that don't yet have a
// custom printer.
diff --git a/lib/Target/R600/Processors.td b/lib/Target/R600/Processors.td
index cff97cdb3beb..e5fef0c1d033 100644
--- a/lib/Target/R600/Processors.td
+++ b/lib/Target/R600/Processors.td
@@ -113,8 +113,12 @@ def : ProcessorModel<"mullins", SIQuarterSpeedModel, [FeatureSeaIslands]>;
// Volcanic Islands
//===----------------------------------------------------------------------===//
-def : ProcessorModel<"tonga", SIFullSpeedModel, [FeatureVolcanicIslands]>;
+def : ProcessorModel<"tonga", SIQuarterSpeedModel,
+ [FeatureVolcanicIslands, FeatureSGPRInitBug]
+>;
-def : ProcessorModel<"iceland", SIQuarterSpeedModel, [FeatureVolcanicIslands]>;
+def : ProcessorModel<"iceland", SIQuarterSpeedModel,
+ [FeatureVolcanicIslands, FeatureSGPRInitBug]
+>;
def : ProcessorModel<"carrizo", SIQuarterSpeedModel, [FeatureVolcanicIslands]>;
diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
index 595f69884544..2e1b0943252d 100644
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -838,6 +838,10 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
case Intrinsic::AMDGPU_rsq:
// XXX - I'm assuming SI's RSQ_LEGACY matches R600's behavior.
return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
+
+ case AMDGPUIntrinsic::AMDGPU_fract:
+ case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name.
+ return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
}
// break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
break;
@@ -1694,7 +1698,7 @@ SDValue R600TargetLowering::LowerFormalArguments(
// XXX - I think PartOffset should give you this, but it seems to give the
// size of the register which isn't useful.
- unsigned ValBase = ArgLocs[In.OrigArgIndex].getLocMemOffset();
+ unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
unsigned PartOffset = VA.getLocMemOffset();
unsigned Offset = 36 + VA.getLocMemOffset();
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
index b1d3ce276eee..05957d2cf2a1 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -1193,7 +1193,7 @@ let Predicates = [isR600] in {
def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>;
def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_r600 $src))>;
- defm : RsqPat<RECIPSQRT_IEEE_r600, f32>;
+ def : RsqPat<RECIPSQRT_IEEE_r600, f32>;
def : FROUNDPat <CNDGE_r600, CNDGT_r600>;
diff --git a/lib/Target/R600/SIAnnotateControlFlow.cpp b/lib/Target/R600/SIAnnotateControlFlow.cpp
index c99219dd9074..b8165fb4ab2c 100644
--- a/lib/Target/R600/SIAnnotateControlFlow.cpp
+++ b/lib/Target/R600/SIAnnotateControlFlow.cpp
@@ -83,7 +83,7 @@ class SIAnnotateControlFlow : public FunctionPass {
void insertElse(BranchInst *Term);
- Value *handleLoopCondition(Value *Cond, PHINode *Broken);
+ Value *handleLoopCondition(Value *Cond, PHINode *Broken, llvm::Loop *L);
void handleLoop(BranchInst *Term);
@@ -207,8 +207,17 @@ void SIAnnotateControlFlow::insertElse(BranchInst *Term) {
}
/// \brief Recursively handle the condition leading to a loop
-Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken) {
- if (PHINode *Phi = dyn_cast<PHINode>(Cond)) {
+Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken,
+ llvm::Loop *L) {
+
+ // Only search through PHI nodes which are inside the loop. If we try this
+ // with PHI nodes that are outside of the loop, we end up inserting new PHI
+ // nodes outside of the loop which depend on values defined inside the loop.
+ // This will break the module with
+ // 'Instruction does not dominate all users!' errors.
+ PHINode *Phi = nullptr;
+ if ((Phi = dyn_cast<PHINode>(Cond)) && L->contains(Phi)) {
+
BasicBlock *Parent = Phi->getParent();
PHINode *NewPhi = PHINode::Create(Int64, 0, "", &Parent->front());
Value *Ret = NewPhi;
@@ -223,7 +232,7 @@ Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken)
}
Phi->setIncomingValue(i, BoolFalse);
- Value *PhiArg = handleLoopCondition(Incoming, Broken);
+ Value *PhiArg = handleLoopCondition(Incoming, Broken, L);
NewPhi->addIncoming(PhiArg, From);
}
@@ -253,7 +262,12 @@ Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken)
} else if (Instruction *Inst = dyn_cast<Instruction>(Cond)) {
BasicBlock *Parent = Inst->getParent();
- TerminatorInst *Insert = Parent->getTerminator();
+ Instruction *Insert;
+ if (L->contains(Inst)) {
+ Insert = Parent->getTerminator();
+ } else {
+ Insert = L->getHeader()->getFirstNonPHIOrDbgOrLifetime();
+ }
Value *Args[] = { Cond, Broken };
return CallInst::Create(IfBreak, Args, "", Insert);
@@ -265,14 +279,15 @@ Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken)
/// \brief Handle a back edge (loop)
void SIAnnotateControlFlow::handleLoop(BranchInst *Term) {
+ BasicBlock *BB = Term->getParent();
+ llvm::Loop *L = LI->getLoopFor(BB);
BasicBlock *Target = Term->getSuccessor(1);
PHINode *Broken = PHINode::Create(Int64, 0, "", &Target->front());
Value *Cond = Term->getCondition();
Term->setCondition(BoolTrue);
- Value *Arg = handleLoopCondition(Cond, Broken);
+ Value *Arg = handleLoopCondition(Cond, Broken, L);
- BasicBlock *BB = Term->getParent();
for (pred_iterator PI = pred_begin(Target), PE = pred_end(Target);
PI != PE; ++PI) {
diff --git a/lib/Target/R600/SIDefines.h b/lib/Target/R600/SIDefines.h
index 7601794beab8..b54014072bf9 100644
--- a/lib/Target/R600/SIDefines.h
+++ b/lib/Target/R600/SIDefines.h
@@ -35,7 +35,8 @@ enum {
SMRD = 1 << 16,
DS = 1 << 17,
MIMG = 1 << 18,
- FLAT = 1 << 19
+ FLAT = 1 << 19,
+ WQM = 1 << 20
};
}
diff --git a/lib/Target/R600/SIFoldOperands.cpp b/lib/Target/R600/SIFoldOperands.cpp
index d8ffa4f75505..cb24bba24a2f 100644
--- a/lib/Target/R600/SIFoldOperands.cpp
+++ b/lib/Target/R600/SIFoldOperands.cpp
@@ -209,7 +209,12 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
APInt Imm;
if (FoldingImm) {
- const TargetRegisterClass *UseRC = MRI.getRegClass(UseOp.getReg());
+ unsigned UseReg = UseOp.getReg();
+ const TargetRegisterClass *UseRC
+ = TargetRegisterInfo::isVirtualRegister(UseReg) ?
+ MRI.getRegClass(UseReg) :
+ TRI.getRegClass(UseReg);
+
Imm = APInt(64, OpToFold.getImm());
// Split 64-bit constants into 32-bits for folding.
@@ -228,8 +233,13 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
// In order to fold immediates into copies, we need to change the
// copy to a MOV.
if (UseMI->getOpcode() == AMDGPU::COPY) {
- unsigned MovOp = TII->getMovOpcode(
- MRI.getRegClass(UseMI->getOperand(0).getReg()));
+ unsigned DestReg = UseMI->getOperand(0).getReg();
+ const TargetRegisterClass *DestRC
+ = TargetRegisterInfo::isVirtualRegister(DestReg) ?
+ MRI.getRegClass(DestReg) :
+ TRI.getRegClass(DestReg);
+
+ unsigned MovOp = TII->getMovOpcode(DestRC);
if (MovOp == AMDGPU::COPY)
continue;
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index 6b2ea0682a43..32ae605e3f63 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -89,8 +89,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setOperationAction(ISD::STORE, MVT::v16i32, Custom);
setOperationAction(ISD::STORE, MVT::i1, Custom);
- setOperationAction(ISD::STORE, MVT::i32, Custom);
- setOperationAction(ISD::STORE, MVT::v2i32, Custom);
setOperationAction(ISD::STORE, MVT::v4i32, Custom);
setOperationAction(ISD::SELECT, MVT::i64, Custom);
@@ -158,8 +156,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
for (MVT VT : MVT::fp_valuetypes())
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
- setTruncStoreAction(MVT::i32, MVT::i8, Custom);
- setTruncStoreAction(MVT::i32, MVT::i16, Custom);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
setTruncStoreAction(MVT::i64, MVT::i32, Expand);
setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand);
@@ -214,6 +210,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
}
setOperationAction(ISD::FDIV, MVT::f32, Custom);
+ setOperationAction(ISD::FDIV, MVT::f64, Custom);
setTargetDAGCombine(ISD::FADD);
setTargetDAGCombine(ISD::FSUB);
@@ -314,9 +311,8 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
if (!VT.isSimple() || VT == MVT::Other)
return false;
- // XXX - CI changes say "Support for unaligned memory accesses" but I don't
- // see what for specifically. The wording everywhere else seems to be the
- // same.
+ // TODO - CI+ supports unaligned memory accesses, but this requires driver
+ // support.
// XXX - The only mention I see of this in the ISA manual is for LDS direct
// reads the "byte address and must be dword aligned". Is it also true for the
@@ -328,12 +324,18 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
return Align % 4 == 0;
}
+ // Smaller than dword value must be aligned.
+ // FIXME: This should be allowed on CI+
+ if (VT.bitsLT(MVT::i32))
+ return false;
+
// 8.1.6 - For Dword or larger reads or writes, the two LSBs of the
// byte-address are ignored, thus forcing Dword alignment.
// This applies to private, global, and constant memory.
if (IsFast)
*IsFast = true;
- return VT.bitsGT(MVT::i32);
+
+ return VT.bitsGT(MVT::i32) && Align % 4 == 0;
}
EVT SITargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
@@ -448,7 +450,7 @@ SDValue SITargetLowering::LowerFormalArguments(
// We REALLY want the ORIGINAL number of vertex elements here, e.g. a
// three or five element vertex only needs three or five registers,
// NOT four or eigth.
- Type *ParamType = FType->getParamType(Arg.OrigArgIndex);
+ Type *ParamType = FType->getParamType(Arg.getOrigArgIndex());
unsigned NumElements = ParamType->getVectorNumElements();
for (unsigned j = 0; j != NumElements; ++j) {
@@ -531,7 +533,7 @@ SDValue SITargetLowering::LowerFormalArguments(
Offset, Ins[i].Flags.isSExt());
const PointerType *ParamTy =
- dyn_cast<PointerType>(FType->getParamType(Ins[i].OrigArgIndex));
+ dyn_cast<PointerType>(FType->getParamType(Ins[i].getOrigArgIndex()));
if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS &&
ParamTy && ParamTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
// On SI local pointers are just offsets into LDS, so they are always
@@ -566,7 +568,7 @@ SDValue SITargetLowering::LowerFormalArguments(
if (Arg.VT.isVector()) {
// Build a vector from the registers
- Type *ParamType = FType->getParamType(Arg.OrigArgIndex);
+ Type *ParamType = FType->getParamType(Arg.getOrigArgIndex());
unsigned NumElements = ParamType->getVectorNumElements();
SmallVector<SDValue, 4> Regs;
@@ -919,6 +921,12 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
+
+ case AMDGPUIntrinsic::AMDGPU_fract:
+ case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name.
+ return DAG.getNode(ISD::FSUB, DL, VT, Op.getOperand(1),
+ DAG.getNode(ISD::FFLOOR, DL, VT, Op.getOperand(1)));
+
default:
return AMDGPUTargetLowering::LowerOperation(Op, DAG);
}
@@ -1104,7 +1112,70 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
}
SDValue SITargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const {
- return SDValue();
+ if (DAG.getTarget().Options.UnsafeFPMath)
+ return LowerFastFDIV(Op, DAG);
+
+ SDLoc SL(Op);
+ SDValue X = Op.getOperand(0);
+ SDValue Y = Op.getOperand(1);
+
+ const SDValue One = DAG.getConstantFP(1.0, MVT::f64);
+
+ SDVTList ScaleVT = DAG.getVTList(MVT::f64, MVT::i1);
+
+ SDValue DivScale0 = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT, Y, Y, X);
+
+ SDValue NegDivScale0 = DAG.getNode(ISD::FNEG, SL, MVT::f64, DivScale0);
+
+ SDValue Rcp = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f64, DivScale0);
+
+ SDValue Fma0 = DAG.getNode(ISD::FMA, SL, MVT::f64, NegDivScale0, Rcp, One);
+
+ SDValue Fma1 = DAG.getNode(ISD::FMA, SL, MVT::f64, Rcp, Fma0, Rcp);
+
+ SDValue Fma2 = DAG.getNode(ISD::FMA, SL, MVT::f64, NegDivScale0, Fma1, One);
+
+ SDValue DivScale1 = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT, X, Y, X);
+
+ SDValue Fma3 = DAG.getNode(ISD::FMA, SL, MVT::f64, Fma1, Fma2, Fma1);
+ SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f64, DivScale1, Fma3);
+
+ SDValue Fma4 = DAG.getNode(ISD::FMA, SL, MVT::f64,
+ NegDivScale0, Mul, DivScale1);
+
+ SDValue Scale;
+
+ if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+ // Workaround a hardware bug on SI where the condition output from div_scale
+ // is not usable.
+
+ const SDValue Hi = DAG.getConstant(1, MVT::i32);
+
+ // Figure out if the scale to use for div_fmas.
+ SDValue NumBC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, X);
+ SDValue DenBC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Y);
+ SDValue Scale0BC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, DivScale0);
+ SDValue Scale1BC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, DivScale1);
+
+ SDValue NumHi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, NumBC, Hi);
+ SDValue DenHi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, DenBC, Hi);
+
+ SDValue Scale0Hi
+ = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Scale0BC, Hi);
+ SDValue Scale1Hi
+ = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Scale1BC, Hi);
+
+ SDValue CmpDen = DAG.getSetCC(SL, MVT::i1, DenHi, Scale0Hi, ISD::SETEQ);
+ SDValue CmpNum = DAG.getSetCC(SL, MVT::i1, NumHi, Scale1Hi, ISD::SETEQ);
+ Scale = DAG.getNode(ISD::XOR, SL, MVT::i1, CmpNum, CmpDen);
+ } else {
+ Scale = DivScale1.getValue(1);
+ }
+
+ SDValue Fmas = DAG.getNode(AMDGPUISD::DIV_FMAS, SL, MVT::f64,
+ Fma4, Fma3, Mul, Scale);
+
+ return DAG.getNode(AMDGPUISD::DIV_FIXUP, SL, MVT::f64, Fmas, Y, X);
}
SDValue SITargetLowering::LowerFDIV(SDValue Op, SelectionDAG &DAG) const {
@@ -1125,11 +1196,6 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Store->getMemoryVT();
// These stores are legal.
- if (Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS &&
- VT.isVector() && VT.getVectorNumElements() == 2 &&
- VT.getVectorElementType() == MVT::i32)
- return SDValue();
-
if (Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) {
if (VT.isVector() && VT.getVectorNumElements() > 4)
return ScalarizeVectorStore(Op, DAG);
@@ -1524,6 +1590,7 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
case AMDGPUISD::UMAX:
case AMDGPUISD::UMIN: {
if (DCI.getDAGCombineLevel() >= AfterLegalizeDAG &&
+ N->getValueType(0) != MVT::f64 &&
getTargetMachine().getOptLevel() > CodeGenOpt::None)
return performMin3Max3Combine(N, DCI);
break;
diff --git a/lib/Target/R600/SIInsertWaits.cpp b/lib/Target/R600/SIInsertWaits.cpp
index 181b11643bf3..50f20ac3619e 100644
--- a/lib/Target/R600/SIInsertWaits.cpp
+++ b/lib/Target/R600/SIInsertWaits.cpp
@@ -82,6 +82,8 @@ private:
/// \brief Type of the last opcode.
InstType LastOpcodeType;
+ bool LastInstWritesM0;
+
/// \brief Get increment/decrement amount for this instruction.
Counters getHwCounts(MachineInstr &MI);
@@ -106,6 +108,9 @@ private:
/// \brief Resolve all operand dependencies to counter requirements
Counters handleOperands(MachineInstr &MI);
+ /// \brief Insert S_NOP between an instruction writing M0 and S_SENDMSG.
+ void handleSendMsg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I);
+
public:
SIInsertWaits(TargetMachine &tm) :
MachineFunctionPass(ID),
@@ -269,6 +274,7 @@ void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB,
// Insert a NOP to break the clause.
BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP))
.addImm(0);
+ LastInstWritesM0 = false;
}
if (TII->isSMRD(I->getOpcode()))
@@ -362,6 +368,7 @@ bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
((Counts.Named.LGKM & 0x7) << 8));
LastOpcodeType = OTHER;
+ LastInstWritesM0 = false;
return true;
}
@@ -403,6 +410,30 @@ Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
return Result;
}
+void SIInsertWaits::handleSendMsg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) {
+ if (TRI->ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
+ return;
+
+ // There must be "S_NOP 0" between an instruction writing M0 and S_SENDMSG.
+ if (LastInstWritesM0 && I->getOpcode() == AMDGPU::S_SENDMSG) {
+ BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP)).addImm(0);
+ LastInstWritesM0 = false;
+ return;
+ }
+
+ // Set whether this instruction sets M0
+ LastInstWritesM0 = false;
+
+ unsigned NumOperands = I->getNumOperands();
+ for (unsigned i = 0; i < NumOperands; i++) {
+ const MachineOperand &Op = I->getOperand(i);
+
+ if (Op.isReg() && Op.isDef() && Op.getReg() == AMDGPU::M0)
+ LastInstWritesM0 = true;
+ }
+}
+
// FIXME: Insert waits listed in Table 4.2 "Required User-Inserted Wait States"
// around other non-memory instructions.
bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
@@ -417,6 +448,7 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
WaitedOn = ZeroCounts;
LastIssued = ZeroCounts;
LastOpcodeType = OTHER;
+ LastInstWritesM0 = false;
memset(&UsedRegs, 0, sizeof(UsedRegs));
memset(&DefinedRegs, 0, sizeof(DefinedRegs));
@@ -433,7 +465,9 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
Changes |= insertWait(MBB, I, LastIssued);
else
Changes |= insertWait(MBB, I, handleOperands(*I));
+
pushInstruction(MBB, I);
+ handleSendMsg(MBB, I);
}
// Wait for everything at the end of the MBB
diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td
index 09c0cbe8f5c3..b825208c92be 100644
--- a/lib/Target/R600/SIInstrFormats.td
+++ b/lib/Target/R600/SIInstrFormats.td
@@ -38,6 +38,7 @@ class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
field bits<1> DS = 0;
field bits<1> MIMG = 0;
field bits<1> FLAT = 0;
+ field bits<1> WQM = 0;
// These need to be kept in sync with the enum in SIInstrFlags.
let TSFlags{0} = VM_CNT;
@@ -64,6 +65,7 @@ class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
let TSFlags{17} = DS;
let TSFlags{18} = MIMG;
let TSFlags{19} = FLAT;
+ let TSFlags{20} = WQM;
// Most instructions require adjustments after selection to satisfy
// operand requirements.
@@ -295,18 +297,32 @@ class VOP1e <bits<8> op> : Enc32 {
}
class VOP2e <bits<6> op> : Enc32 {
+ bits<8> vdst;
+ bits<9> src0;
+ bits<8> src1;
- bits<8> VDST;
- bits<9> SRC0;
- bits<8> VSRC1;
-
- let Inst{8-0} = SRC0;
- let Inst{16-9} = VSRC1;
- let Inst{24-17} = VDST;
+ let Inst{8-0} = src0;
+ let Inst{16-9} = src1;
+ let Inst{24-17} = vdst;
let Inst{30-25} = op;
let Inst{31} = 0x0; //encoding
}
+class VOP2_MADKe <bits<6> op> : Enc64 {
+
+ bits<8> vdst;
+ bits<9> src0;
+ bits<8> vsrc1;
+ bits<32> src2;
+
+ let Inst{8-0} = src0;
+ let Inst{16-9} = vsrc1;
+ let Inst{24-17} = vdst;
+ let Inst{30-25} = op;
+ let Inst{31} = 0x0; // encoding
+ let Inst{63-32} = src2;
+}
+
class VOP3e <bits<9> op> : Enc64 {
bits<8> dst;
@@ -554,9 +570,6 @@ class VOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
class VOP2 <bits<6> op, dag outs, dag ins, string asm, list<dag> pattern> :
VOP2Common <outs, ins, asm, pattern>, VOP2e<op>;
-class VOP3b <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
- VOP3Common <outs, ins, asm, pattern>, VOP3be<op>;
-
class VOPC <bits<8> op, dag ins, string asm, list<dag> pattern> :
VOPCCommon <ins, asm, pattern>, VOPCe <op>;
@@ -585,9 +598,6 @@ class DS <dag outs, dag ins, string asm, list<dag> pattern> :
let SchedRW = [WriteLDS];
}
-class DS_si <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
- DS <outs, ins, asm, pattern>, DSe<op>;
-
class MUBUF <dag outs, dag ins, string asm, list<dag> pattern> :
InstSI<outs, ins, asm, pattern> {
diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp
index 80b560eb65ae..5ab33b491ce3 100644
--- a/lib/Target/R600/SIInstrInfo.cpp
+++ b/lib/Target/R600/SIInstrInfo.cpp
@@ -121,12 +121,20 @@ bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1,
if (Load0->getOperand(0) != Load1->getOperand(0))
return false;
+ const ConstantSDNode *Load0Offset =
+ dyn_cast<ConstantSDNode>(Load0->getOperand(1));
+ const ConstantSDNode *Load1Offset =
+ dyn_cast<ConstantSDNode>(Load1->getOperand(1));
+
+ if (!Load0Offset || !Load1Offset)
+ return false;
+
// Check chain.
if (findChainOperand(Load0) != findChainOperand(Load1))
return false;
- Offset0 = cast<ConstantSDNode>(Load0->getOperand(1))->getZExtValue();
- Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getZExtValue();
+ Offset0 = Load0Offset->getZExtValue();
+ Offset1 = Load1Offset->getZExtValue();
return true;
}
@@ -333,6 +341,21 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
} else if (AMDGPU::SReg_64RegClass.contains(DestReg)) {
+ if (DestReg == AMDGPU::VCC) {
+ if (AMDGPU::SReg_64RegClass.contains(SrcReg)) {
+ BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), AMDGPU::VCC)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ } else {
+ // FIXME: Hack until VReg_1 removed.
+ assert(AMDGPU::VGPR_32RegClass.contains(SrcReg));
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_I32_e32), AMDGPU::VCC)
+ .addImm(0)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ }
+
+ return;
+ }
+
assert(AMDGPU::SReg_64RegClass.contains(SrcReg));
BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
@@ -408,11 +431,15 @@ unsigned SIInstrInfo::commuteOpcode(unsigned Opcode) const {
int NewOpc;
// Try to map original to commuted opcode
- if ((NewOpc = AMDGPU::getCommuteRev(Opcode)) != -1)
+ NewOpc = AMDGPU::getCommuteRev(Opcode);
+ // Check if the commuted (REV) opcode exists on the target.
+ if (NewOpc != -1 && pseudoToMCOpcode(NewOpc) != -1)
return NewOpc;
// Try to map commuted to original opcode
- if ((NewOpc = AMDGPU::getCommuteOrig(Opcode)) != -1)
+ NewOpc = AMDGPU::getCommuteOrig(Opcode);
+ // Check if the original (non-REV) opcode exists on the target.
+ if (NewOpc != -1 && pseudoToMCOpcode(NewOpc) != -1)
return NewOpc;
return Opcode;
@@ -1121,6 +1148,8 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
return false;
}
+ int RegClass = Desc.OpInfo[i].RegClass;
+
switch (Desc.OpInfo[i].OperandType) {
case MCOI::OPERAND_REGISTER:
if (MI->getOperand(i).isImm() || MI->getOperand(i).isFPImm()) {
@@ -1131,7 +1160,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
case AMDGPU::OPERAND_REG_IMM32:
break;
case AMDGPU::OPERAND_REG_INLINE_C:
- if (MI->getOperand(i).isImm() && !isInlineConstant(MI->getOperand(i))) {
+ if (isLiteralConstant(MI->getOperand(i))) {
ErrInfo = "Illegal immediate value for operand.";
return false;
}
@@ -1152,7 +1181,6 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
if (!MI->getOperand(i).isReg())
continue;
- int RegClass = Desc.OpInfo[i].RegClass;
if (RegClass != -1) {
unsigned Reg = MI->getOperand(i).getReg();
if (TargetRegisterInfo::isVirtualRegister(Reg))
@@ -1197,31 +1225,6 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
}
}
- // Verify SRC1 for VOP2 and VOPC
- if (Src1Idx != -1 && (isVOP2(Opcode) || isVOPC(Opcode))) {
- const MachineOperand &Src1 = MI->getOperand(Src1Idx);
- if (Src1.isImm()) {
- ErrInfo = "VOP[2C] src1 cannot be an immediate.";
- return false;
- }
- }
-
- // Verify VOP3
- if (isVOP3(Opcode)) {
- if (Src0Idx != -1 && isLiteralConstant(MI->getOperand(Src0Idx))) {
- ErrInfo = "VOP3 src0 cannot be a literal constant.";
- return false;
- }
- if (Src1Idx != -1 && isLiteralConstant(MI->getOperand(Src1Idx))) {
- ErrInfo = "VOP3 src1 cannot be a literal constant.";
- return false;
- }
- if (Src2Idx != -1 && isLiteralConstant(MI->getOperand(Src2Idx))) {
- ErrInfo = "VOP3 src2 cannot be a literal constant.";
- return false;
- }
- }
-
// Verify misc. restrictions on specific instructions.
if (Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32 ||
Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64) {
@@ -1292,6 +1295,7 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e64;
case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32;
case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32;
+ case AMDGPU::S_FLBIT_I32: return AMDGPU::V_FFBH_I32_e64;
}
}
@@ -2043,6 +2047,24 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
swapOperands(Inst);
}
break;
+ case AMDGPU::S_LSHL_B64:
+ if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ NewOpcode = AMDGPU::V_LSHLREV_B64;
+ swapOperands(Inst);
+ }
+ break;
+ case AMDGPU::S_ASHR_I64:
+ if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ NewOpcode = AMDGPU::V_ASHRREV_I64;
+ swapOperands(Inst);
+ }
+ break;
+ case AMDGPU::S_LSHR_B64:
+ if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ NewOpcode = AMDGPU::V_LSHRREV_B64;
+ swapOperands(Inst);
+ }
+ break;
case AMDGPU::S_BFE_U64:
case AMDGPU::S_BFM_B64:
diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h
index 28cd27dd8962..129803072672 100644
--- a/lib/Target/R600/SIInstrInfo.h
+++ b/lib/Target/R600/SIInstrInfo.h
@@ -204,6 +204,10 @@ public:
return get(Opcode).TSFlags & SIInstrFlags::FLAT;
}
+ bool isWQM(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::WQM;
+ }
+
bool isInlineConstant(const APInt &Imm) const;
bool isInlineConstant(const MachineOperand &MO) const;
bool isLiteralConstant(const MachineOperand &MO) const;
@@ -243,7 +247,27 @@ public:
/// the register class of its machine operand.
/// to infer the correct register class base on the other operands.
const TargetRegisterClass *getOpRegClass(const MachineInstr &MI,
- unsigned OpNo) const;\
+ unsigned OpNo) const;
+
+ /// \brief Return the size in bytes of the operand OpNo on the given
+ // instruction opcode.
+ unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const {
+ const MCOperandInfo &OpInfo = get(Opcode).OpInfo[OpNo];
+
+ if (OpInfo.RegClass == -1) {
+ // If this is an immediate operand, this must be a 32-bit literal.
+ assert(OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE);
+ return 4;
+ }
+
+ return RI.getRegClass(OpInfo.RegClass)->getSize();
+ }
+
+ /// \brief This form should usually be preferred since it handles operands
+ /// with unknown register classes.
+ unsigned getOpSize(const MachineInstr &MI, unsigned OpNo) const {
+ return getOpRegClass(MI, OpNo)->getSize();
+ }
/// \returns true if it is legal for the operand at index \p OpNo
/// to read a VGPR.
diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
index 175e11d709cf..a749e7f861bb 100644
--- a/lib/Target/R600/SIInstrInfo.td
+++ b/lib/Target/R600/SIInstrInfo.td
@@ -383,15 +383,13 @@ class SOP1_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
let isPseudo = 1;
}
-class SOP1_Real_si <sop1 op, string opName, dag outs, dag ins, string asm,
- list<dag> pattern> :
- SOP1 <outs, ins, asm, pattern>,
+class SOP1_Real_si <sop1 op, string opName, dag outs, dag ins, string asm> :
+ SOP1 <outs, ins, asm, []>,
SOP1e <op.SI>,
SIMCInstr<opName, SISubtarget.SI>;
-class SOP1_Real_vi <sop1 op, string opName, dag outs, dag ins, string asm,
- list<dag> pattern> :
- SOP1 <outs, ins, asm, pattern>,
+class SOP1_Real_vi <sop1 op, string opName, dag outs, dag ins, string asm> :
+ SOP1 <outs, ins, asm, []>,
SOP1e <op.VI>,
SIMCInstr<opName, SISubtarget.VI>;
@@ -400,10 +398,10 @@ multiclass SOP1_32 <sop1 op, string opName, list<dag> pattern> {
pattern>;
def _si : SOP1_Real_si <op, opName, (outs SReg_32:$dst), (ins SSrc_32:$src0),
- opName#" $dst, $src0", pattern>;
+ opName#" $dst, $src0">;
def _vi : SOP1_Real_vi <op, opName, (outs SReg_32:$dst), (ins SSrc_32:$src0),
- opName#" $dst, $src0", pattern>;
+ opName#" $dst, $src0">;
}
multiclass SOP1_64 <sop1 op, string opName, list<dag> pattern> {
@@ -411,10 +409,10 @@ multiclass SOP1_64 <sop1 op, string opName, list<dag> pattern> {
pattern>;
def _si : SOP1_Real_si <op, opName, (outs SReg_64:$dst), (ins SSrc_64:$src0),
- opName#" $dst, $src0", pattern>;
+ opName#" $dst, $src0">;
def _vi : SOP1_Real_vi <op, opName, (outs SReg_64:$dst), (ins SSrc_64:$src0),
- opName#" $dst, $src0", pattern>;
+ opName#" $dst, $src0">;
}
// no input, 64-bit output.
@@ -422,12 +420,12 @@ multiclass SOP1_64_0 <sop1 op, string opName, list<dag> pattern> {
def "" : SOP1_Pseudo <opName, (outs SReg_64:$dst), (ins), pattern>;
def _si : SOP1_Real_si <op, opName, (outs SReg_64:$dst), (ins),
- opName#" $dst", pattern> {
+ opName#" $dst"> {
let SSRC0 = 0;
}
def _vi : SOP1_Real_vi <op, opName, (outs SReg_64:$dst), (ins),
- opName#" $dst", pattern> {
+ opName#" $dst"> {
let SSRC0 = 0;
}
}
@@ -438,10 +436,10 @@ multiclass SOP1_32_64 <sop1 op, string opName, list<dag> pattern> {
pattern>;
def _si : SOP1_Real_si <op, opName, (outs SReg_32:$dst), (ins SSrc_64:$src0),
- opName#" $dst, $src0", pattern>;
+ opName#" $dst, $src0">;
def _vi : SOP1_Real_vi <op, opName, (outs SReg_32:$dst), (ins SSrc_64:$src0),
- opName#" $dst, $src0", pattern>;
+ opName#" $dst, $src0">;
}
class SOP2_Pseudo<string opName, dag outs, dag ins, list<dag> pattern> :
@@ -451,15 +449,13 @@ class SOP2_Pseudo<string opName, dag outs, dag ins, list<dag> pattern> :
let Size = 4;
}
-class SOP2_Real_si<sop2 op, string opName, dag outs, dag ins, string asm,
- list<dag> pattern> :
- SOP2<outs, ins, asm, pattern>,
+class SOP2_Real_si<sop2 op, string opName, dag outs, dag ins, string asm> :
+ SOP2<outs, ins, asm, []>,
SOP2e<op.SI>,
SIMCInstr<opName, SISubtarget.SI>;
-class SOP2_Real_vi<sop2 op, string opName, dag outs, dag ins, string asm,
- list<dag> pattern> :
- SOP2<outs, ins, asm, pattern>,
+class SOP2_Real_vi<sop2 op, string opName, dag outs, dag ins, string asm> :
+ SOP2<outs, ins, asm, []>,
SOP2e<op.VI>,
SIMCInstr<opName, SISubtarget.VI>;
@@ -469,11 +465,11 @@ multiclass SOP2_SELECT_32 <sop2 op, string opName, list<dag> pattern> {
def _si : SOP2_Real_si <op, opName, (outs SReg_32:$dst),
(ins SSrc_32:$src0, SSrc_32:$src1, SCCReg:$scc),
- opName#" $dst, $src0, $src1 [$scc]", pattern>;
+ opName#" $dst, $src0, $src1 [$scc]">;
def _vi : SOP2_Real_vi <op, opName, (outs SReg_32:$dst),
(ins SSrc_32:$src0, SSrc_32:$src1, SCCReg:$scc),
- opName#" $dst, $src0, $src1 [$scc]", pattern>;
+ opName#" $dst, $src0, $src1 [$scc]">;
}
multiclass SOP2_32 <sop2 op, string opName, list<dag> pattern> {
@@ -481,10 +477,10 @@ multiclass SOP2_32 <sop2 op, string opName, list<dag> pattern> {
(ins SSrc_32:$src0, SSrc_32:$src1), pattern>;
def _si : SOP2_Real_si <op, opName, (outs SReg_32:$dst),
- (ins SSrc_32:$src0, SSrc_32:$src1), opName#" $dst, $src0, $src1", pattern>;
+ (ins SSrc_32:$src0, SSrc_32:$src1), opName#" $dst, $src0, $src1">;
def _vi : SOP2_Real_vi <op, opName, (outs SReg_32:$dst),
- (ins SSrc_32:$src0, SSrc_32:$src1), opName#" $dst, $src0, $src1", pattern>;
+ (ins SSrc_32:$src0, SSrc_32:$src1), opName#" $dst, $src0, $src1">;
}
multiclass SOP2_64 <sop2 op, string opName, list<dag> pattern> {
@@ -492,10 +488,10 @@ multiclass SOP2_64 <sop2 op, string opName, list<dag> pattern> {
(ins SSrc_64:$src0, SSrc_64:$src1), pattern>;
def _si : SOP2_Real_si <op, opName, (outs SReg_64:$dst),
- (ins SSrc_64:$src0, SSrc_64:$src1), opName#" $dst, $src0, $src1", pattern>;
+ (ins SSrc_64:$src0, SSrc_64:$src1), opName#" $dst, $src0, $src1">;
def _vi : SOP2_Real_vi <op, opName, (outs SReg_64:$dst),
- (ins SSrc_64:$src0, SSrc_64:$src1), opName#" $dst, $src0, $src1", pattern>;
+ (ins SSrc_64:$src0, SSrc_64:$src1), opName#" $dst, $src0, $src1">;
}
multiclass SOP2_64_32 <sop2 op, string opName, list<dag> pattern> {
@@ -503,10 +499,10 @@ multiclass SOP2_64_32 <sop2 op, string opName, list<dag> pattern> {
(ins SSrc_64:$src0, SSrc_32:$src1), pattern>;
def _si : SOP2_Real_si <op, opName, (outs SReg_64:$dst),
- (ins SSrc_64:$src0, SSrc_32:$src1), opName#" $dst, $src0, $src1", pattern>;
+ (ins SSrc_64:$src0, SSrc_32:$src1), opName#" $dst, $src0, $src1">;
def _vi : SOP2_Real_vi <op, opName, (outs SReg_64:$dst),
- (ins SSrc_64:$src0, SSrc_32:$src1), opName#" $dst, $src0, $src1", pattern>;
+ (ins SSrc_64:$src0, SSrc_32:$src1), opName#" $dst, $src0, $src1">;
}
@@ -527,15 +523,13 @@ class SOPK_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
let isPseudo = 1;
}
-class SOPK_Real_si <sopk op, string opName, dag outs, dag ins, string asm,
- list<dag> pattern> :
- SOPK <outs, ins, asm, pattern>,
+class SOPK_Real_si <sopk op, string opName, dag outs, dag ins, string asm> :
+ SOPK <outs, ins, asm, []>,
SOPKe <op.SI>,
SIMCInstr<opName, SISubtarget.SI>;
-class SOPK_Real_vi <sopk op, string opName, dag outs, dag ins, string asm,
- list<dag> pattern> :
- SOPK <outs, ins, asm, pattern>,
+class SOPK_Real_vi <sopk op, string opName, dag outs, dag ins, string asm> :
+ SOPK <outs, ins, asm, []>,
SOPKe <op.VI>,
SIMCInstr<opName, SISubtarget.VI>;
@@ -544,10 +538,10 @@ multiclass SOPK_32 <sopk op, string opName, list<dag> pattern> {
pattern>;
def _si : SOPK_Real_si <op, opName, (outs SReg_32:$dst), (ins u16imm:$src0),
- opName#" $dst, $src0", pattern>;
+ opName#" $dst, $src0">;
def _vi : SOPK_Real_vi <op, opName, (outs SReg_32:$dst), (ins u16imm:$src0),
- opName#" $dst, $src0", pattern>;
+ opName#" $dst, $src0">;
}
multiclass SOPK_SCC <sopk op, string opName, list<dag> pattern> {
@@ -555,10 +549,10 @@ multiclass SOPK_SCC <sopk op, string opName, list<dag> pattern> {
(ins SReg_32:$src0, u16imm:$src1), pattern>;
def _si : SOPK_Real_si <op, opName, (outs SCCReg:$dst),
- (ins SReg_32:$src0, u16imm:$src1), opName#" $dst, $src0", pattern>;
+ (ins SReg_32:$src0, u16imm:$src1), opName#" $dst, $src0">;
def _vi : SOPK_Real_vi <op, opName, (outs SCCReg:$dst),
- (ins SReg_32:$src0, u16imm:$src1), opName#" $dst, $src0", pattern>;
+ (ins SReg_32:$src0, u16imm:$src1), opName#" $dst, $src0">;
}
//===----------------------------------------------------------------------===//
@@ -792,6 +786,7 @@ def VOP_F32_F32_I32 : VOPProfile <[f32, f32, i32, untyped]>;
def VOP_F64_F64_F64 : VOPProfile <[f64, f64, f64, untyped]>;
def VOP_F64_F64_I32 : VOPProfile <[f64, f64, i32, untyped]>;
def VOP_I32_F32_F32 : VOPProfile <[i32, f32, f32, untyped]>;
+def VOP_I32_F32_I32 : VOPProfile <[i32, f32, i32, untyped]>;
def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>;
def VOP_I32_I32_I32_VCC : VOPProfile <[i32, i32, i32, untyped]> {
let Src0RC32 = VCSrc_32;
@@ -808,9 +803,14 @@ def VOP_I1_F64_I32 : VOPProfile <[i1, f64, i32, untyped]> {
}
def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>;
+def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>;
def VOP_I64_I64_I64 : VOPProfile <[i64, i64, i64, untyped]>;
def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>;
+def VOP_MADK : VOPProfile <[f32, f32, f32, f32]> {
+ field dag Ins = (ins VCSrc_32:$src0, VGPR_32:$vsrc1, u32imm:$src2);
+ field string Asm = " $dst, $src0, $vsrc1, $src2";
+}
def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>;
def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>;
def VOP_I64_I32_I32_I64 : VOPProfile <[i64, i32, i32, i64]>;
@@ -847,6 +847,15 @@ multiclass VOP1_m <vop1 op, dag outs, dag ins, string asm, list<dag> pattern,
SIMCInstr <opName#"_e32", SISubtarget.VI>;
}
+multiclass VOP1SI_m <vop1 op, dag outs, dag ins, string asm, list<dag> pattern,
+ string opName> {
+ def "" : VOP1_Pseudo <outs, ins, pattern, opName>;
+
+ def _si : VOP1<op.SI, outs, ins, asm, []>,
+ SIMCInstr <opName#"_e32", SISubtarget.SI>;
+ // No VI instruction. This class is for SI only.
+}
+
class VOP2_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
VOP2Common <outs, ins, "", pattern>,
VOP <opName>,
@@ -855,25 +864,22 @@ class VOP2_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
}
multiclass VOP2SI_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern,
- string opName, string revOpSI> {
+ string opName, string revOp> {
def "" : VOP2_Pseudo <outs, ins, pattern, opName>,
- VOP2_REV<revOpSI#"_e32", !eq(revOpSI, opName)>;
+ VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
def _si : VOP2 <op.SI, outs, ins, opName#asm, []>,
- VOP2_REV<revOpSI#"_e32_si", !eq(revOpSI, opName)>,
SIMCInstr <opName#"_e32", SISubtarget.SI>;
}
multiclass VOP2_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern,
- string opName, string revOpSI, string revOpVI> {
+ string opName, string revOp> {
def "" : VOP2_Pseudo <outs, ins, pattern, opName>,
- VOP2_REV<revOpSI#"_e32", !eq(revOpSI, opName)>;
+ VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
def _si : VOP2 <op.SI, outs, ins, opName#asm, []>,
- VOP2_REV<revOpSI#"_e32_si", !eq(revOpSI, opName)>,
SIMCInstr <opName#"_e32", SISubtarget.SI>;
def _vi : VOP2 <op.VI, outs, ins, opName#asm, []>,
- VOP2_REV<revOpVI#"_e32_vi", !eq(revOpVI, opName)>,
SIMCInstr <opName#"_e32", SISubtarget.VI>;
}
@@ -905,6 +911,16 @@ class VOP3_Real_vi <bits<10> op, dag outs, dag ins, string asm, string opName> :
VOP3e_vi <op>,
SIMCInstr <opName#"_e64", SISubtarget.VI>;
+class VOP3b_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName> :
+ VOP3Common <outs, ins, asm, []>,
+ VOP3be <op>,
+ SIMCInstr<opName#"_e64", SISubtarget.SI>;
+
+class VOP3b_Real_vi <bits<10> op, dag outs, dag ins, string asm, string opName> :
+ VOP3Common <outs, ins, asm, []>,
+ VOP3be_vi <op>,
+ SIMCInstr <opName#"_e64", SISubtarget.VI>;
+
multiclass VOP3_m <vop op, dag outs, dag ins, string asm, list<dag> pattern,
string opName, int NumSrcArgs, bit HasMods = 1> {
@@ -946,24 +962,45 @@ multiclass VOP3_1_m <vop op, dag outs, dag ins, string asm,
VOP3DisableFields<0, 0, HasMods>;
}
+multiclass VOP3SI_1_m <vop op, dag outs, dag ins, string asm,
+ list<dag> pattern, string opName, bit HasMods = 1> {
+
+ def "" : VOP3_Pseudo <outs, ins, pattern, opName>;
+
+ def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName>,
+ VOP3DisableFields<0, 0, HasMods>;
+ // No VI instruction. This class is for SI only.
+}
+
multiclass VOP3_2_m <vop op, dag outs, dag ins, string asm,
- list<dag> pattern, string opName, string revOpSI, string revOpVI,
+ list<dag> pattern, string opName, string revOp,
bit HasMods = 1, bit UseFullOp = 0> {
def "" : VOP3_Pseudo <outs, ins, pattern, opName>,
- VOP2_REV<revOpSI#"_e64", !eq(revOpSI, opName)>;
+ VOP2_REV<revOp#"_e64", !eq(revOp, opName)>;
- def _si : VOP3_Real_si <op.SI3,
- outs, ins, asm, opName>,
- VOP2_REV<revOpSI#"_e64_si", !eq(revOpSI, opName)>,
+ def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName>,
+ VOP3DisableFields<1, 0, HasMods>;
+
+ def _vi : VOP3_Real_vi <op.VI3, outs, ins, asm, opName>,
VOP3DisableFields<1, 0, HasMods>;
+}
+
+multiclass VOP3SI_2_m <vop op, dag outs, dag ins, string asm,
+ list<dag> pattern, string opName, string revOp,
+ bit HasMods = 1, bit UseFullOp = 0> {
+
+ def "" : VOP3_Pseudo <outs, ins, pattern, opName>,
+ VOP2_REV<revOp#"_e64", !eq(revOp, opName)>;
- def _vi : VOP3_Real_vi <op.VI3,
- outs, ins, asm, opName>,
- VOP2_REV<revOpVI#"_e64_vi", !eq(revOpVI, opName)>,
+ def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName>,
VOP3DisableFields<1, 0, HasMods>;
+
+ // No VI instruction. This class is for SI only.
}
+// XXX - Is v_div_scale_{f32|f64} only available in vop3b without
+// option of implicit vcc use?
multiclass VOP3b_2_m <vop op, dag outs, dag ins, string asm,
list<dag> pattern, string opName, string revOp,
bit HasMods = 1, bit UseFullOp = 0> {
@@ -974,19 +1011,27 @@ multiclass VOP3b_2_m <vop op, dag outs, dag ins, string asm,
// can write it into any SGPR. We currently don't use the carry out,
// so for now hardcode it to VCC as well.
let sdst = SIOperand.VCC, Defs = [VCC] in {
- def _si : VOP3b <op.SI3, outs, ins, asm, pattern>,
- VOP3DisableFields<1, 0, HasMods>,
- SIMCInstr<opName#"_e64", SISubtarget.SI>,
- VOP2_REV<revOp#"_e64_si", !eq(revOp, opName)>;
-
- // TODO: Do we need this VI variant here?
- /*def _vi : VOP3b_vi <op.VI3, outs, ins, asm, pattern>,
- VOP3DisableFields<1, 0, HasMods>,
- SIMCInstr<opName#"_e64", SISubtarget.VI>,
- VOP2_REV<revOp#"_e64_vi", !eq(revOp, opName)>;*/
+ def _si : VOP3b_Real_si <op.SI3, outs, ins, asm, opName>,
+ VOP3DisableFields<1, 0, HasMods>;
+
+ def _vi : VOP3b_Real_vi <op.VI3, outs, ins, asm, opName>,
+ VOP3DisableFields<1, 0, HasMods>;
} // End sdst = SIOperand.VCC, Defs = [VCC]
}
+multiclass VOP3b_3_m <vop op, dag outs, dag ins, string asm,
+ list<dag> pattern, string opName, string revOp,
+ bit HasMods = 1, bit UseFullOp = 0> {
+ def "" : VOP3_Pseudo <outs, ins, pattern, opName>;
+
+
+ def _si : VOP3b_Real_si <op.SI3, outs, ins, asm, opName>,
+ VOP3DisableFields<1, 1, HasMods>;
+
+ def _vi : VOP3b_Real_vi <op.VI3, outs, ins, asm, opName>,
+ VOP3DisableFields<1, 1, HasMods>;
+}
+
multiclass VOP3_C_m <vop op, dag outs, dag ins, string asm,
list<dag> pattern, string opName,
bit HasMods, bit defExec> {
@@ -1046,33 +1091,30 @@ multiclass VOP1Inst <vop1 op, string opName, VOPProfile P,
multiclass VOP1InstSI <vop1 op, string opName, VOPProfile P,
SDPatternOperator node = null_frag> {
- def _e32 : VOP1 <op.SI, P.Outs, P.Ins32, opName#P.Asm32, []>,
- VOP <opName>;
+ defm _e32 : VOP1SI_m <op, P.Outs, P.Ins32, opName#P.Asm32, [], opName>;
- def _e64 : VOP3Common <P.Outs, P.Ins64, opName#P.Asm64,
+ defm _e64 : VOP3SI_1_m <op, P.Outs, P.Ins64, opName#P.Asm64,
!if(P.HasModifiers,
[(set P.DstVT:$dst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0,
i32:$src0_modifiers, i1:$clamp, i32:$omod))))],
- [(set P.DstVT:$dst, (node P.Src0VT:$src0))])>,
- VOP <opName>,
- VOP3e <op.SI3>,
- VOP3DisableFields<0, 0, P.HasModifiers>;
+ [(set P.DstVT:$dst, (node P.Src0VT:$src0))]),
+ opName, P.HasModifiers>;
}
multiclass VOP2_Helper <vop2 op, string opName, dag outs,
dag ins32, string asm32, list<dag> pat32,
dag ins64, string asm64, list<dag> pat64,
- string revOpSI, string revOpVI, bit HasMods> {
- defm _e32 : VOP2_m <op, outs, ins32, asm32, pat32, opName, revOpSI, revOpVI>;
+ string revOp, bit HasMods> {
+ defm _e32 : VOP2_m <op, outs, ins32, asm32, pat32, opName, revOp>;
defm _e64 : VOP3_2_m <op,
- outs, ins64, opName#"_e64"#asm64, pat64, opName, revOpSI, revOpVI, HasMods
+ outs, ins64, opName#"_e64"#asm64, pat64, opName, revOp, HasMods
>;
}
multiclass VOP2Inst <vop2 op, string opName, VOPProfile P,
SDPatternOperator node = null_frag,
- string revOpSI = opName, string revOpVI = revOpSI> : VOP2_Helper <
+ string revOp = opName> : VOP2_Helper <
op, opName, P.Outs,
P.Ins32, P.Asm32, [],
P.Ins64, P.Asm64,
@@ -1082,15 +1124,30 @@ multiclass VOP2Inst <vop2 op, string opName, VOPProfile P,
i1:$clamp, i32:$omod)),
(P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
[(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
- revOpSI, revOpVI, P.HasModifiers
+ revOp, P.HasModifiers
>;
+multiclass VOP2InstSI <vop2 op, string opName, VOPProfile P,
+ SDPatternOperator node = null_frag,
+ string revOp = opName> {
+ defm _e32 : VOP2SI_m <op, P.Outs, P.Ins32, P.Asm32, [], opName, revOp>;
+
+ defm _e64 : VOP3SI_2_m <op, P.Outs, P.Ins64, opName#"_e64"#P.Asm64,
+ !if(P.HasModifiers,
+ [(set P.DstVT:$dst,
+ (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
+ i1:$clamp, i32:$omod)),
+ (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
+ [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
+ opName, revOp, P.HasModifiers>;
+}
+
multiclass VOP2b_Helper <vop2 op, string opName, dag outs,
dag ins32, string asm32, list<dag> pat32,
dag ins64, string asm64, list<dag> pat64,
string revOp, bit HasMods> {
- defm _e32 : VOP2_m <op, outs, ins32, asm32, pat32, opName, revOp, revOp>;
+ defm _e32 : VOP2_m <op, outs, ins32, asm32, pat32, opName, revOp>;
defm _e64 : VOP3b_2_m <op,
outs, ins64, opName#"_e64"#asm64, pat64, opName, revOp, HasMods
@@ -1116,16 +1173,16 @@ multiclass VOP2bInst <vop2 op, string opName, VOPProfile P,
multiclass VOP2_VI3_Helper <vop23 op, string opName, dag outs,
dag ins32, string asm32, list<dag> pat32,
dag ins64, string asm64, list<dag> pat64,
- string revOpSI, string revOpVI, bit HasMods> {
- defm _e32 : VOP2SI_m <op, outs, ins32, asm32, pat32, opName, revOpSI>;
+ string revOp, bit HasMods> {
+ defm _e32 : VOP2SI_m <op, outs, ins32, asm32, pat32, opName, revOp>;
defm _e64 : VOP3_2_m <op, outs, ins64, opName#"_e64"#asm64, pat64, opName,
- revOpSI, revOpVI, HasMods>;
+ revOp, HasMods>;
}
multiclass VOP2_VI3_Inst <vop23 op, string opName, VOPProfile P,
SDPatternOperator node = null_frag,
- string revOpSI = opName, string revOpVI = revOpSI>
+ string revOp = opName>
: VOP2_VI3_Helper <
op, opName, P.Outs,
P.Ins32, P.Asm32, [],
@@ -1136,9 +1193,26 @@ multiclass VOP2_VI3_Inst <vop23 op, string opName, VOPProfile P,
i1:$clamp, i32:$omod)),
(P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
[(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
- revOpSI, revOpVI, P.HasModifiers
+ revOp, P.HasModifiers
>;
+multiclass VOP2MADK <vop2 op, string opName, list<dag> pattern = []> {
+
+ def "" : VOP2_Pseudo <VOP_MADK.Outs, VOP_MADK.Ins, pattern, opName>;
+
+let isCodeGenOnly = 0 in {
+ def _si : VOP2Common <VOP_MADK.Outs, VOP_MADK.Ins,
+ !strconcat(opName, VOP_MADK.Asm), []>,
+ SIMCInstr <opName#"_e32", SISubtarget.SI>,
+ VOP2_MADKe <op.SI>;
+
+ def _vi : VOP2Common <VOP_MADK.Outs, VOP_MADK.Ins,
+ !strconcat(opName, VOP_MADK.Asm), []>,
+ SIMCInstr <opName#"_e32", SISubtarget.VI>,
+ VOP2_MADKe <op.VI>;
+} // End isCodeGenOnly = 0
+}
+
class VOPC_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
VOPCCommon <ins, "", pattern>,
VOP <opName>,
@@ -1274,9 +1348,31 @@ multiclass VOP3Inst <vop3 op, string opName, VOPProfile P,
P.NumSrcArgs, P.HasModifiers
>;
+// Special case for v_div_fmas_{f32|f64}, since it seems to be the
+// only VOP instruction that implicitly reads VCC.
+multiclass VOP3_VCC_Inst <vop3 op, string opName,
+ VOPProfile P,
+ SDPatternOperator node = null_frag> : VOP3_Helper <
+ op, opName,
+ P.Outs,
+ (ins InputModsNoDefault:$src0_modifiers, P.Src0RC64:$src0,
+ InputModsNoDefault:$src1_modifiers, P.Src1RC64:$src1,
+ InputModsNoDefault:$src2_modifiers, P.Src2RC64:$src2,
+ ClampMod:$clamp,
+ omod:$omod),
+ " $dst, $src0_modifiers, $src1_modifiers, $src2_modifiers"#"$clamp"#"$omod",
+ [(set P.DstVT:$dst,
+ (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
+ i1:$clamp, i32:$omod)),
+ (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
+ (P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers)),
+ (i1 VCC)))],
+ 3, 1
+>;
+
multiclass VOP3b_Helper <vop op, RegisterClass vrc, RegisterOperand arc,
string opName, list<dag> pattern> :
- VOP3b_2_m <
+ VOP3b_3_m <
op, (outs vrc:$vdst, SReg_64:$sdst),
(ins InputModsNoDefault:$src0_modifiers, arc:$src0,
InputModsNoDefault:$src1_modifiers, arc:$src1,
@@ -1307,22 +1403,21 @@ class Vop3ModPat<Instruction Inst, VOPProfile P, SDPatternOperator node> : Pat<
// Interpolation opcodes
//===----------------------------------------------------------------------===//
-class VINTRP_Pseudo <string opName, dag outs, dag ins, string asm,
- list<dag> pattern> :
- VINTRPCommon <outs, ins, asm, pattern>,
+class VINTRP_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
+ VINTRPCommon <outs, ins, "", pattern>,
SIMCInstr<opName, SISubtarget.NONE> {
let isPseudo = 1;
}
class VINTRP_Real_si <bits <2> op, string opName, dag outs, dag ins,
- string asm, list<dag> pattern> :
- VINTRPCommon <outs, ins, asm, pattern>,
+ string asm> :
+ VINTRPCommon <outs, ins, asm, []>,
VINTRPe <op>,
SIMCInstr<opName, SISubtarget.SI>;
class VINTRP_Real_vi <bits <2> op, string opName, dag outs, dag ins,
- string asm, list<dag> pattern> :
- VINTRPCommon <outs, ins, asm, pattern>,
+ string asm> :
+ VINTRPCommon <outs, ins, asm, []>,
VINTRPe_vi <op>,
SIMCInstr<opName, SISubtarget.VI>;
@@ -1331,11 +1426,11 @@ multiclass VINTRP_m <bits <2> op, string opName, dag outs, dag ins, string asm,
list<dag> pattern = []> {
let DisableEncoding = disableEncoding,
Constraints = constraints in {
- def "" : VINTRP_Pseudo <opName, outs, ins, asm, pattern>;
+ def "" : VINTRP_Pseudo <opName, outs, ins, pattern>;
- def _si : VINTRP_Real_si <op, opName, outs, ins, asm, pattern>;
+ def _si : VINTRP_Real_si <op, opName, outs, ins, asm>;
- def _vi : VINTRP_Real_vi <op, opName, outs, ins, asm, pattern>;
+ def _vi : VINTRP_Real_vi <op, opName, outs, ins, asm>;
}
}
@@ -1467,70 +1562,92 @@ multiclass DS_Store2_Helper <bits<8> op, string asm, RegisterClass regClass>
asm#" $addr, $data0, $data1"#"$offset0"#"$offset1 [M0]",
[]>;
-class DS_1A_si <bits<8> op, dag outs, dag ins, string asm, list<dag> pat> :
- DS_si <op, outs, ins, asm, pat> {
- bits<16> offset;
-
- // Single load interpret the 2 i8imm operands as a single i16 offset.
- let offset0 = offset{7-0};
- let offset1 = offset{15-8};
-
- let hasSideEffects = 0;
+// 1 address, 1 data.
+multiclass DS_1A1D_RET_m <bits<8> op, string opName, dag outs, dag ins,
+ string asm, list<dag> pat, string noRetOp> {
+ let mayLoad = 1, mayStore = 1,
+ hasPostISelHook = 1 // Adjusted to no return version.
+ in {
+ def "" : DS_Pseudo <opName, outs, ins, pat>,
+ AtomicNoRet<noRetOp, 1>;
+
+ let data1 = 0 in {
+ def _si : DS_1A_Real_si <op, opName, outs, ins, asm>;
+ def _vi : DS_1A_Real_vi <op, opName, outs, ins, asm>;
+ }
+ }
}
-// 1 address, 1 data.
-class DS_1A1D_RET <bits<8> op, string asm, RegisterClass rc, string noRetOp = ""> : DS_1A_si <
- op,
+multiclass DS_1A1D_RET <bits<8> op, string asm, RegisterClass rc,
+ string noRetOp = ""> : DS_1A1D_RET_m <
+ op, asm,
(outs rc:$vdst),
(ins i1imm:$gds, VGPR_32:$addr, rc:$data0, ds_offset:$offset, M0Reg:$m0),
- asm#" $vdst, $addr, $data0"#"$offset"#" [M0]", []>,
- AtomicNoRet<noRetOp, 1> {
-
- let data1 = 0;
- let mayStore = 1;
- let mayLoad = 1;
+ asm#" $vdst, $addr, $data0"#"$offset"#" [M0]", [], noRetOp>;
- let hasPostISelHook = 1; // Adjusted to no return version.
+// 1 address, 2 data.
+multiclass DS_1A2D_RET_m <bits<8> op, string opName, dag outs, dag ins,
+ string asm, list<dag> pat, string noRetOp> {
+ let mayLoad = 1, mayStore = 1,
+ hasPostISelHook = 1 // Adjusted to no return version.
+ in {
+ def "" : DS_Pseudo <opName, outs, ins, pat>,
+ AtomicNoRet<noRetOp, 1>;
+
+ def _si : DS_1A_Real_si <op, opName, outs, ins, asm>;
+ def _vi : DS_1A_Real_vi <op, opName, outs, ins, asm>;
+ }
}
-// 1 address, 2 data.
-class DS_1A2D_RET <bits<8> op, string asm, RegisterClass rc, string noRetOp = ""> : DS_1A_si <
- op,
+multiclass DS_1A2D_RET <bits<8> op, string asm, RegisterClass rc,
+ string noRetOp = ""> : DS_1A2D_RET_m <
+ op, asm,
(outs rc:$vdst),
(ins i1imm:$gds, VGPR_32:$addr, rc:$data0, rc:$data1, ds_offset:$offset, M0Reg:$m0),
asm#" $vdst, $addr, $data0, $data1"#"$offset"#" [M0]",
- []>,
- AtomicNoRet<noRetOp, 1> {
- let mayStore = 1;
- let mayLoad = 1;
- let hasPostISelHook = 1; // Adjusted to no return version.
-}
+ [], noRetOp>;
// 1 address, 2 data.
-class DS_1A2D_NORET <bits<8> op, string asm, RegisterClass rc, string noRetOp = asm> : DS_1A_si <
- op,
+multiclass DS_1A2D_NORET_m <bits<8> op, string opName, dag outs, dag ins,
+ string asm, list<dag> pat, string noRetOp> {
+ let mayLoad = 1, mayStore = 1 in {
+ def "" : DS_Pseudo <opName, outs, ins, pat>,
+ AtomicNoRet<noRetOp, 0>;
+
+ def _si : DS_1A_Real_si <op, opName, outs, ins, asm>;
+ def _vi : DS_1A_Real_vi <op, opName, outs, ins, asm>;
+ }
+}
+
+multiclass DS_1A2D_NORET <bits<8> op, string asm, RegisterClass rc,
+ string noRetOp = asm> : DS_1A2D_NORET_m <
+ op, asm,
(outs),
(ins i1imm:$gds, VGPR_32:$addr, rc:$data0, rc:$data1, ds_offset:$offset, M0Reg:$m0),
asm#" $addr, $data0, $data1"#"$offset"#" [M0]",
- []>,
- AtomicNoRet<noRetOp, 0> {
- let mayStore = 1;
- let mayLoad = 1;
-}
+ [], noRetOp>;
// 1 address, 1 data.
-class DS_1A1D_NORET <bits<8> op, string asm, RegisterClass rc, string noRetOp = asm> : DS_1A_si <
- op,
+multiclass DS_1A1D_NORET_m <bits<8> op, string opName, dag outs, dag ins,
+ string asm, list<dag> pat, string noRetOp> {
+ let mayLoad = 1, mayStore = 1 in {
+ def "" : DS_Pseudo <opName, outs, ins, pat>,
+ AtomicNoRet<noRetOp, 0>;
+
+ let data1 = 0 in {
+ def _si : DS_1A_Real_si <op, opName, outs, ins, asm>;
+ def _vi : DS_1A_Real_vi <op, opName, outs, ins, asm>;
+ }
+ }
+}
+
+multiclass DS_1A1D_NORET <bits<8> op, string asm, RegisterClass rc,
+ string noRetOp = asm> : DS_1A1D_NORET_m <
+ op, asm,
(outs),
(ins i1imm:$gds, VGPR_32:$addr, rc:$data0, ds_offset:$offset, M0Reg:$m0),
asm#" $addr, $data0"#"$offset"#" [M0]",
- []>,
- AtomicNoRet<noRetOp, 0> {
-
- let data1 = 0;
- let mayStore = 1;
- let mayLoad = 1;
-}
+ [], noRetOp>;
//===----------------------------------------------------------------------===//
// MTBUF classes
@@ -1596,45 +1713,111 @@ multiclass MTBUF_Load_Helper <bits<3> op, string opName,
// MUBUF classes
//===----------------------------------------------------------------------===//
-class MUBUF_si <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
- MUBUF <outs, ins, asm, pattern>, MUBUFe <op> {
- let lds = 0;
-}
-
-class MUBUF_vi <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
- MUBUF <outs, ins, asm, pattern>, MUBUFe_vi <op> {
- let lds = 0;
+class mubuf <bits<7> si, bits<7> vi = si> {
+ field bits<7> SI = si;
+ field bits<7> VI = vi;
}
class MUBUFAddr64Table <bit is_addr64, string suffix = ""> {
-
bit IsAddr64 = is_addr64;
string OpName = NAME # suffix;
}
-class MUBUFAtomicAddr64 <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern>
- : MUBUF_si <op, outs, ins, asm, pattern> {
+class MUBUF_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
+ MUBUF <outs, ins, "", pattern>,
+ SIMCInstr<opName, SISubtarget.NONE> {
+ let isPseudo = 1;
+
+ // dummy fields, so that we can use let statements around multiclasses
+ bits<1> offen;
+ bits<1> idxen;
+ bits<8> vaddr;
+ bits<1> glc;
+ bits<1> slc;
+ bits<1> tfe;
+ bits<8> soffset;
+}
+
+class MUBUF_Real_si <mubuf op, string opName, dag outs, dag ins,
+ string asm> :
+ MUBUF <outs, ins, asm, []>,
+ MUBUFe <op.SI>,
+ SIMCInstr<opName, SISubtarget.SI> {
+ let lds = 0;
+}
- let offen = 0;
- let idxen = 0;
- let addr64 = 1;
- let tfe = 0;
+class MUBUF_Real_vi <mubuf op, string opName, dag outs, dag ins,
+ string asm> :
+ MUBUF <outs, ins, asm, []>,
+ MUBUFe_vi <op.VI>,
+ SIMCInstr<opName, SISubtarget.VI> {
let lds = 0;
- let soffset = 128;
}
-class MUBUFAtomicOffset <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern>
- : MUBUF_si <op, outs, ins, asm, pattern> {
+multiclass MUBUF_m <mubuf op, string opName, dag outs, dag ins, string asm,
+ list<dag> pattern> {
+
+ def "" : MUBUF_Pseudo <opName, outs, ins, pattern>,
+ MUBUFAddr64Table <0>;
- let offen = 0;
- let idxen = 0;
- let addr64 = 0;
- let tfe = 0;
+ let addr64 = 0 in {
+ def _si : MUBUF_Real_si <op, opName, outs, ins, asm>;
+ }
+
+ def _vi : MUBUF_Real_vi <op, opName, outs, ins, asm>;
+}
+
+multiclass MUBUFAddr64_m <mubuf op, string opName, dag outs,
+ dag ins, string asm, list<dag> pattern> {
+
+ def "" : MUBUF_Pseudo <opName, outs, ins, pattern>,
+ MUBUFAddr64Table <1>;
+
+ let addr64 = 1 in {
+ def _si : MUBUF_Real_si <op, opName, outs, ins, asm>;
+ }
+
+ // There is no VI version. If the pseudo is selected, it should be lowered
+ // for VI appropriately.
+}
+
+class MUBUF_si <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ MUBUF <outs, ins, asm, pattern>, MUBUFe <op> {
let lds = 0;
- let vaddr = 0;
}
-multiclass MUBUF_Atomic <bits<7> op, string name, RegisterClass rc,
+multiclass MUBUFAtomicOffset_m <mubuf op, string opName, dag outs, dag ins,
+ string asm, list<dag> pattern, bit is_return> {
+
+ def "" : MUBUF_Pseudo <opName, outs, ins, pattern>,
+ MUBUFAddr64Table <0, !if(is_return, "_RTN", "")>,
+ AtomicNoRet<NAME#"_OFFSET", is_return>;
+
+ let offen = 0, idxen = 0, tfe = 0, vaddr = 0 in {
+ let addr64 = 0 in {
+ def _si : MUBUF_Real_si <op, opName, outs, ins, asm>;
+ }
+
+ def _vi : MUBUF_Real_vi <op, opName, outs, ins, asm>;
+ }
+}
+
+multiclass MUBUFAtomicAddr64_m <mubuf op, string opName, dag outs, dag ins,
+ string asm, list<dag> pattern, bit is_return> {
+
+ def "" : MUBUF_Pseudo <opName, outs, ins, pattern>,
+ MUBUFAddr64Table <1, !if(is_return, "_RTN", "")>,
+ AtomicNoRet<NAME#"_ADDR64", is_return>;
+
+ let offen = 0, idxen = 0, addr64 = 1, tfe = 0, soffset = 128 in {
+ def _si : MUBUF_Real_si <op, opName, outs, ins, asm>;
+ }
+
+ // There is no VI version. If the pseudo is selected, it should be lowered
+ // for VI appropriately.
+}
+
+multiclass MUBUF_Atomic <mubuf op, string name, RegisterClass rc,
ValueType vt, SDPatternOperator atomic> {
let mayStore = 1, mayLoad = 1, hasPostISelHook = 1 in {
@@ -1642,208 +1825,135 @@ multiclass MUBUF_Atomic <bits<7> op, string name, RegisterClass rc,
// No return variants
let glc = 0 in {
- def _ADDR64 : MUBUFAtomicAddr64 <
- op, (outs),
+ defm _ADDR64 : MUBUFAtomicAddr64_m <
+ op, name#"_addr64", (outs),
(ins rc:$vdata, SReg_128:$srsrc, VReg_64:$vaddr,
mbuf_offset:$offset, slc:$slc),
- name#" $vdata, $vaddr, $srsrc, 0 addr64"#"$offset"#"$slc", []
- >, MUBUFAddr64Table<1>, AtomicNoRet<NAME#"_ADDR64", 0>;
+ name#" $vdata, $vaddr, $srsrc, 0 addr64"#"$offset"#"$slc", [], 0
+ >;
- def _OFFSET : MUBUFAtomicOffset <
- op, (outs),
+ defm _OFFSET : MUBUFAtomicOffset_m <
+ op, name#"_offset", (outs),
(ins rc:$vdata, SReg_128:$srsrc, mbuf_offset:$offset,
SCSrc_32:$soffset, slc:$slc),
- name#" $vdata, $srsrc, $soffset"#"$offset"#"$slc", []
- >, MUBUFAddr64Table<0>, AtomicNoRet<NAME#"_OFFSET", 0>;
+ name#" $vdata, $srsrc, $soffset"#"$offset"#"$slc", [], 0
+ >;
} // glc = 0
// Variant that return values
let glc = 1, Constraints = "$vdata = $vdata_in",
DisableEncoding = "$vdata_in" in {
- def _RTN_ADDR64 : MUBUFAtomicAddr64 <
- op, (outs rc:$vdata),
+ defm _RTN_ADDR64 : MUBUFAtomicAddr64_m <
+ op, name#"_rtn_addr64", (outs rc:$vdata),
(ins rc:$vdata_in, SReg_128:$srsrc, VReg_64:$vaddr,
mbuf_offset:$offset, slc:$slc),
name#" $vdata, $vaddr, $srsrc, 0 addr64"#"$offset"#" glc"#"$slc",
[(set vt:$vdata,
(atomic (MUBUFAddr64Atomic v4i32:$srsrc, i64:$vaddr, i16:$offset,
- i1:$slc), vt:$vdata_in))]
- >, MUBUFAddr64Table<1, "_RTN">, AtomicNoRet<NAME#"_ADDR64", 1>;
+ i1:$slc), vt:$vdata_in))], 1
+ >;
- def _RTN_OFFSET : MUBUFAtomicOffset <
- op, (outs rc:$vdata),
+ defm _RTN_OFFSET : MUBUFAtomicOffset_m <
+ op, name#"_rtn_offset", (outs rc:$vdata),
(ins rc:$vdata_in, SReg_128:$srsrc, mbuf_offset:$offset,
SCSrc_32:$soffset, slc:$slc),
name#" $vdata, $srsrc, $soffset"#"$offset"#" glc $slc",
[(set vt:$vdata,
(atomic (MUBUFOffsetAtomic v4i32:$srsrc, i32:$soffset, i16:$offset,
- i1:$slc), vt:$vdata_in))]
- >, MUBUFAddr64Table<0, "_RTN">, AtomicNoRet<NAME#"_OFFSET", 1>;
+ i1:$slc), vt:$vdata_in))], 1
+ >;
} // glc = 1
} // mayStore = 1, mayLoad = 1, hasPostISelHook = 1
}
-multiclass MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass,
+multiclass MUBUF_Load_Helper <mubuf op, string name, RegisterClass regClass,
ValueType load_vt = i32,
SDPatternOperator ld = null_frag> {
let mayLoad = 1, mayStore = 0 in {
-
- let addr64 = 0 in {
-
- let offen = 0, idxen = 0, vaddr = 0 in {
- def _OFFSET : MUBUF_si <op, (outs regClass:$vdata),
- (ins SReg_128:$srsrc,
- mbuf_offset:$offset, SCSrc_32:$soffset, glc:$glc,
- slc:$slc, tfe:$tfe),
- asm#" $vdata, $srsrc, $soffset"#"$offset"#"$glc"#"$slc"#"$tfe",
- [(set load_vt:$vdata, (ld (MUBUFOffset v4i32:$srsrc,
- i32:$soffset, i16:$offset,
- i1:$glc, i1:$slc, i1:$tfe)))]>,
- MUBUFAddr64Table<0>;
- }
-
- let offen = 1, idxen = 0 in {
- def _OFFEN : MUBUF_si <op, (outs regClass:$vdata),
- (ins SReg_128:$srsrc, VGPR_32:$vaddr,
- SCSrc_32:$soffset, mbuf_offset:$offset, glc:$glc, slc:$slc,
- tfe:$tfe),
- asm#" $vdata, $vaddr, $srsrc, $soffset offen"#"$offset"#"$glc"#"$slc"#"$tfe", []>;
- }
-
- let offen = 0, idxen = 1 in {
- def _IDXEN : MUBUF_si <op, (outs regClass:$vdata),
- (ins SReg_128:$srsrc, VGPR_32:$vaddr,
- mbuf_offset:$offset, SCSrc_32:$soffset, glc:$glc,
- slc:$slc, tfe:$tfe),
- asm#" $vdata, $vaddr, $srsrc, $soffset idxen"#"$offset"#"$glc"#"$slc"#"$tfe", []>;
- }
-
- let offen = 1, idxen = 1 in {
- def _BOTHEN : MUBUF_si <op, (outs regClass:$vdata),
- (ins SReg_128:$srsrc, VReg_64:$vaddr,
- SCSrc_32:$soffset, glc:$glc, slc:$slc, tfe:$tfe),
- asm#" $vdata, $vaddr, $srsrc, $soffset, idxen offen"#"$glc"#"$slc"#"$tfe", []>;
- }
- }
-
- let offen = 0, idxen = 0, addr64 = 1, glc = 0, slc = 0, tfe = 0, soffset = 128 /* ZERO */ in {
- def _ADDR64 : MUBUF_si <op, (outs regClass:$vdata),
- (ins SReg_128:$srsrc, VReg_64:$vaddr, mbuf_offset:$offset),
- asm#" $vdata, $vaddr, $srsrc, 0 addr64"#"$offset",
- [(set load_vt:$vdata, (ld (MUBUFAddr64 v4i32:$srsrc,
- i64:$vaddr, i16:$offset)))]>, MUBUFAddr64Table<1>;
- }
- }
-}
-
-multiclass MUBUF_Load_Helper_vi <bits<7> op, string asm, RegisterClass regClass,
- ValueType load_vt = i32,
- SDPatternOperator ld = null_frag> {
-
- let lds = 0, mayLoad = 1 in {
let offen = 0, idxen = 0, vaddr = 0 in {
- def _OFFSET : MUBUF_vi <op, (outs regClass:$vdata),
+ defm _OFFSET : MUBUF_m <op, name#"_offset", (outs regClass:$vdata),
(ins SReg_128:$srsrc,
mbuf_offset:$offset, SCSrc_32:$soffset, glc:$glc,
slc:$slc, tfe:$tfe),
- asm#" $vdata, $srsrc, $soffset"#"$offset"#"$glc"#"$slc"#"$tfe",
+ name#" $vdata, $srsrc, $soffset"#"$offset"#"$glc"#"$slc"#"$tfe",
[(set load_vt:$vdata, (ld (MUBUFOffset v4i32:$srsrc,
i32:$soffset, i16:$offset,
- i1:$glc, i1:$slc, i1:$tfe)))]>,
- MUBUFAddr64Table<0>;
+ i1:$glc, i1:$slc, i1:$tfe)))]>;
}
let offen = 1, idxen = 0 in {
- def _OFFEN : MUBUF_vi <op, (outs regClass:$vdata),
+ defm _OFFEN : MUBUF_m <op, name#"_offen", (outs regClass:$vdata),
(ins SReg_128:$srsrc, VGPR_32:$vaddr,
SCSrc_32:$soffset, mbuf_offset:$offset, glc:$glc, slc:$slc,
tfe:$tfe),
- asm#" $vdata, $vaddr, $srsrc, $soffset offen"#"$offset"#"$glc"#"$slc"#"$tfe", []>;
+ name#" $vdata, $vaddr, $srsrc, $soffset offen"#"$offset"#"$glc"#"$slc"#"$tfe", []>;
}
let offen = 0, idxen = 1 in {
- def _IDXEN : MUBUF_vi <op, (outs regClass:$vdata),
+ defm _IDXEN : MUBUF_m <op, name#"_idxen", (outs regClass:$vdata),
(ins SReg_128:$srsrc, VGPR_32:$vaddr,
mbuf_offset:$offset, SCSrc_32:$soffset, glc:$glc,
slc:$slc, tfe:$tfe),
- asm#" $vdata, $vaddr, $srsrc, $soffset idxen"#"$offset"#"$glc"#"$slc"#"$tfe", []>;
+ name#" $vdata, $vaddr, $srsrc, $soffset idxen"#"$offset"#"$glc"#"$slc"#"$tfe", []>;
}
let offen = 1, idxen = 1 in {
- def _BOTHEN : MUBUF_vi <op, (outs regClass:$vdata),
+ defm _BOTHEN : MUBUF_m <op, name#"_bothen", (outs regClass:$vdata),
(ins SReg_128:$srsrc, VReg_64:$vaddr,
SCSrc_32:$soffset, glc:$glc, slc:$slc, tfe:$tfe),
- asm#" $vdata, $vaddr, $srsrc, $soffset, idxen offen"#"$glc"#"$slc"#"$tfe", []>;
+ name#" $vdata, $vaddr, $srsrc, $soffset, idxen offen"#"$glc"#"$slc"#"$tfe", []>;
+ }
+
+ let offen = 0, idxen = 0, glc = 0, slc = 0, tfe = 0, soffset = 128 /* ZERO */ in {
+ defm _ADDR64 : MUBUFAddr64_m <op, name#"_addr64", (outs regClass:$vdata),
+ (ins SReg_128:$srsrc, VReg_64:$vaddr, mbuf_offset:$offset),
+ name#" $vdata, $vaddr, $srsrc, 0 addr64"#"$offset",
+ [(set load_vt:$vdata, (ld (MUBUFAddr64 v4i32:$srsrc,
+ i64:$vaddr, i16:$offset)))]>;
}
}
}
-multiclass MUBUF_Store_Helper <bits<7> op, string name, RegisterClass vdataClass,
+multiclass MUBUF_Store_Helper <mubuf op, string name, RegisterClass vdataClass,
ValueType store_vt, SDPatternOperator st> {
-
let mayLoad = 0, mayStore = 1 in {
- let addr64 = 0 in {
-
- def "" : MUBUF_si <
- op, (outs),
- (ins vdataClass:$vdata, SReg_128:$srsrc, VGPR_32:$vaddr, SCSrc_32:$soffset,
- mbuf_offset:$offset, offen:$offen, idxen:$idxen, glc:$glc, slc:$slc,
- tfe:$tfe),
- name#" $vdata, $vaddr, $srsrc, $soffset"#"$offen"#"$idxen"#"$offset"#
- "$glc"#"$slc"#"$tfe",
- []
- >;
+ defm : MUBUF_m <op, name, (outs),
+ (ins vdataClass:$vdata, SReg_128:$srsrc, VGPR_32:$vaddr, SCSrc_32:$soffset,
+ mbuf_offset:$offset, offen:$offen, idxen:$idxen, glc:$glc, slc:$slc,
+ tfe:$tfe),
+ name#" $vdata, $vaddr, $srsrc, $soffset"#"$offen"#"$idxen"#"$offset"#
+ "$glc"#"$slc"#"$tfe", []>;
let offen = 0, idxen = 0, vaddr = 0 in {
- def _OFFSET : MUBUF_si <
- op, (outs),
- (ins vdataClass:$vdata, SReg_128:$srsrc, mbuf_offset:$offset,
- SCSrc_32:$soffset, glc:$glc, slc:$slc, tfe:$tfe),
- name#" $vdata, $srsrc, $soffset"#"$offset"#"$glc"#"$slc"#"$tfe",
- [(st store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
- i16:$offset, i1:$glc, i1:$slc,
- i1:$tfe))]
- >, MUBUFAddr64Table<0>;
+ defm _OFFSET : MUBUF_m <op, name#"_offset",(outs),
+ (ins vdataClass:$vdata, SReg_128:$srsrc, mbuf_offset:$offset,
+ SCSrc_32:$soffset, glc:$glc, slc:$slc, tfe:$tfe),
+ name#" $vdata, $srsrc, $soffset"#"$offset"#"$glc"#"$slc"#"$tfe",
+ [(st store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
+ i16:$offset, i1:$glc, i1:$slc, i1:$tfe))]>;
} // offen = 0, idxen = 0, vaddr = 0
let offen = 1, idxen = 0 in {
- def _OFFEN : MUBUF_si <
- op, (outs),
- (ins vdataClass:$vdata, SReg_128:$srsrc, VGPR_32:$vaddr, SCSrc_32:$soffset,
- mbuf_offset:$offset, glc:$glc, slc:$slc, tfe:$tfe),
- name#" $vdata, $vaddr, $srsrc, $soffset offen"#"$offset"#
- "$glc"#"$slc"#"$tfe",
- []
- >;
+ defm _OFFEN : MUBUF_m <op, name#"_offen", (outs),
+ (ins vdataClass:$vdata, SReg_128:$srsrc, VGPR_32:$vaddr, SCSrc_32:$soffset,
+ mbuf_offset:$offset, glc:$glc, slc:$slc, tfe:$tfe),
+ name#" $vdata, $vaddr, $srsrc, $soffset offen"#"$offset"#
+ "$glc"#"$slc"#"$tfe", []>;
} // end offen = 1, idxen = 0
- } // End addr64 = 0
-
- def _ADDR64 : MUBUF_si <
- op, (outs),
- (ins vdataClass:$vdata, SReg_128:$srsrc, VReg_64:$vaddr, mbuf_offset:$offset),
- name#" $vdata, $vaddr, $srsrc, 0 addr64"#"$offset",
- [(st store_vt:$vdata,
- (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i16:$offset))]>, MUBUFAddr64Table<1>
- {
-
- let mayLoad = 0;
- let mayStore = 1;
-
- // Encoding
- let offen = 0;
- let idxen = 0;
- let glc = 0;
- let addr64 = 1;
- let slc = 0;
- let tfe = 0;
- let soffset = 128; // ZERO
- }
- } // End mayLoad = 0, mayStore = 1
+ let offen = 0, idxen = 0, glc = 0, slc = 0, tfe = 0,
+ soffset = 128 /* ZERO */ in {
+ defm _ADDR64 : MUBUFAddr64_m <op, name#"_addr64", (outs),
+ (ins vdataClass:$vdata, SReg_128:$srsrc, VReg_64:$vaddr, mbuf_offset:$offset),
+ name#" $vdata, $vaddr, $srsrc, 0 addr64"#"$offset",
+ [(st store_vt:$vdata,
+ (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i16:$offset))]>;
+ }
+ } // End mayLoad = 0, mayStore = 1
}
class FLAT_Load_Helper <bits<7> op, string asm, RegisterClass regClass> :
@@ -1912,7 +2022,7 @@ multiclass MIMG_NoSampler <bits<7> op, string asm> {
class MIMG_Sampler_Helper <bits<7> op, string asm,
RegisterClass dst_rc,
- RegisterClass src_rc> : MIMG <
+ RegisterClass src_rc, int wqm> : MIMG <
op,
(outs dst_rc:$vdata),
(ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128,
@@ -1924,33 +2034,41 @@ class MIMG_Sampler_Helper <bits<7> op, string asm,
let mayLoad = 1;
let mayStore = 0;
let hasPostISelHook = 1;
+ let WQM = wqm;
}
multiclass MIMG_Sampler_Src_Helper <bits<7> op, string asm,
RegisterClass dst_rc,
- int channels> {
- def _V1 : MIMG_Sampler_Helper <op, asm, dst_rc, VGPR_32>,
+ int channels, int wqm> {
+ def _V1 : MIMG_Sampler_Helper <op, asm, dst_rc, VGPR_32, wqm>,
MIMG_Mask<asm#"_V1", channels>;
- def _V2 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_64>,
+ def _V2 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_64, wqm>,
MIMG_Mask<asm#"_V2", channels>;
- def _V4 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_128>,
+ def _V4 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_128, wqm>,
MIMG_Mask<asm#"_V4", channels>;
- def _V8 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_256>,
+ def _V8 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_256, wqm>,
MIMG_Mask<asm#"_V8", channels>;
- def _V16 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_512>,
+ def _V16 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_512, wqm>,
MIMG_Mask<asm#"_V16", channels>;
}
multiclass MIMG_Sampler <bits<7> op, string asm> {
- defm _V1 : MIMG_Sampler_Src_Helper<op, asm, VGPR_32, 1>;
- defm _V2 : MIMG_Sampler_Src_Helper<op, asm, VReg_64, 2>;
- defm _V3 : MIMG_Sampler_Src_Helper<op, asm, VReg_96, 3>;
- defm _V4 : MIMG_Sampler_Src_Helper<op, asm, VReg_128, 4>;
+ defm _V1 : MIMG_Sampler_Src_Helper<op, asm, VGPR_32, 1, 0>;
+ defm _V2 : MIMG_Sampler_Src_Helper<op, asm, VReg_64, 2, 0>;
+ defm _V3 : MIMG_Sampler_Src_Helper<op, asm, VReg_96, 3, 0>;
+ defm _V4 : MIMG_Sampler_Src_Helper<op, asm, VReg_128, 4, 0>;
+}
+
+multiclass MIMG_Sampler_WQM <bits<7> op, string asm> {
+ defm _V1 : MIMG_Sampler_Src_Helper<op, asm, VGPR_32, 1, 1>;
+ defm _V2 : MIMG_Sampler_Src_Helper<op, asm, VReg_64, 2, 1>;
+ defm _V3 : MIMG_Sampler_Src_Helper<op, asm, VReg_96, 3, 1>;
+ defm _V4 : MIMG_Sampler_Src_Helper<op, asm, VReg_128, 4, 1>;
}
class MIMG_Gather_Helper <bits<7> op, string asm,
RegisterClass dst_rc,
- RegisterClass src_rc> : MIMG <
+ RegisterClass src_rc, int wqm> : MIMG <
op,
(outs dst_rc:$vdata),
(ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128,
@@ -1971,28 +2089,36 @@ class MIMG_Gather_Helper <bits<7> op, string asm,
// Therefore, disable all code which updates DMASK by setting these two:
let MIMG = 0;
let hasPostISelHook = 0;
+ let WQM = wqm;
}
multiclass MIMG_Gather_Src_Helper <bits<7> op, string asm,
RegisterClass dst_rc,
- int channels> {
- def _V1 : MIMG_Gather_Helper <op, asm, dst_rc, VGPR_32>,
+ int channels, int wqm> {
+ def _V1 : MIMG_Gather_Helper <op, asm, dst_rc, VGPR_32, wqm>,
MIMG_Mask<asm#"_V1", channels>;
- def _V2 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_64>,
+ def _V2 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_64, wqm>,
MIMG_Mask<asm#"_V2", channels>;
- def _V4 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_128>,
+ def _V4 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_128, wqm>,
MIMG_Mask<asm#"_V4", channels>;
- def _V8 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_256>,
+ def _V8 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_256, wqm>,
MIMG_Mask<asm#"_V8", channels>;
- def _V16 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_512>,
+ def _V16 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_512, wqm>,
MIMG_Mask<asm#"_V16", channels>;
}
multiclass MIMG_Gather <bits<7> op, string asm> {
- defm _V1 : MIMG_Gather_Src_Helper<op, asm, VGPR_32, 1>;
- defm _V2 : MIMG_Gather_Src_Helper<op, asm, VReg_64, 2>;
- defm _V3 : MIMG_Gather_Src_Helper<op, asm, VReg_96, 3>;
- defm _V4 : MIMG_Gather_Src_Helper<op, asm, VReg_128, 4>;
+ defm _V1 : MIMG_Gather_Src_Helper<op, asm, VGPR_32, 1, 0>;
+ defm _V2 : MIMG_Gather_Src_Helper<op, asm, VReg_64, 2, 0>;
+ defm _V3 : MIMG_Gather_Src_Helper<op, asm, VReg_96, 3, 0>;
+ defm _V4 : MIMG_Gather_Src_Helper<op, asm, VReg_128, 4, 0>;
+}
+
+multiclass MIMG_Gather_WQM <bits<7> op, string asm> {
+ defm _V1 : MIMG_Gather_Src_Helper<op, asm, VGPR_32, 1, 1>;
+ defm _V2 : MIMG_Gather_Src_Helper<op, asm, VReg_64, 2, 1>;
+ defm _V3 : MIMG_Gather_Src_Helper<op, asm, VReg_96, 3, 1>;
+ defm _V4 : MIMG_Gather_Src_Helper<op, asm, VReg_128, 4, 1>;
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index 4b1a84662cb5..bbedef29303b 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -152,9 +152,11 @@ defm S_FLBIT_I32_B32 : SOP1_32 <sop1<0x15, 0x12>, "s_flbit_i32_b32",
[(set i32:$dst, (ctlz_zero_undef i32:$src0))]
>;
-//defm S_FLBIT_I32_B64 : SOP1_32 <sop1<0x16, 0x13>, "s_flbit_i32_b64", []>;
-defm S_FLBIT_I32 : SOP1_32 <sop1<0x17, 0x14>, "s_flbit_i32", []>;
-//defm S_FLBIT_I32_I64 : SOP1_32 <sop1<0x18, 0x15>, "s_flbit_i32_i64", []>;
+defm S_FLBIT_I32_B64 : SOP1_32_64 <sop1<0x16, 0x13>, "s_flbit_i32_b64", []>;
+defm S_FLBIT_I32 : SOP1_32 <sop1<0x17, 0x14>, "s_flbit_i32",
+ [(set i32:$dst, (int_AMDGPU_flbit_i32 i32:$src0))]
+>;
+defm S_FLBIT_I32_I64 : SOP1_32_64 <sop1<0x18, 0x15>, "s_flbit_i32_i64", []>;
defm S_SEXT_I32_I8 : SOP1_32 <sop1<0x19, 0x16>, "s_sext_i32_i8",
[(set i32:$dst, (sext_inreg i32:$src0, i8))]
>;
@@ -764,88 +766,88 @@ defm V_CMPX_CLASS_F64 : VOPCX_CLASS_F64 <vopc<0xb8, 0x13>, "v_cmpx_class_f64">;
//===----------------------------------------------------------------------===//
-def DS_ADD_U32 : DS_1A1D_NORET <0x0, "ds_add_u32", VGPR_32>;
-def DS_SUB_U32 : DS_1A1D_NORET <0x1, "ds_sub_u32", VGPR_32>;
-def DS_RSUB_U32 : DS_1A1D_NORET <0x2, "ds_rsub_u32", VGPR_32>;
-def DS_INC_U32 : DS_1A1D_NORET <0x3, "ds_inc_u32", VGPR_32>;
-def DS_DEC_U32 : DS_1A1D_NORET <0x4, "ds_dec_u32", VGPR_32>;
-def DS_MIN_I32 : DS_1A1D_NORET <0x5, "ds_min_i32", VGPR_32>;
-def DS_MAX_I32 : DS_1A1D_NORET <0x6, "ds_max_i32", VGPR_32>;
-def DS_MIN_U32 : DS_1A1D_NORET <0x7, "ds_min_u32", VGPR_32>;
-def DS_MAX_U32 : DS_1A1D_NORET <0x8, "ds_max_u32", VGPR_32>;
-def DS_AND_B32 : DS_1A1D_NORET <0x9, "ds_and_b32", VGPR_32>;
-def DS_OR_B32 : DS_1A1D_NORET <0xa, "ds_or_b32", VGPR_32>;
-def DS_XOR_B32 : DS_1A1D_NORET <0xb, "ds_xor_b32", VGPR_32>;
-def DS_MSKOR_B32 : DS_1A1D_NORET <0xc, "ds_mskor_b32", VGPR_32>;
-def DS_CMPST_B32 : DS_1A2D_NORET <0x10, "ds_cmpst_b32", VGPR_32>;
-def DS_CMPST_F32 : DS_1A2D_NORET <0x11, "ds_cmpst_f32", VGPR_32>;
-def DS_MIN_F32 : DS_1A1D_NORET <0x12, "ds_min_f32", VGPR_32>;
-def DS_MAX_F32 : DS_1A1D_NORET <0x13, "ds_max_f32", VGPR_32>;
-
-def DS_ADD_RTN_U32 : DS_1A1D_RET <0x20, "ds_add_rtn_u32", VGPR_32, "ds_add_u32">;
-def DS_SUB_RTN_U32 : DS_1A1D_RET <0x21, "ds_sub_rtn_u32", VGPR_32, "ds_sub_u32">;
-def DS_RSUB_RTN_U32 : DS_1A1D_RET <0x22, "ds_rsub_rtn_u32", VGPR_32, "ds_rsub_u32">;
-def DS_INC_RTN_U32 : DS_1A1D_RET <0x23, "ds_inc_rtn_u32", VGPR_32, "ds_inc_u32">;
-def DS_DEC_RTN_U32 : DS_1A1D_RET <0x24, "ds_dec_rtn_u32", VGPR_32, "ds_dec_u32">;
-def DS_MIN_RTN_I32 : DS_1A1D_RET <0x25, "ds_min_rtn_i32", VGPR_32, "ds_min_i32">;
-def DS_MAX_RTN_I32 : DS_1A1D_RET <0x26, "ds_max_rtn_i32", VGPR_32, "ds_max_i32">;
-def DS_MIN_RTN_U32 : DS_1A1D_RET <0x27, "ds_min_rtn_u32", VGPR_32, "ds_min_u32">;
-def DS_MAX_RTN_U32 : DS_1A1D_RET <0x28, "ds_max_rtn_u32", VGPR_32, "ds_max_u32">;
-def DS_AND_RTN_B32 : DS_1A1D_RET <0x29, "ds_and_rtn_b32", VGPR_32, "ds_and_b32">;
-def DS_OR_RTN_B32 : DS_1A1D_RET <0x2a, "ds_or_rtn_b32", VGPR_32, "ds_or_b32">;
-def DS_XOR_RTN_B32 : DS_1A1D_RET <0x2b, "ds_xor_rtn_b32", VGPR_32, "ds_xor_b32">;
-def DS_MSKOR_RTN_B32 : DS_1A1D_RET <0x2c, "ds_mskor_rtn_b32", VGPR_32, "ds_mskor_b32">;
-def DS_WRXCHG_RTN_B32 : DS_1A1D_RET <0x2d, "ds_wrxchg_rtn_b32", VGPR_32>;
+defm DS_ADD_U32 : DS_1A1D_NORET <0x0, "ds_add_u32", VGPR_32>;
+defm DS_SUB_U32 : DS_1A1D_NORET <0x1, "ds_sub_u32", VGPR_32>;
+defm DS_RSUB_U32 : DS_1A1D_NORET <0x2, "ds_rsub_u32", VGPR_32>;
+defm DS_INC_U32 : DS_1A1D_NORET <0x3, "ds_inc_u32", VGPR_32>;
+defm DS_DEC_U32 : DS_1A1D_NORET <0x4, "ds_dec_u32", VGPR_32>;
+defm DS_MIN_I32 : DS_1A1D_NORET <0x5, "ds_min_i32", VGPR_32>;
+defm DS_MAX_I32 : DS_1A1D_NORET <0x6, "ds_max_i32", VGPR_32>;
+defm DS_MIN_U32 : DS_1A1D_NORET <0x7, "ds_min_u32", VGPR_32>;
+defm DS_MAX_U32 : DS_1A1D_NORET <0x8, "ds_max_u32", VGPR_32>;
+defm DS_AND_B32 : DS_1A1D_NORET <0x9, "ds_and_b32", VGPR_32>;
+defm DS_OR_B32 : DS_1A1D_NORET <0xa, "ds_or_b32", VGPR_32>;
+defm DS_XOR_B32 : DS_1A1D_NORET <0xb, "ds_xor_b32", VGPR_32>;
+defm DS_MSKOR_B32 : DS_1A1D_NORET <0xc, "ds_mskor_b32", VGPR_32>;
+defm DS_CMPST_B32 : DS_1A2D_NORET <0x10, "ds_cmpst_b32", VGPR_32>;
+defm DS_CMPST_F32 : DS_1A2D_NORET <0x11, "ds_cmpst_f32", VGPR_32>;
+defm DS_MIN_F32 : DS_1A1D_NORET <0x12, "ds_min_f32", VGPR_32>;
+defm DS_MAX_F32 : DS_1A1D_NORET <0x13, "ds_max_f32", VGPR_32>;
+
+defm DS_ADD_RTN_U32 : DS_1A1D_RET <0x20, "ds_add_rtn_u32", VGPR_32, "ds_add_u32">;
+defm DS_SUB_RTN_U32 : DS_1A1D_RET <0x21, "ds_sub_rtn_u32", VGPR_32, "ds_sub_u32">;
+defm DS_RSUB_RTN_U32 : DS_1A1D_RET <0x22, "ds_rsub_rtn_u32", VGPR_32, "ds_rsub_u32">;
+defm DS_INC_RTN_U32 : DS_1A1D_RET <0x23, "ds_inc_rtn_u32", VGPR_32, "ds_inc_u32">;
+defm DS_DEC_RTN_U32 : DS_1A1D_RET <0x24, "ds_dec_rtn_u32", VGPR_32, "ds_dec_u32">;
+defm DS_MIN_RTN_I32 : DS_1A1D_RET <0x25, "ds_min_rtn_i32", VGPR_32, "ds_min_i32">;
+defm DS_MAX_RTN_I32 : DS_1A1D_RET <0x26, "ds_max_rtn_i32", VGPR_32, "ds_max_i32">;
+defm DS_MIN_RTN_U32 : DS_1A1D_RET <0x27, "ds_min_rtn_u32", VGPR_32, "ds_min_u32">;
+defm DS_MAX_RTN_U32 : DS_1A1D_RET <0x28, "ds_max_rtn_u32", VGPR_32, "ds_max_u32">;
+defm DS_AND_RTN_B32 : DS_1A1D_RET <0x29, "ds_and_rtn_b32", VGPR_32, "ds_and_b32">;
+defm DS_OR_RTN_B32 : DS_1A1D_RET <0x2a, "ds_or_rtn_b32", VGPR_32, "ds_or_b32">;
+defm DS_XOR_RTN_B32 : DS_1A1D_RET <0x2b, "ds_xor_rtn_b32", VGPR_32, "ds_xor_b32">;
+defm DS_MSKOR_RTN_B32 : DS_1A1D_RET <0x2c, "ds_mskor_rtn_b32", VGPR_32, "ds_mskor_b32">;
+defm DS_WRXCHG_RTN_B32 : DS_1A1D_RET <0x2d, "ds_wrxchg_rtn_b32", VGPR_32>;
//def DS_WRXCHG2_RTN_B32 : DS_2A0D_RET <0x2e, "ds_wrxchg2_rtn_b32", VGPR_32, "ds_wrxchg2_b32">;
//def DS_WRXCHG2ST64_RTN_B32 : DS_2A0D_RET <0x2f, "ds_wrxchg2_rtn_b32", VGPR_32, "ds_wrxchg2st64_b32">;
-def DS_CMPST_RTN_B32 : DS_1A2D_RET <0x30, "ds_cmpst_rtn_b32", VGPR_32, "ds_cmpst_b32">;
-def DS_CMPST_RTN_F32 : DS_1A2D_RET <0x31, "ds_cmpst_rtn_f32", VGPR_32, "ds_cmpst_f32">;
-def DS_MIN_RTN_F32 : DS_1A1D_RET <0x32, "ds_min_rtn_f32", VGPR_32, "ds_min_f32">;
-def DS_MAX_RTN_F32 : DS_1A1D_RET <0x33, "ds_max_rtn_f32", VGPR_32, "ds_max_f32">;
+defm DS_CMPST_RTN_B32 : DS_1A2D_RET <0x30, "ds_cmpst_rtn_b32", VGPR_32, "ds_cmpst_b32">;
+defm DS_CMPST_RTN_F32 : DS_1A2D_RET <0x31, "ds_cmpst_rtn_f32", VGPR_32, "ds_cmpst_f32">;
+defm DS_MIN_RTN_F32 : DS_1A1D_RET <0x32, "ds_min_rtn_f32", VGPR_32, "ds_min_f32">;
+defm DS_MAX_RTN_F32 : DS_1A1D_RET <0x33, "ds_max_rtn_f32", VGPR_32, "ds_max_f32">;
let SubtargetPredicate = isCI in {
-def DS_WRAP_RTN_F32 : DS_1A1D_RET <0x34, "ds_wrap_rtn_f32", VGPR_32, "ds_wrap_f32">;
+defm DS_WRAP_RTN_F32 : DS_1A1D_RET <0x34, "ds_wrap_rtn_f32", VGPR_32, "ds_wrap_f32">;
} // End isCI
-def DS_ADD_U64 : DS_1A1D_NORET <0x40, "ds_add_u64", VReg_64>;
-def DS_SUB_U64 : DS_1A1D_NORET <0x41, "ds_sub_u64", VReg_64>;
-def DS_RSUB_U64 : DS_1A1D_NORET <0x42, "ds_rsub_u64", VReg_64>;
-def DS_INC_U64 : DS_1A1D_NORET <0x43, "ds_inc_u64", VReg_64>;
-def DS_DEC_U64 : DS_1A1D_NORET <0x44, "ds_dec_u64", VReg_64>;
-def DS_MIN_I64 : DS_1A1D_NORET <0x45, "ds_min_i64", VReg_64>;
-def DS_MAX_I64 : DS_1A1D_NORET <0x46, "ds_max_i64", VReg_64>;
-def DS_MIN_U64 : DS_1A1D_NORET <0x47, "ds_min_u64", VReg_64>;
-def DS_MAX_U64 : DS_1A1D_NORET <0x48, "ds_max_u64", VReg_64>;
-def DS_AND_B64 : DS_1A1D_NORET <0x49, "ds_and_b64", VReg_64>;
-def DS_OR_B64 : DS_1A1D_NORET <0x4a, "ds_or_b64", VReg_64>;
-def DS_XOR_B64 : DS_1A1D_NORET <0x4b, "ds_xor_b64", VReg_64>;
-def DS_MSKOR_B64 : DS_1A1D_NORET <0x4c, "ds_mskor_b64", VReg_64>;
-def DS_CMPST_B64 : DS_1A2D_NORET <0x50, "ds_cmpst_b64", VReg_64>;
-def DS_CMPST_F64 : DS_1A2D_NORET <0x51, "ds_cmpst_f64", VReg_64>;
-def DS_MIN_F64 : DS_1A1D_NORET <0x52, "ds_min_f64", VReg_64>;
-def DS_MAX_F64 : DS_1A1D_NORET <0x53, "ds_max_f64", VReg_64>;
-
-def DS_ADD_RTN_U64 : DS_1A1D_RET <0x60, "ds_add_rtn_u64", VReg_64, "ds_add_u64">;
-def DS_SUB_RTN_U64 : DS_1A1D_RET <0x61, "ds_sub_rtn_u64", VReg_64, "ds_sub_u64">;
-def DS_RSUB_RTN_U64 : DS_1A1D_RET <0x62, "ds_rsub_rtn_u64", VReg_64, "ds_rsub_u64">;
-def DS_INC_RTN_U64 : DS_1A1D_RET <0x63, "ds_inc_rtn_u64", VReg_64, "ds_inc_u64">;
-def DS_DEC_RTN_U64 : DS_1A1D_RET <0x64, "ds_dec_rtn_u64", VReg_64, "ds_dec_u64">;
-def DS_MIN_RTN_I64 : DS_1A1D_RET <0x65, "ds_min_rtn_i64", VReg_64, "ds_min_i64">;
-def DS_MAX_RTN_I64 : DS_1A1D_RET <0x66, "ds_max_rtn_i64", VReg_64, "ds_max_i64">;
-def DS_MIN_RTN_U64 : DS_1A1D_RET <0x67, "ds_min_rtn_u64", VReg_64, "ds_min_u64">;
-def DS_MAX_RTN_U64 : DS_1A1D_RET <0x68, "ds_max_rtn_u64", VReg_64, "ds_max_u64">;
-def DS_AND_RTN_B64 : DS_1A1D_RET <0x69, "ds_and_rtn_b64", VReg_64, "ds_and_b64">;
-def DS_OR_RTN_B64 : DS_1A1D_RET <0x6a, "ds_or_rtn_b64", VReg_64, "ds_or_b64">;
-def DS_XOR_RTN_B64 : DS_1A1D_RET <0x6b, "ds_xor_rtn_b64", VReg_64, "ds_xor_b64">;
-def DS_MSKOR_RTN_B64 : DS_1A1D_RET <0x6c, "ds_mskor_rtn_b64", VReg_64, "ds_mskor_b64">;
-def DS_WRXCHG_RTN_B64 : DS_1A1D_RET <0x6d, "ds_wrxchg_rtn_b64", VReg_64, "ds_wrxchg_b64">;
+defm DS_ADD_U64 : DS_1A1D_NORET <0x40, "ds_add_u64", VReg_64>;
+defm DS_SUB_U64 : DS_1A1D_NORET <0x41, "ds_sub_u64", VReg_64>;
+defm DS_RSUB_U64 : DS_1A1D_NORET <0x42, "ds_rsub_u64", VReg_64>;
+defm DS_INC_U64 : DS_1A1D_NORET <0x43, "ds_inc_u64", VReg_64>;
+defm DS_DEC_U64 : DS_1A1D_NORET <0x44, "ds_dec_u64", VReg_64>;
+defm DS_MIN_I64 : DS_1A1D_NORET <0x45, "ds_min_i64", VReg_64>;
+defm DS_MAX_I64 : DS_1A1D_NORET <0x46, "ds_max_i64", VReg_64>;
+defm DS_MIN_U64 : DS_1A1D_NORET <0x47, "ds_min_u64", VReg_64>;
+defm DS_MAX_U64 : DS_1A1D_NORET <0x48, "ds_max_u64", VReg_64>;
+defm DS_AND_B64 : DS_1A1D_NORET <0x49, "ds_and_b64", VReg_64>;
+defm DS_OR_B64 : DS_1A1D_NORET <0x4a, "ds_or_b64", VReg_64>;
+defm DS_XOR_B64 : DS_1A1D_NORET <0x4b, "ds_xor_b64", VReg_64>;
+defm DS_MSKOR_B64 : DS_1A1D_NORET <0x4c, "ds_mskor_b64", VReg_64>;
+defm DS_CMPST_B64 : DS_1A2D_NORET <0x50, "ds_cmpst_b64", VReg_64>;
+defm DS_CMPST_F64 : DS_1A2D_NORET <0x51, "ds_cmpst_f64", VReg_64>;
+defm DS_MIN_F64 : DS_1A1D_NORET <0x52, "ds_min_f64", VReg_64>;
+defm DS_MAX_F64 : DS_1A1D_NORET <0x53, "ds_max_f64", VReg_64>;
+
+defm DS_ADD_RTN_U64 : DS_1A1D_RET <0x60, "ds_add_rtn_u64", VReg_64, "ds_add_u64">;
+defm DS_SUB_RTN_U64 : DS_1A1D_RET <0x61, "ds_sub_rtn_u64", VReg_64, "ds_sub_u64">;
+defm DS_RSUB_RTN_U64 : DS_1A1D_RET <0x62, "ds_rsub_rtn_u64", VReg_64, "ds_rsub_u64">;
+defm DS_INC_RTN_U64 : DS_1A1D_RET <0x63, "ds_inc_rtn_u64", VReg_64, "ds_inc_u64">;
+defm DS_DEC_RTN_U64 : DS_1A1D_RET <0x64, "ds_dec_rtn_u64", VReg_64, "ds_dec_u64">;
+defm DS_MIN_RTN_I64 : DS_1A1D_RET <0x65, "ds_min_rtn_i64", VReg_64, "ds_min_i64">;
+defm DS_MAX_RTN_I64 : DS_1A1D_RET <0x66, "ds_max_rtn_i64", VReg_64, "ds_max_i64">;
+defm DS_MIN_RTN_U64 : DS_1A1D_RET <0x67, "ds_min_rtn_u64", VReg_64, "ds_min_u64">;
+defm DS_MAX_RTN_U64 : DS_1A1D_RET <0x68, "ds_max_rtn_u64", VReg_64, "ds_max_u64">;
+defm DS_AND_RTN_B64 : DS_1A1D_RET <0x69, "ds_and_rtn_b64", VReg_64, "ds_and_b64">;
+defm DS_OR_RTN_B64 : DS_1A1D_RET <0x6a, "ds_or_rtn_b64", VReg_64, "ds_or_b64">;
+defm DS_XOR_RTN_B64 : DS_1A1D_RET <0x6b, "ds_xor_rtn_b64", VReg_64, "ds_xor_b64">;
+defm DS_MSKOR_RTN_B64 : DS_1A1D_RET <0x6c, "ds_mskor_rtn_b64", VReg_64, "ds_mskor_b64">;
+defm DS_WRXCHG_RTN_B64 : DS_1A1D_RET <0x6d, "ds_wrxchg_rtn_b64", VReg_64, "ds_wrxchg_b64">;
//def DS_WRXCHG2_RTN_B64 : DS_2A0D_RET <0x6e, "ds_wrxchg2_rtn_b64", VReg_64, "ds_wrxchg2_b64">;
//def DS_WRXCHG2ST64_RTN_B64 : DS_2A0D_RET <0x6f, "ds_wrxchg2_rtn_b64", VReg_64, "ds_wrxchg2st64_b64">;
-def DS_CMPST_RTN_B64 : DS_1A2D_RET <0x70, "ds_cmpst_rtn_b64", VReg_64, "ds_cmpst_b64">;
-def DS_CMPST_RTN_F64 : DS_1A2D_RET <0x71, "ds_cmpst_rtn_f64", VReg_64, "ds_cmpst_f64">;
-def DS_MIN_RTN_F64 : DS_1A1D_RET <0x72, "ds_min_f64", VReg_64, "ds_min_f64">;
-def DS_MAX_RTN_F64 : DS_1A1D_RET <0x73, "ds_max_f64", VReg_64, "ds_max_f64">;
+defm DS_CMPST_RTN_B64 : DS_1A2D_RET <0x70, "ds_cmpst_rtn_b64", VReg_64, "ds_cmpst_b64">;
+defm DS_CMPST_RTN_F64 : DS_1A2D_RET <0x71, "ds_cmpst_rtn_f64", VReg_64, "ds_cmpst_f64">;
+defm DS_MIN_RTN_F64 : DS_1A1D_RET <0x72, "ds_min_rtn_f64", VReg_64, "ds_min_f64">;
+defm DS_MAX_RTN_F64 : DS_1A1D_RET <0x73, "ds_max_rtn_f64", VReg_64, "ds_max_f64">;
//let SubtargetPredicate = isCI in {
// DS_CONDXCHG32_RTN_B64
@@ -874,123 +876,120 @@ defm DS_WRITE2ST64_B64 : DS_Store2_Helper <0x0000004F, "ds_write2st64_b64", VReg
defm DS_READ2_B32 : DS_Load2_Helper <0x00000037, "ds_read2_b32", VReg_64>;
defm DS_READ2ST64_B32 : DS_Load2_Helper <0x00000038, "ds_read2st64_b32", VReg_64>;
-defm DS_READ2_B64 : DS_Load2_Helper <0x00000075, "ds_read2_b64", VReg_128>;
-defm DS_READ2ST64_B64 : DS_Load2_Helper <0x00000076, "ds_read2st64_b64", VReg_128>;
+defm DS_READ2_B64 : DS_Load2_Helper <0x00000077, "ds_read2_b64", VReg_128>;
+defm DS_READ2ST64_B64 : DS_Load2_Helper <0x00000078, "ds_read2st64_b64", VReg_128>;
//===----------------------------------------------------------------------===//
// MUBUF Instructions
//===----------------------------------------------------------------------===//
-let SubtargetPredicate = isSICI in {
-
-//def BUFFER_LOAD_FORMAT_X : MUBUF_ <0x00000000, "buffer_load_format_x", []>;
-//def BUFFER_LOAD_FORMAT_XY : MUBUF_ <0x00000001, "buffer_load_format_xy", []>;
-//def BUFFER_LOAD_FORMAT_XYZ : MUBUF_ <0x00000002, "buffer_load_format_xyz", []>;
-defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <0x00000003, "buffer_load_format_xyzw", VReg_128>;
-//def BUFFER_STORE_FORMAT_X : MUBUF_ <0x00000004, "buffer_store_format_x", []>;
-//def BUFFER_STORE_FORMAT_XY : MUBUF_ <0x00000005, "buffer_store_format_xy", []>;
-//def BUFFER_STORE_FORMAT_XYZ : MUBUF_ <0x00000006, "buffer_store_format_xyz", []>;
-//def BUFFER_STORE_FORMAT_XYZW : MUBUF_ <0x00000007, "buffer_store_format_xyzw", []>;
+//def BUFFER_LOAD_FORMAT_X : MUBUF_ <mubuf<0x00>, "buffer_load_format_x", []>;
+//def BUFFER_LOAD_FORMAT_XY : MUBUF_ <mubuf<0x01>, "buffer_load_format_xy", []>;
+//def BUFFER_LOAD_FORMAT_XYZ : MUBUF_ <mubuf<0x02>, "buffer_load_format_xyz", []>;
+defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <mubuf<0x03>, "buffer_load_format_xyzw", VReg_128>;
+//def BUFFER_STORE_FORMAT_X : MUBUF_ <mubuf<0x04>, "buffer_store_format_x", []>;
+//def BUFFER_STORE_FORMAT_XY : MUBUF_ <mubuf<0x05>, "buffer_store_format_xy", []>;
+//def BUFFER_STORE_FORMAT_XYZ : MUBUF_ <mubuf<0x06>, "buffer_store_format_xyz", []>;
+//def BUFFER_STORE_FORMAT_XYZW : MUBUF_ <mubuf<0x07>, "buffer_store_format_xyzw", []>;
defm BUFFER_LOAD_UBYTE : MUBUF_Load_Helper <
- 0x00000008, "buffer_load_ubyte", VGPR_32, i32, az_extloadi8_global
+ mubuf<0x08, 0x10>, "buffer_load_ubyte", VGPR_32, i32, az_extloadi8_global
>;
defm BUFFER_LOAD_SBYTE : MUBUF_Load_Helper <
- 0x00000009, "buffer_load_sbyte", VGPR_32, i32, sextloadi8_global
+ mubuf<0x09, 0x11>, "buffer_load_sbyte", VGPR_32, i32, sextloadi8_global
>;
defm BUFFER_LOAD_USHORT : MUBUF_Load_Helper <
- 0x0000000a, "buffer_load_ushort", VGPR_32, i32, az_extloadi16_global
+ mubuf<0x0a, 0x12>, "buffer_load_ushort", VGPR_32, i32, az_extloadi16_global
>;
defm BUFFER_LOAD_SSHORT : MUBUF_Load_Helper <
- 0x0000000b, "buffer_load_sshort", VGPR_32, i32, sextloadi16_global
+ mubuf<0x0b, 0x13>, "buffer_load_sshort", VGPR_32, i32, sextloadi16_global
>;
defm BUFFER_LOAD_DWORD : MUBUF_Load_Helper <
- 0x0000000c, "buffer_load_dword", VGPR_32, i32, global_load
+ mubuf<0x0c, 0x14>, "buffer_load_dword", VGPR_32, i32, global_load
>;
defm BUFFER_LOAD_DWORDX2 : MUBUF_Load_Helper <
- 0x0000000d, "buffer_load_dwordx2", VReg_64, v2i32, global_load
+ mubuf<0x0d, 0x15>, "buffer_load_dwordx2", VReg_64, v2i32, global_load
>;
defm BUFFER_LOAD_DWORDX4 : MUBUF_Load_Helper <
- 0x0000000e, "buffer_load_dwordx4", VReg_128, v4i32, global_load
+ mubuf<0x0e, 0x17>, "buffer_load_dwordx4", VReg_128, v4i32, global_load
>;
defm BUFFER_STORE_BYTE : MUBUF_Store_Helper <
- 0x00000018, "buffer_store_byte", VGPR_32, i32, truncstorei8_global
+ mubuf<0x18>, "buffer_store_byte", VGPR_32, i32, truncstorei8_global
>;
defm BUFFER_STORE_SHORT : MUBUF_Store_Helper <
- 0x0000001a, "buffer_store_short", VGPR_32, i32, truncstorei16_global
+ mubuf<0x1a>, "buffer_store_short", VGPR_32, i32, truncstorei16_global
>;
defm BUFFER_STORE_DWORD : MUBUF_Store_Helper <
- 0x0000001c, "buffer_store_dword", VGPR_32, i32, global_store
+ mubuf<0x1c>, "buffer_store_dword", VGPR_32, i32, global_store
>;
defm BUFFER_STORE_DWORDX2 : MUBUF_Store_Helper <
- 0x0000001d, "buffer_store_dwordx2", VReg_64, v2i32, global_store
+ mubuf<0x1d>, "buffer_store_dwordx2", VReg_64, v2i32, global_store
>;
defm BUFFER_STORE_DWORDX4 : MUBUF_Store_Helper <
- 0x0000001e, "buffer_store_dwordx4", VReg_128, v4i32, global_store
+ mubuf<0x1e, 0x1f>, "buffer_store_dwordx4", VReg_128, v4i32, global_store
>;
-//def BUFFER_ATOMIC_SWAP : MUBUF_ <0x00000030, "buffer_atomic_swap", []>;
+
defm BUFFER_ATOMIC_SWAP : MUBUF_Atomic <
- 0x00000030, "buffer_atomic_swap", VGPR_32, i32, atomic_swap_global
+ mubuf<0x30, 0x40>, "buffer_atomic_swap", VGPR_32, i32, atomic_swap_global
>;
-//def BUFFER_ATOMIC_CMPSWAP : MUBUF_ <0x00000031, "buffer_atomic_cmpswap", []>;
+//def BUFFER_ATOMIC_CMPSWAP : MUBUF_ <mubuf<0x31, 0x41>, "buffer_atomic_cmpswap", []>;
defm BUFFER_ATOMIC_ADD : MUBUF_Atomic <
- 0x00000032, "buffer_atomic_add", VGPR_32, i32, atomic_add_global
+ mubuf<0x32, 0x42>, "buffer_atomic_add", VGPR_32, i32, atomic_add_global
>;
defm BUFFER_ATOMIC_SUB : MUBUF_Atomic <
- 0x00000033, "buffer_atomic_sub", VGPR_32, i32, atomic_sub_global
+ mubuf<0x33, 0x43>, "buffer_atomic_sub", VGPR_32, i32, atomic_sub_global
>;
-//def BUFFER_ATOMIC_RSUB : MUBUF_ <0x00000034, "buffer_atomic_rsub", []>;
+//def BUFFER_ATOMIC_RSUB : MUBUF_ <mubuf<0x34>, "buffer_atomic_rsub", []>; // isn't on CI & VI
defm BUFFER_ATOMIC_SMIN : MUBUF_Atomic <
- 0x00000035, "buffer_atomic_smin", VGPR_32, i32, atomic_min_global
+ mubuf<0x35, 0x44>, "buffer_atomic_smin", VGPR_32, i32, atomic_min_global
>;
defm BUFFER_ATOMIC_UMIN : MUBUF_Atomic <
- 0x00000036, "buffer_atomic_umin", VGPR_32, i32, atomic_umin_global
+ mubuf<0x36, 0x45>, "buffer_atomic_umin", VGPR_32, i32, atomic_umin_global
>;
defm BUFFER_ATOMIC_SMAX : MUBUF_Atomic <
- 0x00000037, "buffer_atomic_smax", VGPR_32, i32, atomic_max_global
+ mubuf<0x37, 0x46>, "buffer_atomic_smax", VGPR_32, i32, atomic_max_global
>;
defm BUFFER_ATOMIC_UMAX : MUBUF_Atomic <
- 0x00000038, "buffer_atomic_umax", VGPR_32, i32, atomic_umax_global
+ mubuf<0x38, 0x47>, "buffer_atomic_umax", VGPR_32, i32, atomic_umax_global
>;
defm BUFFER_ATOMIC_AND : MUBUF_Atomic <
- 0x00000039, "buffer_atomic_and", VGPR_32, i32, atomic_and_global
+ mubuf<0x39, 0x48>, "buffer_atomic_and", VGPR_32, i32, atomic_and_global
>;
defm BUFFER_ATOMIC_OR : MUBUF_Atomic <
- 0x0000003a, "buffer_atomic_or", VGPR_32, i32, atomic_or_global
+ mubuf<0x3a, 0x49>, "buffer_atomic_or", VGPR_32, i32, atomic_or_global
>;
defm BUFFER_ATOMIC_XOR : MUBUF_Atomic <
- 0x0000003b, "buffer_atomic_xor", VGPR_32, i32, atomic_xor_global
->;
-//def BUFFER_ATOMIC_INC : MUBUF_ <0x0000003c, "buffer_atomic_inc", []>;
-//def BUFFER_ATOMIC_DEC : MUBUF_ <0x0000003d, "buffer_atomic_dec", []>;
-//def BUFFER_ATOMIC_FCMPSWAP : MUBUF_ <0x0000003e, "buffer_atomic_fcmpswap", []>;
-//def BUFFER_ATOMIC_FMIN : MUBUF_ <0x0000003f, "buffer_atomic_fmin", []>;
-//def BUFFER_ATOMIC_FMAX : MUBUF_ <0x00000040, "buffer_atomic_fmax", []>;
-//def BUFFER_ATOMIC_SWAP_X2 : MUBUF_X2 <0x00000050, "buffer_atomic_swap_x2", []>;
-//def BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_X2 <0x00000051, "buffer_atomic_cmpswap_x2", []>;
-//def BUFFER_ATOMIC_ADD_X2 : MUBUF_X2 <0x00000052, "buffer_atomic_add_x2", []>;
-//def BUFFER_ATOMIC_SUB_X2 : MUBUF_X2 <0x00000053, "buffer_atomic_sub_x2", []>;
-//def BUFFER_ATOMIC_RSUB_X2 : MUBUF_X2 <0x00000054, "buffer_atomic_rsub_x2", []>;
-//def BUFFER_ATOMIC_SMIN_X2 : MUBUF_X2 <0x00000055, "buffer_atomic_smin_x2", []>;
-//def BUFFER_ATOMIC_UMIN_X2 : MUBUF_X2 <0x00000056, "buffer_atomic_umin_x2", []>;
-//def BUFFER_ATOMIC_SMAX_X2 : MUBUF_X2 <0x00000057, "buffer_atomic_smax_x2", []>;
-//def BUFFER_ATOMIC_UMAX_X2 : MUBUF_X2 <0x00000058, "buffer_atomic_umax_x2", []>;
-//def BUFFER_ATOMIC_AND_X2 : MUBUF_X2 <0x00000059, "buffer_atomic_and_x2", []>;
-//def BUFFER_ATOMIC_OR_X2 : MUBUF_X2 <0x0000005a, "buffer_atomic_or_x2", []>;
-//def BUFFER_ATOMIC_XOR_X2 : MUBUF_X2 <0x0000005b, "buffer_atomic_xor_x2", []>;
-//def BUFFER_ATOMIC_INC_X2 : MUBUF_X2 <0x0000005c, "buffer_atomic_inc_x2", []>;
-//def BUFFER_ATOMIC_DEC_X2 : MUBUF_X2 <0x0000005d, "buffer_atomic_dec_x2", []>;
-//def BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_X2 <0x0000005e, "buffer_atomic_fcmpswap_x2", []>;
-//def BUFFER_ATOMIC_FMIN_X2 : MUBUF_X2 <0x0000005f, "buffer_atomic_fmin_x2", []>;
-//def BUFFER_ATOMIC_FMAX_X2 : MUBUF_X2 <0x00000060, "buffer_atomic_fmax_x2", []>;
-//def BUFFER_WBINVL1_SC : MUBUF_WBINVL1 <0x00000070, "buffer_wbinvl1_sc", []>;
-//def BUFFER_WBINVL1 : MUBUF_WBINVL1 <0x00000071, "buffer_wbinvl1", []>;
-
-} // End SubtargetPredicate = isSICI
+ mubuf<0x3b, 0x4a>, "buffer_atomic_xor", VGPR_32, i32, atomic_xor_global
+>;
+//def BUFFER_ATOMIC_INC : MUBUF_ <mubuf<0x3c, 0x4b>, "buffer_atomic_inc", []>;
+//def BUFFER_ATOMIC_DEC : MUBUF_ <mubuf<0x3d, 0x4c>, "buffer_atomic_dec", []>;
+//def BUFFER_ATOMIC_FCMPSWAP : MUBUF_ <mubuf<0x3e>, "buffer_atomic_fcmpswap", []>; // isn't on VI
+//def BUFFER_ATOMIC_FMIN : MUBUF_ <mubuf<0x3f>, "buffer_atomic_fmin", []>; // isn't on VI
+//def BUFFER_ATOMIC_FMAX : MUBUF_ <mubuf<0x40>, "buffer_atomic_fmax", []>; // isn't on VI
+//def BUFFER_ATOMIC_SWAP_X2 : MUBUF_X2 <mubuf<0x50, 0x60>, "buffer_atomic_swap_x2", []>;
+//def BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_X2 <mubuf<0x51, 0x61>, "buffer_atomic_cmpswap_x2", []>;
+//def BUFFER_ATOMIC_ADD_X2 : MUBUF_X2 <mubuf<0x52, 0x62>, "buffer_atomic_add_x2", []>;
+//def BUFFER_ATOMIC_SUB_X2 : MUBUF_X2 <mubuf<0x53, 0x63>, "buffer_atomic_sub_x2", []>;
+//def BUFFER_ATOMIC_RSUB_X2 : MUBUF_X2 <mubuf<0x54>, "buffer_atomic_rsub_x2", []>; // isn't on CI & VI
+//def BUFFER_ATOMIC_SMIN_X2 : MUBUF_X2 <mubuf<0x55, 0x64>, "buffer_atomic_smin_x2", []>;
+//def BUFFER_ATOMIC_UMIN_X2 : MUBUF_X2 <mubuf<0x56, 0x65>, "buffer_atomic_umin_x2", []>;
+//def BUFFER_ATOMIC_SMAX_X2 : MUBUF_X2 <mubuf<0x57, 0x66>, "buffer_atomic_smax_x2", []>;
+//def BUFFER_ATOMIC_UMAX_X2 : MUBUF_X2 <mubuf<0x58, 0x67>, "buffer_atomic_umax_x2", []>;
+//def BUFFER_ATOMIC_AND_X2 : MUBUF_X2 <mubuf<0x59, 0x68>, "buffer_atomic_and_x2", []>;
+//def BUFFER_ATOMIC_OR_X2 : MUBUF_X2 <mubuf<0x5a, 0x69>, "buffer_atomic_or_x2", []>;
+//def BUFFER_ATOMIC_XOR_X2 : MUBUF_X2 <mubuf<0x5b, 0x6a>, "buffer_atomic_xor_x2", []>;
+//def BUFFER_ATOMIC_INC_X2 : MUBUF_X2 <mubuf<0x5c, 0x6b>, "buffer_atomic_inc_x2", []>;
+//def BUFFER_ATOMIC_DEC_X2 : MUBUF_X2 <mubuf<0x5d, 0x6c>, "buffer_atomic_dec_x2", []>;
+//def BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_X2 <mubuf<0x5e>, "buffer_atomic_fcmpswap_x2", []>; // isn't on VI
+//def BUFFER_ATOMIC_FMIN_X2 : MUBUF_X2 <mubuf<0x5f>, "buffer_atomic_fmin_x2", []>; // isn't on VI
+//def BUFFER_ATOMIC_FMAX_X2 : MUBUF_X2 <mubuf<0x60>, "buffer_atomic_fmax_x2", []>; // isn't on VI
+//def BUFFER_WBINVL1_SC : MUBUF_WBINVL1 <mubuf<0x70>, "buffer_wbinvl1_sc", []>; // isn't on CI & VI
+//def BUFFER_WBINVL1_VOL : MUBUF_WBINVL1 <mubuf<0x70, 0x3f>, "buffer_wbinvl1_vol", []>; // isn't on SI
+//def BUFFER_WBINVL1 : MUBUF_WBINVL1 <mubuf<0x71, 0x3e>, "buffer_wbinvl1", []>;
//===----------------------------------------------------------------------===//
// MTBUF Instructions
@@ -1037,63 +1036,63 @@ defm IMAGE_GET_RESINFO : MIMG_NoSampler <0x0000000e, "image_get_resinfo">;
//def IMAGE_ATOMIC_FCMPSWAP : MIMG_NoPattern_ <"image_atomic_fcmpswap", 0x0000001d>;
//def IMAGE_ATOMIC_FMIN : MIMG_NoPattern_ <"image_atomic_fmin", 0x0000001e>;
//def IMAGE_ATOMIC_FMAX : MIMG_NoPattern_ <"image_atomic_fmax", 0x0000001f>;
-defm IMAGE_SAMPLE : MIMG_Sampler <0x00000020, "image_sample">;
-defm IMAGE_SAMPLE_CL : MIMG_Sampler <0x00000021, "image_sample_cl">;
+defm IMAGE_SAMPLE : MIMG_Sampler_WQM <0x00000020, "image_sample">;
+defm IMAGE_SAMPLE_CL : MIMG_Sampler_WQM <0x00000021, "image_sample_cl">;
defm IMAGE_SAMPLE_D : MIMG_Sampler <0x00000022, "image_sample_d">;
defm IMAGE_SAMPLE_D_CL : MIMG_Sampler <0x00000023, "image_sample_d_cl">;
defm IMAGE_SAMPLE_L : MIMG_Sampler <0x00000024, "image_sample_l">;
-defm IMAGE_SAMPLE_B : MIMG_Sampler <0x00000025, "image_sample_b">;
-defm IMAGE_SAMPLE_B_CL : MIMG_Sampler <0x00000026, "image_sample_b_cl">;
+defm IMAGE_SAMPLE_B : MIMG_Sampler_WQM <0x00000025, "image_sample_b">;
+defm IMAGE_SAMPLE_B_CL : MIMG_Sampler_WQM <0x00000026, "image_sample_b_cl">;
defm IMAGE_SAMPLE_LZ : MIMG_Sampler <0x00000027, "image_sample_lz">;
-defm IMAGE_SAMPLE_C : MIMG_Sampler <0x00000028, "image_sample_c">;
-defm IMAGE_SAMPLE_C_CL : MIMG_Sampler <0x00000029, "image_sample_c_cl">;
+defm IMAGE_SAMPLE_C : MIMG_Sampler_WQM <0x00000028, "image_sample_c">;
+defm IMAGE_SAMPLE_C_CL : MIMG_Sampler_WQM <0x00000029, "image_sample_c_cl">;
defm IMAGE_SAMPLE_C_D : MIMG_Sampler <0x0000002a, "image_sample_c_d">;
defm IMAGE_SAMPLE_C_D_CL : MIMG_Sampler <0x0000002b, "image_sample_c_d_cl">;
defm IMAGE_SAMPLE_C_L : MIMG_Sampler <0x0000002c, "image_sample_c_l">;
-defm IMAGE_SAMPLE_C_B : MIMG_Sampler <0x0000002d, "image_sample_c_b">;
-defm IMAGE_SAMPLE_C_B_CL : MIMG_Sampler <0x0000002e, "image_sample_c_b_cl">;
+defm IMAGE_SAMPLE_C_B : MIMG_Sampler_WQM <0x0000002d, "image_sample_c_b">;
+defm IMAGE_SAMPLE_C_B_CL : MIMG_Sampler_WQM <0x0000002e, "image_sample_c_b_cl">;
defm IMAGE_SAMPLE_C_LZ : MIMG_Sampler <0x0000002f, "image_sample_c_lz">;
-defm IMAGE_SAMPLE_O : MIMG_Sampler <0x00000030, "image_sample_o">;
-defm IMAGE_SAMPLE_CL_O : MIMG_Sampler <0x00000031, "image_sample_cl_o">;
+defm IMAGE_SAMPLE_O : MIMG_Sampler_WQM <0x00000030, "image_sample_o">;
+defm IMAGE_SAMPLE_CL_O : MIMG_Sampler_WQM <0x00000031, "image_sample_cl_o">;
defm IMAGE_SAMPLE_D_O : MIMG_Sampler <0x00000032, "image_sample_d_o">;
defm IMAGE_SAMPLE_D_CL_O : MIMG_Sampler <0x00000033, "image_sample_d_cl_o">;
defm IMAGE_SAMPLE_L_O : MIMG_Sampler <0x00000034, "image_sample_l_o">;
-defm IMAGE_SAMPLE_B_O : MIMG_Sampler <0x00000035, "image_sample_b_o">;
-defm IMAGE_SAMPLE_B_CL_O : MIMG_Sampler <0x00000036, "image_sample_b_cl_o">;
+defm IMAGE_SAMPLE_B_O : MIMG_Sampler_WQM <0x00000035, "image_sample_b_o">;
+defm IMAGE_SAMPLE_B_CL_O : MIMG_Sampler_WQM <0x00000036, "image_sample_b_cl_o">;
defm IMAGE_SAMPLE_LZ_O : MIMG_Sampler <0x00000037, "image_sample_lz_o">;
-defm IMAGE_SAMPLE_C_O : MIMG_Sampler <0x00000038, "image_sample_c_o">;
-defm IMAGE_SAMPLE_C_CL_O : MIMG_Sampler <0x00000039, "image_sample_c_cl_o">;
+defm IMAGE_SAMPLE_C_O : MIMG_Sampler_WQM <0x00000038, "image_sample_c_o">;
+defm IMAGE_SAMPLE_C_CL_O : MIMG_Sampler_WQM <0x00000039, "image_sample_c_cl_o">;
defm IMAGE_SAMPLE_C_D_O : MIMG_Sampler <0x0000003a, "image_sample_c_d_o">;
defm IMAGE_SAMPLE_C_D_CL_O : MIMG_Sampler <0x0000003b, "image_sample_c_d_cl_o">;
defm IMAGE_SAMPLE_C_L_O : MIMG_Sampler <0x0000003c, "image_sample_c_l_o">;
-defm IMAGE_SAMPLE_C_B_O : MIMG_Sampler <0x0000003d, "image_sample_c_b_o">;
-defm IMAGE_SAMPLE_C_B_CL_O : MIMG_Sampler <0x0000003e, "image_sample_c_b_cl_o">;
+defm IMAGE_SAMPLE_C_B_O : MIMG_Sampler_WQM <0x0000003d, "image_sample_c_b_o">;
+defm IMAGE_SAMPLE_C_B_CL_O : MIMG_Sampler_WQM <0x0000003e, "image_sample_c_b_cl_o">;
defm IMAGE_SAMPLE_C_LZ_O : MIMG_Sampler <0x0000003f, "image_sample_c_lz_o">;
-defm IMAGE_GATHER4 : MIMG_Gather <0x00000040, "image_gather4">;
-defm IMAGE_GATHER4_CL : MIMG_Gather <0x00000041, "image_gather4_cl">;
+defm IMAGE_GATHER4 : MIMG_Gather_WQM <0x00000040, "image_gather4">;
+defm IMAGE_GATHER4_CL : MIMG_Gather_WQM <0x00000041, "image_gather4_cl">;
defm IMAGE_GATHER4_L : MIMG_Gather <0x00000044, "image_gather4_l">;
-defm IMAGE_GATHER4_B : MIMG_Gather <0x00000045, "image_gather4_b">;
-defm IMAGE_GATHER4_B_CL : MIMG_Gather <0x00000046, "image_gather4_b_cl">;
+defm IMAGE_GATHER4_B : MIMG_Gather_WQM <0x00000045, "image_gather4_b">;
+defm IMAGE_GATHER4_B_CL : MIMG_Gather_WQM <0x00000046, "image_gather4_b_cl">;
defm IMAGE_GATHER4_LZ : MIMG_Gather <0x00000047, "image_gather4_lz">;
-defm IMAGE_GATHER4_C : MIMG_Gather <0x00000048, "image_gather4_c">;
-defm IMAGE_GATHER4_C_CL : MIMG_Gather <0x00000049, "image_gather4_c_cl">;
+defm IMAGE_GATHER4_C : MIMG_Gather_WQM <0x00000048, "image_gather4_c">;
+defm IMAGE_GATHER4_C_CL : MIMG_Gather_WQM <0x00000049, "image_gather4_c_cl">;
defm IMAGE_GATHER4_C_L : MIMG_Gather <0x0000004c, "image_gather4_c_l">;
-defm IMAGE_GATHER4_C_B : MIMG_Gather <0x0000004d, "image_gather4_c_b">;
-defm IMAGE_GATHER4_C_B_CL : MIMG_Gather <0x0000004e, "image_gather4_c_b_cl">;
+defm IMAGE_GATHER4_C_B : MIMG_Gather_WQM <0x0000004d, "image_gather4_c_b">;
+defm IMAGE_GATHER4_C_B_CL : MIMG_Gather_WQM <0x0000004e, "image_gather4_c_b_cl">;
defm IMAGE_GATHER4_C_LZ : MIMG_Gather <0x0000004f, "image_gather4_c_lz">;
-defm IMAGE_GATHER4_O : MIMG_Gather <0x00000050, "image_gather4_o">;
-defm IMAGE_GATHER4_CL_O : MIMG_Gather <0x00000051, "image_gather4_cl_o">;
+defm IMAGE_GATHER4_O : MIMG_Gather_WQM <0x00000050, "image_gather4_o">;
+defm IMAGE_GATHER4_CL_O : MIMG_Gather_WQM <0x00000051, "image_gather4_cl_o">;
defm IMAGE_GATHER4_L_O : MIMG_Gather <0x00000054, "image_gather4_l_o">;
-defm IMAGE_GATHER4_B_O : MIMG_Gather <0x00000055, "image_gather4_b_o">;
+defm IMAGE_GATHER4_B_O : MIMG_Gather_WQM <0x00000055, "image_gather4_b_o">;
defm IMAGE_GATHER4_B_CL_O : MIMG_Gather <0x00000056, "image_gather4_b_cl_o">;
defm IMAGE_GATHER4_LZ_O : MIMG_Gather <0x00000057, "image_gather4_lz_o">;
-defm IMAGE_GATHER4_C_O : MIMG_Gather <0x00000058, "image_gather4_c_o">;
-defm IMAGE_GATHER4_C_CL_O : MIMG_Gather <0x00000059, "image_gather4_c_cl_o">;
+defm IMAGE_GATHER4_C_O : MIMG_Gather_WQM <0x00000058, "image_gather4_c_o">;
+defm IMAGE_GATHER4_C_CL_O : MIMG_Gather_WQM <0x00000059, "image_gather4_c_cl_o">;
defm IMAGE_GATHER4_C_L_O : MIMG_Gather <0x0000005c, "image_gather4_c_l_o">;
-defm IMAGE_GATHER4_C_B_O : MIMG_Gather <0x0000005d, "image_gather4_c_b_o">;
-defm IMAGE_GATHER4_C_B_CL_O : MIMG_Gather <0x0000005e, "image_gather4_c_b_cl_o">;
+defm IMAGE_GATHER4_C_B_O : MIMG_Gather_WQM <0x0000005d, "image_gather4_c_b_o">;
+defm IMAGE_GATHER4_C_B_CL_O : MIMG_Gather_WQM <0x0000005e, "image_gather4_c_b_cl_o">;
defm IMAGE_GATHER4_C_LZ_O : MIMG_Gather <0x0000005f, "image_gather4_c_lz_o">;
-defm IMAGE_GET_LOD : MIMG_Sampler <0x00000060, "image_get_lod">;
+defm IMAGE_GET_LOD : MIMG_Sampler_WQM <0x00000060, "image_get_lod">;
defm IMAGE_SAMPLE_CD : MIMG_Sampler <0x00000068, "image_sample_cd">;
defm IMAGE_SAMPLE_CD_CL : MIMG_Sampler <0x00000069, "image_sample_cd_cl">;
defm IMAGE_SAMPLE_C_CD : MIMG_Sampler <0x0000006a, "image_sample_c_cd">;
@@ -1445,53 +1444,37 @@ defm V_MIN_F32 : VOP2Inst <vop2<0xf, 0xa>, "v_min_f32", VOP_F32_F32_F32,
fminnum>;
defm V_MAX_F32 : VOP2Inst <vop2<0x10, 0xb>, "v_max_f32", VOP_F32_F32_F32,
fmaxnum>;
-defm V_MIN_I32 : VOP2Inst <vop2<0x11, 0xc>, "v_min_i32", VOP_I32_I32_I32,
- AMDGPUsmin
->;
-defm V_MAX_I32 : VOP2Inst <vop2<0x12, 0xd>, "v_max_i32", VOP_I32_I32_I32,
- AMDGPUsmax
->;
-defm V_MIN_U32 : VOP2Inst <vop2<0x13, 0xe>, "v_min_u32", VOP_I32_I32_I32,
- AMDGPUumin
->;
-defm V_MAX_U32 : VOP2Inst <vop2<0x14, 0xf>, "v_max_u32", VOP_I32_I32_I32,
- AMDGPUumax
->;
+defm V_MIN_I32 : VOP2Inst <vop2<0x11, 0xc>, "v_min_i32", VOP_I32_I32_I32>;
+defm V_MAX_I32 : VOP2Inst <vop2<0x12, 0xd>, "v_max_i32", VOP_I32_I32_I32>;
+defm V_MIN_U32 : VOP2Inst <vop2<0x13, 0xe>, "v_min_u32", VOP_I32_I32_I32>;
+defm V_MAX_U32 : VOP2Inst <vop2<0x14, 0xf>, "v_max_u32", VOP_I32_I32_I32>;
-// No non-Rev Op on VI
defm V_LSHRREV_B32 : VOP2Inst <
vop2<0x16, 0x10>, "v_lshrrev_b32", VOP_I32_I32_I32, null_frag,
- "v_lshr_b32", "v_lshrrev_b32"
+ "v_lshr_b32"
>;
-// No non-Rev OP on VI
defm V_ASHRREV_I32 : VOP2Inst <
vop2<0x18, 0x11>, "v_ashrrev_i32", VOP_I32_I32_I32, null_frag,
- "v_ashr_i32", "v_ashrrev_i32"
+ "v_ashr_i32"
>;
-// No non-Rev OP on VI
defm V_LSHLREV_B32 : VOP2Inst <
vop2<0x1a, 0x12>, "v_lshlrev_b32", VOP_I32_I32_I32, null_frag,
- "v_lshl_b32", "v_lshlrev_b32"
+ "v_lshl_b32"
>;
-defm V_AND_B32 : VOP2Inst <vop2<0x1b, 0x13>, "v_and_b32",
- VOP_I32_I32_I32, and>;
-defm V_OR_B32 : VOP2Inst <vop2<0x1c, 0x14>, "v_or_b32",
- VOP_I32_I32_I32, or
->;
-defm V_XOR_B32 : VOP2Inst <vop2<0x1d, 0x15>, "v_xor_b32",
- VOP_I32_I32_I32, xor
->;
+defm V_AND_B32 : VOP2Inst <vop2<0x1b, 0x13>, "v_and_b32", VOP_I32_I32_I32>;
+defm V_OR_B32 : VOP2Inst <vop2<0x1c, 0x14>, "v_or_b32", VOP_I32_I32_I32>;
+defm V_XOR_B32 : VOP2Inst <vop2<0x1d, 0x15>, "v_xor_b32", VOP_I32_I32_I32>;
defm V_MAC_F32 : VOP2Inst <vop2<0x1f, 0x16>, "v_mac_f32", VOP_F32_F32_F32>;
} // End isCommutable = 1
-defm V_MADMK_F32 : VOP2Inst <vop2<0x20, 0x17>, "v_madmk_f32", VOP_F32_F32_F32>;
+defm V_MADMK_F32 : VOP2MADK <vop2<0x20, 0x17>, "v_madmk_f32">;
let isCommutable = 1 in {
-defm V_MADAK_F32 : VOP2Inst <vop2<0x21, 0x18>, "v_madak_f32", VOP_F32_F32_F32>;
+defm V_MADAK_F32 : VOP2MADK <vop2<0x21, 0x18>, "v_madak_f32">;
} // End isCommutable = 1
let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC
@@ -1503,9 +1486,7 @@ let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC
defm V_ADD_I32 : VOP2bInst <vop2<0x25, 0x19>, "v_add_i32",
VOP_I32_I32_I32, add
>;
-defm V_SUB_I32 : VOP2bInst <vop2<0x26, 0x1a>, "v_sub_i32",
- VOP_I32_I32_I32, sub
->;
+defm V_SUB_I32 : VOP2bInst <vop2<0x26, 0x1a>, "v_sub_i32", VOP_I32_I32_I32>;
defm V_SUBREV_I32 : VOP2bInst <vop2<0x27, 0x1b>, "v_subrev_i32",
VOP_I32_I32_I32, null_frag, "v_sub_i32"
@@ -1513,10 +1494,10 @@ defm V_SUBREV_I32 : VOP2bInst <vop2<0x27, 0x1b>, "v_subrev_i32",
let Uses = [VCC] in { // Carry-in comes from VCC
defm V_ADDC_U32 : VOP2bInst <vop2<0x28, 0x1c>, "v_addc_u32",
- VOP_I32_I32_I32_VCC, adde
+ VOP_I32_I32_I32_VCC
>;
defm V_SUBB_U32 : VOP2bInst <vop2<0x29, 0x1d>, "v_subb_u32",
- VOP_I32_I32_I32_VCC, sube
+ VOP_I32_I32_I32_VCC
>;
defm V_SUBBREV_U32 : VOP2bInst <vop2<0x2a, 0x1e>, "v_subbrev_u32",
VOP_I32_I32_I32_VCC, null_frag, "v_subb_u32"
@@ -1529,47 +1510,41 @@ defm V_READLANE_B32 : VOP2SI_3VI_m <
vop3 <0x001, 0x289>,
"v_readlane_b32",
(outs SReg_32:$vdst),
- (ins VGPR_32:$src0, SSrc_32:$vsrc1),
- "v_readlane_b32 $vdst, $src0, $vsrc1"
+ (ins VGPR_32:$src0, SCSrc_32:$src1),
+ "v_readlane_b32 $vdst, $src0, $src1"
>;
defm V_WRITELANE_B32 : VOP2SI_3VI_m <
vop3 <0x002, 0x28a>,
"v_writelane_b32",
(outs VGPR_32:$vdst),
- (ins SReg_32:$src0, SSrc_32:$vsrc1),
- "v_writelane_b32 $vdst, $src0, $vsrc1"
+ (ins SReg_32:$src0, SCSrc_32:$src1),
+ "v_writelane_b32 $vdst, $src0, $src1"
>;
// These instructions only exist on SI and CI
let SubtargetPredicate = isSICI in {
-let isCommutable = 1 in {
-defm V_MAC_LEGACY_F32 : VOP2Inst <vop2<0x6>, "v_mac_legacy_f32",
- VOP_F32_F32_F32
->;
-} // End isCommutable = 1
-
-defm V_MIN_LEGACY_F32 : VOP2Inst <vop2<0xd>, "v_min_legacy_f32",
+defm V_MIN_LEGACY_F32 : VOP2InstSI <vop2<0xd>, "v_min_legacy_f32",
VOP_F32_F32_F32, AMDGPUfmin_legacy
>;
-defm V_MAX_LEGACY_F32 : VOP2Inst <vop2<0xe>, "v_max_legacy_f32",
+defm V_MAX_LEGACY_F32 : VOP2InstSI <vop2<0xe>, "v_max_legacy_f32",
VOP_F32_F32_F32, AMDGPUfmax_legacy
>;
let isCommutable = 1 in {
-defm V_LSHR_B32 : VOP2Inst <vop2<0x15>, "v_lshr_b32", VOP_I32_I32_I32, srl>;
-defm V_ASHR_I32 : VOP2Inst <vop2<0x17>, "v_ashr_i32",
- VOP_I32_I32_I32, sra
->;
-
-let hasPostISelHook = 1 in {
-defm V_LSHL_B32 : VOP2Inst <vop2<0x19>, "v_lshl_b32", VOP_I32_I32_I32, shl>;
-}
-
+defm V_LSHR_B32 : VOP2InstSI <vop2<0x15>, "v_lshr_b32", VOP_I32_I32_I32>;
+defm V_ASHR_I32 : VOP2InstSI <vop2<0x17>, "v_ashr_i32", VOP_I32_I32_I32>;
+defm V_LSHL_B32 : VOP2InstSI <vop2<0x19>, "v_lshl_b32", VOP_I32_I32_I32>;
} // End isCommutable = 1
} // End let SubtargetPredicate = SICI
+let isCommutable = 1 in {
+defm V_MAC_LEGACY_F32 : VOP2_VI3_Inst <vop23<0x6, 0x28e>, "v_mac_legacy_f32",
+ VOP_F32_F32_F32
+>;
+} // End isCommutable = 1
+
defm V_BFM_B32 : VOP2_VI3_Inst <vop23<0x1e, 0x293>, "v_bfm_b32", VOP_I32_I32_I32,
AMDGPUbfm
>;
@@ -1586,14 +1561,25 @@ defm V_LDEXP_F32 : VOP2_VI3_Inst <vop23<0x2b, 0x288>, "v_ldexp_f32",
VOP_F32_F32_I32, AMDGPUldexp
>;
-////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "v_cvt_pkaccum_u8_f32", []>;
-////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "v_cvt_pknorm_i16_f32", []>;
-////def V_CVT_PKNORM_U16_F32 : VOP2_U16 <0x0000002e, "v_cvt_pknorm_u16_f32", []>;
+
+defm V_CVT_PKACCUM_U8_F32 : VOP2_VI3_Inst <vop23<0x2c, 0x1f0>, "v_cvt_pkaccum_u8_f32",
+ VOP_I32_F32_I32>; // TODO: set "Uses = dst"
+
+defm V_CVT_PKNORM_I16_F32 : VOP2_VI3_Inst <vop23<0x2d, 0x294>, "v_cvt_pknorm_i16_f32",
+ VOP_I32_F32_F32
+>;
+defm V_CVT_PKNORM_U16_F32 : VOP2_VI3_Inst <vop23<0x2e, 0x295>, "v_cvt_pknorm_u16_f32",
+ VOP_I32_F32_F32
+>;
defm V_CVT_PKRTZ_F16_F32 : VOP2_VI3_Inst <vop23<0x2f, 0x296>, "v_cvt_pkrtz_f16_f32",
- VOP_I32_F32_F32, int_SI_packf16
+ VOP_I32_F32_F32, int_SI_packf16
+>;
+defm V_CVT_PK_U16_U32 : VOP2_VI3_Inst <vop23<0x30, 0x297>, "v_cvt_pk_u16_u32",
+ VOP_I32_I32_I32
+>;
+defm V_CVT_PK_I16_I32 : VOP2_VI3_Inst <vop23<0x31, 0x298>, "v_cvt_pk_i16_i32",
+ VOP_I32_I32_I32
>;
-////def V_CVT_PK_U16_U32 : VOP2_U16 <0x00000030, "v_cvt_pk_u16_u32", []>;
-////def V_CVT_PK_I16_I32 : VOP2_I16 <0x00000031, "v_cvt_pk_i16_i32", []>;
//===----------------------------------------------------------------------===//
// VOP3 Instructions
@@ -1659,27 +1645,34 @@ defm V_ALIGNBYTE_B32 : VOP3Inst <vop3<0x14f, 0x1cf>, "v_alignbyte_b32",
VOP_I32_I32_I32_I32
>;
-defm V_MIN3_F32 : VOP3Inst <vop3<0x151>, "v_min3_f32",
+defm V_MIN3_F32 : VOP3Inst <vop3<0x151, 0x1d0>, "v_min3_f32",
VOP_F32_F32_F32_F32, AMDGPUfmin3>;
-defm V_MIN3_I32 : VOP3Inst <vop3<0x152>, "v_min3_i32",
+defm V_MIN3_I32 : VOP3Inst <vop3<0x152, 0x1d1>, "v_min3_i32",
VOP_I32_I32_I32_I32, AMDGPUsmin3
>;
-defm V_MIN3_U32 : VOP3Inst <vop3<0x153>, "v_min3_u32",
+defm V_MIN3_U32 : VOP3Inst <vop3<0x153, 0x1d2>, "v_min3_u32",
VOP_I32_I32_I32_I32, AMDGPUumin3
>;
-defm V_MAX3_F32 : VOP3Inst <vop3<0x154>, "v_max3_f32",
+defm V_MAX3_F32 : VOP3Inst <vop3<0x154, 0x1d3>, "v_max3_f32",
VOP_F32_F32_F32_F32, AMDGPUfmax3
>;
-defm V_MAX3_I32 : VOP3Inst <vop3<0x155>, "v_max3_i32",
+defm V_MAX3_I32 : VOP3Inst <vop3<0x155, 0x1d4>, "v_max3_i32",
VOP_I32_I32_I32_I32, AMDGPUsmax3
>;
-defm V_MAX3_U32 : VOP3Inst <vop3<0x156>, "v_max3_u32",
+defm V_MAX3_U32 : VOP3Inst <vop3<0x156, 0x1d5>, "v_max3_u32",
VOP_I32_I32_I32_I32, AMDGPUumax3
>;
-//def V_MED3_F32 : VOP3_MED3 <0x00000157, "v_med3_f32", []>;
-//def V_MED3_I32 : VOP3_MED3 <0x00000158, "v_med3_i32", []>;
-//def V_MED3_U32 : VOP3_MED3 <0x00000159, "v_med3_u32", []>;
+defm V_MED3_F32 : VOP3Inst <vop3<0x157, 0x1d6>, "v_med3_f32",
+ VOP_F32_F32_F32_F32
+>;
+defm V_MED3_I32 : VOP3Inst <vop3<0x158, 0x1d7>, "v_med3_i32",
+ VOP_I32_I32_I32_I32
+>;
+defm V_MED3_U32 : VOP3Inst <vop3<0x159, 0x1d8>, "v_med3_u32",
+ VOP_I32_I32_I32_I32
+>;
+
//def V_SAD_U8 : VOP3_U8 <0x0000015a, "v_sad_u8", []>;
//def V_SAD_HI_U8 : VOP3_U8 <0x0000015b, "v_sad_hi_u8", []>;
//def V_SAD_U16 : VOP3_U16 <0x0000015c, "v_sad_u16", []>;
@@ -1742,21 +1735,36 @@ defm V_MUL_HI_I32 : VOP3Inst <vop3<0x16c, 0x287>, "v_mul_hi_i32",
} // isCommutable = 1, SchedRW = [WriteQuarterRate32]
+let SchedRW = [WriteFloatFMA, WriteSALU] in {
defm V_DIV_SCALE_F32 : VOP3b_32 <vop3<0x16d, 0x1e0>, "v_div_scale_f32", []>;
+}
-let SchedRW = [WriteDouble] in {
+let SchedRW = [WriteDouble, WriteSALU] in {
// Double precision division pre-scale.
defm V_DIV_SCALE_F64 : VOP3b_64 <vop3<0x16e, 0x1e1>, "v_div_scale_f64", []>;
} // let SchedRW = [WriteDouble]
-let isCommutable = 1 in {
-defm V_DIV_FMAS_F32 : VOP3Inst <vop3<0x16f, 0x1e2>, "v_div_fmas_f32",
+let isCommutable = 1, Uses = [VCC] in {
+
+// v_div_fmas_f32:
+// result = src0 * src1 + src2
+// if (vcc)
+// result *= 2^32
+//
+defm V_DIV_FMAS_F32 : VOP3_VCC_Inst <vop3<0x16f, 0x1e2>, "v_div_fmas_f32",
VOP_F32_F32_F32_F32, AMDGPUdiv_fmas
>;
+
let SchedRW = [WriteDouble] in {
-defm V_DIV_FMAS_F64 : VOP3Inst <vop3<0x170, 0x1e3>, "v_div_fmas_f64",
+// v_div_fmas_f64:
+// result = src0 * src1 + src2
+// if (vcc)
+// result *= 2^64
+//
+defm V_DIV_FMAS_F64 : VOP3_VCC_Inst <vop3<0x170, 0x1e3>, "v_div_fmas_f64",
VOP_F64_F64_F64_F64, AMDGPUdiv_fmas
>;
+
} // End SchedRW = [WriteDouble]
} // End isCommutable = 1
@@ -1774,23 +1782,29 @@ defm V_TRIG_PREOP_F64 : VOP3Inst <
// These instructions only exist on SI and CI
let SubtargetPredicate = isSICI in {
-defm V_LSHL_B64 : VOP3Inst <vop3<0x161>, "v_lshl_b64",
- VOP_I64_I64_I32, shl
->;
-
-defm V_LSHR_B64 : VOP3Inst <vop3<0x162>, "v_lshr_b64",
- VOP_I64_I64_I32, srl
->;
-
-defm V_ASHR_I64 : VOP3Inst <vop3<0x163>, "v_ashr_i64",
- VOP_I64_I64_I32, sra
->;
+defm V_LSHL_B64 : VOP3Inst <vop3<0x161>, "v_lshl_b64", VOP_I64_I64_I32>;
+defm V_LSHR_B64 : VOP3Inst <vop3<0x162>, "v_lshr_b64", VOP_I64_I64_I32>;
+defm V_ASHR_I64 : VOP3Inst <vop3<0x163>, "v_ashr_i64", VOP_I64_I64_I32>;
defm V_MULLIT_F32 : VOP3Inst <vop3<0x150>, "v_mullit_f32",
VOP_F32_F32_F32_F32>;
} // End SubtargetPredicate = isSICI
+let SubtargetPredicate = isVI in {
+
+defm V_LSHLREV_B64 : VOP3Inst <vop3<0, 0x28f>, "v_lshlrev_b64",
+ VOP_I64_I32_I64
+>;
+defm V_LSHRREV_B64 : VOP3Inst <vop3<0, 0x290>, "v_lshrrev_b64",
+ VOP_I64_I32_I64
+>;
+defm V_ASHRREV_I64 : VOP3Inst <vop3<0, 0x291>, "v_ashrrev_i64",
+ VOP_I64_I32_I64
+>;
+
+} // End SubtargetPredicate = isVI
+
//===----------------------------------------------------------------------===//
// Pseudo Instructions
//===----------------------------------------------------------------------===//
@@ -1809,8 +1823,8 @@ def SGPR_USE : InstSI <(outs),(ins), "", []>;
// SI pseudo instructions. These are used by the CFG structurizer pass
// and should be lowered to ISA instructions prior to codegen.
-let mayLoad = 1, mayStore = 1, hasSideEffects = 1,
- Uses = [EXEC], Defs = [EXEC] in {
+let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in {
+let Uses = [EXEC], Defs = [EXEC] in {
let isBranch = 1, isTerminator = 1 in {
@@ -1867,15 +1881,18 @@ def SI_END_CF : InstSI <
[(int_SI_end_cf i64:$saved)]
>;
+} // End Uses = [EXEC], Defs = [EXEC]
+
+let Uses = [EXEC], Defs = [EXEC,VCC] in {
def SI_KILL : InstSI <
(outs),
(ins VSrc_32:$src),
"si_kill $src",
[(int_AMDGPU_kill f32:$src)]
>;
+} // End Uses = [EXEC], Defs = [EXEC,VCC]
} // end mayLoad = 1, mayStore = 1, hasSideEffects = 1
- // Uses = [EXEC], Defs = [EXEC]
let Uses = [EXEC], Defs = [EXEC,VCC,M0] in {
@@ -2020,16 +2037,12 @@ def : Pat <
(SI_KILL 0xbf800000)
>;
-let Predicates = [isSICI] in {
-
/* int_SI_vs_load_input */
def : Pat<
(SIload_input v4i32:$tlst, imm:$attr_offset, i32:$buf_idx_vgpr),
(BUFFER_LOAD_FORMAT_XYZW_IDXEN $tlst, $buf_idx_vgpr, imm:$attr_offset, 0, 0, 0, 0)
>;
-} // End Predicates = [isSICI]
-
/* int_SI_export */
def : Pat <
(int_SI_export imm:$en, imm:$vm, imm:$done, imm:$tgt, imm:$compr,
@@ -2156,9 +2169,13 @@ def : Pat <
//===----------------------------------------------------------------------===//
let Predicates = [UnsafeFPMath] in {
-def : RcpPat<V_RCP_F64_e32, f64>;
-defm : RsqPat<V_RSQ_F64_e32, f64>;
-defm : RsqPat<V_RSQ_F32_e32, f32>;
+
+//def : RcpPat<V_RCP_F64_e32, f64>;
+//defm : RsqPat<V_RSQ_F64_e32, f64>;
+//defm : RsqPat<V_RSQ_F32_e32, f32>;
+
+def : RsqPat<V_RSQ_F32_e32, f32>;
+def : RsqPat<V_RSQ_F64_e32, f64>;
}
//===----------------------------------------------------------------------===//
@@ -2675,13 +2692,6 @@ def : Pat <
(V_MUL_LEGACY_F32_e32 $src0, (V_RCP_LEGACY_F32_e32 $src1))
>;
-def : Pat<
- (fdiv f64:$src0, f64:$src1),
- (V_MUL_F64 0 /* src0_modifiers */, $src0,
- 0 /* src1_modifiers */, (V_RCP_F64_e32 $src1),
- 0 /* clamp */, 0 /* omod */)
->;
-
def : Pat <
(int_AMDGPU_cube v4f32:$src),
(REG_SEQUENCE VReg_128,
@@ -2716,16 +2726,12 @@ class Ext32Pat <SDNode ext> : Pat <
def : Ext32Pat <zext>;
def : Ext32Pat <anyext>;
-let Predicates = [isSICI] in {
-
// Offset in an 32Bit VGPR
def : Pat <
(SIload_constant v4i32:$sbase, i32:$voff),
(BUFFER_LOAD_DWORD_OFFEN $sbase, $voff, 0, 0, 0, 0, 0)
>;
-} // End Predicates = [isSICI]
-
// The multiplication scales from [0,1] to the unsigned integer range
def : Pat <
(AMDGPUurecip i32:$src0),
@@ -2907,7 +2913,6 @@ class MUBUFScratchLoadPat <MUBUF Instr, ValueType vt, PatFrag ld> : Pat <
(Instr $srsrc, $vaddr, $soffset, $offset, 0, 0, 0)
>;
-let Predicates = [isSICI] in {
def : MUBUFScratchLoadPat <BUFFER_LOAD_SBYTE_OFFEN, i32, sextloadi8_private>;
def : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, i32, extloadi8_private>;
def : MUBUFScratchLoadPat <BUFFER_LOAD_SSHORT_OFFEN, i32, sextloadi16_private>;
@@ -2915,7 +2920,6 @@ def : MUBUFScratchLoadPat <BUFFER_LOAD_USHORT_OFFEN, i32, extloadi16_private>;
def : MUBUFScratchLoadPat <BUFFER_LOAD_DWORD_OFFEN, i32, load_private>;
def : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX2_OFFEN, v2i32, load_private>;
def : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX4_OFFEN, v4i32, load_private>;
-} // End Predicates = [isSICI]
// BUFFER_LOAD_DWORD*, addr64=0
multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxen,
@@ -2954,14 +2958,12 @@ multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxe
>;
}
-let Predicates = [isSICI] in {
defm : MUBUF_Load_Dword <i32, BUFFER_LOAD_DWORD_OFFSET, BUFFER_LOAD_DWORD_OFFEN,
BUFFER_LOAD_DWORD_IDXEN, BUFFER_LOAD_DWORD_BOTHEN>;
defm : MUBUF_Load_Dword <v2i32, BUFFER_LOAD_DWORDX2_OFFSET, BUFFER_LOAD_DWORDX2_OFFEN,
BUFFER_LOAD_DWORDX2_IDXEN, BUFFER_LOAD_DWORDX2_BOTHEN>;
defm : MUBUF_Load_Dword <v4i32, BUFFER_LOAD_DWORDX4_OFFSET, BUFFER_LOAD_DWORDX4_OFFEN,
BUFFER_LOAD_DWORDX4_IDXEN, BUFFER_LOAD_DWORDX4_BOTHEN>;
-} // End Predicates = [isSICI]
class MUBUFScratchStorePat <MUBUF Instr, ValueType vt, PatFrag st> : Pat <
(st vt:$value, (MUBUFScratch v4i32:$srsrc, i32:$vaddr, i32:$soffset,
@@ -2969,13 +2971,11 @@ class MUBUFScratchStorePat <MUBUF Instr, ValueType vt, PatFrag st> : Pat <
(Instr $value, $srsrc, $vaddr, $soffset, $offset, 0, 0, 0)
>;
-let Predicates = [isSICI] in {
def : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, i32, truncstorei8_private>;
def : MUBUFScratchStorePat <BUFFER_STORE_SHORT_OFFEN, i32, truncstorei16_private>;
def : MUBUFScratchStorePat <BUFFER_STORE_DWORD_OFFEN, i32, store_private>;
def : MUBUFScratchStorePat <BUFFER_STORE_DWORDX2_OFFEN, v2i32, store_private>;
def : MUBUFScratchStorePat <BUFFER_STORE_DWORDX4_OFFEN, v4i32, store_private>;
-} // End Predicates = [isSICI]
/*
class MUBUFStore_Pattern <MUBUF Instr, ValueType vt, PatFrag st> : Pat <
@@ -3246,6 +3246,12 @@ def : Pat <
>;
def : Pat <
+ (i1 (trunc i64:$a)),
+ (V_CMP_EQ_I32_e64 (V_AND_B32_e64 (i32 1),
+ (EXTRACT_SUBREG $a, sub0)), 1)
+>;
+
+def : Pat <
(i32 (bswap i32:$a)),
(V_BFI_B32 (S_MOV_B32 0x00ff00ff),
(V_ALIGNBIT_B32 $a, $a, 24),
@@ -3257,6 +3263,28 @@ def : Pat <
(V_CNDMASK_B32_e64 $src0, $src1, $src2)
>;
+//===----------------------------------------------------------------------===//
+// Fract Patterns
+//===----------------------------------------------------------------------===//
+
+let Predicates = [isCI] in {
+
+// Convert (x - floor(x)) to fract(x)
+def : Pat <
+ (f32 (fsub (f32 (VOP3Mods f32:$x, i32:$mods)),
+ (f32 (ffloor (f32 (VOP3Mods f32:$x, i32:$mods)))))),
+ (V_FRACT_F32_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE)
+>;
+
+// Convert (x + (-floor(x))) to fract(x)
+def : Pat <
+ (f64 (fadd (f64 (VOP3Mods f64:$x, i32:$mods)),
+ (f64 (fneg (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))))))),
+ (V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE)
+>;
+
+} // End Predicates = [isCI]
+
//============================================================================//
// Miscellaneous Optimization Patterns
//============================================================================//
diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp
index 068b22f37704..c319b32111fe 100644
--- a/lib/Target/R600/SILowerControlFlow.cpp
+++ b/lib/Target/R600/SILowerControlFlow.cpp
@@ -88,7 +88,8 @@ private:
void Kill(MachineInstr &MI);
void Branch(MachineInstr &MI);
- void LoadM0(MachineInstr &MI, MachineInstr *MovRel);
+ void LoadM0(MachineInstr &MI, MachineInstr *MovRel, int Offset = 0);
+ void computeIndirectRegAndOffset(unsigned VecReg, unsigned &Reg, int &Offset);
void IndirectSrc(MachineInstr &MI);
void IndirectDst(MachineInstr &MI);
@@ -323,7 +324,7 @@ void SILowerControlFlowPass::Kill(MachineInstr &MI) {
MI.eraseFromParent();
}
-void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {
+void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel, int Offset) {
MachineBasicBlock &MBB = *MI.getParent();
DebugLoc DL = MI.getDebugLoc();
@@ -333,8 +334,14 @@ void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {
unsigned Idx = MI.getOperand(3).getReg();
if (AMDGPU::SReg_32RegClass.contains(Idx)) {
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
- .addReg(Idx);
+ if (Offset) {
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0)
+ .addReg(Idx)
+ .addImm(Offset);
+ } else {
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
+ .addReg(Idx);
+ }
MBB.insert(I, MovRel);
} else {
@@ -363,6 +370,11 @@ void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), AMDGPU::VCC)
.addReg(AMDGPU::VCC);
+ if (Offset) {
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0)
+ .addReg(AMDGPU::M0)
+ .addImm(Offset);
+ }
// Do the actual move
MBB.insert(I, MovRel);
@@ -384,6 +396,33 @@ void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {
MI.eraseFromParent();
}
+/// \param @VecReg The register which holds element zero of the vector
+/// being addressed into.
+/// \param[out] @Reg The base register to use in the indirect addressing instruction.
+/// \param[in,out] @Offset As an input, this is the constant offset part of the
+// indirect Index. e.g. v0 = v[VecReg + Offset]
+// As an output, this is a constant value that needs
+// to be added to the value stored in M0.
+void SILowerControlFlowPass::computeIndirectRegAndOffset(unsigned VecReg,
+ unsigned &Reg,
+ int &Offset) {
+ unsigned SubReg = TRI->getSubReg(VecReg, AMDGPU::sub0);
+ if (!SubReg)
+ SubReg = VecReg;
+
+ const TargetRegisterClass *RC = TRI->getPhysRegClass(SubReg);
+ int RegIdx = TRI->getHWRegIndex(SubReg) + Offset;
+
+ if (RegIdx < 0) {
+ Offset = RegIdx;
+ RegIdx = 0;
+ } else {
+ Offset = 0;
+ }
+
+ Reg = RC->getRegister(RegIdx);
+}
+
void SILowerControlFlowPass::IndirectSrc(MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
@@ -391,18 +430,18 @@ void SILowerControlFlowPass::IndirectSrc(MachineInstr &MI) {
unsigned Dst = MI.getOperand(0).getReg();
unsigned Vec = MI.getOperand(2).getReg();
- unsigned Off = MI.getOperand(4).getImm();
- unsigned SubReg = TRI->getSubReg(Vec, AMDGPU::sub0);
- if (!SubReg)
- SubReg = Vec;
+ int Off = MI.getOperand(4).getImm();
+ unsigned Reg;
+
+ computeIndirectRegAndOffset(Vec, Reg, Off);
MachineInstr *MovRel =
BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst)
- .addReg(SubReg + Off)
+ .addReg(Reg)
.addReg(AMDGPU::M0, RegState::Implicit)
.addReg(Vec, RegState::Implicit);
- LoadM0(MI, MovRel);
+ LoadM0(MI, MovRel, Off);
}
void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) {
@@ -411,20 +450,20 @@ void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) {
DebugLoc DL = MI.getDebugLoc();
unsigned Dst = MI.getOperand(0).getReg();
- unsigned Off = MI.getOperand(4).getImm();
+ int Off = MI.getOperand(4).getImm();
unsigned Val = MI.getOperand(5).getReg();
- unsigned SubReg = TRI->getSubReg(Dst, AMDGPU::sub0);
- if (!SubReg)
- SubReg = Dst;
+ unsigned Reg;
+
+ computeIndirectRegAndOffset(Dst, Reg, Off);
MachineInstr *MovRel =
BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELD_B32_e32))
- .addReg(SubReg + Off, RegState::Define)
+ .addReg(Reg, RegState::Define)
.addReg(Val)
.addReg(AMDGPU::M0, RegState::Implicit)
.addReg(Dst, RegState::Implicit);
- LoadM0(MI, MovRel);
+ LoadM0(MI, MovRel, Off);
}
bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
@@ -447,7 +486,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
Next = std::next(I);
MachineInstr &MI = *I;
- if (TII->isDS(MI.getOpcode()))
+ if (TII->isWQM(MI.getOpcode()) || TII->isDS(MI.getOpcode()))
NeedWQM = true;
// Flat uses m0 in case it needs to access LDS.
@@ -513,12 +552,6 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
case AMDGPU::SI_INDIRECT_DST_V16:
IndirectDst(MI);
break;
-
- case AMDGPU::V_INTERP_P1_F32:
- case AMDGPU::V_INTERP_P2_F32:
- case AMDGPU::V_INTERP_MOV_F32:
- NeedWQM = true;
- break;
}
}
}
diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp
index 58c2cd109680..f50299112604 100644
--- a/lib/Target/R600/SIRegisterInfo.cpp
+++ b/lib/Target/R600/SIRegisterInfo.cpp
@@ -14,7 +14,6 @@
#include "SIRegisterInfo.h"
-#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -47,13 +46,31 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(AMDGPU::VGPR255);
Reserved.set(AMDGPU::VGPR254);
+ // Tonga and Iceland can only allocate a fixed number of SGPRs due
+ // to a hw bug.
+ if (ST.hasSGPRInitBug()) {
+ unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
+ // Reserve some SGPRs for FLAT_SCRATCH and VCC (4 SGPRs).
+ // Assume XNACK_MASK is unused.
+ unsigned Limit = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4;
+
+ for (unsigned i = Limit; i < NumSGPRs; ++i) {
+ unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
+ MCRegAliasIterator R = MCRegAliasIterator(Reg, this, true);
+
+ for (; R.isValid(); ++R)
+ Reserved.set(*R);
+ }
+ }
+
return Reserved;
}
unsigned SIRegisterInfo::getRegPressureSetLimit(unsigned Idx) const {
// FIXME: We should adjust the max number of waves based on LDS size.
- unsigned SGPRLimit = getNumSGPRsAllowed(ST.getMaxWavesPerCU());
+ unsigned SGPRLimit = getNumSGPRsAllowed(ST.getGeneration(),
+ ST.getMaxWavesPerCU());
unsigned VGPRLimit = getNumVGPRsAllowed(ST.getMaxWavesPerCU());
for (regclass_iterator I = regclass_begin(), E = regclass_end();
@@ -204,7 +221,9 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
Ctx.emitError("Ran out of VGPRs for spilling SGPR");
}
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill.VGPR)
+ BuildMI(*MBB, MI, DL,
+ TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
+ Spill.VGPR)
.addReg(SubReg)
.addImm(Spill.Lane);
@@ -236,7 +255,9 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
if (isM0)
SubReg = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0);
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READLANE_B32), SubReg)
+ BuildMI(*MBB, MI, DL,
+ TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
+ SubReg)
.addReg(Spill.VGPR)
.addImm(Spill.Lane)
.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
@@ -245,7 +266,22 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
.addReg(SubReg);
}
}
- TII->insertNOPs(MI, 3);
+
+ // TODO: only do this when it is needed
+ switch (ST.getGeneration()) {
+ case AMDGPUSubtarget::SOUTHERN_ISLANDS:
+ // "VALU writes SGPR" -> "SMRD reads that SGPR" needs "S_NOP 3" on SI
+ TII->insertNOPs(MI, 3);
+ break;
+ case AMDGPUSubtarget::SEA_ISLANDS:
+ break;
+ default: // VOLCANIC_ISLANDS and later
+ // "VALU writes SGPR -> VMEM reads that SGPR" needs "S_NOP 4" on VI
+ // and later. This also applies to VALUs which write VCC, but we're
+ // unlikely to see VMEM use VCC.
+ TII->insertNOPs(MI, 4);
+ }
+
MI->eraseFromParent();
break;
}
@@ -490,14 +526,24 @@ unsigned SIRegisterInfo::getNumVGPRsAllowed(unsigned WaveCount) const {
}
}
-unsigned SIRegisterInfo::getNumSGPRsAllowed(unsigned WaveCount) const {
- switch(WaveCount) {
- case 10: return 48;
- case 9: return 56;
- case 8: return 64;
- case 7: return 72;
- case 6: return 80;
- case 5: return 96;
- default: return 103;
+unsigned SIRegisterInfo::getNumSGPRsAllowed(AMDGPUSubtarget::Generation gen,
+ unsigned WaveCount) const {
+ if (gen >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ switch (WaveCount) {
+ case 10: return 80;
+ case 9: return 80;
+ case 8: return 96;
+ default: return 102;
+ }
+ } else {
+ switch(WaveCount) {
+ case 10: return 48;
+ case 9: return 56;
+ case 8: return 64;
+ case 7: return 72;
+ case 6: return 80;
+ case 5: return 96;
+ default: return 103;
+ }
}
}
diff --git a/lib/Target/R600/SIRegisterInfo.h b/lib/Target/R600/SIRegisterInfo.h
index d908ffd12d2c..1dfe53093eb4 100644
--- a/lib/Target/R600/SIRegisterInfo.h
+++ b/lib/Target/R600/SIRegisterInfo.h
@@ -17,6 +17,7 @@
#define LLVM_LIB_TARGET_R600_SIREGISTERINFO_H
#include "AMDGPURegisterInfo.h"
+#include "AMDGPUSubtarget.h"
#include "llvm/Support/Debug.h"
namespace llvm {
@@ -111,7 +112,8 @@ struct SIRegisterInfo : public AMDGPURegisterInfo {
/// \brief Give the maximum number of SGPRs that can be used by \p WaveCount
/// concurrent waves.
- unsigned getNumSGPRsAllowed(unsigned WaveCount) const;
+ unsigned getNumSGPRsAllowed(AMDGPUSubtarget::Generation gen,
+ unsigned WaveCount) const;
unsigned findUnusedRegister(const MachineRegisterInfo &MRI,
const TargetRegisterClass *RC) const;
diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td
index 1a1efb0c89a9..c63f30508f63 100644
--- a/lib/Target/R600/SIRegisterInfo.td
+++ b/lib/Target/R600/SIRegisterInfo.td
@@ -209,7 +209,9 @@ def VReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 256, (add VGPR_256
def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 512, (add VGPR_512)>;
-def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add VGPR_32)>;
+def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add VGPR_32)> {
+ let Size = 32;
+}
class RegImmOperand <RegisterClass rc> : RegisterOperand<rc> {
let OperandNamespace = "AMDGPU";
diff --git a/lib/Target/R600/VIInstrFormats.td b/lib/Target/R600/VIInstrFormats.td
index 5285d18ced46..c24223511248 100644
--- a/lib/Target/R600/VIInstrFormats.td
+++ b/lib/Target/R600/VIInstrFormats.td
@@ -136,6 +136,32 @@ class VOP3e_vi <bits<10> op> : Enc64 {
let Inst{63} = src2_modifiers{0};
}
+class VOP3be_vi <bits<10> op> : Enc64 {
+ bits<8> vdst;
+ bits<2> src0_modifiers;
+ bits<9> src0;
+ bits<2> src1_modifiers;
+ bits<9> src1;
+ bits<2> src2_modifiers;
+ bits<9> src2;
+ bits<7> sdst;
+ bits<2> omod;
+ bits<1> clamp;
+
+ let Inst{7-0} = vdst;
+ let Inst{14-8} = sdst;
+ let Inst{15} = clamp;
+ let Inst{25-16} = op;
+ let Inst{31-26} = 0x34; //encoding
+ let Inst{40-32} = src0;
+ let Inst{49-41} = src1;
+ let Inst{58-50} = src2;
+ let Inst{60-59} = omod;
+ let Inst{61} = src0_modifiers{0};
+ let Inst{62} = src1_modifiers{0};
+ let Inst{63} = src2_modifiers{0};
+}
+
class EXPe_vi : EXPe {
let Inst{31-26} = 0x31; //encoding
}
diff --git a/lib/Target/R600/VIInstructions.td b/lib/Target/R600/VIInstructions.td
index 24e66cea6277..4a6e933783bb 100644
--- a/lib/Target/R600/VIInstructions.td
+++ b/lib/Target/R600/VIInstructions.td
@@ -9,18 +9,6 @@
// Instruction definitions for VI and newer.
//===----------------------------------------------------------------------===//
-let SubtargetPredicate = isVI in {
-
-defm BUFFER_LOAD_DWORD_VI : MUBUF_Load_Helper_vi <
- 0x14, "buffer_load_dword", VGPR_32, i32, global_load
->;
-
-defm BUFFER_LOAD_FORMAT_XYZW_VI : MUBUF_Load_Helper_vi <
- 0x03, "buffer_load_format_xyzw", VReg_128
->;
-
-} // End SubtargetPredicate = isVI
-
//===----------------------------------------------------------------------===//
// SMEM Patterns
@@ -28,37 +16,10 @@ defm BUFFER_LOAD_FORMAT_XYZW_VI : MUBUF_Load_Helper_vi <
let Predicates = [isVI] in {
-// 1. Offset as 8bit DWORD immediate
+// 1. Offset as 20bit DWORD immediate
def : Pat <
(SIload_constant v4i32:$sbase, IMM20bit:$offset),
(S_BUFFER_LOAD_DWORD_IMM $sbase, (as_i32imm $offset))
>;
-//===----------------------------------------------------------------------===//
-// MUBUF Patterns
-//===----------------------------------------------------------------------===//
-
-// Offset in an 32Bit VGPR
-def : Pat <
- (SIload_constant v4i32:$sbase, i32:$voff),
- (BUFFER_LOAD_DWORD_VI_OFFEN $sbase, $voff, 0, 0, 0, 0, 0)
->;
-
-// Offset in an 32Bit VGPR
-def : Pat <
- (SIload_constant v4i32:$sbase, i32:$voff),
- (BUFFER_LOAD_DWORD_VI_OFFEN $sbase, $voff, 0, 0, 0, 0, 0)
->;
-
-/* int_SI_vs_load_input */
-def : Pat<
- (SIload_input v4i32:$tlst, imm:$attr_offset, i32:$buf_idx_vgpr),
- (BUFFER_LOAD_FORMAT_XYZW_VI_IDXEN $tlst, $buf_idx_vgpr, imm:$attr_offset, 0, 0, 0, 0)
->;
-
-defm : MUBUF_Load_Dword <i32, BUFFER_LOAD_DWORD_VI_OFFSET,
- BUFFER_LOAD_DWORD_VI_OFFEN,
- BUFFER_LOAD_DWORD_VI_IDXEN,
- BUFFER_LOAD_DWORD_VI_BOTHEN>;
-
} // End Predicates = [isVI]
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index 16aab16d63ee..41e717febe94 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -375,12 +375,25 @@ static bool usesTheStack(const MachineFunction &MF) {
return false;
}
-void X86FrameLowering::getStackProbeFunction(const X86Subtarget &STI,
- unsigned &CallOp,
- const char *&Symbol) {
- CallOp = STI.is64Bit() ? X86::W64ALLOCA : X86::CALLpcrel32;
+void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ DebugLoc DL) {
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
+ bool Is64Bit = STI.is64Bit();
+ bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large;
+ const X86RegisterInfo *RegInfo =
+ static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo());
+
+ unsigned CallOp;
+ if (Is64Bit)
+ CallOp = IsLargeCodeModel ? X86::CALL64r : X86::CALL64pcrel32;
+ else
+ CallOp = X86::CALLpcrel32;
- if (STI.is64Bit()) {
+ const char *Symbol;
+ if (Is64Bit) {
if (STI.isTargetCygMing()) {
Symbol = "___chkstk_ms";
} else {
@@ -390,6 +403,37 @@ void X86FrameLowering::getStackProbeFunction(const X86Subtarget &STI,
Symbol = "_alloca";
else
Symbol = "_chkstk";
+
+ MachineInstrBuilder CI;
+
+ // All current stack probes take AX and SP as input, clobber flags, and
+ // preserve all registers. x86_64 probes leave RSP unmodified.
+ if (Is64Bit && MF.getTarget().getCodeModel() == CodeModel::Large) {
+ // For the large code model, we have to call through a register. Use R11,
+ // as it is scratch in all supported calling conventions.
+ BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::R11)
+ .addExternalSymbol(Symbol);
+ CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addReg(X86::R11);
+ } else {
+ CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addExternalSymbol(Symbol);
+ }
+
+ unsigned AX = Is64Bit ? X86::RAX : X86::EAX;
+ unsigned SP = Is64Bit ? X86::RSP : X86::ESP;
+ CI.addReg(AX, RegState::Implicit)
+ .addReg(SP, RegState::Implicit)
+ .addReg(AX, RegState::Define | RegState::Implicit)
+ .addReg(SP, RegState::Define | RegState::Implicit)
+ .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
+
+ if (Is64Bit) {
+ // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp
+ // themselves. It also does not clobber %rax so we can reuse it when
+ // adjusting %rsp.
+ BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64rr), X86::RSP)
+ .addReg(X86::RSP)
+ .addReg(X86::RAX);
+ }
}
/// emitPrologue - Push callee-saved registers onto the stack, which
@@ -722,11 +766,6 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
// increments is necessary to ensure that the guard pages used by the OS
// virtual memory manager are allocated in correct sequence.
if (NumBytes >= StackProbeSize && UseStackProbe) {
- const char *StackProbeSymbol;
- unsigned CallOp;
-
- getStackProbeFunction(STI, CallOp, StackProbeSymbol);
-
// Check whether EAX is livein for this function.
bool isEAXAlive = isEAXLiveIn(MF);
@@ -755,22 +794,17 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
.setMIFlag(MachineInstr::FrameSetup);
}
- BuildMI(MBB, MBBI, DL,
- TII.get(CallOp))
- .addExternalSymbol(StackProbeSymbol)
- .addReg(StackPtr, RegState::Define | RegState::Implicit)
- .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit)
- .setMIFlag(MachineInstr::FrameSetup);
+ // Save a pointer to the MI where we set AX.
+ MachineBasicBlock::iterator SetRAX = MBBI;
+ --SetRAX;
+
+ // Call __chkstk, __chkstk_ms, or __alloca.
+ emitStackProbeCall(MF, MBB, MBBI, DL);
+
+ // Apply the frame setup flag to all inserted instrs.
+ for (; SetRAX != MBBI; ++SetRAX)
+ SetRAX->setFlag(MachineInstr::FrameSetup);
- if (Is64Bit) {
- // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp
- // themself. It also does not clobber %rax so we can reuse it when
- // adjusting %rsp.
- BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64rr), StackPtr)
- .addReg(StackPtr)
- .addReg(X86::RAX)
- .setMIFlag(MachineInstr::FrameSetup);
- }
if (isEAXAlive) {
// Restore EAX
MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h
index ee0ee227cad8..dd8fc3240c23 100644
--- a/lib/Target/X86/X86FrameLowering.h
+++ b/lib/Target/X86/X86FrameLowering.h
@@ -27,9 +27,11 @@ public:
explicit X86FrameLowering(StackDirection D, unsigned StackAl, int LAO)
: TargetFrameLowering(StackGrowsDown, StackAl, LAO) {}
- static void getStackProbeFunction(const X86Subtarget &STI,
- unsigned &CallOp,
- const char *&Symbol);
+ /// Emit a call to the target's stack probe function. This is required for all
+ /// large stack allocations on Windows. The caller is required to materialize
+ /// the number of bytes to probe in RAX/EAX.
+ static void emitStackProbeCall(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, DebugLoc DL);
void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 177299b8afc4..85978d8aaa5c 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -15,6 +15,7 @@
#include "X86ISelLowering.h"
#include "Utils/X86ShuffleDecode.h"
#include "X86CallingConv.h"
+#include "X86FrameLowering.h"
#include "X86InstrBuilder.h"
#include "X86MachineFunctionInfo.h"
#include "X86TargetMachine.h"
@@ -10094,12 +10095,12 @@ static SDValue lowerV2X128VectorShuffle(SDLoc DL, MVT VT, SDValue V1,
VT.getVectorNumElements() / 2);
// Check for patterns which can be matched with a single insert of a 128-bit
// subvector.
- if (isShuffleEquivalent(Mask, 0, 1, 0, 1) ||
- isShuffleEquivalent(Mask, 0, 1, 4, 5)) {
+ bool OnlyUsesV1 = isShuffleEquivalent(Mask, 0, 1, 0, 1);
+ if (OnlyUsesV1 || isShuffleEquivalent(Mask, 0, 1, 4, 5)) {
SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1,
DAG.getIntPtrConstant(0));
SDValue HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT,
- Mask[2] < 4 ? V1 : V2, DAG.getIntPtrConstant(0));
+ OnlyUsesV1 ? V1 : V2, DAG.getIntPtrConstant(0));
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoV, HiV);
}
if (isShuffleEquivalent(Mask, 0, 1, 6, 7)) {
@@ -10112,7 +10113,15 @@ static SDValue lowerV2X128VectorShuffle(SDLoc DL, MVT VT, SDValue V1,
// Otherwise form a 128-bit permutation.
// FIXME: Detect zero-vector inputs and use the VPERM2X128 to zero that half.
- unsigned PermMask = Mask[0] / 2 | (Mask[2] / 2) << 4;
+ int MaskLO = Mask[0];
+ if (MaskLO == SM_SentinelUndef)
+ MaskLO = Mask[1] == SM_SentinelUndef ? 0 : Mask[1];
+
+ int MaskHI = Mask[2];
+ if (MaskHI == SM_SentinelUndef)
+ MaskHI = Mask[3] == SM_SentinelUndef ? 0 : Mask[3];
+
+ unsigned PermMask = MaskLO / 2 | (MaskHI / 2) << 4;
return DAG.getNode(X86ISD::VPERM2X128, DL, VT, V1, V2,
DAG.getConstant(PermMask, MVT::i8));
}
@@ -17172,6 +17181,13 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
switch (IntNo) {
default: return SDValue(); // Don't custom lower most intrinsics.
+ case Intrinsic::x86_avx2_permd:
+ case Intrinsic::x86_avx2_permps:
+ // Operands intentionally swapped. Mask is last operand to intrinsic,
+ // but second operand for node/instruction.
+ return DAG.getNode(X86ISD::VPERMV, dl, Op.getValueType(),
+ Op.getOperand(2), Op.getOperand(1));
+
case Intrinsic::x86_avx512_mask_valign_q_512:
case Intrinsic::x86_avx512_mask_valign_d_512:
// Vector source operands are swapped.
@@ -21076,47 +21092,7 @@ X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI,
assert(!Subtarget->isTargetMachO());
- // The lowering is pretty easy: we're just emitting the call to _alloca. The
- // non-trivial part is impdef of ESP.
-
- if (Subtarget->isTargetWin64()) {
- if (Subtarget->isTargetCygMing()) {
- // ___chkstk(Mingw64):
- // Clobbers R10, R11, RAX and EFLAGS.
- // Updates RSP.
- BuildMI(*BB, MI, DL, TII->get(X86::W64ALLOCA))
- .addExternalSymbol("___chkstk")
- .addReg(X86::RAX, RegState::Implicit)
- .addReg(X86::RSP, RegState::Implicit)
- .addReg(X86::RAX, RegState::Define | RegState::Implicit)
- .addReg(X86::RSP, RegState::Define | RegState::Implicit)
- .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
- } else {
- // __chkstk(MSVCRT): does not update stack pointer.
- // Clobbers R10, R11 and EFLAGS.
- BuildMI(*BB, MI, DL, TII->get(X86::W64ALLOCA))
- .addExternalSymbol("__chkstk")
- .addReg(X86::RAX, RegState::Implicit)
- .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
- // RAX has the offset to be subtracted from RSP.
- BuildMI(*BB, MI, DL, TII->get(X86::SUB64rr), X86::RSP)
- .addReg(X86::RSP)
- .addReg(X86::RAX);
- }
- } else {
- const char *StackProbeSymbol = (Subtarget->isTargetKnownWindowsMSVC() ||
- Subtarget->isTargetWindowsItanium())
- ? "_chkstk"
- : "_alloca";
-
- BuildMI(*BB, MI, DL, TII->get(X86::CALLpcrel32))
- .addExternalSymbol(StackProbeSymbol)
- .addReg(X86::EAX, RegState::Implicit)
- .addReg(X86::ESP, RegState::Implicit)
- .addReg(X86::EAX, RegState::Define | RegState::Implicit)
- .addReg(X86::ESP, RegState::Define | RegState::Implicit)
- .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
- }
+ X86FrameLowering::emitStackProbeCall(*BB->getParent(), *BB, MI, DL);
MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
@@ -25558,45 +25534,51 @@ static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG,
if ((CC == ISD::SETNE || CC == ISD::SETEQ) && LHS.getOpcode() == ISD::SUB)
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(LHS.getOperand(0)))
if (C->getAPIntValue() == 0 && LHS.hasOneUse()) {
- SDValue addV = DAG.getNode(ISD::ADD, SDLoc(N),
- LHS.getValueType(), RHS, LHS.getOperand(1));
- return DAG.getSetCC(SDLoc(N), N->getValueType(0),
- addV, DAG.getConstant(0, addV.getValueType()), CC);
+ SDValue addV = DAG.getNode(ISD::ADD, SDLoc(N), LHS.getValueType(), RHS,
+ LHS.getOperand(1));
+ return DAG.getSetCC(SDLoc(N), N->getValueType(0), addV,
+ DAG.getConstant(0, addV.getValueType()), CC);
}
if ((CC == ISD::SETNE || CC == ISD::SETEQ) && RHS.getOpcode() == ISD::SUB)
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS.getOperand(0)))
if (C->getAPIntValue() == 0 && RHS.hasOneUse()) {
- SDValue addV = DAG.getNode(ISD::ADD, SDLoc(N),
- RHS.getValueType(), LHS, RHS.getOperand(1));
- return DAG.getSetCC(SDLoc(N), N->getValueType(0),
- addV, DAG.getConstant(0, addV.getValueType()), CC);
+ SDValue addV = DAG.getNode(ISD::ADD, SDLoc(N), RHS.getValueType(), LHS,
+ RHS.getOperand(1));
+ return DAG.getSetCC(SDLoc(N), N->getValueType(0), addV,
+ DAG.getConstant(0, addV.getValueType()), CC);
}
- if (VT.getScalarType() == MVT::i1) {
- bool IsSEXT0 = (LHS.getOpcode() == ISD::SIGN_EXTEND) &&
- (LHS.getOperand(0).getValueType().getScalarType() == MVT::i1);
- bool IsVZero0 = ISD::isBuildVectorAllZeros(LHS.getNode());
- if (!IsSEXT0 && !IsVZero0)
- return SDValue();
- bool IsSEXT1 = (RHS.getOpcode() == ISD::SIGN_EXTEND) &&
- (RHS.getOperand(0).getValueType().getScalarType() == MVT::i1);
+ if (VT.getScalarType() == MVT::i1 &&
+ (CC == ISD::SETNE || CC == ISD::SETEQ || ISD::isSignedIntSetCC(CC))) {
+ bool IsSEXT0 =
+ (LHS.getOpcode() == ISD::SIGN_EXTEND) &&
+ (LHS.getOperand(0).getValueType().getScalarType() == MVT::i1);
bool IsVZero1 = ISD::isBuildVectorAllZeros(RHS.getNode());
- if (!IsSEXT1 && !IsVZero1)
- return SDValue();
+ if (!IsSEXT0 || !IsVZero1) {
+ // Swap the operands and update the condition code.
+ std::swap(LHS, RHS);
+ CC = ISD::getSetCCSwappedOperands(CC);
+
+ IsSEXT0 = (LHS.getOpcode() == ISD::SIGN_EXTEND) &&
+ (LHS.getOperand(0).getValueType().getScalarType() == MVT::i1);
+ IsVZero1 = ISD::isBuildVectorAllZeros(RHS.getNode());
+ }
if (IsSEXT0 && IsVZero1) {
- assert(VT == LHS.getOperand(0).getValueType() && "Uexpected operand type");
- if (CC == ISD::SETEQ)
+ assert(VT == LHS.getOperand(0).getValueType() &&
+ "Uexpected operand type");
+ if (CC == ISD::SETGT)
+ return DAG.getConstant(0, VT);
+ if (CC == ISD::SETLE)
+ return DAG.getConstant(1, VT);
+ if (CC == ISD::SETEQ || CC == ISD::SETGE)
return DAG.getNOT(DL, LHS.getOperand(0), VT);
+
+ assert((CC == ISD::SETNE || CC == ISD::SETLT) &&
+ "Unexpected condition code!");
return LHS.getOperand(0);
}
- if (IsSEXT1 && IsVZero0) {
- assert(VT == RHS.getOperand(0).getValueType() && "Uexpected operand type");
- if (CC == ISD::SETEQ)
- return DAG.getNOT(DL, RHS.getOperand(0), VT);
- return RHS.getOperand(0);
- }
}
return SDValue();
diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td
index 71415c6fde8e..7baff19e51a9 100644
--- a/lib/Target/X86/X86InstrControl.td
+++ b/lib/Target/X86/X86InstrControl.td
@@ -279,7 +279,8 @@ let isCall = 1, Uses = [RSP], SchedRW = [WriteJump] in {
}
let isCall = 1, isCodeGenOnly = 1 in
- // __chkstk(MSVC): clobber R10, R11 and EFLAGS.
+ // __chkstk(MSVC): clobber R10, R11 and EFLAGS
+ // ___chkstk_ms(Mingw64): clobber R10, R11 and EFLAGS
// ___chkstk(Mingw64): clobber R10, R11, RAX and EFLAGS, and update RSP.
let Defs = [RAX, R10, R11, RSP, EFLAGS],
Uses = [RSP] in {
diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h
index 7130ae2c3097..b411d079c56c 100644
--- a/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/lib/Target/X86/X86IntrinsicsInfo.h
@@ -175,8 +175,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx2_packsswb, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(avx2_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
X86_INTRINSIC_DATA(avx2_packuswb, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
- X86_INTRINSIC_DATA(avx2_permd, INTR_TYPE_2OP, X86ISD::VPERMV, 0),
- X86_INTRINSIC_DATA(avx2_permps, INTR_TYPE_2OP, X86ISD::VPERMV, 0),
X86_INTRINSIC_DATA(avx2_phadd_d, INTR_TYPE_2OP, X86ISD::HADD, 0),
X86_INTRINSIC_DATA(avx2_phadd_w, INTR_TYPE_2OP, X86ISD::HADD, 0),
X86_INTRINSIC_DATA(avx2_phsub_d, INTR_TYPE_2OP, X86ISD::HSUB, 0),
diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index cb965fb9a225..60b541f1aa2f 100644
--- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -47,6 +47,8 @@ using namespace llvm;
static cl::opt<std::string>
DefaultGCOVVersion("default-gcov-version", cl::init("402*"), cl::Hidden,
cl::ValueRequired);
+static cl::opt<bool> DefaultExitBlockBeforeBody("gcov-exit-block-before-body",
+ cl::init(false), cl::Hidden);
GCOVOptions GCOVOptions::getDefault() {
GCOVOptions Options;
@@ -312,7 +314,7 @@ namespace {
class GCOVFunction : public GCOVRecord {
public:
GCOVFunction(DISubprogram SP, raw_ostream *os, uint32_t Ident,
- bool UseCfgChecksum)
+ bool UseCfgChecksum, bool ExitBlockBeforeBody)
: SP(SP), Ident(Ident), UseCfgChecksum(UseCfgChecksum), CfgChecksum(0),
ReturnBlock(1, os) {
this->os = os;
@@ -322,11 +324,13 @@ namespace {
uint32_t i = 0;
for (auto &BB : *F) {
- // Skip index 1 (0, 2, 3, 4, ...) because that's assigned to the
- // ReturnBlock.
- bool first = i == 0;
- Blocks.insert(std::make_pair(&BB, GCOVBlock(i++ + !first, os)));
+ // Skip index 1 if it's assigned to the ReturnBlock.
+ if (i == 1 && ExitBlockBeforeBody)
+ ++i;
+ Blocks.insert(std::make_pair(&BB, GCOVBlock(i++, os)));
}
+ if (!ExitBlockBeforeBody)
+ ReturnBlock.Number = i;
std::string FunctionNameAndLine;
raw_string_ostream FNLOS(FunctionNameAndLine);
@@ -469,7 +473,7 @@ static bool functionHasLines(Function *F) {
if (Loc.isUnknown()) continue;
// Artificial lines such as calls to the global constructors.
- if (Loc.getLine() == 0) continue;
+ if (Loc.getLine() == 0) continue;
return true;
}
@@ -513,7 +517,8 @@ void GCOVProfiler::emitProfileNotes() {
EntryBlock.splitBasicBlock(It);
Funcs.push_back(make_unique<GCOVFunction>(SP, &out, FunctionIdent++,
- Options.UseCfgChecksum));
+ Options.UseCfgChecksum,
+ DefaultExitBlockBeforeBody));
GCOVFunction &Func = *Funcs.back();
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index ac13eebf8275..1ed14d001093 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -2183,12 +2183,16 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS,
// Handle the floating point versions of equality comparisons too.
if ((isKnownTrue && Cmp->getPredicate() == CmpInst::FCMP_OEQ) ||
(isKnownFalse && Cmp->getPredicate() == CmpInst::FCMP_UNE)) {
- // Floating point -0.0 and 0.0 compare equal, so we can't
- // propagate a constant based on that comparison.
+
+ // Floating point -0.0 and 0.0 compare equal, so we can only
+ // propagate values if we know that we have a constant and that
+ // its value is non-zero.
+
// FIXME: We should do this optimization if 'no signed zeros' is
// applicable via an instruction-level fast-math-flag or some other
// indicator that relaxed FP semantics are being used.
- if (!isa<ConstantFP>(Op1) || !cast<ConstantFP>(Op1)->isZero())
+
+ if (isa<ConstantFP>(Op1) && !cast<ConstantFP>(Op1)->isZero())
Worklist.push_back(std::make_pair(Op0, Op1));
}
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index 9164be224654..267cb999d245 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -535,6 +535,8 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
Loop *PredLoop = LI->getLoopFor(*PI);
if (!PredLoop || PredLoop->contains(Exit))
continue;
+ if (isa<IndirectBrInst>((*PI)->getTerminator()))
+ continue;
SplitLatchEdge |= L->getLoopLatch() == *PI;
BasicBlock *ExitSplit = SplitCriticalEdge(*PI, Exit, this);
ExitSplit->moveBefore(Exit);
diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp
index 6cb91a154f06..d54c09aa6ae4 100644
--- a/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -288,14 +288,11 @@ bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO,
IntegerType *IT = cast<IntegerType>(IVOperand->getType());
Value *OtherOperand = nullptr;
- int OtherOperandIdx = -1;
if (BO->getOperand(0) == IVOperand) {
OtherOperand = BO->getOperand(1);
- OtherOperandIdx = 1;
} else {
assert(BO->getOperand(1) == IVOperand && "only other use!");
OtherOperand = BO->getOperand(0);
- OtherOperandIdx = 0;
}
bool Changed = false;
diff --git a/test/Analysis/ScalarEvolution/pr22856.ll b/test/Analysis/ScalarEvolution/pr22856.ll
new file mode 100644
index 000000000000..89e83516efdd
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/pr22856.ll
@@ -0,0 +1,33 @@
+; RUN: opt -loop-reduce -verify < %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64--linux-gnu"
+
+define void @unbounded() {
+
+block_A:
+ %0 = sext i32 undef to i64
+ br i1 undef, label %block_F, label %block_G
+
+block_C: ; preds = %block_F
+ br i1 undef, label %block_D, label %block_E
+
+block_D: ; preds = %block_D, %block_C
+ br i1 undef, label %block_E, label %block_D
+
+block_E: ; preds = %block_D, %block_C
+ %iv2 = phi i64 [ %4, %block_D ], [ %4, %block_C ]
+ %1 = add nsw i32 %iv1, 1
+ %2 = icmp eq i32 %1, undef
+ br i1 %2, label %block_G, label %block_F
+
+block_F: ; preds = %block_E, %block_A
+ %iv3 = phi i64 [ %iv2, %block_E ], [ %0, %block_A ]
+ %iv1 = phi i32 [ %1, %block_E ], [ undef, %block_A ]
+ %3 = add nsw i64 %iv3, 2
+ %4 = add nsw i64 %iv3, 1
+ br label %block_C
+
+block_G: ; preds = %block_E, %block_A
+ ret void
+}
diff --git a/test/CodeGen/AArch64/arm64-tls-dynamics.ll b/test/CodeGen/AArch64/arm64-tls-dynamics.ll
index 30ea63b4664a..a89c2c5e6fd5 100644
--- a/test/CodeGen/AArch64/arm64-tls-dynamics.ll
+++ b/test/CodeGen/AArch64/arm64-tls-dynamics.ll
@@ -1,5 +1,7 @@
-; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -filetype=obj < %s | llvm-objdump -r - | FileCheck --check-prefix=CHECK-RELOC %s
+; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -aarch64-elf-ldtls-generation=1 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -aarch64-elf-ldtls-generation=1 -filetype=obj < %s | llvm-objdump -r - | FileCheck --check-prefix=CHECK-RELOC %s
+; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-NOLD %s
+; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -filetype=obj < %s | llvm-objdump -r - | FileCheck --check-prefix=CHECK-NOLD-RELOC %s
@general_dynamic_var = external thread_local global i32
@@ -9,22 +11,34 @@ define i32 @test_generaldynamic() {
%val = load i32* @general_dynamic_var
ret i32 %val
- ; FIXME: the adrp instructions are redundant (if harmless).
-; CHECK: adrp [[TLSDESC_HI:x[0-9]+]], :tlsdesc:general_dynamic_var
-; CHECK: add x0, [[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var
; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:general_dynamic_var
-; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var]
-; CHECK: .tlsdesccall general_dynamic_var
+; CHECK-NEXT: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var]
+; CHECK-NEXT: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var
+; CHECK-NEXT: .tlsdesccall general_dynamic_var
; CHECK-NEXT: blr [[CALLEE]]
+; CHECK-NOLD: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:general_dynamic_var
+; CHECK-NOLD-NEXT: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var]
+; CHECK-NOLD-NEXT: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var
+; CHECK-NOLD-NEXT: .tlsdesccall general_dynamic_var
+; CHECK-NOLD-NEXT: blr [[CALLEE]]
+
+
; CHECK: mrs x[[TP:[0-9]+]], TPIDR_EL0
; CHECK: ldr w0, [x[[TP]], x0]
+; CHECK-NOLD: mrs x[[TP:[0-9]+]], TPIDR_EL0
+; CHECK-NOLD: ldr w0, [x[[TP]], x0]
; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21
-; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
+; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_CALL
+
}
define i32* @test_generaldynamic_addr() {
@@ -32,21 +46,25 @@ define i32* @test_generaldynamic_addr() {
ret i32* @general_dynamic_var
- ; FIXME: the adrp instructions are redundant (if harmless).
-; CHECK: adrp [[TLSDESC_HI:x[0-9]+]], :tlsdesc:general_dynamic_var
-; CHECK: add x0, [[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var
; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:general_dynamic_var
-; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var]
-; CHECK: .tlsdesccall general_dynamic_var
+; CHECK-NEXT: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var]
+; CHECK-NEXT: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var
+; CHECK-NEXT: .tlsdesccall general_dynamic_var
; CHECK-NEXT: blr [[CALLEE]]
; CHECK: mrs [[TP:x[0-9]+]], TPIDR_EL0
; CHECK: add x0, [[TP]], x0
; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21
-; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
+; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
+
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_CALL
+
}
@local_dynamic_var = external thread_local(localdynamic) global i32
@@ -58,54 +76,71 @@ define i32 @test_localdynamic() {
ret i32 %val
; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_
-; CHECK: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_
-; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_
-; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_]
-; CHECK: .tlsdesccall _TLS_MODULE_BASE_
+; CHECK-NEXT: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_]
+; CHECK-NEXT: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_
+; CHECK-NEXT: .tlsdesccall _TLS_MODULE_BASE_
; CHECK-NEXT: blr [[CALLEE]]
-
-; CHECK: movz [[DTP_OFFSET:x[0-9]+]], #:dtprel_g1:local_dynamic_var
-; CHECK: movk [[DTP_OFFSET]], #:dtprel_g0_nc:local_dynamic_var
-
-; CHECK: add x[[TPREL:[0-9]+]], x0, [[DTP_OFFSET]]
-
+; CHECK-NEXT: add x[[TPOFF:[0-9]+]], x0, :dtprel_hi12:local_dynamic_var
+; CHECK-NEXT: add x[[TPOFF]], x[[TPOFF]], :dtprel_lo12_nc:local_dynamic_var
; CHECK: mrs x[[TPIDR:[0-9]+]], TPIDR_EL0
+; CHECK: ldr w0, [x[[TPIDR]], x[[TPOFF]]]
+
+; CHECK-NOLD: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:local_dynamic_var
+; CHECK-NOLD-NEXT: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:local_dynamic_var]
+; CHECK-NOLD-NEXT: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:local_dynamic_var
+; CHECK-NOLD-NEXT: .tlsdesccall local_dynamic_var
+; CHECK-NOLD-NEXT: blr [[CALLEE]]
+; CHECK-NOLD: mrs x[[TPIDR:[0-9]+]], TPIDR_EL0
+; CHECK-NOLD: ldr w0, [x[[TPIDR]], x0]
-; CHECK: ldr w0, [x[[TPIDR]], x[[TPREL]]]
; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21
-; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
+; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
+; CHECK-RELOC: R_AARCH64_TLSLD_ADD_DTPREL_HI12
+; CHECK-RELOC: R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC
+
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_CALL
}
define i32* @test_localdynamic_addr() {
; CHECK-LABEL: test_localdynamic_addr:
- ret i32* @local_dynamic_var
-
; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_
-; CHECK: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_
-; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_
-; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_]
-; CHECK: .tlsdesccall _TLS_MODULE_BASE_
+; CHECK-NEXT: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_]
+; CHECK-NEXT: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_
+; CHECK-NEXT: .tlsdesccall _TLS_MODULE_BASE_
; CHECK-NEXT: blr [[CALLEE]]
-
-; CHECK: movz [[DTP_OFFSET:x[0-9]+]], #:dtprel_g1:local_dynamic_var
-; CHECK: movk [[DTP_OFFSET]], #:dtprel_g0_nc:local_dynamic_var
-
-; CHECK: add [[TPREL:x[0-9]+]], x0, [[DTP_OFFSET]]
-
-; CHECK: mrs [[TPIDR:x[0-9]+]], TPIDR_EL0
-
-; CHECK: add x0, [[TPIDR]], [[TPREL]]
+; CHECK-NEXT: add x[[TPOFF:[0-9]+]], x0, :dtprel_hi12:local_dynamic_var
+; CHECK-NEXT: add x[[TPOFF]], x[[TPOFF]], :dtprel_lo12_nc:local_dynamic_var
+; CHECK: mrs x[[TPIDR:[0-9]+]], TPIDR_EL0
+; CHECK: add x0, x[[TPIDR]], x[[TPOFF]]
+
+; CHECK-NOLD: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:local_dynamic_var
+; CHECK-NOLD-NEXT: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:local_dynamic_var]
+; CHECK-NOLD-NEXT: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:local_dynamic_var
+; CHECK-NOLD-NEXT: .tlsdesccall local_dynamic_var
+; CHECK-NOLD-NEXT: blr [[CALLEE]]
+; CHECK-NOLD: mrs x[[TPIDR:[0-9]+]], TPIDR_EL0
+; CHECK-NOLD: add x0, x[[TPIDR]], x0
+ ret i32* @local_dynamic_var
; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21
-; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
+; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
+; CHECK-RELOC: R_AARCH64_TLSLD_ADD_DTPREL_HI12
+; CHECK-RELOC: R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_CALL
}
; The entire point of the local-dynamic access model is to have a single call to
@@ -122,11 +157,10 @@ define i32 @test_localdynamic_deduplicate() {
%sum = add i32 %val, %val2
ret i32 %sum
-; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_
-; CHECK: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_
-; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_
-; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_]
-; CHECK: .tlsdesccall _TLS_MODULE_BASE_
+; CHECK: adrp x[[DTPREL_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_
+; CHECK-NEXT: ldr [[CALLEE:x[0-9]+]], [x[[DTPREL_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_]
+; CHECK-NEXT: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE
+; CHECK-NEXT: .tlsdesccall _TLS_MODULE_BASE_
; CHECK-NEXT: blr [[CALLEE]]
; CHECK-NOT: _TLS_MODULE_BASE_
diff --git a/test/CodeGen/AArch64/arm64-tls-execs.ll b/test/CodeGen/AArch64/arm64-tls-execs.ll
index f0130d858896..e6d3d680f417 100644
--- a/test/CodeGen/AArch64/arm64-tls-execs.ll
+++ b/test/CodeGen/AArch64/arm64-tls-execs.ll
@@ -38,14 +38,13 @@ define i32 @test_local_exec() {
; CHECK-LABEL: test_local_exec:
%val = load i32* @local_exec_var
-; CHECK: movz [[TP_OFFSET:x[0-9]+]], #:tprel_g1:local_exec_var // encoding: [0bAAA{{[01]+}},A,0b101AAAAA,0x92]
-; CHECK: movk [[TP_OFFSET]], #:tprel_g0_nc:local_exec_var
-; CHECK: mrs x[[TP:[0-9]+]], TPIDR_EL0
-; CHECK: ldr w0, [x[[TP]], [[TP_OFFSET]]]
-
-; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G1
-; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G0_NC
+; CHECK: mrs x[[R1:[0-9]+]], TPIDR_EL0
+; CHECK: add x[[R2:[0-9]+]], x[[R1]], :tprel_hi12:local_exec_var
+; CHECK: add x[[R3:[0-9]+]], x[[R2]], :tprel_lo12_nc:local_exec_var
+; CHECK: ldr w0, [x[[R3]]]
+; CHECK-RELOC: R_AARCH64_TLSLE_ADD_TPREL_HI12
+; CHECK-RELOC: R_AARCH64_TLSLE_ADD_TPREL_LO12_NC
ret i32 %val
}
@@ -53,11 +52,11 @@ define i32* @test_local_exec_addr() {
; CHECK-LABEL: test_local_exec_addr:
ret i32* @local_exec_var
-; CHECK: movz [[TP_OFFSET:x[0-9]+]], #:tprel_g1:local_exec_var
-; CHECK: movk [[TP_OFFSET]], #:tprel_g0_nc:local_exec_var
-; CHECK: mrs [[TP:x[0-9]+]], TPIDR_EL0
-; CHECK: add x0, [[TP]], [[TP_OFFSET]]
+; CHECK: mrs x[[R1:[0-9]+]], TPIDR_EL0
+; CHECK: add x[[R2:[0-9]+]], x[[R1]], :tprel_hi12:local_exec_var
+; CHECK: add x0, x[[R2]], :tprel_lo12_nc:local_exec_var
+; CHECK: ret
-; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G1
-; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G0_NC
+; CHECK-RELOC: R_AARCH64_TLSLE_ADD_TPREL_HI12
+; CHECK-RELOC: R_AARCH64_TLSLE_ADD_TPREL_LO12_NC
}
diff --git a/test/CodeGen/AArch64/implicit-sret.ll b/test/CodeGen/AArch64/implicit-sret.ll
new file mode 100644
index 000000000000..264d519f36f8
--- /dev/null
+++ b/test/CodeGen/AArch64/implicit-sret.ll
@@ -0,0 +1,13 @@
+; RUN: llc %s -o - -mtriple=arm64-apple-ios7.0 | FileCheck %s
+;
+; Handle implicit sret arguments that are generated on-the-fly during lowering.
+; <rdar://19792160> Null pointer assertion in AArch64TargetLowering
+
+; CHECK-LABEL: big_retval
+; ... str or stp for the first 1024 bits
+; CHECK: strb wzr, [x8, #128]
+; CHECK: ret
+define i1032 @big_retval() {
+entry:
+ ret i1032 0
+}
diff --git a/test/CodeGen/AArch64/machine-copy-prop.ll b/test/CodeGen/AArch64/machine-copy-prop.ll
new file mode 100644
index 000000000000..92d877d40f59
--- /dev/null
+++ b/test/CodeGen/AArch64/machine-copy-prop.ll
@@ -0,0 +1,101 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mcpu=cortex-a57 -verify-machineinstrs < %s | FileCheck %s
+
+; This file check a bug in MachineCopyPropagation pass. The last COPY will be
+; incorrectly removed if the machine instructions are as follows:
+; %Q5_Q6<def> = COPY %Q2_Q3
+; %D5<def> =
+; %D3<def> =
+; %D3<def> = COPY %D6
+; This is caused by a bug in function SourceNoLongerAvailable(), which fails to
+; remove the relationship of D6 and "%Q5_Q6<def> = COPY %Q2_Q3".
+
+@failed = internal unnamed_addr global i1 false
+
+; CHECK-LABEL: foo:
+; CHECK: ld2
+; CHECK-NOT: // kill: D{{[0-9]+}}<def> D{{[0-9]+}}<kill>
+define void @foo(<2 x i32> %shuffle251, <8 x i8> %vtbl1.i, i8* %t2, <2 x i32> %vrsubhn_v2.i1364) {
+entry:
+ %val0 = alloca [2 x i64], align 8
+ %val1 = alloca <2 x i64>, align 16
+ %vmull = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> <i32 -1, i32 -1>, <2 x i32> %shuffle251)
+ %vgetq_lane = extractelement <2 x i64> %vmull, i32 0
+ %cmp = icmp eq i64 %vgetq_lane, 1
+ br i1 %cmp, label %if.end, label %if.then
+
+if.then: ; preds = %entry
+ store i1 true, i1* @failed, align 1
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ tail call void @f2()
+ %sqdmull = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> <i16 1, i16 0, i16 0, i16 0>, <4 x i16> <i16 2, i16 0, i16 0, i16 0>)
+ %sqadd = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> zeroinitializer, <4 x i32> %sqdmull)
+ %shuffle = shufflevector <4 x i32> %sqadd, <4 x i32> undef, <2 x i32> zeroinitializer
+ %0 = mul <2 x i32> %shuffle, <i32 -1, i32 0>
+ %sub = add <2 x i32> %0, <i32 1, i32 0>
+ %sext = sext <2 x i32> %sub to <2 x i64>
+ %vset_lane603 = shufflevector <2 x i64> %sext, <2 x i64> undef, <1 x i32> zeroinitializer
+ %t1 = bitcast [2 x i64]* %val0 to i8*
+ call void @llvm.aarch64.neon.st2lane.v2i64.p0i8(<2 x i64> zeroinitializer, <2 x i64> zeroinitializer, i64 1, i8* %t1)
+ call void @llvm.aarch64.neon.st2lane.v1i64.p0i8(<1 x i64> <i64 4096>, <1 x i64> <i64 -1>, i64 0, i8* %t2)
+ %vld2_lane = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0i8(<1 x i64> <i64 11>, <1 x i64> <i64 11>, i64 0, i8* %t2)
+ %vld2_lane.0.extract = extractvalue { <1 x i64>, <1 x i64> } %vld2_lane, 0
+ %vld2_lane.1.extract = extractvalue { <1 x i64>, <1 x i64> } %vld2_lane, 1
+ %vld2_lane1 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0i8(<1 x i64> %vld2_lane.0.extract, <1 x i64> %vld2_lane.1.extract, i64 0, i8* %t1)
+ %vld2_lane1.0.extract = extractvalue { <1 x i64>, <1 x i64> } %vld2_lane1, 0
+ %vld2_lane1.1.extract = extractvalue { <1 x i64>, <1 x i64> } %vld2_lane1, 1
+ %t3 = bitcast <2 x i64>* %val1 to i8*
+ call void @llvm.aarch64.neon.st2.v1i64.p0i8(<1 x i64> %vld2_lane1.0.extract, <1 x i64> %vld2_lane1.1.extract, i8* %t3)
+ %t4 = load <2 x i64>* %val1, align 16
+ %vsubhn = sub <2 x i64> <i64 11, i64 0>, %t4
+ %vsubhn1 = lshr <2 x i64> %vsubhn, <i64 32, i64 32>
+ %vsubhn2 = trunc <2 x i64> %vsubhn1 to <2 x i32>
+ %neg = xor <2 x i32> %vsubhn2, <i32 -1, i32 -1>
+ %sqadd1 = call <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64> <i64 -1>, <1 x i64> <i64 1>)
+ %sqadd2 = call <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64> %vset_lane603, <1 x i64> %sqadd1)
+ %sqadd3 = call <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64> <i64 1>, <1 x i64> %sqadd2)
+ %shuffle.i = shufflevector <2 x i32> <i32 undef, i32 0>, <2 x i32> %vrsubhn_v2.i1364, <2 x i32> <i32 1, i32 3>
+ %cmp.i = icmp uge <2 x i32> %shuffle.i, %neg
+ %sext.i = sext <2 x i1> %cmp.i to <2 x i32>
+ %vpadal = call <1 x i64> @llvm.aarch64.neon.uaddlp.v1i64.v2i32(<2 x i32> %sext.i)
+ %t5 = sub <1 x i64> %vpadal, %sqadd3
+ %vget_lane1 = extractelement <1 x i64> %t5, i32 0
+ %cmp2 = icmp eq i64 %vget_lane1, 15
+ br i1 %cmp2, label %if.end2, label %if.then2
+
+if.then2: ; preds = %if.end
+ store i1 true, i1* @failed, align 1
+ br label %if.end2
+
+if.end2: ; preds = %if.then682, %if.end
+ call void @f2()
+ %vext = shufflevector <8 x i8> <i8 undef, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i8> %vtbl1.i, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
+ %t6 = bitcast <8 x i8> %vext to <2 x i32>
+ call void @f0(<2 x i32> %t6)
+ ret void
+}
+
+declare void @f0(<2 x i32>)
+
+declare <8 x i8> @f1()
+
+declare void @f2()
+
+declare <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>)
+
+declare void @llvm.aarch64.neon.st2lane.v2i64.p0i8(<2 x i64>, <2 x i64>, i64, i8* nocapture)
+
+declare void @llvm.aarch64.neon.st2lane.v1i64.p0i8(<1 x i64>, <1 x i64>, i64, i8* nocapture)
+
+declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0i8(<1 x i64>, <1 x i64>, i64, i8*)
+
+declare void @llvm.aarch64.neon.st2.v1i64.p0i8(<1 x i64>, <1 x i64>, i8* nocapture)
+
+declare <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64>, <1 x i64>)
+
+declare <1 x i64> @llvm.aarch64.neon.uaddlp.v1i64.v2i32(<2 x i32>)
+
+declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>)
+
+declare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32>, <2 x i32>)
diff --git a/test/CodeGen/AArch64/tailcall-explicit-sret.ll b/test/CodeGen/AArch64/tailcall-explicit-sret.ll
new file mode 100644
index 000000000000..f4ad65584095
--- /dev/null
+++ b/test/CodeGen/AArch64/tailcall-explicit-sret.ll
@@ -0,0 +1,106 @@
+; RUN: llc < %s -mtriple arm64-apple-darwin -aarch64-load-store-opt=false -asm-verbose=false | FileCheck %s
+; Disable the load/store optimizer to avoid having LDP/STPs and simplify checks.
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+
+; Check that we don't try to tail-call with a non-forwarded sret parameter.
+declare void @test_explicit_sret(i1024* sret) #0
+
+; This is the only OK case, where we forward the explicit sret pointer.
+
+; CHECK-LABEL: _test_tailcall_explicit_sret:
+; CHECK-NEXT: b _test_explicit_sret
+define void @test_tailcall_explicit_sret(i1024* sret %arg) #0 {
+ tail call void @test_explicit_sret(i1024* %arg)
+ ret void
+}
+
+; CHECK-LABEL: _test_call_explicit_sret:
+; CHECK-NOT: mov x8
+; CHECK: bl _test_explicit_sret
+; CHECK: ret
+define void @test_call_explicit_sret(i1024* sret %arg) #0 {
+ call void @test_explicit_sret(i1024* %arg)
+ ret void
+}
+
+; CHECK-LABEL: _test_tailcall_explicit_sret_alloca_unused:
+; CHECK: mov x8, sp
+; CHECK-NEXT: bl _test_explicit_sret
+; CHECK: ret
+define void @test_tailcall_explicit_sret_alloca_unused() #0 {
+ %l = alloca i1024, align 8
+ tail call void @test_explicit_sret(i1024* %l)
+ ret void
+}
+
+; CHECK-LABEL: _test_tailcall_explicit_sret_alloca_dummyusers:
+; CHECK: ldr [[PTRLOAD1:x[0-9]+]], [x0]
+; CHECK: str [[PTRLOAD1]], [sp]
+; CHECK: mov x8, sp
+; CHECK-NEXT: bl _test_explicit_sret
+; CHECK: ret
+define void @test_tailcall_explicit_sret_alloca_dummyusers(i1024* %ptr) #0 {
+ %l = alloca i1024, align 8
+ %r = load i1024* %ptr, align 8
+ store i1024 %r, i1024* %l, align 8
+ tail call void @test_explicit_sret(i1024* %l)
+ ret void
+}
+
+; This is too conservative, but doesn't really happen in practice.
+
+; CHECK-LABEL: _test_tailcall_explicit_sret_gep:
+; CHECK: add x8, x0, #128
+; CHECK-NEXT: bl _test_explicit_sret
+; CHECK: ret
+define void @test_tailcall_explicit_sret_gep(i1024* %ptr) #0 {
+ %ptr2 = getelementptr i1024* %ptr, i32 1
+ tail call void @test_explicit_sret(i1024* %ptr2)
+ ret void
+}
+
+; CHECK-LABEL: _test_tailcall_explicit_sret_alloca_returned:
+; CHECK: mov x[[CALLERX8NUM:[0-9]+]], x8
+; CHECK: mov x8, sp
+; CHECK-NEXT: bl _test_explicit_sret
+; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp]
+; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]]
+; CHECK: ret
+define i1024 @test_tailcall_explicit_sret_alloca_returned() #0 {
+ %l = alloca i1024, align 8
+ tail call void @test_explicit_sret(i1024* %l)
+ %r = load i1024* %l, align 8
+ ret i1024 %r
+}
+
+; CHECK-LABEL: _test_indirect_tailcall_explicit_sret_nosret_arg:
+; CHECK-DAG: mov x[[CALLERX8NUM:[0-9]+]], x8
+; CHECK-DAG: mov [[FPTR:x[0-9]+]], x0
+; CHECK: mov x0, sp
+; CHECK-NEXT: blr [[FPTR]]
+; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp]
+; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]]
+; CHECK: ret
+define void @test_indirect_tailcall_explicit_sret_nosret_arg(i1024* sret %arg, void (i1024*)* %f) #0 {
+ %l = alloca i1024, align 8
+ tail call void %f(i1024* %l)
+ %r = load i1024* %l, align 8
+ store i1024 %r, i1024* %arg, align 8
+ ret void
+}
+
+; CHECK-LABEL: _test_indirect_tailcall_explicit_sret_:
+; CHECK: mov x[[CALLERX8NUM:[0-9]+]], x8
+; CHECK: mov x8, sp
+; CHECK-NEXT: blr x0
+; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp]
+; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]]
+; CHECK: ret
+define void @test_indirect_tailcall_explicit_sret_(i1024* sret %arg, i1024 ()* %f) #0 {
+ %ret = tail call i1024 %f()
+ store i1024 %ret, i1024* %arg, align 8
+ ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/AArch64/tailcall-implicit-sret.ll b/test/CodeGen/AArch64/tailcall-implicit-sret.ll
new file mode 100644
index 000000000000..5d6805998d22
--- /dev/null
+++ b/test/CodeGen/AArch64/tailcall-implicit-sret.ll
@@ -0,0 +1,46 @@
+; RUN: llc < %s -mtriple arm64-apple-darwin -aarch64-load-store-opt=false -asm-verbose=false | FileCheck %s
+; Disable the load/store optimizer to avoid having LDP/STPs and simplify checks.
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+
+; Check that we don't try to tail-call with an sret-demoted return.
+
+declare i1024 @test_sret() #0
+
+; CHECK-LABEL: _test_call_sret:
+; CHECK: mov x[[CALLERX8NUM:[0-9]+]], x8
+; CHECK: mov x8, sp
+; CHECK-NEXT: bl _test_sret
+; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp]
+; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]]
+; CHECK: ret
+define i1024 @test_call_sret() #0 {
+ %a = call i1024 @test_sret()
+ ret i1024 %a
+}
+
+; CHECK-LABEL: _test_tailcall_sret:
+; CHECK: mov x[[CALLERX8NUM:[0-9]+]], x8
+; CHECK: mov x8, sp
+; CHECK-NEXT: bl _test_sret
+; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp]
+; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]]
+; CHECK: ret
+define i1024 @test_tailcall_sret() #0 {
+ %a = tail call i1024 @test_sret()
+ ret i1024 %a
+}
+
+; CHECK-LABEL: _test_indirect_tailcall_sret:
+; CHECK: mov x[[CALLERX8NUM:[0-9]+]], x8
+; CHECK: mov x8, sp
+; CHECK-NEXT: blr x0
+; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp]
+; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]]
+; CHECK: ret
+define i1024 @test_indirect_tailcall_sret(i1024 ()* %f) #0 {
+ %a = tail call i1024 %f()
+ ret i1024 %a
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/Mips/adjust-callstack-sp.ll b/test/CodeGen/Mips/adjust-callstack-sp.ll
new file mode 100644
index 000000000000..8c61a650a962
--- /dev/null
+++ b/test/CodeGen/Mips/adjust-callstack-sp.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=mips -mcpu=mips16 | FileCheck %s -check-prefix=M16
+; RUN: llc < %s -march=mips -mcpu=mips2 | FileCheck %s -check-prefix=GP32
+; RUN: llc < %s -march=mips -mcpu=mips32 | FileCheck %s -check-prefix=GP32
+; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s -check-prefix=GP32
+; RUN: llc < %s -march=mips -mcpu=mips3 | FileCheck %s -check-prefix=GP64
+; RUN: llc < %s -march=mips -mcpu=mips64 | FileCheck %s -check-prefix=GP64
+; RUN: llc < %s -march=mips -mcpu=mips64r6 | FileCheck %s -check-prefix=GP64
+
+declare void @bar(i32*)
+
+define void @foo(i32 %sz) {
+ ; ALL-LABEL: foo:
+
+ ; M16-NOT: addiu $sp, 0 # 16 bit inst
+ ; GP32-NOT: addiu $sp, $sp, 0
+ ; GP64-NOT: daddiu $sp, $sp, 0
+ %a = alloca i32, i32 %sz
+ call void @bar(i32* %a)
+ ret void
+}
diff --git a/test/CodeGen/Mips/cconv/arguments-small-structures-bigger-than-32bits.ll b/test/CodeGen/Mips/cconv/arguments-small-structures-bigger-than-32bits.ll
new file mode 100644
index 000000000000..d17290e552e0
--- /dev/null
+++ b/test/CodeGen/Mips/cconv/arguments-small-structures-bigger-than-32bits.ll
@@ -0,0 +1,80 @@
+; RUN: llc < %s -march=mips64 -target-abi n64 -mcpu=mips64r2 | FileCheck %s -check-prefix=ALL -check-prefix=MIPSEB
+; RUN: llc < %s -march=mips64el -target-abi n64 -mcpu=mips64r2 | FileCheck %s -check-prefix=ALL -check-prefix=MIPSEL
+; RUN: llc < %s -march=mips64 -target-abi n32 -mcpu=mips64r2 | FileCheck %s -check-prefix=ALL -check-prefix=MIPSEB
+; RUN: llc < %s -march=mips64el -target-abi n32 -mcpu=mips64r2 | FileCheck %s -check-prefix=ALL -check-prefix=MIPSEL
+
+; #include <stdio.h>
+;
+; struct S1 {
+; char x1;
+; short x2;
+; char x3;
+; };
+;
+; struct S2 {
+; char x1;
+; char x2;
+; char x3;
+; char x4;
+; char x5;
+; };
+;
+; void fS1(struct S1 s);
+; void fS2(struct S2 s);
+;
+; void f1() {
+; struct S1 s1_1;
+; fS1(s1_1);
+; }
+;
+; void f2() {
+; struct S2 s2_1;
+; fS2(s2_1);
+; }
+;
+; int main() {
+; f1();
+; f2();
+; }
+
+%struct.S1 = type { i8, i16, i8 }
+%struct.S2 = type { i8, i8, i8, i8, i8 }
+
+declare void @fS1(i48 inreg) #1
+declare void @fS2(i40 inreg) #1
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #2
+
+define void @f1() #0 {
+entry:
+ %s1_1 = alloca %struct.S1, align 2
+ %s1_1.coerce = alloca { i48 }
+ %0 = bitcast { i48 }* %s1_1.coerce to i8*
+ %1 = bitcast %struct.S1* %s1_1 to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 6, i32 0, i1 false)
+ %2 = getelementptr { i48 }* %s1_1.coerce, i32 0, i32 0
+ %3 = load i48* %2, align 1
+ call void @fS1(i48 inreg %3)
+ ret void
+ ; ALL-LABEL: f1:
+
+ ; MIPSEB: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 16
+ ; MIPSEL-NOT: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 16
+}
+
+define void @f2() #0 {
+entry:
+ %s2_1 = alloca %struct.S2, align 1
+ %s2_1.coerce = alloca { i40 }
+ %0 = bitcast { i40 }* %s2_1.coerce to i8*
+ %1 = bitcast %struct.S2* %s2_1 to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 5, i32 0, i1 false)
+ %2 = getelementptr { i40 }* %s2_1.coerce, i32 0, i32 0
+ %3 = load i40* %2, align 1
+ call void @fS2(i40 inreg %3)
+ ret void
+ ; ALL-LABEL: f2:
+
+ ; MIPSEB: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 24
+ ; MIPSEL-NOT: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 24
+}
diff --git a/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-byte.ll b/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-byte.ll
new file mode 100644
index 000000000000..458b124c9927
--- /dev/null
+++ b/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-byte.ll
@@ -0,0 +1,282 @@
+; RUN: llc --march=mips64 -mcpu=mips64r2 < %s | FileCheck %s
+
+; Generated from the C program:
+;
+; #include <stdio.h>
+; #include <string.h>
+;
+; struct SmallStruct_1b {
+; char x1;
+; };
+;
+; struct SmallStruct_2b {
+; char x1;
+; char x2;
+; };
+;
+; struct SmallStruct_3b {
+; char x1;
+; char x2;
+; char x3;
+; };
+;
+; struct SmallStruct_4b {
+; char x1;
+; char x2;
+; char x3;
+; char x4;
+; };
+;
+; struct SmallStruct_5b {
+; char x1;
+; char x2;
+; char x3;
+; char x4;
+; char x5;
+; };
+;
+; struct SmallStruct_6b {
+; char x1;
+; char x2;
+; char x3;
+; char x4;
+; char x5;
+; char x6;
+; };
+;
+; struct SmallStruct_7b {
+; char x1;
+; char x2;
+; char x3;
+; char x4;
+; char x5;
+; char x6;
+; char x7;
+; };
+;
+; struct SmallStruct_8b {
+; char x1;
+; char x2;
+; char x3;
+; char x4;
+; char x5;
+; char x6;
+; char x7;
+; char x8;
+; };
+;
+; struct SmallStruct_9b {
+; char x1;
+; char x2;
+; char x3;
+; char x4;
+; char x5;
+; char x6;
+; char x7;
+; char x8;
+; char x9;
+; };
+;
+; void varArgF_SmallStruct(char* c, ...);
+;
+; void smallStruct_1b(struct SmallStruct_1b* ss) {
+; varArgF_SmallStruct("", *ss);
+; }
+;
+; void smallStruct_2b(struct SmallStruct_2b* ss) {
+; varArgF_SmallStruct("", *ss);
+; }
+;
+; void smallStruct_3b(struct SmallStruct_3b* ss)
+; {
+; varArgF_SmallStruct("", *ss);
+; }
+;
+; void smallStruct_4b(struct SmallStruct_4b* ss)
+; {
+; varArgF_SmallStruct("", *ss);
+; }
+;
+; void smallStruct_5b(struct SmallStruct_5b* ss)
+; {
+; varArgF_SmallStruct("", *ss);
+; }
+;
+; void smallStruct_6b(struct SmallStruct_6b* ss)
+; {
+; varArgF_SmallStruct("", *ss);
+; }
+;
+; void smallStruct_7b(struct SmallStruct_7b* ss)
+; {
+; varArgF_SmallStruct("", *ss);
+; }
+;
+; void smallStruct_8b(struct SmallStruct_8b* ss)
+; {
+; varArgF_SmallStruct("", *ss);
+; }
+;
+; void smallStruct_9b(struct SmallStruct_9b* ss)
+; {
+; varArgF_SmallStruct("", *ss);
+; }
+
+%struct.SmallStruct_1b = type { i8 }
+%struct.SmallStruct_2b = type { i8, i8 }
+%struct.SmallStruct_3b = type { i8, i8, i8 }
+%struct.SmallStruct_4b = type { i8, i8, i8, i8 }
+%struct.SmallStruct_5b = type { i8, i8, i8, i8, i8 }
+%struct.SmallStruct_6b = type { i8, i8, i8, i8, i8, i8 }
+%struct.SmallStruct_7b = type { i8, i8, i8, i8, i8, i8, i8 }
+%struct.SmallStruct_8b = type { i8, i8, i8, i8, i8, i8, i8, i8 }
+%struct.SmallStruct_9b = type { i8, i8, i8, i8, i8, i8, i8, i8, i8 }
+
+@.str = private unnamed_addr constant [3 x i8] c"01\00", align 1
+
+declare void @varArgF_SmallStruct(i8* %c, ...)
+
+define void @smallStruct_1b(%struct.SmallStruct_1b* %ss) #0 {
+entry:
+ %ss.addr = alloca %struct.SmallStruct_1b*, align 8
+ store %struct.SmallStruct_1b* %ss, %struct.SmallStruct_1b** %ss.addr, align 8
+ %0 = load %struct.SmallStruct_1b** %ss.addr, align 8
+ %1 = bitcast %struct.SmallStruct_1b* %0 to { i8 }*
+ %2 = getelementptr { i8 }* %1, i32 0, i32 0
+ %3 = load i8* %2, align 1
+ call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i8 inreg %3)
+ ret void
+ ; CHECK-LABEL: smallStruct_1b:
+ ; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 56
+}
+
+define void @smallStruct_2b(%struct.SmallStruct_2b* %ss) #0 {
+entry:
+ %ss.addr = alloca %struct.SmallStruct_2b*, align 8
+ store %struct.SmallStruct_2b* %ss, %struct.SmallStruct_2b** %ss.addr, align 8
+ %0 = load %struct.SmallStruct_2b** %ss.addr, align 8
+ %1 = bitcast %struct.SmallStruct_2b* %0 to { i16 }*
+ %2 = getelementptr { i16 }* %1, i32 0, i32 0
+ %3 = load i16* %2, align 1
+ call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i16 inreg %3)
+ ret void
+ ; CHECK-LABEL: smallStruct_2b:
+ ; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 48
+}
+
+define void @smallStruct_3b(%struct.SmallStruct_3b* %ss) #0 {
+entry:
+ %ss.addr = alloca %struct.SmallStruct_3b*, align 8
+ %.coerce = alloca { i24 }
+ store %struct.SmallStruct_3b* %ss, %struct.SmallStruct_3b** %ss.addr, align 8
+ %0 = load %struct.SmallStruct_3b** %ss.addr, align 8
+ %1 = bitcast { i24 }* %.coerce to i8*
+ %2 = bitcast %struct.SmallStruct_3b* %0 to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 3, i32 0, i1 false)
+ %3 = getelementptr { i24 }* %.coerce, i32 0, i32 0
+ %4 = load i24* %3, align 1
+ call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i24 inreg %4)
+ ret void
+ ; CHECK-LABEL: smallStruct_3b:
+ ; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 40
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #1
+
+define void @smallStruct_4b(%struct.SmallStruct_4b* %ss) #0 {
+entry:
+ %ss.addr = alloca %struct.SmallStruct_4b*, align 8
+ store %struct.SmallStruct_4b* %ss, %struct.SmallStruct_4b** %ss.addr, align 8
+ %0 = load %struct.SmallStruct_4b** %ss.addr, align 8
+ %1 = bitcast %struct.SmallStruct_4b* %0 to { i32 }*
+ %2 = getelementptr { i32 }* %1, i32 0, i32 0
+ %3 = load i32* %2, align 1
+ call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i32 inreg %3)
+ ret void
+ ; CHECK-LABEL: smallStruct_4b:
+ ; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 32
+}
+
+define void @smallStruct_5b(%struct.SmallStruct_5b* %ss) #0 {
+entry:
+ %ss.addr = alloca %struct.SmallStruct_5b*, align 8
+ %.coerce = alloca { i40 }
+ store %struct.SmallStruct_5b* %ss, %struct.SmallStruct_5b** %ss.addr, align 8
+ %0 = load %struct.SmallStruct_5b** %ss.addr, align 8
+ %1 = bitcast { i40 }* %.coerce to i8*
+ %2 = bitcast %struct.SmallStruct_5b* %0 to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 5, i32 0, i1 false)
+ %3 = getelementptr { i40 }* %.coerce, i32 0, i32 0
+ %4 = load i40* %3, align 1
+ call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i40 inreg %4)
+ ret void
+ ; CHECK-LABEL: smallStruct_5b:
+ ; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 24
+}
+
+define void @smallStruct_6b(%struct.SmallStruct_6b* %ss) #0 {
+entry:
+ %ss.addr = alloca %struct.SmallStruct_6b*, align 8
+ %.coerce = alloca { i48 }
+ store %struct.SmallStruct_6b* %ss, %struct.SmallStruct_6b** %ss.addr, align 8
+ %0 = load %struct.SmallStruct_6b** %ss.addr, align 8
+ %1 = bitcast { i48 }* %.coerce to i8*
+ %2 = bitcast %struct.SmallStruct_6b* %0 to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 6, i32 0, i1 false)
+ %3 = getelementptr { i48 }* %.coerce, i32 0, i32 0
+ %4 = load i48* %3, align 1
+ call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i48 inreg %4)
+ ret void
+ ; CHECK-LABEL: smallStruct_6b:
+ ; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 16
+}
+
+define void @smallStruct_7b(%struct.SmallStruct_7b* %ss) #0 {
+entry:
+ %ss.addr = alloca %struct.SmallStruct_7b*, align 8
+ %.coerce = alloca { i56 }
+ store %struct.SmallStruct_7b* %ss, %struct.SmallStruct_7b** %ss.addr, align 8
+ %0 = load %struct.SmallStruct_7b** %ss.addr, align 8
+ %1 = bitcast { i56 }* %.coerce to i8*
+ %2 = bitcast %struct.SmallStruct_7b* %0 to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 7, i32 0, i1 false)
+ %3 = getelementptr { i56 }* %.coerce, i32 0, i32 0
+ %4 = load i56* %3, align 1
+ call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i56 inreg %4)
+ ret void
+ ; CHECK-LABEL: smallStruct_7b:
+ ; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 8
+}
+
+define void @smallStruct_8b(%struct.SmallStruct_8b* %ss) #0 {
+entry:
+ %ss.addr = alloca %struct.SmallStruct_8b*, align 8
+ store %struct.SmallStruct_8b* %ss, %struct.SmallStruct_8b** %ss.addr, align 8
+ %0 = load %struct.SmallStruct_8b** %ss.addr, align 8
+ %1 = bitcast %struct.SmallStruct_8b* %0 to { i64 }*
+ %2 = getelementptr { i64 }* %1, i32 0, i32 0
+ %3 = load i64* %2, align 1
+ call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i64 inreg %3)
+ ret void
+ ; CHECK-LABEL: smallStruct_8b:
+ ; CHECK-NOT: dsll
+}
+
+define void @smallStruct_9b(%struct.SmallStruct_9b* %ss) #0 {
+entry:
+ %ss.addr = alloca %struct.SmallStruct_9b*, align 8
+ %.coerce = alloca { i64, i8 }
+ store %struct.SmallStruct_9b* %ss, %struct.SmallStruct_9b** %ss.addr, align 8
+ %0 = load %struct.SmallStruct_9b** %ss.addr, align 8
+ %1 = bitcast { i64, i8 }* %.coerce to i8*
+ %2 = bitcast %struct.SmallStruct_9b* %0 to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 9, i32 0, i1 false)
+ %3 = getelementptr { i64, i8 }* %.coerce, i32 0, i32 0
+ %4 = load i64* %3, align 1
+ %5 = getelementptr { i64, i8 }* %.coerce, i32 0, i32 1
+ %6 = load i8* %5, align 1
+ call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i64 inreg %4, i8 inreg %6)
+ ret void
+ ; CHECK-LABEL: smallStruct_9b:
+ ; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 56
+}
diff --git a/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-combinations.ll b/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-combinations.ll
new file mode 100644
index 000000000000..899a3e8ff0a1
--- /dev/null
+++ b/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-combinations.ll
@@ -0,0 +1,149 @@
+; RUN: llc --march=mips64 -mcpu=mips64r2 < %s | FileCheck %s
+
+; Generated from the C program:
+;
+; #include <stdio.h>
+; #include <string.h>
+;
+; struct SmallStruct_1b1s {
+; char x1;
+; short x2;
+; };
+;
+; struct SmallStruct_1b1i {
+; char x1;
+; int x2;
+; };
+;
+; struct SmallStruct_1b1s1b {
+; char x1;
+; short x2;
+; char x3;
+; };
+;
+; struct SmallStruct_1s1i {
+; short x1;
+; int x2;
+; };
+;
+; struct SmallStruct_3b1s {
+; char x1;
+; char x2;
+; char x3;
+; short x4;
+; };
+;
+; void varArgF_SmallStruct(char* c, ...);
+;
+; void smallStruct_1b1s(struct SmallStruct_1b1s* ss)
+; {
+; varArgF_SmallStruct("", *ss);
+; }
+;
+; void smallStruct_1b1i(struct SmallStruct_1b1i* ss)
+; {
+; varArgF_SmallStruct("", *ss);
+; }
+;
+; void smallStruct_1b1s1b(struct SmallStruct_1b1s1b* ss)
+; {
+; varArgF_SmallStruct("", *ss);
+; }
+;
+; void smallStruct_1s1i(struct SmallStruct_1s1i* ss)
+; {
+; varArgF_SmallStruct("", *ss);
+; }
+;
+; void smallStruct_3b1s(struct SmallStruct_3b1s* ss)
+; {
+; varArgF_SmallStruct("", *ss);
+; }
+
+%struct.SmallStruct_1b1s = type { i8, i16 }
+%struct.SmallStruct_1b1i = type { i8, i32 }
+%struct.SmallStruct_1b1s1b = type { i8, i16, i8 }
+%struct.SmallStruct_1s1i = type { i16, i32 }
+%struct.SmallStruct_3b1s = type { i8, i8, i8, i16 }
+
+@.str = private unnamed_addr constant [3 x i8] c"01\00", align 1
+
+declare void @varArgF_SmallStruct(i8* %c, ...)
+
+define void @smallStruct_1b1s(%struct.SmallStruct_1b1s* %ss) #0 {
+entry:
+ %ss.addr = alloca %struct.SmallStruct_1b1s*, align 8
+ store %struct.SmallStruct_1b1s* %ss, %struct.SmallStruct_1b1s** %ss.addr, align 8
+ %0 = load %struct.SmallStruct_1b1s** %ss.addr, align 8
+ %1 = bitcast %struct.SmallStruct_1b1s* %0 to { i32 }*
+ %2 = getelementptr { i32 }* %1, i32 0, i32 0
+ %3 = load i32* %2, align 1
+ call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i32 inreg %3)
+ ret void
+ ; CHECK-LABEL: smallStruct_1b1s:
+ ; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 32
+}
+
+define void @smallStruct_1b1i(%struct.SmallStruct_1b1i* %ss) #0 {
+entry:
+ %ss.addr = alloca %struct.SmallStruct_1b1i*, align 8
+ store %struct.SmallStruct_1b1i* %ss, %struct.SmallStruct_1b1i** %ss.addr, align 8
+ %0 = load %struct.SmallStruct_1b1i** %ss.addr, align 8
+ %1 = bitcast %struct.SmallStruct_1b1i* %0 to { i64 }*
+ %2 = getelementptr { i64 }* %1, i32 0, i32 0
+ %3 = load i64* %2, align 1
+ call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i64 inreg %3)
+ ret void
+ ; CHECK-LABEL: smallStruct_1b1i:
+ ; CHECK-NOT: dsll
+}
+
+define void @smallStruct_1b1s1b(%struct.SmallStruct_1b1s1b* %ss) #0 {
+entry:
+ %ss.addr = alloca %struct.SmallStruct_1b1s1b*, align 8
+ %.coerce = alloca { i48 }
+ store %struct.SmallStruct_1b1s1b* %ss, %struct.SmallStruct_1b1s1b** %ss.addr, align 8
+ %0 = load %struct.SmallStruct_1b1s1b** %ss.addr, align 8
+ %1 = bitcast { i48 }* %.coerce to i8*
+ %2 = bitcast %struct.SmallStruct_1b1s1b* %0 to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 6, i32 0, i1 false)
+ %3 = getelementptr { i48 }* %.coerce, i32 0, i32 0
+ %4 = load i48* %3, align 1
+ call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i48 inreg %4)
+ ret void
+ ; CHECK-LABEL: smallStruct_1b1s1b:
+ ; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 16
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #1
+
+define void @smallStruct_1s1i(%struct.SmallStruct_1s1i* %ss) #0 {
+entry:
+ %ss.addr = alloca %struct.SmallStruct_1s1i*, align 8
+ store %struct.SmallStruct_1s1i* %ss, %struct.SmallStruct_1s1i** %ss.addr, align 8
+ %0 = load %struct.SmallStruct_1s1i** %ss.addr, align 8
+ %1 = bitcast %struct.SmallStruct_1s1i* %0 to { i64 }*
+ %2 = getelementptr { i64 }* %1, i32 0, i32 0
+ %3 = load i64* %2, align 1
+ call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i64 inreg %3)
+ ret void
+ ; CHECK-LABEL: smallStruct_1s1i:
+ ; CHECK-NOT: dsll
+}
+
+define void @smallStruct_3b1s(%struct.SmallStruct_3b1s* %ss) #0 {
+entry:
+ %ss.addr = alloca %struct.SmallStruct_3b1s*, align 8
+ %.coerce = alloca { i48 }
+ store %struct.SmallStruct_3b1s* %ss, %struct.SmallStruct_3b1s** %ss.addr, align 8
+ %0 = load %struct.SmallStruct_3b1s** %ss.addr, align 8
+ %1 = bitcast { i48 }* %.coerce to i8*
+ %2 = bitcast %struct.SmallStruct_3b1s* %0 to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 6, i32 0, i1 false)
+ %3 = getelementptr { i48 }* %.coerce, i32 0, i32 0
+ %4 = load i48* %3, align 1
+ call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i48 inreg %4)
+ ret void
+ ; CHECK-LABEL: smallStruct_3b1s:
+ ; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 16
+}
diff --git a/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-multiple-args.ll b/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-multiple-args.ll
new file mode 100644
index 000000000000..1f7362523346
--- /dev/null
+++ b/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-multiple-args.ll
@@ -0,0 +1,161 @@
+; RUN: llc --march=mips64 -mcpu=mips64r2 < %s | FileCheck %s
+
+; Generated from the C program:
+;
+; #include <stdio.h>
+; #include <string.h>
+;
+; struct SmallStruct_1b {
+; char x1;
+; };
+;
+; struct SmallStruct_2b {
+; char x1;
+; char x2;
+; };
+;
+; struct SmallStruct_3b {
+; char x1;
+; char x2;
+; char x3;
+; };
+;
+; struct SmallStruct_4b {
+; char x1;
+; char x2;
+; char x3;
+; char x4;
+; };
+;
+; struct SmallStruct_5b {
+; char x1;
+; char x2;
+; char x3;
+; char x4;
+; char x5;
+; };
+;
+; struct SmallStruct_6b {
+; char x1;
+; char x2;
+; char x3;
+; char x4;
+; char x5;
+; char x6;
+; };
+;
+; struct SmallStruct_7b {
+; char x1;
+; char x2;
+; char x3;
+; char x4;
+; char x5;
+; char x6;
+; char x7;
+; };
+;
+; struct SmallStruct_8b {
+; char x1;
+; char x2;
+; char x3;
+; char x4;
+; char x5;
+; char x6;
+; char x7;
+; char x8;
+; };
+;
+; struct SmallStruct_9b {
+; char x1;
+; char x2;
+; char x3;
+; char x4;
+; char x5;
+; char x6;
+; char x7;
+; char x8;
+; char x9;
+; };
+;
+; void varArgF_SmallStruct(char* c, ...);
+;
+; void smallStruct_1b_x9(struct SmallStruct_1b* ss1, struct SmallStruct_1b* ss2, struct SmallStruct_1b* ss3, struct SmallStruct_1b* ss4, struct SmallStruct_1b* ss5, struct SmallStruct_1b* ss6, struct SmallStruct_1b* ss7, struct SmallStruct_1b* ss8, struct SmallStruct_1b* ss9)
+; {
+; varArgF_SmallStruct("", *ss1, *ss2, *ss3, *ss4, *ss5, *ss6, *ss7, *ss8, *ss9);
+; }
+
+%struct.SmallStruct_1b = type { i8 }
+
+@.str = private unnamed_addr constant [3 x i8] c"01\00", align 1
+
+declare void @varArgF_SmallStruct(i8* %c, ...)
+
+define void @smallStruct_1b_x9(%struct.SmallStruct_1b* %ss1, %struct.SmallStruct_1b* %ss2, %struct.SmallStruct_1b* %ss3, %struct.SmallStruct_1b* %ss4, %struct.SmallStruct_1b* %ss5, %struct.SmallStruct_1b* %ss6, %struct.SmallStruct_1b* %ss7, %struct.SmallStruct_1b* %ss8, %struct.SmallStruct_1b* %ss9) #0 {
+entry:
+ %ss1.addr = alloca %struct.SmallStruct_1b*, align 8
+ %ss2.addr = alloca %struct.SmallStruct_1b*, align 8
+ %ss3.addr = alloca %struct.SmallStruct_1b*, align 8
+ %ss4.addr = alloca %struct.SmallStruct_1b*, align 8
+ %ss5.addr = alloca %struct.SmallStruct_1b*, align 8
+ %ss6.addr = alloca %struct.SmallStruct_1b*, align 8
+ %ss7.addr = alloca %struct.SmallStruct_1b*, align 8
+ %ss8.addr = alloca %struct.SmallStruct_1b*, align 8
+ %ss9.addr = alloca %struct.SmallStruct_1b*, align 8
+ store %struct.SmallStruct_1b* %ss1, %struct.SmallStruct_1b** %ss1.addr, align 8
+ store %struct.SmallStruct_1b* %ss2, %struct.SmallStruct_1b** %ss2.addr, align 8
+ store %struct.SmallStruct_1b* %ss3, %struct.SmallStruct_1b** %ss3.addr, align 8
+ store %struct.SmallStruct_1b* %ss4, %struct.SmallStruct_1b** %ss4.addr, align 8
+ store %struct.SmallStruct_1b* %ss5, %struct.SmallStruct_1b** %ss5.addr, align 8
+ store %struct.SmallStruct_1b* %ss6, %struct.SmallStruct_1b** %ss6.addr, align 8
+ store %struct.SmallStruct_1b* %ss7, %struct.SmallStruct_1b** %ss7.addr, align 8
+ store %struct.SmallStruct_1b* %ss8, %struct.SmallStruct_1b** %ss8.addr, align 8
+ store %struct.SmallStruct_1b* %ss9, %struct.SmallStruct_1b** %ss9.addr, align 8
+ %0 = load %struct.SmallStruct_1b** %ss1.addr, align 8
+ %1 = load %struct.SmallStruct_1b** %ss2.addr, align 8
+ %2 = load %struct.SmallStruct_1b** %ss3.addr, align 8
+ %3 = load %struct.SmallStruct_1b** %ss4.addr, align 8
+ %4 = load %struct.SmallStruct_1b** %ss5.addr, align 8
+ %5 = load %struct.SmallStruct_1b** %ss6.addr, align 8
+ %6 = load %struct.SmallStruct_1b** %ss7.addr, align 8
+ %7 = load %struct.SmallStruct_1b** %ss8.addr, align 8
+ %8 = load %struct.SmallStruct_1b** %ss9.addr, align 8
+ %9 = bitcast %struct.SmallStruct_1b* %0 to { i8 }*
+ %10 = getelementptr { i8 }* %9, i32 0, i32 0
+ %11 = load i8* %10, align 1
+ %12 = bitcast %struct.SmallStruct_1b* %1 to { i8 }*
+ %13 = getelementptr { i8 }* %12, i32 0, i32 0
+ %14 = load i8* %13, align 1
+ %15 = bitcast %struct.SmallStruct_1b* %2 to { i8 }*
+ %16 = getelementptr { i8 }* %15, i32 0, i32 0
+ %17 = load i8* %16, align 1
+ %18 = bitcast %struct.SmallStruct_1b* %3 to { i8 }*
+ %19 = getelementptr { i8 }* %18, i32 0, i32 0
+ %20 = load i8* %19, align 1
+ %21 = bitcast %struct.SmallStruct_1b* %4 to { i8 }*
+ %22 = getelementptr { i8 }* %21, i32 0, i32 0
+ %23 = load i8* %22, align 1
+ %24 = bitcast %struct.SmallStruct_1b* %5 to { i8 }*
+ %25 = getelementptr { i8 }* %24, i32 0, i32 0
+ %26 = load i8* %25, align 1
+ %27 = bitcast %struct.SmallStruct_1b* %6 to { i8 }*
+ %28 = getelementptr { i8 }* %27, i32 0, i32 0
+ %29 = load i8* %28, align 1
+ %30 = bitcast %struct.SmallStruct_1b* %7 to { i8 }*
+ %31 = getelementptr { i8 }* %30, i32 0, i32 0
+ %32 = load i8* %31, align 1
+ %33 = bitcast %struct.SmallStruct_1b* %8 to { i8 }*
+ %34 = getelementptr { i8 }* %33, i32 0, i32 0
+ %35 = load i8* %34, align 1
+ call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i8 inreg %11, i8 inreg %14, i8 inreg %17, i8 inreg %20, i8 inreg %23, i8 inreg %26, i8 inreg %29, i8 inreg %32, i8 inreg %35)
+ ret void
+ ; CHECK-LABEL: smallStruct_1b_x9:
+ ; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 56
+ ; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 56
+ ; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 56
+ ; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 56
+ ; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 56
+ ; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 56
+ ; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 56
+ ; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 56
+ ; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 56
+}
diff --git a/test/CodeGen/Mips/check-adde-redundant-moves.ll b/test/CodeGen/Mips/check-adde-redundant-moves.ll
new file mode 100644
index 000000000000..527c21770263
--- /dev/null
+++ b/test/CodeGen/Mips/check-adde-redundant-moves.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=mips -mcpu=mips2 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP32
+; RUN: llc < %s -march=mips -mcpu=mips32 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP32
+; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP32
+; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP32
+; RUN: llc < %s -march=mips64 -mcpu=mips3 | FileCheck %s -check-prefix=ALL
+; RUN: llc < %s -march=mips64 -mcpu=mips4 | FileCheck %s -check-prefix=ALL
+; RUN: llc < %s -march=mips64 -mcpu=mips64 | FileCheck %s -check-prefix=ALL
+; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s -check-prefix=ALL
+; RUN: llc < %s -march=mips64 -mcpu=mips64r6 | FileCheck %s -check-prefix=ALL
+
+define i64 @add_i64(i64 %a) {
+ ; GP32-LABEL: add_i64
+
+ ; GP32-NOT: move $[[T0:[0-9]+]], $[[T0]]
+ %r = add i64 5, %a
+ ret i64 %r
+}
+
+define i128 @add_i128(i128 %a) {
+ ; ALL-LABEL: add_i128
+
+ ; ALL-NOT: move $[[T0:[0-9]+]], $[[T0]]
+ %r = add i128 5, %a
+ ret i128 %r
+}
diff --git a/test/CodeGen/Mips/fcmp.ll b/test/CodeGen/Mips/fcmp.ll
index 8e83b0064ed9..aa1f09bf7aba 100644
--- a/test/CodeGen/Mips/fcmp.ll
+++ b/test/CodeGen/Mips/fcmp.ll
@@ -1,10 +1,17 @@
-; RUN: llc < %s -march=mipsel -mcpu=mips32 | FileCheck %s -check-prefix=ALL -check-prefix=32-C
-; RUN: llc < %s -march=mipsel -mcpu=mips32r2 | FileCheck %s -check-prefix=ALL -check-prefix=32-C
-; RUN: llc < %s -march=mipsel -mcpu=mips32r6 | FileCheck %s -check-prefix=ALL -check-prefix=32-CMP
-; RUN: llc < %s -march=mips64el -mcpu=mips4 | FileCheck %s -check-prefix=ALL -check-prefix=64-C
-; RUN: llc < %s -march=mips64el -mcpu=mips64 | FileCheck %s -check-prefix=ALL -check-prefix=64-C
-; RUN: llc < %s -march=mips64el -mcpu=mips64r2 | FileCheck %s -check-prefix=ALL -check-prefix=64-C
-; RUN: llc < %s -march=mips64el -mcpu=mips64r6 | FileCheck %s -check-prefix=ALL -check-prefix=64-CMP
+; RUN: llc < %s -march=mips -mcpu=mips32 | \
+; RUN: FileCheck %s -check-prefix=ALL -check-prefix=32-C
+; RUN: llc < %s -march=mips -mcpu=mips32r2 | \
+; RUN: FileCheck %s -check-prefix=ALL -check-prefix=32-C
+; RUN: llc < %s -march=mips -mcpu=mips32r6 | \
+; RUN: FileCheck %s -check-prefix=ALL -check-prefix=32-CMP
+; RUN: llc < %s -march=mips64 -mcpu=mips4 | \
+; RUN: FileCheck %s -check-prefix=ALL -check-prefix=64-C
+; RUN: llc < %s -march=mips64 -mcpu=mips64 | \
+; RUN: FileCheck %s -check-prefix=ALL -check-prefix=64-C
+; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | \
+; RUN: FileCheck %s -check-prefix=ALL -check-prefix=64-C
+; RUN: llc < %s -march=mips64 -mcpu=mips64r6 | \
+; RUN: FileCheck %s -check-prefix=ALL -check-prefix=64-CMP
define i32 @false_f32(float %a, float %b) nounwind {
; ALL-LABEL: false_f32:
@@ -18,15 +25,13 @@ define i32 @false_f32(float %a, float %b) nounwind {
define i32 @oeq_f32(float %a, float %b) nounwind {
; ALL-LABEL: oeq_f32:
-; 32-C-DAG: addiu $[[T0:2]], $zero, 0
-; 32-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG: addiu $2, $zero, 1
; 32-C-DAG: c.eq.s $f12, $f14
-; 32-C-DAG: movt $[[T0]], $1, $fcc0
+; 32-C: movf $2, $zero, $fcc0
-; 64-C-DAG: addiu $[[T0:2]], $zero, 0
-; 64-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG: addiu $2, $zero, 1
; 64-C-DAG: c.eq.s $f12, $f13
-; 64-C-DAG: movt $[[T0]], $1, $fcc0
+; 64-C: movf $2, $zero, $fcc0
; 32-CMP-DAG: cmp.eq.s $[[T0:f[0-9]+]], $f12, $f14
; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]]
@@ -44,15 +49,13 @@ define i32 @oeq_f32(float %a, float %b) nounwind {
define i32 @ogt_f32(float %a, float %b) nounwind {
; ALL-LABEL: ogt_f32:
-; 32-C-DAG: addiu $[[T0:2]], $zero, 0
-; 32-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG: addiu $2, $zero, 1
; 32-C-DAG: c.ule.s $f12, $f14
-; 32-C-DAG: movf $[[T0]], $1, $fcc0
+; 32-C: movt $2, $zero, $fcc0
-; 64-C-DAG: addiu $[[T0:2]], $zero, 0
-; 64-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG: addiu $2, $zero, 1
; 64-C-DAG: c.ule.s $f12, $f13
-; 64-C-DAG: movf $[[T0]], $1, $fcc0
+; 64-C: movt $2, $zero, $fcc0
; 32-CMP-DAG: cmp.lt.s $[[T0:f[0-9]+]], $f14, $f12
; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]]
@@ -70,15 +73,13 @@ define i32 @ogt_f32(float %a, float %b) nounwind {
define i32 @oge_f32(float %a, float %b) nounwind {
; ALL-LABEL: oge_f32:
-; 32-C-DAG: addiu $[[T0:2]], $zero, 0
-; 32-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG: addiu $2, $zero, 1
; 32-C-DAG: c.ult.s $f12, $f14
-; 32-C-DAG: movf $[[T0]], $1, $fcc0
+; 32-C: movt $2, $zero, $fcc0
-; 64-C-DAG: addiu $[[T0:2]], $zero, 0
-; 64-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG: addiu $2, $zero, 1
; 64-C-DAG: c.ult.s $f12, $f13
-; 64-C-DAG: movf $[[T0]], $1, $fcc0
+; 64-C: movt $2, $zero, $fcc0
; 32-CMP-DAG: cmp.le.s $[[T0:f[0-9]+]], $f14, $f12
; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]]
@@ -96,15 +97,13 @@ define i32 @oge_f32(float %a, float %b) nounwind {
define i32 @olt_f32(float %a, float %b) nounwind {
; ALL-LABEL: olt_f32:
-; 32-C-DAG: addiu $[[T0:2]], $zero, 0
-; 32-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG: addiu $2, $zero, 1
; 32-C-DAG: c.olt.s $f12, $f14
-; 32-C-DAG: movt $[[T0]], $1, $fcc0
+; 32-C: movf $2, $zero, $fcc0
-; 64-C-DAG: addiu $[[T0:2]], $zero, 0
-; 64-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG: addiu $2, $zero, 1
; 64-C-DAG: c.olt.s $f12, $f13
-; 64-C-DAG: movt $[[T0]], $1, $fcc0
+; 64-C: movf $2, $zero, $fcc0
; 32-CMP-DAG: cmp.lt.s $[[T0:f[0-9]+]], $f12, $f14
; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]]
@@ -122,15 +121,13 @@ define i32 @olt_f32(float %a, float %b) nounwind {
define i32 @ole_f32(float %a, float %b) nounwind {
; ALL-LABEL: ole_f32:
-; 32-C-DAG: addiu $[[T0:2]], $zero, 0
-; 32-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG: addiu $2, $zero, 1
; 32-C-DAG: c.ole.s $f12, $f14
-; 32-C-DAG: movt $[[T0]], $1, $fcc0
+; 32-C: movf $2, $zero, $fcc0
-; 64-C-DAG: addiu $[[T0:2]], $zero, 0
-; 64-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG: addiu $2, $zero, 1
; 64-C-DAG: c.ole.s $f12, $f13
-; 64-C-DAG: movt $[[T0]], $1, $fcc0
+; 64-C: movf $2, $zero, $fcc0
; 32-CMP-DAG: cmp.le.s $[[T0:f[0-9]+]], $f12, $f14
; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]]
@@ -148,15 +145,13 @@ define i32 @ole_f32(float %a, float %b) nounwind {
define i32 @one_f32(float %a, float %b) nounwind {
; ALL-LABEL: one_f32:
-; 32-C-DAG: addiu $[[T0:2]], $zero, 0
-; 32-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG: addiu $2, $zero, 1
; 32-C-DAG: c.ueq.s $f12, $f14
-; 32-C-DAG: movf $[[T0]], $1, $fcc0
+; 32-C: movt $2, $zero, $fcc0
-; 64-C-DAG: addiu $[[T0:2]], $zero, 0
-; 64-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG: addiu $2, $zero, 1
; 64-C-DAG: c.ueq.s $f12, $f13
-; 64-C-DAG: movf $[[T0]], $1, $fcc0
+; 64-C: movt $2, $zero, $fcc0
; 32-CMP-DAG: cmp.ueq.s $[[T0:f[0-9]+]], $f12, $f14
; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]]
@@ -176,15 +171,13 @@ define i32 @one_f32(float %a, float %b) nounwind {
define i32 @ord_f32(float %a, float %b) nounwind {
; ALL-LABEL: ord_f32:
-; 32-C-DAG: addiu $[[T0:2]], $zero, 0
-; 32-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG: addiu $2, $zero, 1
; 32-C-DAG: c.un.s $f12, $f14
-; 32-C-DAG: movf $[[T0]], $1, $fcc0
+; 32-C: movt $2, $zero, $fcc0
-; 64-C-DAG: addiu $[[T0:2]], $zero, 0
-; 64-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG: addiu $2, $zero, 1
; 64-C-DAG: c.un.s $f12, $f13
-; 64-C-DAG: movf $[[T0]], $1, $fcc0
+; 64-C: movt $2, $zero, $fcc0
; 32-CMP-DAG: cmp.un.s $[[T0:f[0-9]+]], $f12, $f14
; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]]
@@ -204,15 +197,13 @@ define i32 @ord_f32(float %a, float %b) nounwind {
define i32 @ueq_f32(float %a, float %b) nounwind {
; ALL-LABEL: ueq_f32:
-; 32-C-DAG: addiu $[[T0:2]], $zero, 0
-; 32-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG: addiu $2, $zero, 1
; 32-C-DAG: c.ueq.s $f12, $f14
-; 32-C-DAG: movt $[[T0]], $1, $fcc0
+; 32-C: movf $2, $zero, $fcc0
-; 64-C-DAG: addiu $[[T0:2]], $zero, 0
-; 64-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG: addiu $2, $zero, 1
; 64-C-DAG: c.ueq.s $f12, $f13
-; 64-C-DAG: movt $[[T0]], $1, $fcc0
+; 64-C: movf $2, $zero, $fcc0
; 32-CMP-DAG: cmp.ueq.s $[[T0:f[0-9]+]], $f12, $f14
; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]]
@@ -230,15 +221,13 @@ define i32 @ueq_f32(float %a, float %b) nounwind {
define i32 @ugt_f32(float %a, float %b) nounwind {
; ALL-LABEL: ugt_f32:
-; 32-C-DAG: addiu $[[T0:2]], $zero, 0
-; 32-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG: addiu $2, $zero, 1
; 32-C-DAG: c.ole.s $f12, $f14
-; 32-C-DAG: movf $[[T0]], $1, $fcc0
+; 32-C: movt $2, $zero, $fcc0
-; 64-C-DAG: addiu $[[T0:2]], $zero, 0
-; 64-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG: addiu $2, $zero, 1
; 64-C-DAG: c.ole.s $f12, $f13
-; 64-C-DAG: movf $[[T0]], $1, $fcc0
+; 64-C: movt $2, $zero, $fcc0
; 32-CMP-DAG: cmp.ult.s $[[T0:f[0-9]+]], $f14, $f12
; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]]
@@ -256,15 +245,13 @@ define i32 @ugt_f32(float %a, float %b) nounwind {
define i32 @uge_f32(float %a, float %b) nounwind {
; ALL-LABEL: uge_f32:
-; 32-C-DAG: addiu $[[T0:2]], $zero, 0
-; 32-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG: addiu $2, $zero, 1
; 32-C-DAG: c.olt.s $f12, $f14
-; 32-C-DAG: movf $[[T0]], $1, $fcc0
+; 32-C: movt $2, $zero, $fcc0
-; 64-C-DAG: addiu $[[T0:2]], $zero, 0
-; 64-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG: addiu $2, $zero, 1
; 64-C-DAG: c.olt.s $f12, $f13
-; 64-C-DAG: movf $[[T0]], $1, $fcc0
+; 64-C: movt $2, $zero, $fcc0
; 32-CMP-DAG: cmp.ule.s $[[T0:f[0-9]+]], $f14, $f12
; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]]
@@ -282,15 +269,13 @@ define i32 @uge_f32(float %a, float %b) nounwind {
define i32 @ult_f32(float %a, float %b) nounwind {
; ALL-LABEL: ult_f32:
-; 32-C-DAG: addiu $[[T0:2]], $zero, 0
-; 32-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG: addiu $2, $zero, 1
; 32-C-DAG: c.ult.s $f12, $f14
-; 32-C-DAG: movt $[[T0]], $1, $fcc0
+; 32-C: movf $2, $zero, $fcc0
-; 64-C-DAG: addiu $[[T0:2]], $zero, 0
-; 64-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG: addiu $2, $zero, 1
; 64-C-DAG: c.ult.s $f12, $f13
-; 64-C-DAG: movt $[[T0]], $1, $fcc0
+; 64-C: movf $2, $zero, $fcc0
; 32-CMP-DAG: cmp.ult.s $[[T0:f[0-9]+]], $f12, $f14
; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]]
@@ -300,6 +285,7 @@ define i32 @ult_f32(float %a, float %b) nounwind {
; 64-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]]
; 64-CMP-DAG: andi $2, $[[T1]], 1
+
%1 = fcmp ult float %a, %b
%2 = zext i1 %1 to i32
ret i32 %2
@@ -308,15 +294,13 @@ define i32 @ult_f32(float %a, float %b) nounwind {
define i32 @ule_f32(float %a, float %b) nounwind {
; ALL-LABEL: ule_f32:
-; 32-C-DAG: addiu $[[T0:2]], $zero, 0
-; 32-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG: addiu $2, $zero, 1
; 32-C-DAG: c.ule.s $f12, $f14
-; 32-C-DAG: movt $[[T0]], $1, $fcc0
+; 32-C: movf $2, $zero, $fcc0
-; 64-C-DAG: addiu $[[T0:2]], $zero, 0
-; 64-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG: addiu $2, $zero, 1
; 64-C-DAG: c.ule.s $f12, $f13
-; 64-C-DAG: movt $[[T0]], $1, $fcc0
+; 64-C: movf $2, $zero, $fcc0
; 32-CMP-DAG: cmp.ule.s $[[T0:f[0-9]+]], $f12, $f14
; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]]
@@ -334,15 +318,13 @@ define i32 @ule_f32(float %a, float %b) nounwind {
define i32 @une_f32(float %a, float %b) nounwind {
; ALL-LABEL: une_f32:
-; 32-C-DAG: addiu $[[T0:2]], $zero, 0
-; 32-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG: addiu $2, $zero, 1
; 32-C-DAG: c.eq.s $f12, $f14
-; 32-C-DAG: movf $[[T0]], $1, $fcc0
+; 32-C: movt $2, $zero, $fcc0
-; 64-C-DAG: addiu $[[T0:2]], $zero, 0
-; 64-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG: addiu $2, $zero, 1
; 64-C-DAG: c.eq.s $f12, $f13
-; 64-C-DAG: movf $[[T0]], $1, $fcc0
+; 64-C: movt $2, $zero, $fcc0
; 32-CMP-DAG: cmp.eq.s $[[T0:f[0-9]+]], $f12, $f14
; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]]
@@ -362,15 +344,13 @@ define i32 @une_f32(float %a, float %b) nounwind {
define i32 @uno_f32(float %a, float %b) nounwind {
; ALL-LABEL: uno_f32:
-; 32-C-DAG: addiu $[[T0:2]], $zero, 0
-; 32-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG: addiu $2, $zero, 1
; 32-C-DAG: c.un.s $f12, $f14
-; 32-C-DAG: movt $[[T0]], $1, $fcc0
+; 32-C: movf $2, $zero, $fcc0
-; 64-C-DAG: addiu $[[T0:2]], $zero, 0
-; 64-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG: addiu $2, $zero, 1
; 64-C-DAG: c.un.s $f12, $f13
-; 64-C-DAG: movt $[[T0]], $1, $fcc0
+; 64-C: movf $2, $zero, $fcc0
; 32-CMP-DAG: cmp.un.s $[[T0:f[0-9]+]], $f12, $f14
; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]]
@@ -406,15 +386,13 @@ define i32 @false_f64(double %a, double %b) nounwind {
define i32 @oeq_f64(double %a, double %b) nounwind {
; ALL-LABEL: oeq_f64:
-; 32-C-DAG: addiu $[[T0:2]], $zero, 0
-; 32-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG: addiu $2, $zero, 1
; 32-C-DAG: c.eq.d $f12, $f14
-; 32-C-DAG: movt $[[T0]], $1, $fcc0
+; 32-C: movf $2, $zero, $fcc0
-; 64-C-DAG: addiu $[[T0:2]], $zero, 0
-; 64-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG: addiu $2, $zero, 1
; 64-C-DAG: c.eq.d $f12, $f13
-; 64-C-DAG: movt $[[T0]], $1, $fcc0
+; 64-C: movf $2, $zero, $fcc0
; 32-CMP-DAG: cmp.eq.d $[[T0:f[0-9]+]], $f12, $f14
; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]]
@@ -432,15 +410,13 @@ define i32 @oeq_f64(double %a, double %b) nounwind {
define i32 @ogt_f64(double %a, double %b) nounwind {
; ALL-LABEL: ogt_f64:
-; 32-C-DAG: addiu $[[T0:2]], $zero, 0
-; 32-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG: addiu $2, $zero, 1
; 32-C-DAG: c.ule.d $f12, $f14
-; 32-C-DAG: movf $[[T0]], $1, $fcc0
+; 32-C: movt $2, $zero, $fcc0
-; 64-C-DAG: addiu $[[T0:2]], $zero, 0
-; 64-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG: addiu $2, $zero, 1
; 64-C-DAG: c.ule.d $f12, $f13
-; 64-C-DAG: movf $[[T0]], $1, $fcc0
+; 64-C: movt $2, $zero, $fcc0
; 32-CMP-DAG: cmp.lt.d $[[T0:f[0-9]+]], $f14, $f12
; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]]
@@ -458,15 +434,13 @@ define i32 @ogt_f64(double %a, double %b) nounwind {
define i32 @oge_f64(double %a, double %b) nounwind {
; ALL-LABEL: oge_f64:
-; 32-C-DAG: addiu $[[T0:2]], $zero, 0
-; 32-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG: addiu $2, $zero, 1
; 32-C-DAG: c.ult.d $f12, $f14
-; 32-C-DAG: movf $[[T0]], $1, $fcc0
+; 32-C: movt $2, $zero, $fcc0
-; 64-C-DAG: addiu $[[T0:2]], $zero, 0
-; 64-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG: addiu $2, $zero, 1
; 64-C-DAG: c.ult.d $f12, $f13
-; 64-C-DAG: movf $[[T0]], $1, $fcc0
+; 64-C: movt $2, $zero, $fcc0
; 32-CMP-DAG: cmp.le.d $[[T0:f[0-9]+]], $f14, $f12
; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]]
@@ -484,15 +458,13 @@ define i32 @oge_f64(double %a, double %b) nounwind {
define i32 @olt_f64(double %a, double %b) nounwind {
; ALL-LABEL: olt_f64:
-; 32-C-DAG: addiu $[[T0:2]], $zero, 0
-; 32-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG: addiu $2, $zero, 1
; 32-C-DAG: c.olt.d $f12, $f14
-; 32-C-DAG: movt $[[T0]], $1, $fcc0
+; 32-C: movf $2, $zero, $fcc0
-; 64-C-DAG: addiu $[[T0:2]], $zero, 0
-; 64-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG: addiu $2, $zero, 1
; 64-C-DAG: c.olt.d $f12, $f13
-; 64-C-DAG: movt $[[T0]], $1, $fcc0
+; 64-C: movf $2, $zero, $fcc0
; 32-CMP-DAG: cmp.lt.d $[[T0:f[0-9]+]], $f12, $f14
; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]]
@@ -510,15 +482,13 @@ define i32 @olt_f64(double %a, double %b) nounwind {
define i32 @ole_f64(double %a, double %b) nounwind {
; ALL-LABEL: ole_f64:
-; 32-C-DAG: addiu $[[T0:2]], $zero, 0
-; 32-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG: addiu $2, $zero, 1
; 32-C-DAG: c.ole.d $f12, $f14
-; 32-C-DAG: movt $[[T0]], $1, $fcc0
+; 32-C: movf $2, $zero, $fcc0
-; 64-C-DAG: addiu $[[T0:2]], $zero, 0
-; 64-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG: addiu $2, $zero, 1
; 64-C-DAG: c.ole.d $f12, $f13
-; 64-C-DAG: movt $[[T0]], $1, $fcc0
+; 64-C: movf $2, $zero, $fcc0
; 32-CMP-DAG: cmp.le.d $[[T0:f[0-9]+]], $f12, $f14
; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]]
@@ -536,15 +506,13 @@ define i32 @ole_f64(double %a, double %b) nounwind {
define i32 @one_f64(double %a, double %b) nounwind {
; ALL-LABEL: one_f64:
-; 32-C-DAG: addiu $[[T0:2]], $zero, 0
-; 32-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG: addiu $2, $zero, 1
; 32-C-DAG: c.ueq.d $f12, $f14
-; 32-C-DAG: movf $[[T0]], $1, $fcc0
+; 32-C: movt $2, $zero, $fcc0
-; 64-C-DAG: addiu $[[T0:2]], $zero, 0
-; 64-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG: addiu $2, $zero, 1
; 64-C-DAG: c.ueq.d $f12, $f13
-; 64-C-DAG: movf $[[T0]], $1, $fcc0
+; 64-C: movt $2, $zero, $fcc0
; 32-CMP-DAG: cmp.ueq.d $[[T0:f[0-9]+]], $f12, $f14
; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]]
@@ -564,15 +532,13 @@ define i32 @one_f64(double %a, double %b) nounwind {
define i32 @ord_f64(double %a, double %b) nounwind {
; ALL-LABEL: ord_f64:
-; 32-C-DAG: addiu $[[T0:2]], $zero, 0
-; 32-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG: addiu $2, $zero, 1
; 32-C-DAG: c.un.d $f12, $f14
-; 32-C-DAG: movf $[[T0]], $1, $fcc0
+; 32-C: movt $2, $zero, $fcc0
-; 64-C-DAG: addiu $[[T0:2]], $zero, 0
-; 64-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG: addiu $2, $zero, 1
; 64-C-DAG: c.un.d $f12, $f13
-; 64-C-DAG: movf $[[T0]], $1, $fcc0
+; 64-C: movt $2, $zero, $fcc0
; 32-CMP-DAG: cmp.un.d $[[T0:f[0-9]+]], $f12, $f14
; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]]
@@ -592,15 +558,13 @@ define i32 @ord_f64(double %a, double %b) nounwind {
define i32 @ueq_f64(double %a, double %b) nounwind {
; ALL-LABEL: ueq_f64:
-; 32-C-DAG: addiu $[[T0:2]], $zero, 0
-; 32-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG: addiu $2, $zero, 1
; 32-C-DAG: c.ueq.d $f12, $f14
-; 32-C-DAG: movt $[[T0]], $1, $fcc0
+; 32-C: movf $2, $zero, $fcc0
-; 64-C-DAG: addiu $[[T0:2]], $zero, 0
-; 64-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG: addiu $2, $zero, 1
; 64-C-DAG: c.ueq.d $f12, $f13
-; 64-C-DAG: movt $[[T0]], $1, $fcc0
+; 64-C: movf $2, $zero, $fcc0
; 32-CMP-DAG: cmp.ueq.d $[[T0:f[0-9]+]], $f12, $f14
; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]]
@@ -618,15 +582,13 @@ define i32 @ueq_f64(double %a, double %b) nounwind {
define i32 @ugt_f64(double %a, double %b) nounwind {
; ALL-LABEL: ugt_f64:
-; 32-C-DAG: addiu $[[T0:2]], $zero, 0
-; 32-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG: addiu $2, $zero, 1
; 32-C-DAG: c.ole.d $f12, $f14
-; 32-C-DAG: movf $[[T0]], $1, $fcc0
+; 32-C: movt $2, $zero, $fcc0
-; 64-C-DAG: addiu $[[T0:2]], $zero, 0
-; 64-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG: addiu $2, $zero, 1
; 64-C-DAG: c.ole.d $f12, $f13
-; 64-C-DAG: movf $[[T0]], $1, $fcc0
+; 64-C: movt $2, $zero, $fcc0
; 32-CMP-DAG: cmp.ult.d $[[T0:f[0-9]+]], $f14, $f12
; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]]
@@ -644,15 +606,13 @@ define i32 @ugt_f64(double %a, double %b) nounwind {
define i32 @uge_f64(double %a, double %b) nounwind {
; ALL-LABEL: uge_f64:
-; 32-C-DAG: addiu $[[T0:2]], $zero, 0
-; 32-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG: addiu $2, $zero, 1
; 32-C-DAG: c.olt.d $f12, $f14
-; 32-C-DAG: movf $[[T0]], $1, $fcc0
+; 32-C: movt $2, $zero, $fcc0
-; 64-C-DAG: addiu $[[T0:2]], $zero, 0
-; 64-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG: addiu $2, $zero, 1
; 64-C-DAG: c.olt.d $f12, $f13
-; 64-C-DAG: movf $[[T0]], $1, $fcc0
+; 64-C: movt $2, $zero, $fcc0
; 32-CMP-DAG: cmp.ule.d $[[T0:f[0-9]+]], $f14, $f12
; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]]
@@ -670,15 +630,13 @@ define i32 @uge_f64(double %a, double %b) nounwind {
define i32 @ult_f64(double %a, double %b) nounwind {
; ALL-LABEL: ult_f64:
-; 32-C-DAG: addiu $[[T0:2]], $zero, 0
-; 32-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG: addiu $2, $zero, 1
; 32-C-DAG: c.ult.d $f12, $f14
-; 32-C-DAG: movt $[[T0]], $1, $fcc0
+; 32-C: movf $2, $zero, $fcc0
-; 64-C-DAG: addiu $[[T0:2]], $zero, 0
-; 64-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG: addiu $2, $zero, 1
; 64-C-DAG: c.ult.d $f12, $f13
-; 64-C-DAG: movt $[[T0]], $1, $fcc0
+; 64-C: movf $2, $zero, $fcc0
; 32-CMP-DAG: cmp.ult.d $[[T0:f[0-9]+]], $f12, $f14
; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]]
@@ -696,15 +654,13 @@ define i32 @ult_f64(double %a, double %b) nounwind {
define i32 @ule_f64(double %a, double %b) nounwind {
; ALL-LABEL: ule_f64:
-; 32-C-DAG: addiu $[[T0:2]], $zero, 0
-; 32-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG: addiu $2, $zero, 1
; 32-C-DAG: c.ule.d $f12, $f14
-; 32-C-DAG: movt $[[T0]], $1, $fcc0
+; 32-C: movf $2, $zero, $fcc0
-; 64-C-DAG: addiu $[[T0:2]], $zero, 0
-; 64-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG: addiu $2, $zero, 1
; 64-C-DAG: c.ule.d $f12, $f13
-; 64-C-DAG: movt $[[T0]], $1, $fcc0
+; 64-C: movf $2, $zero, $fcc0
; 32-CMP-DAG: cmp.ule.d $[[T0:f[0-9]+]], $f12, $f14
; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]]
@@ -722,15 +678,13 @@ define i32 @ule_f64(double %a, double %b) nounwind {
define i32 @une_f64(double %a, double %b) nounwind {
; ALL-LABEL: une_f64:
-; 32-C-DAG: addiu $[[T0:2]], $zero, 0
-; 32-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG: addiu $2, $zero, 1
; 32-C-DAG: c.eq.d $f12, $f14
-; 32-C-DAG: movf $[[T0]], $1, $fcc0
+; 32-C: movt $2, $zero, $fcc0
-; 64-C-DAG: addiu $[[T0:2]], $zero, 0
-; 64-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG: addiu $2, $zero, 1
; 64-C-DAG: c.eq.d $f12, $f13
-; 64-C-DAG: movf $[[T0]], $1, $fcc0
+; 64-C: movt $2, $zero, $fcc0
; 32-CMP-DAG: cmp.eq.d $[[T0:f[0-9]+]], $f12, $f14
; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]]
@@ -750,15 +704,13 @@ define i32 @une_f64(double %a, double %b) nounwind {
define i32 @uno_f64(double %a, double %b) nounwind {
; ALL-LABEL: uno_f64:
-; 32-C-DAG: addiu $[[T0:2]], $zero, 0
-; 32-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1
+; 32-C-DAG: addiu $2, $zero, 1
; 32-C-DAG: c.un.d $f12, $f14
-; 32-C-DAG: movt $[[T0]], $1, $fcc0
+; 32-C: movf $2, $zero, $fcc0
-; 64-C-DAG: addiu $[[T0:2]], $zero, 0
-; 64-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1
+; 64-C-DAG: addiu $2, $zero, 1
; 64-C-DAG: c.un.d $f12, $f13
-; 64-C-DAG: movt $[[T0]], $1, $fcc0
+; 64-C: movf $2, $zero, $fcc0
; 32-CMP-DAG: cmp.un.d $[[T0:f[0-9]+]], $f12, $f14
; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]]
diff --git a/test/CodeGen/Mips/fmadd1.ll b/test/CodeGen/Mips/fmadd1.ll
index 271631efb40a..f0667eec3b33 100644
--- a/test/CodeGen/Mips/fmadd1.ll
+++ b/test/CodeGen/Mips/fmadd1.ll
@@ -39,10 +39,9 @@ entry:
; 32R6-DAG: mtc1 $zero, $[[T2:f[0-9]+]]
; 32R6-DAG: add.s $f0, $[[T1]], $[[T2]]
-; 64-DAG: mul.s $[[T1:f[0-9]+]], $f12, $f13
-; 64-DAG: add.s $[[T2:f[0-9]+]], $[[T1]], $f14
-; 64-DAG: mtc1 $zero, $[[T2:f[0-9]+]]
-; 64-DAG: add.s $f0, $[[T1]], $[[T2]]
+; 64-DAG: madd.s $[[T0:f[0-9]+]], $f14, $f12, $f13
+; 64-DAG: mtc1 $zero, $[[T1:f[0-9]+]]
+; 64-DAG: add.s $f0, $[[T0]], $[[T1]]
; 64R2: madd.s $[[T0:f[0-9]+]], $f14, $f12, $f13
; 64R2: mtc1 $zero, $[[T1:f[0-9]+]]
@@ -80,10 +79,9 @@ entry:
; 32R6-DAG: mtc1 $zero, $[[T2:f[0-9]+]]
; 32R6-DAG: add.s $f0, $[[T1]], $[[T2]]
-; 64-DAG: mul.s $[[T1:f[0-9]+]], $f12, $f13
-; 64-DAG: sub.s $[[T2:f[0-9]+]], $[[T1]], $f14
-; 64-DAG: mtc1 $zero, $[[T2:f[0-9]+]]
-; 64-DAG: add.s $f0, $[[T1]], $[[T2]]
+; 64-DAG: msub.s $[[T0:f[0-9]+]], $f14, $f12, $f13
+; 64-DAG: mtc1 $zero, $[[T1:f[0-9]+]]
+; 64-DAG: add.s $f0, $[[T0]], $[[T1]]
; 64R2: msub.s $[[T0:f[0-9]+]], $f14, $f12, $f13
; 64R2: mtc1 $zero, $[[T1:f[0-9]+]]
@@ -124,10 +122,11 @@ entry:
; 32R6-DAG: mtc1 $zero, $[[T2:f[0-9]+]]
; 32R6-DAG: sub.s $f0, $[[T2]], $[[T1]]
-; 64-DAG: mul.s $[[T1:f[0-9]+]], $f12, $f13
-; 64-DAG: add.s $[[T2:f[0-9]+]], $[[T1]], $f14
-; 64-DAG: mtc1 $zero, $[[T2:f[0-9]+]]
-; 64-DAG: sub.s $f0, $[[T2]], $[[T1]]
+; 64-NONAN: nmadd.s $f0, $f14, $f12, $f13
+
+; 64-NAN: madd.s $[[T0:f[0-9]+]], $f14, $f12, $f13
+; 64-NAN: mtc1 $zero, $[[T1:f[0-9]+]]
+; 64-NAN: sub.s $f0, $[[T1]], $[[T0]]
; 64R2-NONAN: nmadd.s $f0, $f14, $f12, $f13
@@ -164,10 +163,11 @@ entry:
; 32R2-NAN: mtc1 $zero, $[[T2:f[0-9]+]]
; 32R2-NAN: sub.s $f0, $[[T2]], $[[T1]]
-; 64-DAG: mul.s $[[T1:f[0-9]+]], $f12, $f13
-; 64-DAG: sub.s $[[T2:f[0-9]+]], $[[T1]], $f14
-; 64-DAG: mtc1 $zero, $[[T2:f[0-9]+]]
-; 64-DAG: sub.s $f0, $[[T2]], $[[T1]]
+; 64-NAN: msub.s $[[T0:f[0-9]+]], $f14, $f12, $f13
+; 64-NAN: mtc1 $zero, $[[T1:f[0-9]+]]
+; 64-NAN: sub.s $f0, $[[T1]], $[[T0]]
+
+; 64-NONAN: nmsub.s $f0, $f14, $f12, $f13
; 64R2-NAN: msub.s $[[T0:f[0-9]+]], $f14, $f12, $f13
; 64R2-NAN: mtc1 $zero, $[[T1:f[0-9]+]]
@@ -206,10 +206,9 @@ entry:
; 32R6-DAG: mtc1 $zero, $[[T2:f[0-9]+]]
; 32R6-DAG: add.d $f0, $[[T1]], $[[T2]]
-; 64-DAG: mul.d $[[T1:f[0-9]+]], $f12, $f13
-; 64-DAG: add.d $[[T2:f[0-9]+]], $[[T1]], $f14
-; 64-DAG: dmtc1 $zero, $[[T2:f[0-9]+]]
-; 64-DAG: add.d $f0, $[[T1]], $[[T2]]
+; 64-DAG: madd.d $[[T0:f[0-9]+]], $f14, $f12, $f13
+; 64-DAG: mtc1 $zero, $[[T1:f[0-9]+]]
+; 64-DAG: add.d $f0, $[[T0]], $[[T1]]
; 64R2: madd.d $[[T0:f[0-9]+]], $f14, $f12, $f13
; 64R2: mtc1 $zero, $[[T1:f[0-9]+]]
@@ -248,10 +247,9 @@ entry:
; 32R6-DAG: mtc1 $zero, $[[T2:f[0-9]+]]
; 32R6-DAG: add.d $f0, $[[T1]], $[[T2]]
-; 64-DAG: mul.d $[[T1:f[0-9]+]], $f12, $f13
-; 64-DAG: sub.d $[[T2:f[0-9]+]], $[[T1]], $f14
-; 64-DAG: dmtc1 $zero, $[[T2:f[0-9]+]]
-; 64-DAG: add.d $f0, $[[T1]], $[[T2]]
+; 64-DAG: msub.d $[[T0:f[0-9]+]], $f14, $f12, $f13
+; 64-DAG: mtc1 $zero, $[[T1:f[0-9]+]]
+; 64-DAG: add.d $f0, $[[T0]], $[[T1]]
; 64R2: msub.d $[[T0:f[0-9]+]], $f14, $f12, $f13
; 64R2: mtc1 $zero, $[[T1:f[0-9]+]]
@@ -293,10 +291,11 @@ entry:
; 32R6-DAG: mtc1 $zero, $[[T2:f[0-9]+]]
; 32R6-DAG: sub.d $f0, $[[T2]], $[[T1]]
-; 64-DAG: mul.d $[[T1:f[0-9]+]], $f12, $f13
-; 64-DAG: add.d $[[T2:f[0-9]+]], $[[T1]], $f14
-; 64-DAG: dmtc1 $zero, $[[T2:f[0-9]+]]
-; 64-DAG: sub.d $f0, $[[T2]], $[[T1]]
+; 64-NONAN: nmadd.d $f0, $f14, $f12, $f13
+
+; 64-NAN: madd.d $[[T0:f[0-9]+]], $f14, $f12, $f13
+; 64-NAN: mtc1 $zero, $[[T1:f[0-9]+]]
+; 64-NAN: sub.d $f0, $[[T1]], $[[T0]]
; 64R2-NONAN: nmadd.d $f0, $f14, $f12, $f13
@@ -340,10 +339,11 @@ entry:
; 32R6-DAG: mtc1 $zero, $[[T2:f[0-9]+]]
; 32R6-DAG: sub.d $f0, $[[T2]], $[[T1]]
-; 64-DAG: mul.d $[[T1:f[0-9]+]], $f12, $f13
-; 64-DAG: sub.d $[[T2:f[0-9]+]], $[[T1]], $f14
-; 64-DAG: dmtc1 $zero, $[[T2:f[0-9]+]]
-; 64-DAG: sub.d $f0, $[[T2]], $[[T1]]
+; 64-NONAN: nmsub.d $f0, $f14, $f12, $f13
+
+; 64-NAN: msub.d $[[T0:f[0-9]+]], $f14, $f12, $f13
+; 64-NAN: mtc1 $zero, $[[T1:f[0-9]+]]
+; 64-NAN: sub.d $f0, $[[T1]], $[[T0]]
; 64R2-NONAN: nmsub.d $f0, $f14, $f12, $f13
diff --git a/test/CodeGen/Mips/llvm-ir/add.ll b/test/CodeGen/Mips/llvm-ir/add.ll
new file mode 100644
index 000000000000..83774eda634f
--- /dev/null
+++ b/test/CodeGen/Mips/llvm-ir/add.ll
@@ -0,0 +1,115 @@
+; RUN: llc < %s -march=mips -mcpu=mips2 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=NOT-R2-R6 -check-prefix=GP32
+; RUN: llc < %s -march=mips -mcpu=mips32 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=NOT-R2-R6 -check-prefix=GP32
+; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=R2-R6 -check-prefix=GP32
+; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=R2-R6 -check-prefix=GP32
+; RUN: llc < %s -march=mips64 -mcpu=mips3 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=NOT-R2-R6 -check-prefix=GP64
+; RUN: llc < %s -march=mips64 -mcpu=mips4 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=NOT-R2-R6 -check-prefix=GP64
+; RUN: llc < %s -march=mips64 -mcpu=mips64 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=NOT-R2-R6 -check-prefix=GP64
+; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=R2-R6 -check-prefix=GP64
+; RUN: llc < %s -march=mips64 -mcpu=mips64r6 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=R2-R6 -check-prefix=GP64
+
+define signext i1 @add_i1(i1 signext %a, i1 signext %b) {
+entry:
+; ALL-LABEL: add_i1:
+
+ ; ALL: addu $[[T0:[0-9]+]], $4, $5
+ ; ALL: sll $[[T0]], $[[T0]], 31
+ ; ALL: sra $2, $[[T0]], 31
+
+ %r = add i1 %a, %b
+ ret i1 %r
+}
+
+define signext i8 @add_i8(i8 signext %a, i8 signext %b) {
+entry:
+; ALL-LABEL: add_i8:
+
+ ; NOT-R2-R6: addu $[[T0:[0-9]+]], $4, $5
+ ; NOT-R2-R6: sll $[[T0]], $[[T0]], 24
+ ; NOT-R2-R6: sra $2, $[[T0]], 24
+
+ ; R2-R6: addu $[[T0:[0-9]+]], $4, $5
+ ; R2-R6: seb $2, $[[T0:[0-9]+]]
+
+ %r = add i8 %a, %b
+ ret i8 %r
+}
+
+define signext i16 @add_i16(i16 signext %a, i16 signext %b) {
+entry:
+; ALL-LABEL: add_i16:
+
+ ; NOT-R2-R6: addu $[[T0:[0-9]+]], $4, $5
+ ; NOT-R2-R6: sll $[[T0]], $[[T0]], 16
+ ; NOT-R2-R6: sra $2, $[[T0]], 16
+
+ ; R2-R6: addu $[[T0:[0-9]+]], $4, $5
+ ; R2-R6: seh $2, $[[T0:[0-9]+]]
+
+ %r = add i16 %a, %b
+ ret i16 %r
+}
+
+define signext i32 @add_i32(i32 signext %a, i32 signext %b) {
+entry:
+; ALL-LABEL: add_i32:
+
+ ; ALL: addu $2, $4, $5
+
+ %r = add i32 %a, %b
+ ret i32 %r
+}
+
+define signext i64 @add_i64(i64 signext %a, i64 signext %b) {
+entry:
+; ALL-LABEL: add_i64:
+
+ ; GP32: addu $3, $5, $7
+ ; GP32: sltu $[[T0:[0-9]+]], $3, $7
+ ; GP32: addu $[[T1:[0-9]+]], $[[T0]], $6
+ ; GP32: addu $2, $4, $[[T1]]
+
+ ; GP64: daddu $2, $4, $5
+
+ %r = add i64 %a, %b
+ ret i64 %r
+}
+
+define signext i128 @add_i128(i128 signext %a, i128 signext %b) {
+entry:
+; ALL-LABEL: add_i128:
+
+ ; GP32: lw $[[T0:[0-9]+]], 28($sp)
+ ; GP32: addu $[[T1:[0-9]+]], $7, $[[T0]]
+ ; GP32: sltu $[[T2:[0-9]+]], $[[T1]], $[[T0]]
+ ; GP32: lw $[[T3:[0-9]+]], 24($sp)
+ ; GP32: addu $[[T4:[0-9]+]], $[[T2]], $[[T3]]
+ ; GP32: addu $[[T5:[0-9]+]], $6, $[[T4]]
+ ; GP32: sltu $[[T6:[0-9]+]], $[[T5]], $[[T3]]
+ ; GP32: lw $[[T7:[0-9]+]], 20($sp)
+ ; GP32: addu $[[T8:[0-9]+]], $[[T6]], $[[T7]]
+ ; GP32: lw $[[T9:[0-9]+]], 16($sp)
+ ; GP32: addu $3, $5, $[[T8]]
+ ; GP32: sltu $[[T10:[0-9]+]], $3, $[[T7]]
+ ; GP32: addu $[[T11:[0-9]+]], $[[T10]], $[[T9]]
+ ; GP32: addu $2, $4, $[[T11]]
+ ; GP32: move $4, $[[T5]]
+ ; GP32: move $5, $[[T1]]
+
+ ; GP64: daddu $3, $5, $7
+ ; GP64: sltu $[[T0:[0-9]+]], $3, $7
+ ; GP64: daddu $[[T1:[0-9]+]], $[[T0]], $6
+ ; GP64: daddu $2, $4, $[[T1]]
+
+ %r = add i128 %a, %b
+ ret i128 %r
+}
diff --git a/test/CodeGen/Mips/llvm-ir/and.ll b/test/CodeGen/Mips/llvm-ir/and.ll
new file mode 100644
index 000000000000..09d0ef9238af
--- /dev/null
+++ b/test/CodeGen/Mips/llvm-ir/and.ll
@@ -0,0 +1,94 @@
+; RUN: llc < %s -march=mips -mcpu=mips2 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP32
+; RUN: llc < %s -march=mips -mcpu=mips32 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP32
+; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP32
+; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP32
+; RUN: llc < %s -march=mips64 -mcpu=mips3 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP64
+; RUN: llc < %s -march=mips64 -mcpu=mips4 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP64
+; RUN: llc < %s -march=mips64 -mcpu=mips64 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP64
+; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP64
+; RUN: llc < %s -march=mips64 -mcpu=mips64r6 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP64
+
+define signext i1 @and_i1(i1 signext %a, i1 signext %b) {
+entry:
+; ALL-LABEL: and_i1:
+
+ ; ALL: and $2, $4, $5
+
+ %r = and i1 %a, %b
+ ret i1 %r
+}
+
+define signext i8 @and_i8(i8 signext %a, i8 signext %b) {
+entry:
+; ALL-LABEL: and_i8:
+
+ ; ALL: and $2, $4, $5
+
+ %r = and i8 %a, %b
+ ret i8 %r
+}
+
+define signext i16 @and_i16(i16 signext %a, i16 signext %b) {
+entry:
+; ALL-LABEL: and_i16:
+
+ ; ALL: and $2, $4, $5
+
+ %r = and i16 %a, %b
+ ret i16 %r
+}
+
+define signext i32 @and_i32(i32 signext %a, i32 signext %b) {
+entry:
+; ALL-LABEL: and_i32:
+
+ ; GP32: and $2, $4, $5
+
+ ; GP64: and $[[T0:[0-9]+]], $4, $5
+ ; GP64: sll $2, $[[T0]], 0
+
+ %r = and i32 %a, %b
+ ret i32 %r
+}
+
+define signext i64 @and_i64(i64 signext %a, i64 signext %b) {
+entry:
+; ALL-LABEL: and_i64:
+
+ ; GP32: and $2, $4, $6
+ ; GP32: and $3, $5, $7
+
+ ; GP64: and $2, $4, $5
+
+ %r = and i64 %a, %b
+ ret i64 %r
+}
+
+define signext i128 @and_i128(i128 signext %a, i128 signext %b) {
+entry:
+; ALL-LABEL: and_i128:
+
+ ; GP32: lw $[[T0:[0-9]+]], 24($sp)
+ ; GP32: lw $[[T1:[0-9]+]], 20($sp)
+ ; GP32: lw $[[T2:[0-9]+]], 16($sp)
+ ; GP32: and $2, $4, $[[T2]]
+ ; GP32: and $3, $5, $[[T1]]
+ ; GP32: and $4, $6, $[[T0]]
+ ; GP32: lw $[[T3:[0-9]+]], 28($sp)
+ ; GP32: and $5, $7, $[[T3]]
+
+ ; GP64: and $2, $4, $6
+ ; GP64: and $3, $5, $7
+
+ %r = and i128 %a, %b
+ ret i128 %r
+}
diff --git a/test/CodeGen/Mips/llvm-ir/ashr.ll b/test/CodeGen/Mips/llvm-ir/ashr.ll
new file mode 100644
index 000000000000..415998929aa0
--- /dev/null
+++ b/test/CodeGen/Mips/llvm-ir/ashr.ll
@@ -0,0 +1,188 @@
+; RUN: llc < %s -march=mips -mcpu=mips2 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP32 \
+; RUN: -check-prefix=M2 -check-prefix=NOT-R2-R6
+; RUN: llc < %s -march=mips -mcpu=mips32 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP32 -check-prefix=NOT-R2-R6 \
+; RUN: -check-prefix=32R1-R2
+; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP32 \
+; RUN: -check-prefix=32R1-R2 -check-prefix=R2-R6
+; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP32 \
+; RUN: -check-prefix=32R6 -check-prefix=R2-R6
+; RUN: llc < %s -march=mips64 -mcpu=mips3 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP64 \
+; RUN: -check-prefix=M3 -check-prefix=NOT-R2-R6
+; RUN: llc < %s -march=mips64 -mcpu=mips4 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP64 \
+; RUN: -check-prefix=GP64-NOT-R6 -check-prefix=NOT-R2-R6
+; RUN: llc < %s -march=mips64 -mcpu=mips64 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP64 \
+; RUN: -check-prefix=GP64-NOT-R6 -check-prefix=NOT-R2-R6
+; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP64 \
+; RUN: -check-prefix=GP64-NOT-R6 -check-prefix R2-R6
+; RUN: llc < %s -march=mips64 -mcpu=mips64r6 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP64 \
+; RUN: -check-prefix=64R6 -check-prefix=R2-R6
+
+define signext i1 @ashr_i1(i1 signext %a, i1 signext %b) {
+entry:
+; ALL-LABEL: ashr_i1:
+
+ ; ALL: move $2, $4
+
+ %r = ashr i1 %a, %b
+ ret i1 %r
+}
+
+define signext i8 @ashr_i8(i8 signext %a, i8 signext %b) {
+entry:
+; ALL-LABEL: ashr_i8:
+
+ ; FIXME: The andi instruction is redundant.
+ ; ALL: andi $[[T0:[0-9]+]], $5, 255
+ ; ALL: srav $2, $4, $[[T0]]
+
+ %r = ashr i8 %a, %b
+ ret i8 %r
+}
+
+define signext i16 @ashr_i16(i16 signext %a, i16 signext %b) {
+entry:
+; ALL-LABEL: ashr_i16:
+
+ ; FIXME: The andi instruction is redundant.
+ ; ALL: andi $[[T0:[0-9]+]], $5, 65535
+ ; ALL: srav $2, $4, $[[T0]]
+
+ %r = ashr i16 %a, %b
+ ret i16 %r
+}
+
+define signext i32 @ashr_i32(i32 signext %a, i32 signext %b) {
+entry:
+; ALL-LABEL: ashr_i32:
+
+ ; ALL: srav $2, $4, $5
+
+ %r = ashr i32 %a, %b
+ ret i32 %r
+}
+
+define signext i64 @ashr_i64(i64 signext %a, i64 signext %b) {
+entry:
+; ALL-LABEL: ashr_i64:
+
+ ; M2: srav $[[T0:[0-9]+]], $4, $7
+ ; M2: andi $[[T1:[0-9]+]], $7, 32
+ ; M2: bnez $[[T1]], $[[BB0:BB[0-9_]+]]
+ ; M2: move $3, $[[T0]]
+ ; M2: srlv $[[T2:[0-9]+]], $5, $7
+ ; M2: not $[[T3:[0-9]+]], $7
+ ; M2: sll $[[T4:[0-9]+]], $4, 1
+ ; M2: sllv $[[T5:[0-9]+]], $[[T4]], $[[T3]]
+ ; M2: or $3, $[[T3]], $[[T2]]
+ ; M2: $[[BB0]]:
+ ; M2: beqz $[[T1]], $[[BB1:BB[0-9_]+]]
+ ; M2: nop
+ ; M2: sra $2, $4, 31
+ ; M2: $[[BB1]]:
+ ; M2: jr $ra
+ ; M2: nop
+
+ ; 32R1-R2: srlv $[[T0:[0-9]+]], $5, $7
+ ; 32R1-R2: not $[[T1:[0-9]+]], $7
+ ; 32R1-R2: sll $[[T2:[0-9]+]], $4, 1
+ ; 32R1-R2: sllv $[[T3:[0-9]+]], $[[T2]], $[[T1]]
+ ; 32R1-R2: or $3, $[[T3]], $[[T0]]
+ ; 32R1-R2: srav $[[T4:[0-9]+]], $4, $7
+ ; 32R1-R2: andi $[[T5:[0-9]+]], $7, 32
+ ; 32R1-R2: movn $3, $[[T4]], $[[T5]]
+ ; 32R1-R2: sra $4, $4, 31
+ ; 32R1-R2: jr $ra
+ ; 32R1-R2: movn $2, $4, $[[T5]]
+
+ ; 32R6: srav $[[T0:[0-9]+]], $4, $7
+ ; 32R6: andi $[[T1:[0-9]+]], $7, 32
+ ; 32R6: seleqz $[[T2:[0-9]+]], $[[T0]], $[[T1]]
+ ; 32R6: sra $[[T3:[0-9]+]], $4, 31
+ ; 32R6: selnez $[[T4:[0-9]+]], $[[T3]], $[[T1]]
+ ; 32R6: or $[[T5:[0-9]+]], $[[T4]], $[[T2]]
+ ; 32R6: srlv $[[T6:[0-9]+]], $5, $7
+ ; 32R6: not $[[T7:[0-9]+]], $7
+ ; 32R6: sll $[[T8:[0-9]+]], $4, 1
+ ; 32R6: sllv $[[T9:[0-9]+]], $[[T8]], $[[T7]]
+ ; 32R6: or $[[T10:[0-9]+]], $[[T9]], $[[T6]]
+ ; 32R6: seleqz $[[T11:[0-9]+]], $[[T10]], $[[T1]]
+ ; 32R6: selnez $[[T12:[0-9]+]], $[[T0]], $[[T1]]
+ ; 32R6: jr $ra
+ ; 32R6: or $3, $[[T0]], $[[T11]]
+
+ ; FIXME: The sll instruction below is redundant.
+ ; GP64: sll $[[T0:[0-9]+]], $5, 0
+ ; GP64: dsrav $2, $4, $[[T0]]
+
+ %r = ashr i64 %a, %b
+ ret i64 %r
+}
+
+define signext i128 @ashr_i128(i128 signext %a, i128 signext %b) {
+entry:
+; ALL-LABEL: ashr_i128:
+
+ ; GP32: lw $25, %call16(__ashrti3)($gp)
+
+ ; M3: sll $[[T0:[0-9]+]], $7, 0
+ ; M3: dsrav $[[T1:[0-9]+]], $4, $[[T0]]
+ ; M3: andi $[[T2:[0-9]+]], $[[T0]], 64
+ ; M3: bnez $[[T3:[0-9]+]], $[[BB0:BB[0-9_]+]]
+ ; M3: move $3, $[[T1]]
+ ; M3: dsrlv $[[T4:[0-9]+]], $5, $[[T0]]
+ ; M3: dsll $[[T5:[0-9]+]], $4, 1
+ ; M3: not $[[T6:[0-9]+]], $[[T0]]
+ ; M3: dsllv $[[T7:[0-9]+]], $[[T5]], $[[T6]]
+ ; M3: or $3, $[[T7]], $[[T4]]
+ ; M3: $[[BB0]]:
+ ; M3: beqz $[[T3]], $[[BB1:BB[0-9_]+]]
+ ; M3: nop
+ ; M3: dsra $2, $4, 63
+ ; M3: $[[BB1]]:
+ ; M3: jr $ra
+ ; M3: nop
+
+ ; GP64-NOT-R6: sll $[[T0:[0-9]+]], $7, 0
+ ; GP64-NOT-R6: dsrlv $[[T1:[0-9]+]], $5, $[[T0]]
+ ; GP64-NOT-R6: dsll $[[T2:[0-9]+]], $4, 1
+ ; GP64-NOT-R6: not $[[T3:[0-9]+]], $[[T0]]
+ ; GP64-NOT-R6: dsllv $[[T4:[0-9]+]], $[[T2]], $[[T3]]
+ ; GP64-NOT-R6: or $3, $[[T4]], $[[T1]]
+ ; GP64-NOT-R6: dsrav $2, $4, $[[T0]]
+ ; GP64-NOT-R6: andi $[[T5:[0-9]+]], $[[T0]], 64
+
+ ; GP64-NOT-R6: movn $3, $2, $[[T5]]
+ ; GP64-NOT-R6: dsra $[[T6:[0-9]+]], $4, 63
+ ; GP64-NOT-R6: jr $ra
+ ; GP64-NOT-R6: movn $2, $[[T6]], $[[T5]]
+
+ ; 64R6: sll $[[T0:[0-9]+]], $7, 0
+ ; 64R6: dsrav $[[T1:[0-9]+]], $4, $[[T0]]
+ ; 64R6: andi $[[T2:[0-9]+]], $[[T0]], 64
+ ; 64R6: sll $[[T3:[0-9]+]], $[[T2]], 0
+ ; 64R6: seleqz $[[T4:[0-9]+]], $[[T1]], $[[T3]]
+ ; 64R6: dsra $[[T5:[0-9]+]], $4, 63
+ ; 64R6: selnez $[[T6:[0-9]+]], $[[T5]], $[[T3]]
+ ; 64R6: or $2, $[[T6]], $[[T4]]
+ ; 64R6: dsrlv $[[T7:[0-9]+]], $5, $[[T0]]
+ ; 64R6: dsll $[[T8:[0-9]+]], $4, 1
+ ; 64R6: not $[[T9:[0-9]+]], $[[T0]]
+ ; 64R6: dsllv $[[T10:[0-9]+]], $[[T8]], $[[T9]]
+ ; 64R6: or $[[T11:[0-9]+]], $[[T10]], $[[T7]]
+ ; 64R6: seleqz $[[T12:[0-9]+]], $[[T11]], $[[T3]]
+ ; 64R6: selnez $[[T13:[0-9]+]], $[[T1]], $[[T3]]
+ ; 64R6: jr $ra
+ ; 64R6: or $3, $[[T13]], $[[T12]]
+
+ %r = ashr i128 %a, %b
+ ret i128 %r
+}
diff --git a/test/CodeGen/Mips/llvm-ir/lshr.ll b/test/CodeGen/Mips/llvm-ir/lshr.ll
new file mode 100644
index 000000000000..59f4330dde6c
--- /dev/null
+++ b/test/CodeGen/Mips/llvm-ir/lshr.ll
@@ -0,0 +1,176 @@
+; RUN: llc < %s -march=mips -mcpu=mips2 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP32 \
+; RUN: -check-prefix=M2 -check-prefix=NOT-R2-R6
+; RUN: llc < %s -march=mips -mcpu=mips32 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP32 -check-prefix=NOT-R2-R6 \
+; RUN: -check-prefix=32R1-R2
+; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP32 \
+; RUN: -check-prefix=32R1-R2 -check-prefix=R2-R6
+; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP32 \
+; RUN: -check-prefix=32R6 -check-prefix=R2-R6
+; RUN: llc < %s -march=mips64 -mcpu=mips3 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP64 \
+; RUN: -check-prefix=M3 -check-prefix=NOT-R2-R6
+; RUN: llc < %s -march=mips64 -mcpu=mips4 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP64 \
+; RUN: -check-prefix=GP64-NOT-R6 -check-prefix=NOT-R2-R6
+; RUN: llc < %s -march=mips64 -mcpu=mips64 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP64 \
+; RUN: -check-prefix=GP64-NOT-R6 -check-prefix=NOT-R2-R6
+; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP64 \
+; RUN: -check-prefix=GP64-NOT-R6 -check-prefix R2-R6
+; RUN: llc < %s -march=mips64 -mcpu=mips64r6 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP64 \
+; RUN: -check-prefix=64R6 -check-prefix=R2-R6
+
+define signext i1 @lshr_i1(i1 signext %a, i1 signext %b) {
+entry:
+; ALL-LABEL: lshr_i1:
+
+ ; ALL: move $2, $4
+
+ %r = lshr i1 %a, %b
+ ret i1 %r
+}
+
+define zeroext i8 @lshr_i8(i8 zeroext %a, i8 zeroext %b) {
+entry:
+; ALL-LABEL: lshr_i8:
+
+ ; ALL: srlv $[[T0:[0-9]+]], $4, $5
+ ; ALL: andi $2, $[[T0]], 255
+
+ %r = lshr i8 %a, %b
+ ret i8 %r
+}
+
+define zeroext i16 @lshr_i16(i16 zeroext %a, i16 zeroext %b) {
+entry:
+; ALL-LABEL: lshr_i16:
+
+ ; ALL: srlv $[[T0:[0-9]+]], $4, $5
+ ; ALL: andi $2, $[[T0]], 65535
+
+ %r = lshr i16 %a, %b
+ ret i16 %r
+}
+
+define signext i32 @lshr_i32(i32 signext %a, i32 signext %b) {
+entry:
+; ALL-LABEL: lshr_i32:
+
+ ; ALL: srlv $2, $4, $5
+
+ %r = lshr i32 %a, %b
+ ret i32 %r
+}
+
+define signext i64 @lshr_i64(i64 signext %a, i64 signext %b) {
+entry:
+; ALL-LABEL: lshr_i64:
+
+ ; M2: srlv $[[T0:[0-9]+]], $4, $7
+ ; M2: andi $[[T1:[0-9]+]], $7, 32
+ ; M2: bnez $[[T1]], $[[BB0:BB[0-9_]+]]
+ ; M2: move $3, $[[T0]]
+ ; M2: srlv $[[T2:[0-9]+]], $5, $7
+ ; M2: not $[[T3:[0-9]+]], $7
+ ; M2: sll $[[T4:[0-9]+]], $4, 1
+ ; M2: sllv $[[T5:[0-9]+]], $[[T4]], $[[T3]]
+ ; M2: or $3, $[[T3]], $[[T2]]
+ ; M2: $[[BB0]]:
+ ; M2: bnez $[[T1]], $[[BB1:BB[0-9_]+]]
+ ; M2: addiu $2, $zero, 0
+ ; M2: move $2, $[[T0]]
+ ; M2: $[[BB1]]:
+ ; M2: jr $ra
+ ; M2: nop
+
+ ; 32R1-R2: srlv $[[T0:[0-9]+]], $5, $7
+ ; 32R1-R2: not $[[T1:[0-9]+]], $7
+ ; 32R1-R2: sll $[[T2:[0-9]+]], $4, 1
+ ; 32R1-R2: sllv $[[T3:[0-9]+]], $[[T2]], $[[T1]]
+ ; 32R1-R2: or $3, $[[T3]], $[[T0]]
+ ; 32R1-R2: srlv $[[T4:[0-9]+]], $4, $7
+ ; 32R1-R2: andi $[[T5:[0-9]+]], $7, 32
+ ; 32R1-R2: movn $3, $[[T4]], $[[T5]]
+ ; 32R1-R2: jr $ra
+ ; 32R1-R2: movn $2, $zero, $[[T5]]
+
+ ; 32R6: srlv $[[T0:[0-9]+]], $5, $7
+ ; 32R6: not $[[T1:[0-9]+]], $7
+ ; 32R6: sll $[[T2:[0-9]+]], $4, 1
+ ; 32R6: sllv $[[T3:[0-9]+]], $[[T2]], $[[T1]]
+ ; 32R6: or $[[T4:[0-9]+]], $[[T3]], $[[T0]]
+ ; 32R6: andi $[[T5:[0-9]+]], $7, 32
+ ; 32R6: seleqz $[[T6:[0-9]+]], $[[T4]], $[[T3]]
+ ; 32R6: srlv $[[T7:[0-9]+]], $4, $7
+ ; 32R6: selnez $[[T8:[0-9]+]], $[[T7]], $[[T5]]
+ ; 32R6: or $3, $[[T8]], $[[T6]]
+ ; 32R6: jr $ra
+ ; 32R6: seleqz $2, $[[T7]], $[[T5]]
+
+ ; GP64: sll $[[T0:[0-9]+]], $5, 0
+ ; GP64: dsrlv $2, $4, $[[T0]]
+
+ %r = lshr i64 %a, %b
+ ret i64 %r
+}
+
+define signext i128 @lshr_i128(i128 signext %a, i128 signext %b) {
+entry:
+; ALL-LABEL: lshr_i128:
+
+ ; GP32: lw $25, %call16(__lshrti3)($gp)
+
+ ; M3: sll $[[T0:[0-9]+]], $7, 0
+ ; M3: dsrlv $[[T1:[0-9]+]], $4, $[[T0]]
+ ; M3: andi $[[T2:[0-9]+]], $[[T0]], 64
+ ; M3: bnez $[[T3:[0-9]+]], $[[BB0:BB[0-9_]+]]
+ ; M3: move $3, $[[T1]]
+ ; M3: dsrlv $[[T4:[0-9]+]], $5, $[[T0]]
+ ; M3: dsll $[[T5:[0-9]+]], $4, 1
+ ; M3: not $[[T6:[0-9]+]], $[[T0]]
+ ; M3: dsllv $[[T7:[0-9]+]], $[[T5]], $[[T6]]
+ ; M3: or $3, $[[T7]], $[[T4]]
+ ; M3: $[[BB0]]:
+ ; M3: bnez $[[T3]], $[[BB1:BB[0-9_]+]]
+ ; M3: daddiu $2, $zero, 0
+ ; M3: move $2, $[[T1]]
+ ; M3: $[[BB1]]:
+ ; M3: jr $ra
+ ; M3: nop
+
+ ; GP64-NOT-R6: sll $[[T0:[0-9]+]], $7, 0
+ ; GP64-NOT-R6: dsrlv $[[T1:[0-9]+]], $5, $[[T0]]
+ ; GP64-NOT-R6: dsll $[[T2:[0-9]+]], $4, 1
+ ; GP64-NOT-R6: not $[[T3:[0-9]+]], $[[T0]]
+ ; GP64-NOT-R6: dsllv $[[T4:[0-9]+]], $[[T2]], $[[T3]]
+ ; GP64-NOT-R6: or $3, $[[T4]], $[[T1]]
+ ; GP64-NOT-R6: dsrlv $2, $4, $[[T0]]
+ ; GP64-NOT-R6: andi $[[T5:[0-9]+]], $[[T0]], 64
+ ; GP64-NOT-R6: movn $3, $2, $[[T5]]
+ ; GP64-NOT-R6: jr $ra
+ ; GP64-NOT-R6: movn $2, $zero, $1
+
+ ; 64R6: sll $[[T0:[0-9]+]], $7, 0
+ ; 64R6: dsrlv $[[T1:[0-9]+]], $5, $[[T0]]
+ ; 64R6: dsll $[[T2:[0-9]+]], $4, 1
+ ; 64R6: not $[[T3:[0-9]+]], $[[T0]]
+ ; 64R6: dsllv $[[T4:[0-9]+]], $[[T2]], $[[T3]]
+ ; 64R6: or $[[T5:[0-9]+]], $[[T4]], $[[T1]]
+ ; 64R6: andi $[[T6:[0-9]+]], $[[T0]], 64
+ ; 64R6: sll $[[T7:[0-9]+]], $[[T6]], 0
+ ; 64R6: seleqz $[[T8:[0-9]+]], $[[T5]], $[[T7]]
+ ; 64R6: dsrlv $[[T9:[0-9]+]], $4, $[[T0]]
+ ; 64R6: selnez $[[T10:[0-9]+]], $[[T9]], $[[T7]]
+ ; 64R6: or $3, $[[T10]], $[[T8]]
+ ; 64R6: jr $ra
+ ; 64R6: seleqz $2, $[[T0]], $[[T7]]
+
+ %r = lshr i128 %a, %b
+ ret i128 %r
+}
diff --git a/test/CodeGen/Mips/llvm-ir/mul.ll b/test/CodeGen/Mips/llvm-ir/mul.ll
index 167412407cdc..5f7f338c7789 100644
--- a/test/CodeGen/Mips/llvm-ir/mul.ll
+++ b/test/CodeGen/Mips/llvm-ir/mul.ll
@@ -1,19 +1,19 @@
-; RUN: llc < %s -march=mips -mcpu=mips2 | FileCheck %s \
-; RUN: -check-prefix=ALL -check-prefix=M2
-; RUN: llc < %s -march=mips -mcpu=mips32 | FileCheck %s \
-; RUN: -check-prefix=ALL -check-prefix=32R1-R2 -check-prefix=32R1
-; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s \
-; RUN: -check-prefix=ALL -check-prefix=32R1-R2 -check-prefix=32R2
-; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s \
-; RUN: -check-prefix=ALL -check-prefix=32R6
-; RUN: llc < %s -march=mips64 -mcpu=mips4 | FileCheck %s \
-; RUN: -check-prefix=ALL -check-prefix=M4
-; RUN: llc < %s -march=mips64 -mcpu=mips64 | FileCheck %s \
-; RUN: -check-prefix=ALL -check-prefix=64R1-R2
-; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s \
-; RUN: -check-prefix=ALL -check-prefix=64R1-R2
-; RUN: llc < %s -march=mips64 -mcpu=mips64r6 | FileCheck %s \
-; RUN: -check-prefix=ALL -check-prefix=64R6
+; RUN: llc < %s -march=mips -mcpu=mips2 | FileCheck %s -check-prefix=ALL \
+; RUN: -check-prefix=M2 -check-prefix=GP32
+; RUN: llc < %s -march=mips -mcpu=mips32 | FileCheck %s -check-prefix=ALL \
+; RUN: -check-prefix=32R1-R2 -check-prefix=GP32
+; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s -check-prefix=ALL \
+; RUN: -check-prefix=32R1-R2 -check-prefix=32R2 -check-prefix=GP32
+; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s -check-prefix=ALL \
+; RUN: -check-prefix=32R6 -check-prefix=GP32
+; RUN: llc < %s -march=mips64 -mcpu=mips4 | FileCheck %s -check-prefix=ALL \
+; RUN: -check-prefix=M4 -check-prefix=GP64-NOT-R6
+; RUN: llc < %s -march=mips64 -mcpu=mips64 | FileCheck %s -check-prefix=ALL \
+; RUN: -check-prefix=64R1-R2 -check-prefix=GP64-NOT-R6
+; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s -check-prefix=ALL \
+; RUN: -check-prefix=64R1-R2 -check-prefix=GP64 -check-prefix=GP64-NOT-R6
+; RUN: llc < %s -march=mips64 -mcpu=mips64r6 | FileCheck %s -check-prefix=ALL \
+; RUN: -check-prefix=64R6
define signext i1 @mul_i1(i1 signext %a, i1 signext %b) {
entry:
@@ -179,3 +179,30 @@ entry:
%r = mul i64 %a, %b
ret i64 %r
}
+
+define signext i128 @mul_i128(i128 signext %a, i128 signext %b) {
+entry:
+; ALL-LABEL: mul_i128:
+
+ ; GP32: lw $25, %call16(__multi3)($gp)
+
+ ; GP64-NOT-R6: dmult $4, $7
+ ; GP64-NOT-R6: mflo $[[T0:[0-9]+]]
+ ; GP64-NOT-R6: dmult $5, $6
+ ; GP64-NOT-R6: mflo $[[T1:[0-9]+]]
+ ; GP64-NOT-R6: dmultu $5, $7
+ ; GP64-NOT-R6: mflo $3
+ ; GP64-NOT-R6: mfhi $[[T2:[0-9]+]]
+ ; GP64-NOT-R6: daddu $[[T3:[0-9]+]], $[[T2]], $[[T1]]
+ ; GP64-NOT-R6: daddu $2, $[[T3:[0-9]+]], $[[T0]]
+
+ ; 64R6: dmul $[[T0:[0-9]+]], $5, $6
+ ; 64R6: dmuhu $[[T1:[0-9]+]], $5, $7
+ ; 64R6: daddu $[[T2:[0-9]+]], $[[T1]], $[[T0]]
+ ; 64R6: dmul $[[T3:[0-9]+]], $4, $7
+ ; 64R6: daddu $2, $[[T2]], $[[T3]]
+ ; 64R6: dmul $3, $5, $7
+
+ %r = mul i128 %a, %b
+ ret i128 %r
+}
diff --git a/test/CodeGen/Mips/llvm-ir/or.ll b/test/CodeGen/Mips/llvm-ir/or.ll
new file mode 100644
index 000000000000..21d1d4fca2a3
--- /dev/null
+++ b/test/CodeGen/Mips/llvm-ir/or.ll
@@ -0,0 +1,95 @@
+; RUN: llc < %s -march=mips -mcpu=mips2 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP32
+; RUN: llc < %s -march=mips -mcpu=mips32 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP32
+; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP32
+; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP32
+; RUN: llc < %s -march=mips64 -mcpu=mips3 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP64
+; RUN: llc < %s -march=mips64 -mcpu=mips4 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP64
+; RUN: llc < %s -march=mips64 -mcpu=mips64 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP64
+; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP64
+; RUN: llc < %s -march=mips64 -mcpu=mips64r6 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP64
+
+define signext i1 @or_i1(i1 signext %a, i1 signext %b) {
+entry:
+; ALL-LABEL: or_i1:
+
+ ; ALL: or $2, $4, $5
+
+ %r = or i1 %a, %b
+ ret i1 %r
+}
+
+define signext i8 @or_i8(i8 signext %a, i8 signext %b) {
+entry:
+; ALL-LABEL: or_i8:
+
+ ; ALL: or $2, $4, $5
+
+ %r = or i8 %a, %b
+ ret i8 %r
+}
+
+define signext i16 @or_i16(i16 signext %a, i16 signext %b) {
+entry:
+; ALL-LABEL: or_i16:
+
+ ; ALL: or $2, $4, $5
+
+ %r = or i16 %a, %b
+ ret i16 %r
+}
+
+define signext i32 @or_i32(i32 signext %a, i32 signext %b) {
+entry:
+; ALL-LABEL: or_i32:
+
+ ; GP32: or $2, $4, $5
+
+ ; GP64: or $[[T0:[0-9]+]], $4, $5
+ ; FIXME: The sll instruction below is redundant.
+ ; GP64: sll $2, $[[T0]], 0
+
+ %r = or i32 %a, %b
+ ret i32 %r
+}
+
+define signext i64 @or_i64(i64 signext %a, i64 signext %b) {
+entry:
+; ALL-LABEL: or_i64:
+
+ ; GP32: or $2, $4, $6
+ ; GP32: or $3, $5, $7
+
+ ; GP64: or $2, $4, $5
+
+ %r = or i64 %a, %b
+ ret i64 %r
+}
+
+define signext i128 @or_i128(i128 signext %a, i128 signext %b) {
+entry:
+; ALL-LABEL: or_i128:
+
+ ; GP32: lw $[[T0:[0-9]+]], 24($sp)
+ ; GP32: lw $[[T1:[0-9]+]], 20($sp)
+ ; GP32: lw $[[T2:[0-9]+]], 16($sp)
+ ; GP32: or $2, $4, $[[T2]]
+ ; GP32: or $3, $5, $[[T1]]
+ ; GP32: or $4, $6, $[[T0]]
+ ; GP32: lw $[[T3:[0-9]+]], 28($sp)
+ ; GP32: or $5, $7, $[[T3]]
+
+ ; GP64: or $2, $4, $6
+ ; GP64: or $3, $5, $7
+
+ %r = or i128 %a, %b
+ ret i128 %r
+}
diff --git a/test/CodeGen/Mips/llvm-ir/sdiv.ll b/test/CodeGen/Mips/llvm-ir/sdiv.ll
new file mode 100644
index 000000000000..54b7f70b1dac
--- /dev/null
+++ b/test/CodeGen/Mips/llvm-ir/sdiv.ll
@@ -0,0 +1,136 @@
+; RUN: llc < %s -march=mips -mcpu=mips2 | FileCheck %s \
+; RUN: -check-prefix=NOT-R6 -check-prefix=NOT-R2-R6 -check-prefix=GP32
+; RUN: llc < %s -march=mips -mcpu=mips32 | FileCheck %s \
+; RUN: -check-prefix=NOT-R6 -check-prefix=NOT-R2-R6 -check-prefix=GP32
+; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s \
+; RUN: -check-prefix=NOT-R6 -check-prefix=R2 -check-prefix=GP32
+; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s \
+; RUN: -check-prefix=R6 -check-prefix=GP32
+; RUN: llc < %s -march=mips64 -mcpu=mips3 | FileCheck %s \
+; RUN: -check-prefix=NOT-R6 -check-prefix=NOT-R2-R6 -check-prefix=GP64-NOT-R6
+; RUN: llc < %s -march=mips64 -mcpu=mips4 | FileCheck %s \
+; RUN: -check-prefix=NOT-R6 -check-prefix=NOT-R2-R6 -check-prefix=GP64-NOT-R6
+; RUN: llc < %s -march=mips64 -mcpu=mips64 | FileCheck %s \
+; RUN: -check-prefix=NOT-R6 -check-prefix=NOT-R2-R6 -check-prefix=GP64-NOT-R6
+; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s \
+; RUN: -check-prefix=NOT-R6 -check-prefix=R2 -check-prefix=GP64-NOT-R6
+; RUN: llc < %s -march=mips64 -mcpu=mips64r6 | FileCheck %s \
+; RUN: -check-prefix=R6 -check-prefix=64R6
+
+define signext i1 @sdiv_i1(i1 signext %a, i1 signext %b) {
+entry:
+; ALL-LABEL: sdiv_i1:
+
+ ; NOT-R6: div $zero, $4, $5
+ ; NOT-R6: teq $5, $zero, 7
+ ; NOT-R6: mflo $[[T0:[0-9]+]]
+ ; FIXME: The sll/sra instructions are redundant since div is signed.
+ ; NOT-R6: sll $[[T1:[0-9]+]], $[[T0]], 31
+ ; NOT-R6: sra $2, $[[T1]], 31
+
+ ; R6: div $[[T0:[0-9]+]], $4, $5
+ ; R6: teq $5, $zero, 7
+ ; FIXME: The sll/sra instructions are redundant since div is signed.
+ ; R6: sll $[[T1:[0-9]+]], $[[T0]], 31
+ ; R6: sra $2, $[[T1]], 31
+
+ %r = sdiv i1 %a, %b
+ ret i1 %r
+}
+
+define signext i8 @sdiv_i8(i8 signext %a, i8 signext %b) {
+entry:
+; ALL-LABEL: sdiv_i8:
+
+ ; NOT-R2-R6: div $zero, $4, $5
+ ; NOT-R2-R6: teq $5, $zero, 7
+ ; NOT-R2-R6: mflo $[[T0:[0-9]+]]
+ ; FIXME: The sll/sra instructions are redundant since div is signed.
+ ; NOT-R2-R6: sll $[[T1:[0-9]+]], $[[T0]], 24
+ ; NOT-R2-R6: sra $2, $[[T1]], 24
+
+ ; R2: div $zero, $4, $5
+ ; R2: teq $5, $zero, 7
+ ; R2: mflo $[[T0:[0-9]+]]
+ ; FIXME: This instruction is redundant.
+ ; R2: seb $2, $[[T0]]
+
+ ; R6: div $[[T0:[0-9]+]], $4, $5
+ ; R6: teq $5, $zero, 7
+ ; FIXME: This instruction is redundant.
+ ; R6: seb $2, $[[T0]]
+
+ %r = sdiv i8 %a, %b
+ ret i8 %r
+}
+
+define signext i16 @sdiv_i16(i16 signext %a, i16 signext %b) {
+entry:
+; ALL-LABEL: sdiv_i16:
+
+ ; NOT-R2-R6: div $zero, $4, $5
+ ; NOT-R2-R6: teq $5, $zero, 7
+ ; NOT-R2-R6: mflo $[[T0:[0-9]+]]
+ ; FIXME: The sll/sra instructions are redundant since div is signed.
+ ; NOT-R2-R6: sll $[[T1:[0-9]+]], $[[T0]], 16
+ ; NOT-R2-R6: sra $2, $[[T1]], 16
+
+ ; R2: div $zero, $4, $5
+ ; R2: teq $5, $zero, 7
+ ; R2: mflo $[[T0:[0-9]+]]
+ ; FIXME: This is instruction is redundant since div is signed.
+ ; R2: seh $2, $[[T0]]
+
+ ; R6: div $[[T0:[0-9]+]], $4, $5
+ ; R6: teq $5, $zero, 7
+ ; FIXME: This is instruction is redundant since div is signed.
+ ; R6: seh $2, $[[T0]]
+
+ %r = sdiv i16 %a, %b
+ ret i16 %r
+}
+
+define signext i32 @sdiv_i32(i32 signext %a, i32 signext %b) {
+entry:
+; ALL-LABEL: sdiv_i32:
+
+ ; NOT-R6: div $zero, $4, $5
+ ; NOT-R6: teq $5, $zero, 7
+ ; NOT-R6: mflo $2
+
+ ; R6: div $2, $4, $5
+ ; R6: teq $5, $zero, 7
+
+ %r = sdiv i32 %a, %b
+ ret i32 %r
+}
+
+define signext i64 @sdiv_i64(i64 signext %a, i64 signext %b) {
+entry:
+; ALL-LABEL: sdiv_i64:
+
+ ; GP32: lw $25, %call16(__divdi3)($gp)
+
+ ; GP64-NOT-R6: ddiv $zero, $4, $5
+ ; GP64-NOT-R6: teq $5, $zero, 7
+ ; GP64-NOT-R6: mflo $2
+
+ ; 64R6: ddiv $2, $4, $5
+ ; 64R6: teq $5, $zero, 7
+
+ %r = sdiv i64 %a, %b
+ ret i64 %r
+}
+
+define signext i128 @sdiv_i128(i128 signext %a, i128 signext %b) {
+entry:
+ ; ALL-LABEL: sdiv_i128:
+
+ ; GP32: lw $25, %call16(__divti3)($gp)
+
+ ; GP64-NOT-R6: ld $25, %call16(__divti3)($gp)
+ ; 64R6: ld $25, %call16(__divti3)($gp)
+
+ %r = sdiv i128 %a, %b
+ ret i128 %r
+}
diff --git a/test/CodeGen/Mips/llvm-ir/shl.ll b/test/CodeGen/Mips/llvm-ir/shl.ll
new file mode 100644
index 000000000000..fc5243cc97f2
--- /dev/null
+++ b/test/CodeGen/Mips/llvm-ir/shl.ll
@@ -0,0 +1,188 @@
+; RUN: llc < %s -march=mips -mcpu=mips2 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP32 \
+; RUN: -check-prefix=M2 -check-prefix=NOT-R2-R6
+; RUN: llc < %s -march=mips -mcpu=mips32 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP32 -check-prefix=NOT-R2-R6 \
+; RUN: -check-prefix=32R1-R2
+; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP32 \
+; RUN: -check-prefix=32R1-R2 -check-prefix=R2-R6
+; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP32 \
+; RUN: -check-prefix=32R6 -check-prefix=R2-R6
+; RUN: llc < %s -march=mips64 -mcpu=mips3 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP64 \
+; RUN: -check-prefix=M3 -check-prefix=NOT-R2-R6
+; RUN: llc < %s -march=mips64 -mcpu=mips4 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP64 \
+; RUN: -check-prefix=GP64-NOT-R6 -check-prefix=NOT-R2-R6
+; RUN: llc < %s -march=mips64 -mcpu=mips64 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP64 \
+; RUN: -check-prefix=GP64-NOT-R6 -check-prefix=NOT-R2-R6
+; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP64 \
+; RUN: -check-prefix=GP64-NOT-R6 -check-prefix R2-R6
+; RUN: llc < %s -march=mips64 -mcpu=mips64r6 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP64 \
+; RUN: -check-prefix=64R6 -check-prefix=R2-R6
+
+define signext i1 @shl_i1(i1 signext %a, i1 signext %b) {
+entry:
+; ALL-LABEL: shl_i1:
+
+ ; ALL: move $2, $4
+
+ %r = shl i1 %a, %b
+ ret i1 %r
+}
+
+define signext i8 @shl_i8(i8 signext %a, i8 signext %b) {
+entry:
+; ALL-LABEL: shl_i8:
+
+ ; NOT-R2-R6: andi $[[T0:[0-9]+]], $5, 255
+ ; NOT-R2-R6: sllv $[[T1:[0-9]+]], $4, $[[T0]]
+ ; NOT-R2-R6: sll $[[T2:[0-9]+]], $[[T1]], 24
+ ; NOT-R2-R6: sra $2, $[[T2]], 24
+
+ ; R2-R6: andi $[[T0:[0-9]+]], $5, 255
+ ; R2-R6: sllv $[[T1:[0-9]+]], $4, $[[T0]]
+ ; R2-R6: seb $2, $[[T1]]
+
+ %r = shl i8 %a, %b
+ ret i8 %r
+}
+
+define signext i16 @shl_i16(i16 signext %a, i16 signext %b) {
+entry:
+; ALL-LABEL: shl_i16:
+
+ ; NOT-R2-R6: andi $[[T0:[0-9]+]], $5, 65535
+ ; NOT-R2-R6: sllv $[[T1:[0-9]+]], $4, $[[T0]]
+ ; NOT-R2-R6: sll $[[T2:[0-9]+]], $[[T1]], 16
+ ; NOT-R2-R6: sra $2, $[[T2]], 16
+
+ ; R2-R6: andi $[[T0:[0-9]+]], $5, 65535
+ ; R2-R6: sllv $[[T1:[0-9]+]], $4, $[[T0]]
+ ; R2-R6: seh $2, $[[T1]]
+
+ %r = shl i16 %a, %b
+ ret i16 %r
+}
+
+define signext i32 @shl_i32(i32 signext %a, i32 signext %b) {
+entry:
+; ALL-LABEL: shl_i32:
+
+ ; ALL: sllv $2, $4, $5
+
+ %r = shl i32 %a, %b
+ ret i32 %r
+}
+
+define signext i64 @shl_i64(i64 signext %a, i64 signext %b) {
+entry:
+; ALL-LABEL: shl_i64:
+
+ ; M2: sllv $[[T0:[0-9]+]], $5, $7
+ ; M2: andi $[[T1:[0-9]+]], $7, 32
+ ; M2: bnez $[[T1]], $[[BB0:BB[0-9_]+]]
+ ; M2: move $2, $[[T0]]
+ ; M2: sllv $[[T2:[0-9]+]], $4, $7
+ ; M2: not $[[T3:[0-9]+]], $7
+ ; M2: srl $[[T4:[0-9]+]], $5, 1
+ ; M2: srlv $[[T5:[0-9]+]], $[[T4]], $[[T3]]
+ ; M2: or $2, $[[T2]], $[[T3]]
+ ; M2: $[[BB0]]:
+ ; M2: bnez $[[T1]], $[[BB1:BB[0-9_]+]]
+ ; M2: addiu $3, $zero, 0
+ ; M2: move $3, $[[T0]]
+ ; M2: $[[BB1]]:
+ ; M2: jr $ra
+ ; M2: nop
+
+ ; 32R1-R2: sllv $[[T0:[0-9]+]], $4, $7
+ ; 32R1-R2: not $[[T1:[0-9]+]], $7
+ ; 32R1-R2: srl $[[T2:[0-9]+]], $5, 1
+ ; 32R1-R2: srlv $[[T3:[0-9]+]], $[[T2]], $[[T1]]
+ ; 32R1-R2: or $2, $[[T0]], $[[T3]]
+ ; 32R1-R2: sllv $[[T4:[0-9]+]], $5, $7
+ ; 32R1-R2: andi $[[T5:[0-9]+]], $7, 32
+ ; 32R1-R2: movn $2, $[[T4]], $[[T5]]
+ ; 32R1-R2: jr $ra
+ ; 32R1-R2: movn $3, $zero, $[[T5]]
+
+ ; 32R6: sllv $[[T0:[0-9]+]], $4, $7
+ ; 32R6: not $[[T1:[0-9]+]], $7
+ ; 32R6: srl $[[T2:[0-9]+]], $5, 1
+ ; 32R6: srlv $[[T3:[0-9]+]], $[[T2]], $[[T1]]
+ ; 32R6: or $[[T4:[0-9]+]], $[[T0]], $[[T3]]
+ ; 32R6: andi $[[T5:[0-9]+]], $7, 32
+ ; 32R6: seleqz $[[T6:[0-9]+]], $[[T4]], $[[T2]]
+ ; 32R6: sllv $[[T7:[0-9]+]], $5, $7
+ ; 32R6: selnez $[[T8:[0-9]+]], $[[T7]], $[[T5]]
+ ; 32R6: or $2, $[[T8]], $[[T6]]
+ ; 32R6: jr $ra
+ ; 32R6: seleqz $3, $[[T7]], $[[T5]]
+
+ ; GP64: sll $[[T0:[0-9]+]], $5, 0
+ ; GP64: dsllv $2, $4, $1
+
+ %r = shl i64 %a, %b
+ ret i64 %r
+}
+
+define signext i128 @shl_i128(i128 signext %a, i128 signext %b) {
+entry:
+; ALL-LABEL: shl_i128:
+
+ ; GP32: lw $25, %call16(__ashlti3)($gp)
+
+ ; M3: sll $[[T0:[0-9]+]], $7, 0
+ ; M3: dsllv $[[T1:[0-9]+]], $5, $[[T0]]
+ ; M3: andi $[[T2:[0-9]+]], $[[T0]], 64
+ ; M3: bnez $[[T3:[0-9]+]], $[[BB0:BB[0-9_]+]]
+ ; M3: move $2, $[[T1]]
+ ; M3: dsllv $[[T4:[0-9]+]], $4, $[[T0]]
+ ; M3: dsrl $[[T5:[0-9]+]], $5, 1
+ ; M3: not $[[T6:[0-9]+]], $[[T0]]
+ ; M3: dsrlv $[[T7:[0-9]+]], $[[T5]], $[[T6]]
+ ; M3: or $2, $[[T4]], $[[T7]]
+ ; M3: $[[BB0]]:
+ ; M3: bnez $[[T3]], $[[BB1:BB[0-9_]+]]
+ ; M3: daddiu $3, $zero, 0
+ ; M3: move $3, $[[T1]]
+ ; M3: $[[BB1]]:
+ ; M3: jr $ra
+ ; M3: nop
+
+ ; GP64-NOT-R6: sll $[[T0:[0-9]+]], $7, 0
+ ; GP64-NOT-R6: dsllv $[[T1:[0-9]+]], $4, $[[T0]]
+ ; GP64-NOT-R6: dsrl $[[T2:[0-9]+]], $5, 1
+ ; GP64-NOT-R6: not $[[T3:[0-9]+]], $[[T0]]
+ ; GP64-NOT-R6: dsrlv $[[T4:[0-9]+]], $[[T2]], $[[T3]]
+ ; GP64-NOT-R6: or $2, $[[T1]], $[[T4]]
+ ; GP64-NOT-R6: dsllv $3, $5, $[[T0]]
+ ; GP64-NOT-R6: andi $[[T5:[0-9]+]], $[[T0]], 64
+ ; GP64-NOT-R6: movn $2, $3, $[[T5]]
+ ; GP64-NOT-R6: jr $ra
+ ; GP64-NOT-R6: movn $3, $zero, $1
+
+ ; 64R6: sll $[[T0:[0-9]+]], $7, 0
+ ; 64R6: dsllv $[[T1:[0-9]+]], $4, $[[T0]]
+ ; 64R6: dsrl $[[T2:[0-9]+]], $5, 1
+ ; 64R6: not $[[T3:[0-9]+]], $[[T0]]
+ ; 64R6: dsrlv $[[T4:[0-9]+]], $[[T2]], $[[T3]]
+ ; 64R6: or $[[T5:[0-9]+]], $[[T1]], $[[T4]]
+ ; 64R6: andi $[[T6:[0-9]+]], $[[T0]], 64
+ ; 64R6: sll $[[T7:[0-9]+]], $[[T6]], 0
+ ; 64R6: seleqz $[[T8:[0-9]+]], $[[T5]], $[[T7]]
+ ; 64R6: dsllv $[[T9:[0-9]+]], $5, $[[T0]]
+ ; 64R6: selnez $[[T10:[0-9]+]], $[[T9]], $[[T7]]
+ ; 64R6: or $2, $[[T10]], $[[T8]]
+ ; 64R6: jr $ra
+ ; 64R6: seleqz $3, $[[T0]], $[[T7]]
+
+ %r = shl i128 %a, %b
+ ret i128 %r
+}
diff --git a/test/CodeGen/Mips/llvm-ir/srem.ll b/test/CodeGen/Mips/llvm-ir/srem.ll
new file mode 100644
index 000000000000..1e949d24678b
--- /dev/null
+++ b/test/CodeGen/Mips/llvm-ir/srem.ll
@@ -0,0 +1,129 @@
+; RUN: llc < %s -march=mips -mcpu=mips2 | FileCheck %s \
+; RUN: -check-prefix=GP32 -check-prefix=NOT-R6 -check-prefix=NOT-R2-R6
+; RUN: llc < %s -march=mips -mcpu=mips32 | FileCheck %s \
+; RUN: -check-prefix=GP32 -check-prefix=NOT-R6 -check-prefix=NOT-R2-R6
+; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s -check-prefix=GP32 \
+; RUN: -check-prefix=R2 -check-prefix=R2-R6 -check-prefix=NOT-R6
+; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s \
+; RUN: -check-prefix=GP32 -check-prefix=R6 -check-prefix=R2-R6
+; RUN: llc < %s -march=mips64 -mcpu=mips3 | FileCheck %s \
+; RUN: -check-prefix=GP64-NOT-R6 -check-prefix=NOT-R6 -check-prefix=NOT-R2-R6
+; RUN: llc < %s -march=mips64 -mcpu=mips4 | FileCheck %s \
+; RUN: -check-prefix=GP64-NOT-R6 -check-prefix=NOT-R6 -check-prefix=NOT-R2-R6
+; RUN: llc < %s -march=mips64 -mcpu=mips64 | FileCheck %s \
+; RUN: -check-prefix=GP64-NOT-R6 -check-prefix=NOT-R6 -check-prefix=NOT-R2-R6
+; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s \
+; RUN: -check-prefix=R2 -check-prefix=R2-R6 \
+; RUN: -check-prefix=GP64-NOT-R6 -check-prefix=NOT-R6
+; RUN: llc < %s -march=mips64 -mcpu=mips64r6 | FileCheck %s \
+; RUN: -check-prefix=64R6 -check-prefix=R6 -check-prefix=R2-R6
+
+define signext i1 @srem_i1(i1 signext %a, i1 signext %b) {
+entry:
+; ALL-LABEL: srem_i1:
+
+ ; NOT-R6: div $zero, $4, $5
+ ; NOT-R6: teq $5, $zero, 7
+ ; NOT-R6: mfhi $[[T0:[0-9]+]]
+ ; NOT-R6: sll $[[T1:[0-9]+]], $[[T0]], 31
+ ; NOT-R6: sra $2, $[[T1]], 31
+
+ ; R6: mod $[[T0:[0-9]+]], $4, $5
+ ; R6: teq $5, $zero, 7
+ ; R6: sll $[[T3:[0-9]+]], $[[T0]], 31
+ ; R6: sra $2, $[[T3]], 31
+
+ %r = srem i1 %a, %b
+ ret i1 %r
+}
+
+define signext i8 @srem_i8(i8 signext %a, i8 signext %b) {
+entry:
+; ALL-LABEL: srem_i8:
+
+ ; NOT-R2-R6: div $zero, $4, $5
+ ; NOT-R2-R6: teq $5, $zero, 7
+ ; NOT-R2-R6: mfhi $[[T0:[0-9]+]]
+ ; NOT-R2-R6: sll $[[T1:[0-9]+]], $[[T0]], 24
+ ; NOT-R2-R6: sra $2, $[[T1]], 24
+
+ ; R2: div $zero, $4, $5
+ ; R2: teq $5, $zero, 7
+ ; R2: mfhi $[[T0:[0-9]+]]
+ ; R2: seb $2, $[[T0]]
+
+ ; R6: mod $[[T0:[0-9]+]], $4, $5
+ ; R6: teq $5, $zero, 7
+ ; R6: seb $2, $[[T0]]
+
+ %r = srem i8 %a, %b
+ ret i8 %r
+}
+
+define signext i16 @srem_i16(i16 signext %a, i16 signext %b) {
+entry:
+; ALL-LABEL: srem_i16:
+
+ ; NOT-R2-R6: div $zero, $4, $5
+ ; NOT-R2-R6: teq $5, $zero, 7
+ ; NOT-R2-R6: mfhi $[[T0:[0-9]+]]
+ ; NOT-R2-R6: sll $[[T1:[0-9]+]], $[[T0]], 16
+ ; NOT-R2-R6: sra $2, $[[T1]], 16
+
+ ; R2: div $zero, $4, $5
+ ; R2: teq $5, $zero, 7
+ ; R2: mfhi $[[T0:[0-9]+]]
+ ; R2: seh $2, $[[T1]]
+
+ ; R6: mod $[[T0:[0-9]+]], $4, $5
+ ; R6: teq $5, $zero, 7
+ ; R6: seh $2, $[[T0]]
+
+ %r = srem i16 %a, %b
+ ret i16 %r
+}
+
+define signext i32 @srem_i32(i32 signext %a, i32 signext %b) {
+entry:
+; ALL-LABEL: srem_i32:
+
+ ; NOT-R6: div $zero, $4, $5
+ ; NOT-R6: teq $5, $zero, 7
+ ; NOT-R6: mfhi $2
+
+ ; R6: mod $2, $4, $5
+ ; R6: teq $5, $zero, 7
+
+ %r = srem i32 %a, %b
+ ret i32 %r
+}
+
+define signext i64 @srem_i64(i64 signext %a, i64 signext %b) {
+entry:
+; ALL-LABEL: srem_i64:
+
+ ; GP32: lw $25, %call16(__moddi3)($gp)
+
+ ; GP64-NOT-R6: ddiv $zero, $4, $5
+ ; GP64-NOT-R6: teq $5, $zero, 7
+ ; GP64-NOT-R6: mfhi $2
+
+ ; 64R6: dmod $2, $4, $5
+ ; 64R6: teq $5, $zero, 7
+
+ %r = srem i64 %a, %b
+ ret i64 %r
+}
+
+define signext i128 @srem_i128(i128 signext %a, i128 signext %b) {
+entry:
+; ALL-LABEL: srem_i128:
+
+ ; GP32: lw $25, %call16(__modti3)($gp)
+
+ ; GP64-NOT-R6: ld $25, %call16(__modti3)($gp)
+ ; 64-R6: ld $25, %call16(__modti3)($gp)
+
+ %r = srem i128 %a, %b
+ ret i128 %r
+}
diff --git a/test/CodeGen/Mips/llvm-ir/sub.ll b/test/CodeGen/Mips/llvm-ir/sub.ll
new file mode 100644
index 000000000000..6d592be38211
--- /dev/null
+++ b/test/CodeGen/Mips/llvm-ir/sub.ll
@@ -0,0 +1,114 @@
+; RUN: llc < %s -march=mips -mcpu=mips2 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=NOT-R2-R6 -check-prefix=GP32
+; RUN: llc < %s -march=mips -mcpu=mips32 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=NOT-R2-R6 -check-prefix=GP32
+; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=R2-R6 -check-prefix=GP32
+; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=R2-R6 -check-prefix=GP32
+; RUN: llc < %s -march=mips64 -mcpu=mips3 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=NOT-R2-R6 -check-prefix=GP64
+; RUN: llc < %s -march=mips64 -mcpu=mips4 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=NOT-R2-R6 -check-prefix=GP64
+; RUN: llc < %s -march=mips64 -mcpu=mips64 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=NOT-R2-R6 -check-prefix=GP64
+; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=R2-R6 -check-prefix=GP64
+; RUN: llc < %s -march=mips64 -mcpu=mips64r6 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=R2-R6 -check-prefix=GP64
+
+define signext i1 @sub_i1(i1 signext %a, i1 signext %b) {
+entry:
+; ALL-LABEL: sub_i1:
+
+ ; ALL: subu $[[T0:[0-9]+]], $4, $5
+ ; ALL: sll $[[T0]], $[[T0]], 31
+ ; ALL: sra $2, $[[T0]], 31
+
+ %r = sub i1 %a, %b
+ ret i1 %r
+}
+
+define signext i8 @sub_i8(i8 signext %a, i8 signext %b) {
+entry:
+; ALL-LABEL: sub_i8:
+
+ ; NOT-R2-R6: subu $[[T0:[0-9]+]], $4, $5
+ ; NOT-R2-R6: sll $[[T0]], $[[T0]], 24
+ ; NOT-R2-R6: sra $2, $[[T0]], 24
+
+ ; R2-R6: subu $[[T0:[0-9]+]], $4, $5
+ ; R2-R6: seb $2, $[[T0:[0-9]+]]
+
+ %r = sub i8 %a, %b
+ ret i8 %r
+}
+
+define signext i16 @sub_i16(i16 signext %a, i16 signext %b) {
+entry:
+; ALL-LABEL: sub_i16:
+
+ ; NOT-R2-R6: subu $[[T0:[0-9]+]], $4, $5
+ ; NOT-R2-R6: sll $[[T0]], $[[T0]], 16
+ ; NOT-R2-R6: sra $2, $[[T0]], 16
+
+ ; R2-R6: subu $[[T0:[0-9]+]], $4, $5
+ ; R2-R6: seh $2, $[[T0:[0-9]+]]
+
+ %r = sub i16 %a, %b
+ ret i16 %r
+}
+
+define signext i32 @sub_i32(i32 signext %a, i32 signext %b) {
+entry:
+; ALL-LABEL: sub_i32:
+
+ ; ALL: subu $2, $4, $5
+
+ %r = sub i32 %a, %b
+ ret i32 %r
+}
+
+define signext i64 @sub_i64(i64 signext %a, i64 signext %b) {
+entry:
+; ALL-LABEL: sub_i64:
+
+ ; GP32: subu $3, $5, $7
+ ; GP32: sltu $[[T0:[0-9]+]], $5, $7
+ ; GP32: addu $[[T1:[0-9]+]], $[[T0]], $6
+ ; GP32: subu $2, $4, $[[T1]]
+
+ ; GP64: dsubu $2, $4, $5
+
+ %r = sub i64 %a, %b
+ ret i64 %r
+}
+
+define signext i128 @sub_i128(i128 signext %a, i128 signext %b) {
+entry:
+; ALL-LABEL: sub_i128:
+
+ ; GP32: lw $[[T0:[0-9]+]], 20($sp)
+ ; GP32: sltu $[[T1:[0-9]+]], $5, $[[T0]]
+ ; GP32: lw $[[T2:[0-9]+]], 16($sp)
+ ; GP32: addu $[[T3:[0-9]+]], $[[T1]], $[[T2]]
+ ; GP32: lw $[[T4:[0-9]+]], 24($sp)
+ ; GP32: lw $[[T5:[0-9]+]], 28($sp)
+ ; GP32: subu $[[T6:[0-9]+]], $7, $[[T5]]
+ ; GP32: subu $2, $4, $[[T3]]
+ ; GP32: sltu $[[T8:[0-9]+]], $6, $[[T4]]
+ ; GP32: addu $[[T9:[0-9]+]], $[[T8]], $[[T0]]
+ ; GP32: subu $3, $5, $[[T9]]
+ ; GP32: sltu $[[T10:[0-9]+]], $7, $[[T5]]
+ ; GP32: addu $[[T11:[0-9]+]], $[[T10]], $[[T4]]
+ ; GP32: subu $4, $6, $[[T11]]
+ ; GP32: move $5, $[[T6]]
+
+ ; GP64: dsubu $3, $5, $7
+ ; GP64: sltu $[[T0:[0-9]+]], $5, $7
+ ; GP64: daddu $[[T1:[0-9]+]], $[[T0]], $6
+ ; GP64: dsubu $2, $4, $[[T1]]
+
+ %r = sub i128 %a, %b
+ ret i128 %r
+}
diff --git a/test/CodeGen/Mips/llvm-ir/udiv.ll b/test/CodeGen/Mips/llvm-ir/udiv.ll
new file mode 100644
index 000000000000..1f7aa0d5f4ce
--- /dev/null
+++ b/test/CodeGen/Mips/llvm-ir/udiv.ll
@@ -0,0 +1,108 @@
+; RUN: llc < %s -march=mips -mcpu=mips2 | FileCheck %s \
+; RUN: -check-prefix=NOT-R6 -check-prefix=GP32
+; RUN: llc < %s -march=mips -mcpu=mips32 | FileCheck %s \
+; RUN: -check-prefix=NOT-R6 -check-prefix=GP32
+; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s \
+; RUN: -check-prefix=NOT-R6 -check-prefix=GP32
+; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s \
+; RUN: -check-prefix=R6 -check-prefix=GP32
+; RUN: llc < %s -march=mips64 -mcpu=mips3 | FileCheck %s \
+; RUN: -check-prefix=NOT-R6 -check-prefix=GP64-NOT-R6
+; RUN: llc < %s -march=mips64 -mcpu=mips4 | FileCheck %s \
+; RUN: -check-prefix=NOT-R6 -check-prefix=GP64-NOT-R6
+; RUN: llc < %s -march=mips64 -mcpu=mips64 | FileCheck %s \
+; RUN: -check-prefix=NOT-R6 -check-prefix=GP64-NOT-R6
+; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s \
+; RUN: -check-prefix=NOT-R6 -check-prefix=GP64-NOT-R6
+; RUN: llc < %s -march=mips64 -mcpu=mips64r6 | FileCheck %s \
+; RUN: -check-prefix=R6 -check-prefix=64R6
+
+define zeroext i1 @udiv_i1(i1 zeroext %a, i1 zeroext %b) {
+entry:
+; ALL-LABEL: udiv_i1:
+
+ ; NOT-R6: divu $zero, $4, $5
+ ; NOT-R6: teq $5, $zero, 7
+ ; NOT-R6: mflo $2
+
+ ; R6: divu $2, $4, $5
+ ; R6: teq $5, $zero, 7
+
+ %r = udiv i1 %a, %b
+ ret i1 %r
+}
+
+define zeroext i8 @udiv_i8(i8 zeroext %a, i8 zeroext %b) {
+entry:
+; ALL-LABEL: udiv_i8:
+
+ ; NOT-R6: divu $zero, $4, $5
+ ; NOT-R6: teq $5, $zero, 7
+ ; NOT-R6: mflo $2
+
+ ; R6: divu $2, $4, $5
+ ; R6: teq $5, $zero, 7
+
+ %r = udiv i8 %a, %b
+ ret i8 %r
+}
+
+define zeroext i16 @udiv_i16(i16 zeroext %a, i16 zeroext %b) {
+entry:
+; ALL-LABEL: udiv_i16:
+
+ ; NOT-R6: divu $zero, $4, $5
+ ; NOT-R6: teq $5, $zero, 7
+ ; NOT-R6: mflo $2
+
+ ; R6: divu $2, $4, $5
+ ; R6: teq $5, $zero, 7
+
+ %r = udiv i16 %a, %b
+ ret i16 %r
+}
+
+define signext i32 @udiv_i32(i32 signext %a, i32 signext %b) {
+entry:
+; ALL-LABEL: udiv_i32:
+
+ ; NOT-R6: divu $zero, $4, $5
+ ; NOT-R6: teq $5, $zero, 7
+ ; NOT-R6: mflo $2
+
+ ; R6: divu $2, $4, $5
+ ; R6: teq $5, $zero, 7
+
+ %r = udiv i32 %a, %b
+ ret i32 %r
+}
+
+define signext i64 @udiv_i64(i64 signext %a, i64 signext %b) {
+entry:
+; ALL-LABEL: udiv_i64:
+
+ ; GP32: lw $25, %call16(__udivdi3)($gp)
+
+ ; GP64-NOT-R6: ddivu $zero, $4, $5
+ ; GP64-NOT-R6: teq $5, $zero, 7
+ ; GP64-NOT-R6: mflo $2
+
+ ; 64R6: ddivu $2, $4, $5
+ ; 64R6: teq $5, $zero, 7
+
+ %r = udiv i64 %a, %b
+ ret i64 %r
+}
+
+define signext i128 @udiv_i128(i128 signext %a, i128 signext %b) {
+entry:
+; ALL-LABEL: udiv_i128:
+
+ ; GP32: lw $25, %call16(__udivti3)($gp)
+
+ ; GP64-NOT-R6: ld $25, %call16(__udivti3)($gp)
+ ; 64-R6: ld $25, %call16(__udivti3)($gp)
+
+ %r = udiv i128 %a, %b
+ ret i128 %r
+}
diff --git a/test/CodeGen/Mips/llvm-ir/urem.ll b/test/CodeGen/Mips/llvm-ir/urem.ll
new file mode 100644
index 000000000000..73235341a42f
--- /dev/null
+++ b/test/CodeGen/Mips/llvm-ir/urem.ll
@@ -0,0 +1,145 @@
+; RUN: llc < %s -march=mips -mcpu=mips2 | FileCheck %s \
+; RUN: -check-prefix=GP32 -check-prefix=NOT-R6 -check-prefix=NOT-R2-R6
+; RUN: llc < %s -march=mips -mcpu=mips32 | FileCheck %s \
+; RUN: -check-prefix=GP32 -check-prefix=NOT-R6 -check-prefix=NOT-R2-R6
+; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s -check-prefix=GP32 \
+; RUN: -check-prefix=R2 -check-prefix=R2-R6 -check-prefix=NOT-R6
+; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s \
+; RUN: -check-prefix=GP32 -check-prefix=R6 -check-prefix=R2-R6
+; RUN: llc < %s -march=mips64 -mcpu=mips3 | FileCheck %s \
+; RUN: -check-prefix=GP64-NOT-R6 -check-prefix=NOT-R6 -check-prefix=NOT-R2-R6
+; RUN: llc < %s -march=mips64 -mcpu=mips4 | FileCheck %s \
+; RUN: -check-prefix=GP64-NOT-R6 -check-prefix=NOT-R6 -check-prefix=NOT-R2-R6
+; RUN: llc < %s -march=mips64 -mcpu=mips64 | FileCheck %s \
+; RUN: -check-prefix=GP64-NOT-R6 -check-prefix=NOT-R6 -check-prefix=NOT-R2-R6
+; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s \
+; RUN: -check-prefix=R2 -check-prefix=R2-R6 \
+; RUN: -check-prefix=GP64-NOT-R6 -check-prefix=NOT-R6
+; RUN: llc < %s -march=mips64 -mcpu=mips64r6 | FileCheck %s \
+; RUN: -check-prefix=64R6 -check-prefix=R6 -check-prefix=R2-R6
+
+define signext i1 @urem_i1(i1 signext %a, i1 signext %b) {
+entry:
+; ALL-LABEL: urem_i1:
+
+ ; NOT-R6: andi $[[T0:[0-9]+]], $5, 1
+ ; NOT-R6: andi $[[T1:[0-9]+]], $4, 1
+ ; NOT-R6: divu $zero, $[[T1]], $[[T0]]
+ ; NOT-R6: teq $[[T0]], $zero, 7
+ ; NOT-R6: mfhi $[[T2:[0-9]+]]
+ ; NOT-R6: sll $[[T3:[0-9]+]], $[[T2]], 31
+ ; NOT-R6: sra $2, $[[T3]], 31
+
+ ; R6: andi $[[T0:[0-9]+]], $5, 1
+ ; R6: andi $[[T1:[0-9]+]], $4, 1
+ ; R6: modu $[[T2:[0-9]+]], $[[T1]], $[[T0]]
+ ; R6: teq $[[T0]], $zero, 7
+ ; R6: sll $[[T3:[0-9]+]], $[[T2]], 31
+ ; R6: sra $2, $[[T3]], 31
+
+ %r = urem i1 %a, %b
+ ret i1 %r
+}
+
+define signext i8 @urem_i8(i8 signext %a, i8 signext %b) {
+entry:
+; ALL-LABEL: urem_i8:
+
+ ; NOT-R2-R6: andi $[[T0:[0-9]+]], $5, 255
+ ; NOT-R2-R6: andi $[[T1:[0-9]+]], $4, 255
+ ; NOT-R2-R6: divu $zero, $[[T1]], $[[T0]]
+ ; NOT-R2-R6: teq $[[T0]], $zero, 7
+ ; NOT-R2-R6: mfhi $[[T2:[0-9]+]]
+ ; NOT-R2-R6: sll $[[T3:[0-9]+]], $[[T2]], 24
+ ; NOT-R2-R6: sra $2, $[[T3]], 24
+
+ ; R2: andi $[[T0:[0-9]+]], $5, 255
+ ; R2: andi $[[T1:[0-9]+]], $4, 255
+ ; R2: divu $zero, $[[T1]], $[[T0]]
+ ; R2: teq $[[T0]], $zero, 7
+ ; R2: mfhi $[[T2:[0-9]+]]
+ ; R2: seb $2, $[[T2]]
+
+ ; R6: andi $[[T0:[0-9]+]], $5, 255
+ ; R6: andi $[[T1:[0-9]+]], $4, 255
+ ; R6: modu $[[T2:[0-9]+]], $[[T1]], $[[T0]]
+ ; R6: teq $[[T0]], $zero, 7
+ ; R6: seb $2, $[[T2]]
+
+ %r = urem i8 %a, %b
+ ret i8 %r
+}
+
+define signext i16 @urem_i16(i16 signext %a, i16 signext %b) {
+entry:
+; ALL-LABEL: urem_i16:
+
+ ; NOT-R2-R6: andi $[[T0:[0-9]+]], $5, 65535
+ ; NOT-R2-R6: andi $[[T1:[0-9]+]], $4, 65535
+ ; NOT-R2-R6: divu $zero, $[[T1]], $[[T0]]
+ ; NOT-R2-R6: teq $[[T0]], $zero, 7
+ ; NOT-R2-R6: mfhi $[[T2:[0-9]+]]
+ ; NOT-R2-R6: sll $[[T3:[0-9]+]], $[[T2]], 16
+ ; NOT-R2-R6: sra $2, $[[T3]], 16
+
+ ; R2: andi $[[T0:[0-9]+]], $5, 65535
+ ; R2: andi $[[T1:[0-9]+]], $4, 65535
+ ; R2: divu $zero, $[[T1]], $[[T0]]
+ ; R2: teq $[[T0]], $zero, 7
+ ; R2: mfhi $[[T3:[0-9]+]]
+ ; R2: seh $2, $[[T2]]
+
+ ; R6: andi $[[T0:[0-9]+]], $5, 65535
+ ; R6: andi $[[T1:[0-9]+]], $4, 65535
+ ; R6: modu $[[T2:[0-9]+]], $[[T1]], $[[T0]]
+ ; R6: teq $[[T0]], $zero, 7
+ ; R6: seh $2, $[[T2]]
+
+ %r = urem i16 %a, %b
+ ret i16 %r
+}
+
+define signext i32 @urem_i32(i32 signext %a, i32 signext %b) {
+entry:
+; ALL-LABEL: urem_i32:
+
+ ; NOT-R6: divu $zero, $4, $5
+ ; NOT-R6: teq $5, $zero, 7
+ ; NOT-R6: mfhi $2
+
+ ; R6: modu $2, $4, $5
+ ; R6: teq $5, $zero, 7
+
+ %r = urem i32 %a, %b
+ ret i32 %r
+}
+
+define signext i64 @urem_i64(i64 signext %a, i64 signext %b) {
+entry:
+; ALL-LABEL: urem_i64:
+
+ ; GP32: lw $25, %call16(__umoddi3)($gp)
+
+ ; GP64-NOT-R6: ddivu $zero, $4, $5
+ ; GP64-NOT-R6: teq $5, $zero, 7
+ ; GP64-NOT-R6: mfhi $2
+
+ ; 64R6: dmodu $2, $4, $5
+ ; 64R6: teq $5, $zero, 7
+
+ %r = urem i64 %a, %b
+ ret i64 %r
+}
+
+define signext i128 @urem_i128(i128 signext %a, i128 signext %b) {
+entry:
+ ; ALL-LABEL: urem_i128:
+
+ ; GP32: lw $25, %call16(__umodti3)($gp)
+
+ ; GP64-NOT-R6: ld $25, %call16(__umodti3)($gp)
+ ; 64-R6: ld $25, %call16(__umodti3)($gp)
+
+ %r = urem i128 %a, %b
+ ret i128 %r
+}
diff --git a/test/CodeGen/Mips/llvm-ir/xor.ll b/test/CodeGen/Mips/llvm-ir/xor.ll
new file mode 100644
index 000000000000..94dead1eff41
--- /dev/null
+++ b/test/CodeGen/Mips/llvm-ir/xor.ll
@@ -0,0 +1,94 @@
+; RUN: llc < %s -march=mips -mcpu=mips2 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP32
+; RUN: llc < %s -march=mips -mcpu=mips32 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP32
+; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP32
+; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP32
+; RUN: llc < %s -march=mips64 -mcpu=mips3 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP64
+; RUN: llc < %s -march=mips64 -mcpu=mips4 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP64
+; RUN: llc < %s -march=mips64 -mcpu=mips64 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP64
+; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP64
+; RUN: llc < %s -march=mips64 -mcpu=mips64r6 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP64
+
+define signext i1 @xor_i1(i1 signext %a, i1 signext %b) {
+entry:
+; ALL-LABEL: xor_i1:
+
+ ; ALL: xor $2, $4, $5
+
+ %r = xor i1 %a, %b
+ ret i1 %r
+}
+
+define signext i8 @xor_i8(i8 signext %a, i8 signext %b) {
+entry:
+; ALL-LABEL: xor_i8:
+
+ ; ALL: xor $2, $4, $5
+
+ %r = xor i8 %a, %b
+ ret i8 %r
+}
+
+define signext i16 @xor_i16(i16 signext %a, i16 signext %b) {
+entry:
+; ALL-LABEL: xor_i16:
+
+ ; ALL: xor $2, $4, $5
+
+ %r = xor i16 %a, %b
+ ret i16 %r
+}
+
+define signext i32 @xor_i32(i32 signext %a, i32 signext %b) {
+entry:
+; ALL-LABEL: xor_i32:
+
+ ; GP32: xor $2, $4, $5
+
+ ; GP64: xor $[[T0:[0-9]+]], $4, $5
+ ; GP64: sll $2, $[[T0]], 0
+
+ %r = xor i32 %a, %b
+ ret i32 %r
+}
+
+define signext i64 @xor_i64(i64 signext %a, i64 signext %b) {
+entry:
+; ALL-LABEL: xor_i64:
+
+ ; GP32: xor $2, $4, $6
+ ; GP32: xor $3, $5, $7
+
+ ; GP64: xor $2, $4, $5
+
+ %r = xor i64 %a, %b
+ ret i64 %r
+}
+
+define signext i128 @xor_i128(i128 signext %a, i128 signext %b) {
+entry:
+; ALL-LABEL: xor_i128:
+
+ ; GP32: lw $[[T0:[0-9]+]], 24($sp)
+ ; GP32: lw $[[T1:[0-9]+]], 20($sp)
+ ; GP32: lw $[[T2:[0-9]+]], 16($sp)
+ ; GP32: xor $2, $4, $[[T2]]
+ ; GP32: xor $3, $5, $[[T1]]
+ ; GP32: xor $4, $6, $[[T0]]
+ ; GP32: lw $[[T3:[0-9]+]], 28($sp)
+ ; GP32: xor $5, $7, $[[T3]]
+
+ ; GP64: xor $2, $4, $6
+ ; GP64: xor $3, $5, $7
+
+ %r = xor i128 %a, %b
+ ret i128 %r
+}
diff --git a/test/CodeGen/Mips/mips64-f128.ll b/test/CodeGen/Mips/mips64-f128.ll
index 6987d4ab0734..f0cbbd08d79a 100644
--- a/test/CodeGen/Mips/mips64-f128.ll
+++ b/test/CodeGen/Mips/mips64-f128.ll
@@ -545,7 +545,7 @@ entry:
; ALL-LABEL: load_LD_float:
; ALL: ld $[[R0:[0-9]+]], %got_disp(gf1)
-; ALL: lwu $4, 0($[[R0]])
+; ALL: lw $4, 0($[[R0]])
; ALL: ld $25, %call16(__extendsftf2)
; ALL: jalr $25
diff --git a/test/CodeGen/Mips/mips64signextendsesf.ll b/test/CodeGen/Mips/mips64signextendsesf.ll
new file mode 100644
index 000000000000..dec83b80afea
--- /dev/null
+++ b/test/CodeGen/Mips/mips64signextendsesf.ll
@@ -0,0 +1,214 @@
+; RUN: llc -march=mips64 -mcpu=mips64r2 -soft-float -O2 < %s | FileCheck %s
+
+define void @foosf() #0 {
+entry:
+ %in = alloca float, align 4
+ %out = alloca float, align 4
+ store volatile float 0xBFD59E1380000000, float* %in, align 4
+ %in.0.in.0. = load volatile float* %in, align 4
+ %rintf = tail call float @rintf(float %in.0.in.0.) #1
+ store volatile float %rintf, float* %out, align 4
+ ret void
+
+; CHECK-LABEL: foosf
+; CHECK-NOT: dsll
+; CHECK-NOT: dsrl
+; CHECK-NOT: lwu
+}
+
+declare float @rintf(float)
+
+define float @foosf1(float* nocapture readonly %a) #0 {
+entry:
+ %0 = load float* %a, align 4
+ %call = tail call float @roundf(float %0) #2
+ ret float %call
+
+; CHECK-LABEL: foosf1
+; CHECK-NOT: dsll
+; CHECK-NOT: dsrl
+; CHECK-NOT: lwu
+}
+
+declare float @roundf(float) #1
+
+define float @foosf2(float* nocapture readonly %a) #0 {
+entry:
+ %0 = load float* %a, align 4
+ %call = tail call float @truncf(float %0) #2
+ ret float %call
+
+; CHECK-LABEL: foosf2
+; CHECK-NOT: dsll
+; CHECK-NOT: dsrl
+; CHECK-NOT: lwu
+}
+
+declare float @truncf(float) #1
+
+define float @foosf3(float* nocapture readonly %a) #0 {
+entry:
+ %0 = load float* %a, align 4
+ %call = tail call float @floorf(float %0) #2
+ ret float %call
+
+; CHECK-LABEL: foosf3
+; CHECK-NOT: dsll
+; CHECK-NOT: dsrl
+; CHECK-NOT: lwu
+}
+
+declare float @floorf(float) #1
+
+define float @foosf4(float* nocapture readonly %a) #0 {
+entry:
+ %0 = load float* %a, align 4
+ %call = tail call float @nearbyintf(float %0) #2
+ ret float %call
+
+; CHECK-LABEL: foosf4
+; CHECK-NOT: dsll
+; CHECK-NOT: dsrl
+; CHECK-NOT: lwu
+}
+
+declare float @nearbyintf(float) #1
+
+define float @foosf5(float* nocapture readonly %a) #0 {
+entry:
+ %0 = load float* %a, align 4
+ %mul = fmul float %0, undef
+ ret float %mul
+
+; CHECK-LABEL: foosf5
+; CHECK-NOT: dsll
+; CHECK-NOT: dsrl
+; CHECK-NOT: lwu
+}
+
+define float @foosf6(float* nocapture readonly %a) #0 {
+entry:
+ %0 = load float* %a, align 4
+ %sub = fsub float %0, undef
+ ret float %sub
+
+; CHECK-LABEL: foosf6
+; CHECK-NOT: dsll
+; CHECK-NOT: dsrl
+; CHECK-NOT: lwu
+}
+
+define float @foosf7(float* nocapture readonly %a) #0 {
+entry:
+ %0 = load float* %a, align 4
+ %add = fadd float %0, undef
+ ret float %add
+
+; CHECK-LABEL: foosf7
+; CHECK-NOT: dsll
+; CHECK-NOT: dsrl
+; CHECK-NOT: lwu
+}
+
+define float @foosf8(float* nocapture readonly %a) #0 {
+entry:
+ %b = alloca float, align 4
+ %b.0.b.0. = load volatile float* %b, align 4
+ %0 = load float* %a, align 4
+ %div = fdiv float %b.0.b.0., %0
+ ret float %div
+
+; CHECK-LABEL: foosf8
+; CHECK-NOT: dsll
+; CHECK-NOT: dsrl
+; CHECK-NOT: lwu
+}
+
+define float @foosf9() #0 {
+entry:
+ %b = alloca float, align 4
+ %b.0.b.0. = load volatile float* %b, align 4
+ %conv = fpext float %b.0.b.0. to double
+ %b.0.b.0.3 = load volatile float* %b, align 4
+ %conv1 = fpext float %b.0.b.0.3 to double
+ %call = tail call double @pow(double %conv, double %conv1) #1
+ %conv2 = fptrunc double %call to float
+ ret float %conv2
+
+; CHECK-LABEL: foosf9
+; CHECK-NOT: dsll
+; CHECK-NOT: dsrl
+; CHECK-NOT: lwu
+}
+
+declare double @pow(double, double) #0
+
+define float @foosf10() #0 {
+entry:
+ %a = alloca float, align 4
+ %a.0.a.0. = load volatile float* %a, align 4
+ %conv = fpext float %a.0.a.0. to double
+ %call = tail call double @sin(double %conv) #1
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
+
+; CHECK-LABEL: foosf10
+; CHECK-NOT: dsll
+; CHECK-NOT: dsrl
+; CHECK-NOT: lwu
+}
+
+declare double @sin(double) #0
+
+define float @foosf11() #0 {
+entry:
+ %b = alloca float, align 4
+ %b.0.b.0. = load volatile float* %b, align 4
+ %call = tail call float @ceilf(float %b.0.b.0.) #2
+ ret float %call
+
+; CHECK-LABEL: foosf11
+; CHECK-NOT: dsll
+; CHECK-NOT: dsrl
+; CHECK-NOT: lwu
+}
+
+declare float @ceilf(float) #1
+
+define float @foosf12() #0 {
+entry:
+ %b = alloca float, align 4
+ %a = alloca float, align 4
+ %b.0.b.0. = load volatile float* %b, align 4
+ %a.0.a.0. = load volatile float* %a, align 4
+ %call = tail call float @fmaxf(float %b.0.b.0., float %a.0.a.0.) #2
+ ret float %call
+
+; CHECK-LABEL: foosf12
+; CHECK-NOT: dsll
+; CHECK-NOT: dsrl
+; CHECK-NOT: lwu
+}
+
+declare float @fmaxf(float, float) #1
+
+define float @foosf13() #0 {
+entry:
+ %b = alloca float, align 4
+ %a = alloca float, align 4
+ %b.0.b.0. = load volatile float* %b, align 4
+ %a.0.a.0. = load volatile float* %a, align 4
+ %call = tail call float @fminf(float %b.0.b.0., float %a.0.a.0.) #2
+ ret float %call
+
+; CHECK-LABEL: foosf13
+; CHECK-NOT: dsll
+; CHECK-NOT: dsrl
+; CHECK-NOT: lwu
+}
+
+declare float @fminf(float, float) #1
+
+
+attributes #0 = { nounwind "use-soft-float"="true" }
+attributes #1 = { nounwind readnone "use-soft-float"="true" }
diff --git a/test/CodeGen/Mips/mips64sinttofpsf.ll b/test/CodeGen/Mips/mips64sinttofpsf.ll
new file mode 100644
index 000000000000..d3d46036f7da
--- /dev/null
+++ b/test/CodeGen/Mips/mips64sinttofpsf.ll
@@ -0,0 +1,15 @@
+; RUN: llc -march=mips64 -mcpu=mips64r2 -soft-float -O0 < %s | FileCheck %s
+
+
+define double @foo() #0 {
+entry:
+ %x = alloca i32, align 4
+ store volatile i32 -32, i32* %x, align 4
+ %0 = load volatile i32* %x, align 4
+ %conv = sitofp i32 %0 to double
+ ret double %conv
+
+; CHECK-NOT: dsll
+; CHECK-NOT: dsrl
+
+}
diff --git a/test/CodeGen/Mips/no-odd-spreg-msa.ll b/test/CodeGen/Mips/no-odd-spreg-msa.ll
new file mode 100644
index 000000000000..30dd1ff82d73
--- /dev/null
+++ b/test/CodeGen/Mips/no-odd-spreg-msa.ll
@@ -0,0 +1,131 @@
+; RUN: llc -march=mipsel -mcpu=mips32 -mattr=+fp64,+msa,-nooddspreg < %s | FileCheck %s -check-prefix=ALL -check-prefix=ODDSPREG
+; RUN: llc -march=mipsel -mcpu=mips32 -mattr=+fp64,+msa,+nooddspreg < %s | FileCheck %s -check-prefix=ALL -check-prefix=NOODDSPREG
+
+@v4f32 = global <4 x float> zeroinitializer
+
+define void @msa_insert_0(float %a) {
+entry:
+ ; Force the float into an odd-numbered register using named registers and
+ ; load the vector.
+ %b = call float asm sideeffect "mov.s $0, $1", "={$f13},{$f12}" (float %a)
+ %0 = load volatile <4 x float>* @v4f32
+
+ ; Clobber all except $f12/$w12 and $f13
+ ;
+ ; The intention is that if odd single precision registers are permitted, the
+ ; allocator will choose $f12/$w12 for the vector and $f13 for the float to
+ ; avoid the spill/reload.
+ ;
+ ; On the other hand, if odd single precision registers are not permitted, it
+ ; must copy $f13 to an even-numbered register before inserting into the
+ ; vector.
+ call void asm sideeffect "# Clobber", "~{$f0},~{$f1},~{$f2},~{$f3},~{$f4},~{$f5},~{$f6},~{$f7},~{$f8},~{$f9},~{$f10},~{$f11},~{$f14},~{$f15},~{$f16},~{$f17},~{$f18},~{$f19},~{$f20},~{$f21},~{$f22},~{$f23},~{$f24},~{$f25},~{$f26},~{$f27},~{$f28},~{$f29},~{$f30},~{$f31}"()
+ %1 = insertelement <4 x float> %0, float %b, i32 0
+ store <4 x float> %1, <4 x float>* @v4f32
+ ret void
+}
+
+; ALL-LABEL: msa_insert_0:
+; ALL: mov.s $f13, $f12
+; ALL: lw $[[R0:[0-9]+]], %got(v4f32)(
+; ALL: ld.w $w[[W0:[0-9]+]], 0($[[R0]])
+; NOODDSPREG: mov.s $f[[F0:[0-9]+]], $f13
+; NOODDSPREG: insve.w $w[[W0]][0], $w[[F0]][0]
+; ODDSPREG: insve.w $w[[W0]][0], $w13[0]
+; ALL: # Clobber
+; ALL-NOT: sdc1
+; ALL-NOT: ldc1
+; ALL: st.w $w[[W0]], 0($[[R0]])
+
+define void @msa_insert_1(float %a) {
+entry:
+ ; Force the float into an odd-numbered register using named registers and
+ ; load the vector.
+ %b = call float asm sideeffect "mov.s $0, $1", "={$f13},{$f12}" (float %a)
+ %0 = load volatile <4 x float>* @v4f32
+
+ ; Clobber all except $f12/$w12 and $f13
+ ;
+ ; The intention is that if odd single precision registers are permitted, the
+ ; allocator will choose $f12/$w12 for the vector and $f13 for the float to
+ ; avoid the spill/reload.
+ ;
+ ; On the other hand, if odd single precision registers are not permitted, it
+ ; must copy $f13 to an even-numbered register before inserting into the
+ ; vector.
+ call void asm sideeffect "# Clobber", "~{$f0},~{$f1},~{$f2},~{$f3},~{$f4},~{$f5},~{$f6},~{$f7},~{$f8},~{$f9},~{$f10},~{$f11},~{$f14},~{$f15},~{$f16},~{$f17},~{$f18},~{$f19},~{$f20},~{$f21},~{$f22},~{$f23},~{$f24},~{$f25},~{$f26},~{$f27},~{$f28},~{$f29},~{$f30},~{$f31}"()
+ %1 = insertelement <4 x float> %0, float %b, i32 1
+ store <4 x float> %1, <4 x float>* @v4f32
+ ret void
+}
+
+; ALL-LABEL: msa_insert_1:
+; ALL: mov.s $f13, $f12
+; ALL: lw $[[R0:[0-9]+]], %got(v4f32)(
+; ALL: ld.w $w[[W0:[0-9]+]], 0($[[R0]])
+; NOODDSPREG: mov.s $f[[F0:[0-9]+]], $f13
+; NOODDSPREG: insve.w $w[[W0]][1], $w[[F0]][0]
+; ODDSPREG: insve.w $w[[W0]][1], $w13[0]
+; ALL: # Clobber
+; ALL-NOT: sdc1
+; ALL-NOT: ldc1
+; ALL: st.w $w[[W0]], 0($[[R0]])
+
+define float @msa_extract_0() {
+entry:
+ %0 = load volatile <4 x float>* @v4f32
+ %1 = call <4 x float> asm sideeffect "move.v $0, $1", "={$w13},{$w12}" (<4 x float> %0)
+
+ ; Clobber all except $f12, and $f13
+ ;
+ ; The intention is that if odd single precision registers are permitted, the
+ ; allocator will choose $f13/$w13 for the vector since that saves on moves.
+ ;
+ ; On the other hand, if odd single precision registers are not permitted, it
+ ; must move it to $f12/$w12.
+ call void asm sideeffect "# Clobber", "~{$f0},~{$f1},~{$f2},~{$f3},~{$f4},~{$f5},~{$f6},~{$f7},~{$f8},~{$f9},~{$f10},~{$f11},~{$f14},~{$f15},~{$f16},~{$f17},~{$f18},~{$f19},~{$f20},~{$f21},~{$f22},~{$f23},~{$f24},~{$f25},~{$f26},~{$f27},~{$f28},~{$f29},~{$f30},~{$f31}"()
+
+ %2 = extractelement <4 x float> %1, i32 0
+ ret float %2
+}
+
+; ALL-LABEL: msa_extract_0:
+; ALL: lw $[[R0:[0-9]+]], %got(v4f32)(
+; ALL: ld.w $w12, 0($[[R0]])
+; ALL: move.v $w[[W0:13]], $w12
+; NOODDSPREG: move.v $w[[W0:12]], $w13
+; ALL: # Clobber
+; ALL-NOT: st.w
+; ALL-NOT: ld.w
+; ALL: mov.s $f0, $f[[W0]]
+
+define float @msa_extract_1() {
+entry:
+ %0 = load volatile <4 x float>* @v4f32
+ %1 = call <4 x float> asm sideeffect "move.v $0, $1", "={$w13},{$w12}" (<4 x float> %0)
+
+ ; Clobber all except $f13
+ ;
+ ; The intention is that if odd single precision registers are permitted, the
+ ; allocator will choose $f13/$w13 for the vector since that saves on moves.
+ ;
+ ; On the other hand, if odd single precision registers are not permitted, it
+ ; must be spilled.
+ call void asm sideeffect "# Clobber", "~{$f0},~{$f1},~{$f2},~{$f3},~{$f4},~{$f5},~{$f6},~{$f7},~{$f8},~{$f9},~{$f10},~{$f11},~{$f12},~{$f14},~{$f15},~{$f16},~{$f17},~{$f18},~{$f19},~{$f20},~{$f21},~{$f22},~{$f23},~{$f24},~{$f25},~{$f26},~{$f27},~{$f28},~{$f29},~{$f30},~{$f31}"()
+
+ %2 = extractelement <4 x float> %1, i32 1
+ ret float %2
+}
+
+; ALL-LABEL: msa_extract_1:
+; ALL: lw $[[R0:[0-9]+]], %got(v4f32)(
+; ALL: ld.w $w12, 0($[[R0]])
+; ALL: splati.w $w[[W0:[0-9]+]], $w13[1]
+; NOODDSPREG: st.w $w[[W0]], 0($sp)
+; ODDSPREG-NOT: st.w
+; ODDSPREG-NOT: ld.w
+; ALL: # Clobber
+; ODDSPREG-NOT: st.w
+; ODDSPREG-NOT: ld.w
+; NOODDSPREG: ld.w $w0, 0($sp)
+; ODDSPREG: mov.s $f0, $f[[W0]]
diff --git a/test/CodeGen/R600/128bit-kernel-args.ll b/test/CodeGen/R600/128bit-kernel-args.ll
index 3b3fee05af7f..557d86aa8376 100644
--- a/test/CodeGen/R600/128bit-kernel-args.ll
+++ b/test/CodeGen/R600/128bit-kernel-args.ll
@@ -1,26 +1,27 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
-; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI
+; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s --check-prefix=SI
-; R600-CHECK: {{^}}v4i32_kernel_arg:
-; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR:[0-9]]].X, KC0[3].Y
-; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].Y, KC0[3].Z
-; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].Z, KC0[3].W
-; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].W, KC0[4].X
-; SI-CHECK: {{^}}v4i32_kernel_arg:
-; SI-CHECK: buffer_store_dwordx4
+; R600: {{^}}v4i32_kernel_arg:
+; R600-DAG: MOV {{[* ]*}}T[[GPR:[0-9]]].X, KC0[3].Y
+; R600-DAG: MOV {{[* ]*}}T[[GPR]].Y, KC0[3].Z
+; R600-DAG: MOV {{[* ]*}}T[[GPR]].Z, KC0[3].W
+; R600-DAG: MOV {{[* ]*}}T[[GPR]].W, KC0[4].X
+; SI: {{^}}v4i32_kernel_arg:
+; SI: buffer_store_dwordx4
define void @v4i32_kernel_arg(<4 x i32> addrspace(1)* %out, <4 x i32> %in) {
entry:
store <4 x i32> %in, <4 x i32> addrspace(1)* %out
ret void
}
-; R600-CHECK: {{^}}v4f32_kernel_arg:
-; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR:[0-9]]].X, KC0[3].Y
-; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].Y, KC0[3].Z
-; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].Z, KC0[3].W
-; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].W, KC0[4].X
-; SI-CHECK: {{^}}v4f32_kernel_arg:
-; SI-CHECK: buffer_store_dwordx4
+; R600: {{^}}v4f32_kernel_arg:
+; R600-DAG: MOV {{[* ]*}}T[[GPR:[0-9]]].X, KC0[3].Y
+; R600-DAG: MOV {{[* ]*}}T[[GPR]].Y, KC0[3].Z
+; R600-DAG: MOV {{[* ]*}}T[[GPR]].Z, KC0[3].W
+; R600-DAG: MOV {{[* ]*}}T[[GPR]].W, KC0[4].X
+; SI: {{^}}v4f32_kernel_arg:
+; SI: buffer_store_dwordx4
define void @v4f32_kernel_arg(<4 x float> addrspace(1)* %out, <4 x float> %in) {
entry:
store <4 x float> %in, <4 x float> addrspace(1)* %out
diff --git a/test/CodeGen/R600/32-bit-local-address-space.ll b/test/CodeGen/R600/32-bit-local-address-space.ll
index 6ab0c08d505f..71940fd88f26 100644
--- a/test/CodeGen/R600/32-bit-local-address-space.ll
+++ b/test/CodeGen/R600/32-bit-local-address-space.ll
@@ -1,4 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; On Southern Islands GPUs the local address space(3) uses 32-bit pointers and
; the global address space(1) uses 64-bit pointers. These tests check to make sure
diff --git a/test/CodeGen/R600/64bit-kernel-args.ll b/test/CodeGen/R600/64bit-kernel-args.ll
index 02b6f34e9419..9f2738edb6eb 100644
--- a/test/CodeGen/R600/64bit-kernel-args.ll
+++ b/test/CodeGen/R600/64bit-kernel-args.ll
@@ -1,9 +1,9 @@
-; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
+; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s --check-prefix=SI
-; SI-CHECK: {{^}}f64_kernel_arg:
-; SI-CHECK-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0x9
-; SI-CHECK-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0xb
-; SI-CHECK: buffer_store_dwordx2
+; SI: {{^}}f64_kernel_arg:
+; SI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0x9
+; SI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0xb
+; SI: buffer_store_dwordx2
define void @f64_kernel_arg(double addrspace(1)* %out, double %in) {
entry:
store double %in, double addrspace(1)* %out
diff --git a/test/CodeGen/R600/add-debug.ll b/test/CodeGen/R600/add-debug.ll
index 85e9451d4a9b..a83c689eb182 100644
--- a/test/CodeGen/R600/add-debug.ll
+++ b/test/CodeGen/R600/add-debug.ll
@@ -1,4 +1,5 @@
; RUN: llc < %s -march=amdgcn -mcpu=tahiti -debug
+; RUN: llc < %s -march=amdgcn -mcpu=tonga -debug
; REQUIRES: asserts
; Check that SelectionDAGDumper does not crash on int_SI_if.
diff --git a/test/CodeGen/R600/add.ll b/test/CodeGen/R600/add.ll
index d95853a61048..3a8b97cd87e8 100644
--- a/test/CodeGen/R600/add.ll
+++ b/test/CodeGen/R600/add.ll
@@ -1,12 +1,13 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK --check-prefix=FUNC %s
-; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK --check-prefix=FUNC %s
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG --check-prefix=FUNC %s
+; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
+; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
;FUNC-LABEL: {{^}}test1:
-;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;SI-CHECK: v_add_i32_e32 [[REG:v[0-9]+]], {{v[0-9]+, v[0-9]+}}
-;SI-CHECK-NOT: [[REG]]
-;SI-CHECK: buffer_store_dword [[REG]],
+;SI: v_add_i32_e32 [[REG:v[0-9]+]], {{v[0-9]+, v[0-9]+}}
+;SI-NOT: [[REG]]
+;SI: buffer_store_dword [[REG]],
define void @test1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
%b_ptr = getelementptr i32 addrspace(1)* %in, i32 1
%a = load i32 addrspace(1)* %in
@@ -17,11 +18,11 @@ define void @test1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
}
;FUNC-LABEL: {{^}}test2:
-;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;SI-CHECK: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
@@ -33,15 +34,15 @@ define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
}
;FUNC-LABEL: {{^}}test4:
-;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;SI-CHECK: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
@@ -53,22 +54,22 @@ define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
}
; FUNC-LABEL: {{^}}test8:
-; EG-CHECK: ADD_INT
-; EG-CHECK: ADD_INT
-; EG-CHECK: ADD_INT
-; EG-CHECK: ADD_INT
-; EG-CHECK: ADD_INT
-; EG-CHECK: ADD_INT
-; EG-CHECK: ADD_INT
-; EG-CHECK: ADD_INT
-; SI-CHECK: s_add_i32
-; SI-CHECK: s_add_i32
-; SI-CHECK: s_add_i32
-; SI-CHECK: s_add_i32
-; SI-CHECK: s_add_i32
-; SI-CHECK: s_add_i32
-; SI-CHECK: s_add_i32
-; SI-CHECK: s_add_i32
+; EG: ADD_INT
+; EG: ADD_INT
+; EG: ADD_INT
+; EG: ADD_INT
+; EG: ADD_INT
+; EG: ADD_INT
+; EG: ADD_INT
+; EG: ADD_INT
+; SI: s_add_i32
+; SI: s_add_i32
+; SI: s_add_i32
+; SI: s_add_i32
+; SI: s_add_i32
+; SI: s_add_i32
+; SI: s_add_i32
+; SI: s_add_i32
define void @test8(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b) {
entry:
%0 = add <8 x i32> %a, %b
@@ -77,38 +78,38 @@ entry:
}
; FUNC-LABEL: {{^}}test16:
-; EG-CHECK: ADD_INT
-; EG-CHECK: ADD_INT
-; EG-CHECK: ADD_INT
-; EG-CHECK: ADD_INT
-; EG-CHECK: ADD_INT
-; EG-CHECK: ADD_INT
-; EG-CHECK: ADD_INT
-; EG-CHECK: ADD_INT
-; EG-CHECK: ADD_INT
-; EG-CHECK: ADD_INT
-; EG-CHECK: ADD_INT
-; EG-CHECK: ADD_INT
-; EG-CHECK: ADD_INT
-; EG-CHECK: ADD_INT
-; EG-CHECK: ADD_INT
-; EG-CHECK: ADD_INT
-; SI-CHECK: s_add_i32
-; SI-CHECK: s_add_i32
-; SI-CHECK: s_add_i32
-; SI-CHECK: s_add_i32
-; SI-CHECK: s_add_i32
-; SI-CHECK: s_add_i32
-; SI-CHECK: s_add_i32
-; SI-CHECK: s_add_i32
-; SI-CHECK: s_add_i32
-; SI-CHECK: s_add_i32
-; SI-CHECK: s_add_i32
-; SI-CHECK: s_add_i32
-; SI-CHECK: s_add_i32
-; SI-CHECK: s_add_i32
-; SI-CHECK: s_add_i32
-; SI-CHECK: s_add_i32
+; EG: ADD_INT
+; EG: ADD_INT
+; EG: ADD_INT
+; EG: ADD_INT
+; EG: ADD_INT
+; EG: ADD_INT
+; EG: ADD_INT
+; EG: ADD_INT
+; EG: ADD_INT
+; EG: ADD_INT
+; EG: ADD_INT
+; EG: ADD_INT
+; EG: ADD_INT
+; EG: ADD_INT
+; EG: ADD_INT
+; EG: ADD_INT
+; SI: s_add_i32
+; SI: s_add_i32
+; SI: s_add_i32
+; SI: s_add_i32
+; SI: s_add_i32
+; SI: s_add_i32
+; SI: s_add_i32
+; SI: s_add_i32
+; SI: s_add_i32
+; SI: s_add_i32
+; SI: s_add_i32
+; SI: s_add_i32
+; SI: s_add_i32
+; SI: s_add_i32
+; SI: s_add_i32
+; SI: s_add_i32
define void @test16(<16 x i32> addrspace(1)* %out, <16 x i32> %a, <16 x i32> %b) {
entry:
%0 = add <16 x i32> %a, %b
@@ -117,8 +118,8 @@ entry:
}
; FUNC-LABEL: {{^}}add64:
-; SI-CHECK: s_add_u32
-; SI-CHECK: s_addc_u32
+; SI: s_add_u32
+; SI: s_addc_u32
define void @add64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
entry:
%0 = add i64 %a, %b
@@ -132,7 +133,7 @@ entry:
; to a VGPR before doing the add.
; FUNC-LABEL: {{^}}add64_sgpr_vgpr:
-; SI-CHECK-NOT: v_addc_u32_e32 s
+; SI-NOT: v_addc_u32_e32 s
define void @add64_sgpr_vgpr(i64 addrspace(1)* %out, i64 %a, i64 addrspace(1)* %in) {
entry:
%0 = load i64 addrspace(1)* %in
@@ -143,8 +144,8 @@ entry:
; Test i64 add inside a branch.
; FUNC-LABEL: {{^}}add64_in_branch:
-; SI-CHECK: s_add_u32
-; SI-CHECK: s_addc_u32
+; SI: s_add_u32
+; SI: s_addc_u32
define void @add64_in_branch(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %a, i64 %b, i64 %c) {
entry:
%0 = icmp eq i64 %a, 0
diff --git a/test/CodeGen/R600/address-space.ll b/test/CodeGen/R600/address-space.ll
index 1106f4f99623..aaa0628ccdc9 100644
--- a/test/CodeGen/R600/address-space.ll
+++ b/test/CodeGen/R600/address-space.ll
@@ -1,4 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
; Test that codegenprepare understands address space sizes
diff --git a/test/CodeGen/R600/and.ll b/test/CodeGen/R600/and.ll
index bfdf8734eabb..7a395ccb38d0 100644
--- a/test/CodeGen/R600/and.ll
+++ b/test/CodeGen/R600/and.ll
@@ -1,5 +1,6 @@
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}test2:
; EG: AND_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
diff --git a/test/CodeGen/R600/anyext.ll b/test/CodeGen/R600/anyext.ll
index 8336ebcaca3b..48d8f3122495 100644
--- a/test/CodeGen/R600/anyext.ll
+++ b/test/CodeGen/R600/anyext.ll
@@ -1,4 +1,5 @@
; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
; CHECK-LABEL: {{^}}anyext_i1_i32:
; CHECK: v_cndmask_b32_e64
diff --git a/test/CodeGen/R600/atomic_load_add.ll b/test/CodeGen/R600/atomic_load_add.ll
index 6dd1c51afb4a..5fe05f2996af 100644
--- a/test/CodeGen/R600/atomic_load_add.ll
+++ b/test/CodeGen/R600/atomic_load_add.ll
@@ -1,4 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}atomic_add_local:
diff --git a/test/CodeGen/R600/atomic_load_sub.ll b/test/CodeGen/R600/atomic_load_sub.ll
index 5d47185421a0..40722833d265 100644
--- a/test/CodeGen/R600/atomic_load_sub.ll
+++ b/test/CodeGen/R600/atomic_load_sub.ll
@@ -1,5 +1,6 @@
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}atomic_sub_local:
; R600: LDS_SUB *
diff --git a/test/CodeGen/R600/basic-branch.ll b/test/CodeGen/R600/basic-branch.ll
index 42ddddd2ed84..abdc4afef472 100644
--- a/test/CodeGen/R600/basic-branch.ll
+++ b/test/CodeGen/R600/basic-branch.ll
@@ -1,5 +1,6 @@
; XFAIL: *
; RUN: llc -O0 -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -O0 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
; CHECK-LABEL: {{^}}test_branch(
define void @test_branch(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %val) nounwind {
diff --git a/test/CodeGen/R600/basic-loop.ll b/test/CodeGen/R600/basic-loop.ll
index 9d0509b38d8a..f0263caf5d6b 100644
--- a/test/CodeGen/R600/basic-loop.ll
+++ b/test/CodeGen/R600/basic-loop.ll
@@ -1,4 +1,5 @@
; RUN: llc -O0 -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s
+; RUN: llc -O0 -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck %s
; CHECK-LABEL: {{^}}test_loop:
define void @test_loop(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %val) nounwind {
diff --git a/test/CodeGen/R600/bfi_int.ll b/test/CodeGen/R600/bfi_int.ll
index 988a2f85e0ea..03349349735d 100644
--- a/test/CodeGen/R600/bfi_int.ll
+++ b/test/CodeGen/R600/bfi_int.ll
@@ -1,13 +1,14 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600-CHECK %s
-; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600 %s
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI %s
+; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI %s
; BFI_INT Definition pattern from ISA docs
; (y & x) | (z & ~x)
;
-; R600-CHECK: {{^}}bfi_def:
-; R600-CHECK: BFI_INT
-; SI-CHECK: @bfi_def
-; SI-CHECK: v_bfi_b32
+; R600: {{^}}bfi_def:
+; R600: BFI_INT
+; SI: @bfi_def
+; SI: v_bfi_b32
define void @bfi_def(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
entry:
%0 = xor i32 %x, -1
@@ -20,10 +21,10 @@ entry:
; SHA-256 Ch function
; z ^ (x & (y ^ z))
-; R600-CHECK: {{^}}bfi_sha256_ch:
-; R600-CHECK: BFI_INT
-; SI-CHECK: @bfi_sha256_ch
-; SI-CHECK: v_bfi_b32
+; R600: {{^}}bfi_sha256_ch:
+; R600: BFI_INT
+; SI: @bfi_sha256_ch
+; SI: v_bfi_b32
define void @bfi_sha256_ch(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
entry:
%0 = xor i32 %y, %z
@@ -35,11 +36,11 @@ entry:
; SHA-256 Ma function
; ((x & z) | (y & (x | z)))
-; R600-CHECK: {{^}}bfi_sha256_ma:
-; R600-CHECK: XOR_INT * [[DST:T[0-9]+\.[XYZW]]], KC0[2].Z, KC0[2].W
-; R600-CHECK: BFI_INT * {{T[0-9]+\.[XYZW]}}, {{[[DST]]|PV\.[XYZW]}}, KC0[3].X, KC0[2].W
-; SI-CHECK: v_xor_b32_e32 [[DST:v[0-9]+]], {{s[0-9]+, v[0-9]+}}
-; SI-CHECK: v_bfi_b32 {{v[0-9]+}}, [[DST]], {{s[0-9]+, v[0-9]+}}
+; R600: {{^}}bfi_sha256_ma:
+; R600: XOR_INT * [[DST:T[0-9]+\.[XYZW]]], KC0[2].Z, KC0[2].W
+; R600: BFI_INT * {{T[0-9]+\.[XYZW]}}, {{[[DST]]|PV\.[XYZW]}}, KC0[3].X, KC0[2].W
+; SI: v_xor_b32_e32 [[DST:v[0-9]+]], {{s[0-9]+, v[0-9]+}}
+; SI: v_bfi_b32 {{v[0-9]+}}, [[DST]], {{s[0-9]+, v[0-9]+}}
define void @bfi_sha256_ma(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
entry:
diff --git a/test/CodeGen/R600/bitcast.ll b/test/CodeGen/R600/bitcast.ll
index 3607d519f013..1ba64af7dca3 100644
--- a/test/CodeGen/R600/bitcast.ll
+++ b/test/CodeGen/R600/bitcast.ll
@@ -1,4 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; This test just checks that the compiler doesn't crash.
diff --git a/test/CodeGen/R600/bswap.ll b/test/CodeGen/R600/bswap.ll
index 65998f5f1151..e93543de49da 100644
--- a/test/CodeGen/R600/bswap.ll
+++ b/test/CodeGen/R600/bswap.ll
@@ -1,4 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
declare i32 @llvm.bswap.i32(i32) nounwind readnone
declare <2 x i32> @llvm.bswap.v2i32(<2 x i32>) nounwind readnone
diff --git a/test/CodeGen/R600/build_vector.ll b/test/CodeGen/R600/build_vector.ll
index a0ebe089bd5a..65eacf5adc41 100644
--- a/test/CodeGen/R600/build_vector.ll
+++ b/test/CodeGen/R600/build_vector.ll
@@ -1,32 +1,33 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
-; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI
+; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s --check-prefix=SI
-; R600-CHECK: {{^}}build_vector2:
-; R600-CHECK: MOV
-; R600-CHECK: MOV
-; R600-CHECK-NOT: MOV
-; SI-CHECK: {{^}}build_vector2:
-; SI-CHECK-DAG: v_mov_b32_e32 v[[X:[0-9]]], 5
-; SI-CHECK-DAG: v_mov_b32_e32 v[[Y:[0-9]]], 6
-; SI-CHECK: buffer_store_dwordx2 v{{\[}}[[X]]:[[Y]]{{\]}}
+; R600: {{^}}build_vector2:
+; R600: MOV
+; R600: MOV
+; R600-NOT: MOV
+; SI: {{^}}build_vector2:
+; SI-DAG: v_mov_b32_e32 v[[X:[0-9]]], 5
+; SI-DAG: v_mov_b32_e32 v[[Y:[0-9]]], 6
+; SI: buffer_store_dwordx2 v{{\[}}[[X]]:[[Y]]{{\]}}
define void @build_vector2 (<2 x i32> addrspace(1)* %out) {
entry:
store <2 x i32> <i32 5, i32 6>, <2 x i32> addrspace(1)* %out
ret void
}
-; R600-CHECK: {{^}}build_vector4:
-; R600-CHECK: MOV
-; R600-CHECK: MOV
-; R600-CHECK: MOV
-; R600-CHECK: MOV
-; R600-CHECK-NOT: MOV
-; SI-CHECK: {{^}}build_vector4:
-; SI-CHECK-DAG: v_mov_b32_e32 v[[X:[0-9]]], 5
-; SI-CHECK-DAG: v_mov_b32_e32 v[[Y:[0-9]]], 6
-; SI-CHECK-DAG: v_mov_b32_e32 v[[Z:[0-9]]], 7
-; SI-CHECK-DAG: v_mov_b32_e32 v[[W:[0-9]]], 8
-; SI-CHECK: buffer_store_dwordx4 v{{\[}}[[X]]:[[W]]{{\]}}
+; R600: {{^}}build_vector4:
+; R600: MOV
+; R600: MOV
+; R600: MOV
+; R600: MOV
+; R600-NOT: MOV
+; SI: {{^}}build_vector4:
+; SI-DAG: v_mov_b32_e32 v[[X:[0-9]]], 5
+; SI-DAG: v_mov_b32_e32 v[[Y:[0-9]]], 6
+; SI-DAG: v_mov_b32_e32 v[[Z:[0-9]]], 7
+; SI-DAG: v_mov_b32_e32 v[[W:[0-9]]], 8
+; SI: buffer_store_dwordx4 v{{\[}}[[X]]:[[W]]{{\]}}
define void @build_vector4 (<4 x i32> addrspace(1)* %out) {
entry:
store <4 x i32> <i32 5, i32 6, i32 7, i32 8>, <4 x i32> addrspace(1)* %out
diff --git a/test/CodeGen/R600/call.ll b/test/CodeGen/R600/call.ll
index 1afe98ba5951..9a0eb1cc3fa0 100644
--- a/test/CodeGen/R600/call.ll
+++ b/test/CodeGen/R600/call.ll
@@ -1,4 +1,5 @@
; RUN: not llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s 2>&1 | FileCheck %s
+; RUN: not llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s 2>&1 | FileCheck %s
; RUN: not llc -march=r600 -mcpu=cypress < %s 2>&1 | FileCheck %s
; CHECK: error: unsupported call to function external_function in test_call_external
@@ -6,28 +7,27 @@
declare i32 @external_function(i32) nounwind
-define i32 @defined_function(i32 %x) nounwind noinline {
- %y = add i32 %x, 8
- ret i32 %y
-}
-
-define void @test_call(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+define void @test_call_external(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
%b_ptr = getelementptr i32 addrspace(1)* %in, i32 1
%a = load i32 addrspace(1)* %in
%b = load i32 addrspace(1)* %b_ptr
- %c = call i32 @defined_function(i32 %b) nounwind
+ %c = call i32 @external_function(i32 %b) nounwind
%result = add i32 %a, %c
store i32 %result, i32 addrspace(1)* %out
ret void
}
-define void @test_call_external(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+define i32 @defined_function(i32 %x) nounwind noinline {
+ %y = add i32 %x, 8
+ ret i32 %y
+}
+
+define void @test_call(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
%b_ptr = getelementptr i32 addrspace(1)* %in, i32 1
%a = load i32 addrspace(1)* %in
%b = load i32 addrspace(1)* %b_ptr
- %c = call i32 @external_function(i32 %b) nounwind
+ %c = call i32 @defined_function(i32 %b) nounwind
%result = add i32 %a, %c
store i32 %result, i32 addrspace(1)* %out
ret void
}
-
diff --git a/test/CodeGen/R600/call_fs.ll b/test/CodeGen/R600/call_fs.ll
index 7df22402cff9..db2cb6e5011c 100644
--- a/test/CodeGen/R600/call_fs.ll
+++ b/test/CodeGen/R600/call_fs.ll
@@ -1,13 +1,13 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood -show-mc-encoding -o - | FileCheck --check-prefix=EG-CHECK %s
-; RUN: llc < %s -march=r600 -mcpu=rv710 -show-mc-encoding -o - | FileCheck --check-prefix=R600-CHECK %s
+; RUN: llc < %s -march=r600 -mcpu=redwood -show-mc-encoding -o - | FileCheck --check-prefix=EG %s
+; RUN: llc < %s -march=r600 -mcpu=rv710 -show-mc-encoding -o - | FileCheck --check-prefix=R600 %s
-; EG-CHECK: {{^}}call_fs:
-; EG-CHECK: .long 257
-; EG-CHECK: CALL_FS ; encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0xc0,0x84]
-; R600-CHECK: {{^}}call_fs:
-; R600-CHECK: .long 257
-; R600-CHECK:CALL_FS ; encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0x80,0x89]
+; EG: {{^}}call_fs:
+; EG: .long 257
+; EG: CALL_FS ; encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0xc0,0x84]
+; R600: {{^}}call_fs:
+; R600: .long 257
+; R600:CALL_FS ; encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0x80,0x89]
define void @call_fs() #0 {
diff --git a/test/CodeGen/R600/cf_end.ll b/test/CodeGen/R600/cf_end.ll
index 138004df6df9..c74ee22868d5 100644
--- a/test/CodeGen/R600/cf_end.ll
+++ b/test/CodeGen/R600/cf_end.ll
@@ -1,9 +1,9 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood --show-mc-encoding | FileCheck --check-prefix=EG-CHECK %s
-; RUN: llc < %s -march=r600 -mcpu=caicos --show-mc-encoding | FileCheck --check-prefix=EG-CHECK %s
-; RUN: llc < %s -march=r600 -mcpu=cayman --show-mc-encoding | FileCheck --check-prefix=CM-CHECK %s
+; RUN: llc < %s -march=r600 -mcpu=redwood --show-mc-encoding | FileCheck --check-prefix=EG %s
+; RUN: llc < %s -march=r600 -mcpu=caicos --show-mc-encoding | FileCheck --check-prefix=EG %s
+; RUN: llc < %s -march=r600 -mcpu=cayman --show-mc-encoding | FileCheck --check-prefix=CM %s
-; EG-CHECK: CF_END ; encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0x20,0x80]
-; CM-CHECK: CF_END ; encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x88]
+; EG: CF_END ; encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0x20,0x80]
+; CM: CF_END ; encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x88]
define void @eop() {
ret void
}
diff --git a/test/CodeGen/R600/concat_vectors.ll b/test/CodeGen/R600/concat_vectors.ll
index 4c5b9c959516..b27bed3d4265 100644
--- a/test/CodeGen/R600/concat_vectors.ll
+++ b/test/CodeGen/R600/concat_vectors.ll
@@ -1,4 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}test_concat_v1i32:
; 0x80f000 is the high 32 bits of the resource descriptor used by MUBUF
@@ -282,3 +283,14 @@ define void @test_concat_v16i16(<32 x i16> addrspace(1)* %out, <16 x i16> %a, <1
store <32 x i16> %concat, <32 x i16> addrspace(1)* %out, align 64
ret void
}
+
+; FUNC-LABEL: {{^}}concat_vector_crash:
+; SI: s_endpgm
+define void @concat_vector_crash(<8 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) {
+bb:
+ %tmp = load <2 x float> addrspace(1)* %in, align 4
+ %tmp1 = shufflevector <2 x float> %tmp, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %tmp2 = shufflevector <8 x float> undef, <8 x float> %tmp1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
+ store <8 x float> %tmp2, <8 x float> addrspace(1)* %out, align 32
+ ret void
+}
diff --git a/test/CodeGen/R600/copy-illegal-type.ll b/test/CodeGen/R600/copy-illegal-type.ll
index 2dff24c432b1..56c43d23b4a1 100644
--- a/test/CodeGen/R600/copy-illegal-type.ll
+++ b/test/CodeGen/R600/copy-illegal-type.ll
@@ -1,4 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}test_copy_v4i8:
; SI: buffer_load_dword [[REG:v[0-9]+]]
diff --git a/test/CodeGen/R600/copy-to-reg.ll b/test/CodeGen/R600/copy-to-reg.ll
index 4a4143567102..9c1de73b3b1b 100644
--- a/test/CodeGen/R600/copy-to-reg.ll
+++ b/test/CodeGen/R600/copy-to-reg.ll
@@ -1,4 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=SI -mattr=-promote-alloca -verify-machineinstrs < %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca -verify-machineinstrs < %s
; Test that CopyToReg instructions don't have non-register operands prior
; to being emitted.
diff --git a/test/CodeGen/R600/ctlz_zero_undef.ll b/test/CodeGen/R600/ctlz_zero_undef.ll
index 090610d4aac2..1a4317b8095c 100644
--- a/test/CodeGen/R600/ctlz_zero_undef.ll
+++ b/test/CodeGen/R600/ctlz_zero_undef.ll
@@ -1,4 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
diff --git a/test/CodeGen/R600/cttz-ctlz.ll b/test/CodeGen/R600/cttz-ctlz.ll
index 6be06d243eaa..c957a033c5d7 100644
--- a/test/CodeGen/R600/cttz-ctlz.ll
+++ b/test/CodeGen/R600/cttz-ctlz.ll
@@ -1,4 +1,5 @@
; RUN: opt -S -codegenprepare -mtriple=r600-unknown-unknown -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=ALL %s
+; RUN: opt -S -codegenprepare -mtriple=r600-unknown-unknown -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=ALL %s
define i64 @test1(i64 %A) {
diff --git a/test/CodeGen/R600/cttz_zero_undef.ll b/test/CodeGen/R600/cttz_zero_undef.ll
index ab59360694bf..d9d284c58865 100644
--- a/test/CodeGen/R600/cttz_zero_undef.ll
+++ b/test/CodeGen/R600/cttz_zero_undef.ll
@@ -1,4 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone
diff --git a/test/CodeGen/R600/cvt_f32_ubyte.ll b/test/CodeGen/R600/cvt_f32_ubyte.ll
index e26ee12f6f6d..69eea5919c05 100644
--- a/test/CodeGen/R600/cvt_f32_ubyte.ll
+++ b/test/CodeGen/R600/cvt_f32_ubyte.ll
@@ -1,4 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; SI-LABEL: {{^}}load_i8_to_f32:
; SI: buffer_load_ubyte [[LOADREG:v[0-9]+]],
@@ -145,7 +146,7 @@ define void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias %out, <7 x i8>
; SI: buffer_store_dword
; SI: buffer_store_dword
define void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias %out, <8 x i8> addrspace(1)* noalias %in) nounwind {
- %load = load <8 x i8> addrspace(1)* %in, align 1
+ %load = load <8 x i8> addrspace(1)* %in, align 8
%cvt = uitofp <8 x i8> %load to <8 x float>
store <8 x float> %cvt, <8 x float> addrspace(1)* %out, align 16
ret void
diff --git a/test/CodeGen/R600/default-fp-mode.ll b/test/CodeGen/R600/default-fp-mode.ll
index 6b6d49996eb1..da8e91454b98 100644
--- a/test/CodeGen/R600/default-fp-mode.ll
+++ b/test/CodeGen/R600/default-fp-mode.ll
@@ -5,6 +5,13 @@
; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=SI -mattr=-fp32-denormals < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=SI -mattr=+fp64-denormals < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals,+fp64-denormals < %s | FileCheck -check-prefix=FP64-DENORMAL -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp32-denormals,-fp64-denormals < %s | FileCheck -check-prefix=FP32-DENORMAL -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp32-denormals,+fp64-denormals < %s | FileCheck -check-prefix=BOTH-DENORMAL -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals,-fp64-denormals < %s | FileCheck -check-prefix=NO-DENORMAL -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp64-denormals < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}test_kernel:
diff --git a/test/CodeGen/R600/ds_read2_offset_order.ll b/test/CodeGen/R600/ds_read2_offset_order.ll
index bdbe22ff2348..44306bc9d38f 100644
--- a/test/CodeGen/R600/ds_read2_offset_order.ll
+++ b/test/CodeGen/R600/ds_read2_offset_order.ll
@@ -1,4 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -strict-whitespace -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -strict-whitespace -check-prefix=SI %s
; XFAIL: *
diff --git a/test/CodeGen/R600/elf.ll b/test/CodeGen/R600/elf.ll
index ec28ed9c1dcd..f801b3f57357 100644
--- a/test/CodeGen/R600/elf.ll
+++ b/test/CodeGen/R600/elf.ll
@@ -1,19 +1,24 @@
-; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols - | FileCheck --check-prefix=ELF-CHECK %s
-; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG-CHECK %s
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols - | FileCheck --check-prefix=ELF %s
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG --check-prefix=TYPICAL %s
+; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols - | FileCheck --check-prefix=ELF %s
+; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG --check-prefix=TONGA %s
+; RUN: llc < %s -march=amdgcn -mcpu=carrizo -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols - | FileCheck --check-prefix=ELF %s
+; RUN: llc < %s -march=amdgcn -mcpu=carrizo -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG --check-prefix=TYPICAL %s
-; ELF-CHECK: Format: ELF32
-; ELF-CHECK: Name: .AMDGPU.config
-; ELF-CHECK: Type: SHT_PROGBITS
+; ELF: Format: ELF32
+; ELF: Name: .AMDGPU.config
+; ELF: Type: SHT_PROGBITS
-; ELF-CHECK: Symbol {
-; ELF-CHECK: Name: test
-; ELF-CHECK: Binding: Global
+; ELF: Symbol {
+; ELF: Name: test
+; ELF: Binding: Global
-; CONFIG-CHECK: .align 256
-; CONFIG-CHECK: test:
-; CONFIG-CHECK: .section .AMDGPU.config
-; CONFIG-CHECK-NEXT: .long 45096
-; CONFIG-CHECK-NEXT: .long 0
+; CONFIG: .align 256
+; CONFIG: test:
+; CONFIG: .section .AMDGPU.config
+; CONFIG-NEXT: .long 45096
+; TYPICAL-NEXT: .long 0
+; TONGA-NEXT: .long 576
define void @test(i32 %p) #0 {
%i = add i32 %p, 2
%r = bitcast i32 %i to float
diff --git a/test/CodeGen/R600/elf.r600.ll b/test/CodeGen/R600/elf.r600.ll
index 4436c07c5a77..51cd08500932 100644
--- a/test/CodeGen/R600/elf.r600.ll
+++ b/test/CodeGen/R600/elf.r600.ll
@@ -1,14 +1,14 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood -filetype=obj | llvm-readobj -s - | FileCheck --check-prefix=ELF-CHECK %s
-; RUN: llc < %s -march=r600 -mcpu=redwood -o - | FileCheck --check-prefix=CONFIG-CHECK %s
+; RUN: llc < %s -march=r600 -mcpu=redwood -filetype=obj | llvm-readobj -s - | FileCheck --check-prefix=ELF %s
+; RUN: llc < %s -march=r600 -mcpu=redwood -o - | FileCheck --check-prefix=CONFIG %s
-; ELF-CHECK: Format: ELF32
-; ELF-CHECK: Name: .AMDGPU.config
+; ELF: Format: ELF32
+; ELF: Name: .AMDGPU.config
-; CONFIG-CHECK: .section .AMDGPU.config
-; CONFIG-CHECK-NEXT: .long 166100
-; CONFIG-CHECK-NEXT: .long 2
-; CONFIG-CHECK-NEXT: .long 165900
-; CONFIG-CHECK-NEXT: .long 0
+; CONFIG: .section .AMDGPU.config
+; CONFIG-NEXT: .long 166100
+; CONFIG-NEXT: .long 2
+; CONFIG-NEXT: .long 165900
+; CONFIG-NEXT: .long 0
define void @test(float addrspace(1)* %out, i32 %p) {
%i = add i32 %p, 2
%r = bitcast i32 %i to float
diff --git a/test/CodeGen/R600/empty-function.ll b/test/CodeGen/R600/empty-function.ll
index 4b81d971d06b..b5593eb87ae4 100644
--- a/test/CodeGen/R600/empty-function.ll
+++ b/test/CodeGen/R600/empty-function.ll
@@ -1,4 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; Make sure we don't assert on empty functions
diff --git a/test/CodeGen/R600/extload-private.ll b/test/CodeGen/R600/extload-private.ll
new file mode 100644
index 000000000000..fec868232507
--- /dev/null
+++ b/test/CodeGen/R600/extload-private.ll
@@ -0,0 +1,46 @@
+; RUN: llc < %s -march=amdgcn -mcpu=SI -mattr=-promote-alloca -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
+; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-promote-alloca -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}load_i8_sext_private:
+; SI: buffer_load_sbyte v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen
+define void @load_i8_sext_private(i32 addrspace(1)* %out) {
+entry:
+ %tmp0 = alloca i8
+ %tmp1 = load i8* %tmp0
+ %tmp2 = sext i8 %tmp1 to i32
+ store i32 %tmp2, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}load_i8_zext_private:
+; SI: buffer_load_ubyte v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen
+define void @load_i8_zext_private(i32 addrspace(1)* %out) {
+entry:
+ %tmp0 = alloca i8
+ %tmp1 = load i8* %tmp0
+ %tmp2 = zext i8 %tmp1 to i32
+ store i32 %tmp2, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}load_i16_sext_private:
+; SI: buffer_load_sshort v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen
+define void @load_i16_sext_private(i32 addrspace(1)* %out) {
+entry:
+ %tmp0 = alloca i16
+ %tmp1 = load i16* %tmp0
+ %tmp2 = sext i16 %tmp1 to i32
+ store i32 %tmp2, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}load_i16_zext_private:
+; SI: buffer_load_ushort v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen
+define void @load_i16_zext_private(i32 addrspace(1)* %out) {
+entry:
+ %tmp0 = alloca i16
+ %tmp1 = load i16* %tmp0
+ %tmp2 = zext i16 %tmp1 to i32
+ store i32 %tmp2, i32 addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/extload.ll b/test/CodeGen/R600/extload.ll
index 4a94acaba0b5..73d6701bfb5b 100644
--- a/test/CodeGen/R600/extload.ll
+++ b/test/CodeGen/R600/extload.ll
@@ -1,5 +1,6 @@
; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}anyext_load_i8:
; EG: AND_INT
diff --git a/test/CodeGen/R600/extract_vector_elt_i16.ll b/test/CodeGen/R600/extract_vector_elt_i16.ll
index 04c375a89ae3..0774a9ae852b 100644
--- a/test/CodeGen/R600/extract_vector_elt_i16.ll
+++ b/test/CodeGen/R600/extract_vector_elt_i16.ll
@@ -1,4 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}extract_vector_elt_v2i16:
; SI: buffer_load_ushort
diff --git a/test/CodeGen/R600/fadd.ll b/test/CodeGen/R600/fadd.ll
index 9d29c0629628..365af9b73cc0 100644
--- a/test/CodeGen/R600/fadd.ll
+++ b/test/CodeGen/R600/fadd.ll
@@ -1,5 +1,6 @@
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=R600 -check-prefix=FUNC
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC
; FUNC-LABEL: {{^}}fadd_f32:
; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].W
diff --git a/test/CodeGen/R600/fadd64.ll b/test/CodeGen/R600/fadd64.ll
index 389c754c9e8d..f1f6fef54766 100644
--- a/test/CodeGen/R600/fadd64.ll
+++ b/test/CodeGen/R600/fadd64.ll
@@ -1,4 +1,5 @@
; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
; CHECK: {{^}}fadd_f64:
; CHECK: v_add_f64 {{v[[0-9]+:[0-9]+]}}, {{v[[0-9]+:[0-9]+]}}, {{v[[0-9]+:[0-9]+]}}
diff --git a/test/CodeGen/R600/fceil.ll b/test/CodeGen/R600/fceil.ll
index 7c7a7e36295c..f23e8919d733 100644
--- a/test/CodeGen/R600/fceil.ll
+++ b/test/CodeGen/R600/fceil.ll
@@ -1,4 +1,5 @@
-; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
declare float @llvm.ceil.f32(float) nounwind readnone
diff --git a/test/CodeGen/R600/fcmp64.ll b/test/CodeGen/R600/fcmp64.ll
index 032a4e416dc1..9dc8b50513f2 100644
--- a/test/CodeGen/R600/fcmp64.ll
+++ b/test/CodeGen/R600/fcmp64.ll
@@ -1,4 +1,5 @@
; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
; CHECK-LABEL: {{^}}flt_f64:
; CHECK: v_cmp_nge_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
diff --git a/test/CodeGen/R600/fconst64.ll b/test/CodeGen/R600/fconst64.ll
index f3bc399972d5..28e0c909747f 100644
--- a/test/CodeGen/R600/fconst64.ll
+++ b/test/CodeGen/R600/fconst64.ll
@@ -1,4 +1,5 @@
; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
; CHECK: {{^}}fconst_f64:
; CHECK-DAG: s_mov_b32 {{s[0-9]+}}, 0x40140000
diff --git a/test/CodeGen/R600/fdiv.f64.ll b/test/CodeGen/R600/fdiv.f64.ll
new file mode 100644
index 000000000000..276642f99014
--- /dev/null
+++ b/test/CodeGen/R600/fdiv.f64.ll
@@ -0,0 +1,96 @@
+; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=COMMON %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=COMMON %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=COMMON %s
+
+
+; COMMON-LABEL: {{^}}fdiv_f64:
+; COMMON-DAG: buffer_load_dwordx2 [[NUM:v\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0
+; COMMON-DAG: buffer_load_dwordx2 [[DEN:v\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0 offset:8
+; CI-DAG: v_div_scale_f64 [[SCALE0:v\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, [[DEN]], [[DEN]], [[NUM]]
+; CI-DAG: v_div_scale_f64 [[SCALE1:v\[[0-9]+:[0-9]+\]]], vcc, [[NUM]], [[DEN]], [[NUM]]
+
+; Check for div_scale bug workaround on SI
+; SI-DAG: v_div_scale_f64 [[SCALE0:v\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, [[DEN]], [[DEN]], [[NUM]]
+; SI-DAG: v_div_scale_f64 [[SCALE1:v\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, [[NUM]], [[DEN]], [[NUM]]
+
+; COMMON-DAG: v_rcp_f64_e32 [[RCP_SCALE0:v\[[0-9]+:[0-9]+\]]], [[SCALE0]]
+
+; SI-DAG: v_cmp_eq_i32_e32 vcc, {{v[0-9]+}}, {{v[0-9]+}}
+; SI-DAG: v_cmp_eq_i32_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, {{v[0-9]+}}
+; SI-DAG: s_xor_b64 vcc, [[CMP0]], vcc
+
+; COMMON-DAG: v_fma_f64 [[FMA0:v\[[0-9]+:[0-9]+\]]], -[[SCALE0]], [[RCP_SCALE0]], 1.0
+; COMMON-DAG: v_fma_f64 [[FMA1:v\[[0-9]+:[0-9]+\]]], [[RCP_SCALE0]], [[FMA0]], [[RCP_SCALE0]]
+; COMMON-DAG: v_fma_f64 [[FMA2:v\[[0-9]+:[0-9]+\]]], -[[SCALE0]], [[FMA1]], 1.0
+; COMMON-DAG: v_fma_f64 [[FMA3:v\[[0-9]+:[0-9]+\]]], [[FMA1]], [[FMA2]], [[FMA1]]
+; COMMON-DAG: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], [[SCALE1]], [[FMA3]]
+; COMMON-DAG: v_fma_f64 [[FMA4:v\[[0-9]+:[0-9]+\]]], -[[SCALE0]], [[MUL]], [[SCALE1]]
+; COMMON: v_div_fmas_f64 [[FMAS:v\[[0-9]+:[0-9]+\]]], [[FMA3]], [[FMA4]], [[MUL]]
+; COMMON: v_div_fixup_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[FMAS]], [[DEN]], [[NUM]]
+; COMMON: buffer_store_dwordx2 [[RESULT]]
+; COMMON: s_endpgm
+define void @fdiv_f64(double addrspace(1)* %out, double addrspace(1)* %in) nounwind {
+ %gep.1 = getelementptr double addrspace(1)* %in, i32 1
+ %num = load double addrspace(1)* %in
+ %den = load double addrspace(1)* %gep.1
+ %result = fdiv double %num, %den
+ store double %result, double addrspace(1)* %out
+ ret void
+}
+
+; COMMON-LABEL: {{^}}fdiv_f64_s_v:
+define void @fdiv_f64_s_v(double addrspace(1)* %out, double addrspace(1)* %in, double %num) nounwind {
+ %den = load double addrspace(1)* %in
+ %result = fdiv double %num, %den
+ store double %result, double addrspace(1)* %out
+ ret void
+}
+
+; COMMON-LABEL: {{^}}fdiv_f64_v_s:
+define void @fdiv_f64_v_s(double addrspace(1)* %out, double addrspace(1)* %in, double %den) nounwind {
+ %num = load double addrspace(1)* %in
+ %result = fdiv double %num, %den
+ store double %result, double addrspace(1)* %out
+ ret void
+}
+
+; COMMON-LABEL: {{^}}fdiv_f64_s_s:
+define void @fdiv_f64_s_s(double addrspace(1)* %out, double %num, double %den) nounwind {
+ %result = fdiv double %num, %den
+ store double %result, double addrspace(1)* %out
+ ret void
+}
+
+; COMMON-LABEL: {{^}}v_fdiv_v2f64:
+define void @v_fdiv_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in) nounwind {
+ %gep.1 = getelementptr <2 x double> addrspace(1)* %in, i32 1
+ %num = load <2 x double> addrspace(1)* %in
+ %den = load <2 x double> addrspace(1)* %gep.1
+ %result = fdiv <2 x double> %num, %den
+ store <2 x double> %result, <2 x double> addrspace(1)* %out
+ ret void
+}
+
+; COMMON-LABEL: {{^}}s_fdiv_v2f64:
+define void @s_fdiv_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %num, <2 x double> %den) {
+ %result = fdiv <2 x double> %num, %den
+ store <2 x double> %result, <2 x double> addrspace(1)* %out
+ ret void
+}
+
+; COMMON-LABEL: {{^}}v_fdiv_v4f64:
+define void @v_fdiv_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in) nounwind {
+ %gep.1 = getelementptr <4 x double> addrspace(1)* %in, i32 1
+ %num = load <4 x double> addrspace(1)* %in
+ %den = load <4 x double> addrspace(1)* %gep.1
+ %result = fdiv <4 x double> %num, %den
+ store <4 x double> %result, <4 x double> addrspace(1)* %out
+ ret void
+}
+
+; COMMON-LABEL: {{^}}s_fdiv_v4f64:
+define void @s_fdiv_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %num, <4 x double> %den) {
+ %result = fdiv <4 x double> %num, %den
+ store <4 x double> %result, <4 x double> addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/fdiv.ll b/test/CodeGen/R600/fdiv.ll
index f83b88ee1c9d..603287fbdf4f 100644
--- a/test/CodeGen/R600/fdiv.ll
+++ b/test/CodeGen/R600/fdiv.ll
@@ -1,5 +1,6 @@
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 %s
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; These tests check that fdiv is expanded correctly and also test that the
; scheduler is scheduling the RECIP_IEEE and MUL_IEEE instructions in separate
diff --git a/test/CodeGen/R600/fdiv64.ll b/test/CodeGen/R600/fdiv64.ll
deleted file mode 100644
index 0611b153f5cc..000000000000
--- a/test/CodeGen/R600/fdiv64.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s
-
-; CHECK: {{^}}fdiv_f64:
-; CHECK: v_rcp_f64_e32 {{v\[[0-9]+:[0-9]+\]}}
-; CHECK: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}
-
-define void @fdiv_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
- double addrspace(1)* %in2) {
- %r0 = load double addrspace(1)* %in1
- %r1 = load double addrspace(1)* %in2
- %r2 = fdiv double %r0, %r1
- store double %r2, double addrspace(1)* %out
- ret void
-}
diff --git a/test/CodeGen/R600/ffloor.ll b/test/CodeGen/R600/ffloor.ll
index 194d0aaa1819..9038ff81b073 100644
--- a/test/CodeGen/R600/ffloor.ll
+++ b/test/CodeGen/R600/ffloor.ll
@@ -1,5 +1,6 @@
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
declare double @llvm.floor.f64(double) nounwind readnone
declare <2 x double> @llvm.floor.v2f64(<2 x double>) nounwind readnone
diff --git a/test/CodeGen/R600/flat-address-space.ll b/test/CodeGen/R600/flat-address-space.ll
index 99ef41b24e4f..2e98bf51b23b 100644
--- a/test/CodeGen/R600/flat-address-space.ll
+++ b/test/CodeGen/R600/flat-address-space.ll
@@ -1,5 +1,7 @@
; RUN: llc -O0 -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=CHECK -check-prefix=CHECK-NO-PROMOTE %s
; RUN: llc -O0 -march=amdgcn -mcpu=bonaire -mattr=+promote-alloca < %s | FileCheck -check-prefix=CHECK -check-prefix=CHECK-PROMOTE %s
+; RUN: llc -O0 -march=amdgcn -mcpu=tonga -mattr=-promote-alloca < %s | FileCheck -check-prefix=CHECK -check-prefix=CHECK-NO-PROMOTE %s
+; RUN: llc -O0 -march=amdgcn -mcpu=tonga -mattr=+promote-alloca < %s | FileCheck -check-prefix=CHECK -check-prefix=CHECK-PROMOTE %s
; Disable optimizations in case there are optimizations added that
; specialize away generic pointer accesses.
diff --git a/test/CodeGen/R600/fma.f64.ll b/test/CodeGen/R600/fma.f64.ll
index 48b1093ecd0b..bca312bfa751 100644
--- a/test/CodeGen/R600/fma.f64.ll
+++ b/test/CodeGen/R600/fma.f64.ll
@@ -1,4 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
declare double @llvm.fma.f64(double, double, double) nounwind readnone
declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
diff --git a/test/CodeGen/R600/fmax3.f64.ll b/test/CodeGen/R600/fmax3.f64.ll
new file mode 100644
index 000000000000..5ca789de2a08
--- /dev/null
+++ b/test/CodeGen/R600/fmax3.f64.ll
@@ -0,0 +1,24 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+declare double @llvm.maxnum.f64(double, double) nounwind readnone
+
+; SI-LABEL: {{^}}test_fmax3_f64:
+; SI-DAG: buffer_load_dwordx2 [[REGA:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+:[0-9]+}}], 0{{$}}
+; SI-DAG: buffer_load_dwordx2 [[REGB:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+:[0-9]+}}], 0 offset:8
+; SI-DAG: buffer_load_dwordx2 [[REGC:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+:[0-9]+}}], 0 offset:16
+; SI: v_max_f64 [[REGA]], [[REGA]], [[REGB]]
+; SI: v_max_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[REGA]], [[REGC]]
+; SI: buffer_store_dwordx2 [[RESULT]],
+; SI: s_endpgm
+define void @test_fmax3_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) nounwind {
+ %bptr = getelementptr double addrspace(1)* %aptr, i32 1
+ %cptr = getelementptr double addrspace(1)* %aptr, i32 2
+ %a = load double addrspace(1)* %aptr, align 8
+ %b = load double addrspace(1)* %bptr, align 8
+ %c = load double addrspace(1)* %cptr, align 8
+ %f0 = call double @llvm.maxnum.f64(double %a, double %b) nounwind readnone
+ %f1 = call double @llvm.maxnum.f64(double %f0, double %c) nounwind readnone
+ store double %f1, double addrspace(1)* %out, align 8
+ ret void
+}
diff --git a/test/CodeGen/R600/fmax3.ll b/test/CodeGen/R600/fmax3.ll
index 6f95bf20f73b..e1b477c5921e 100644
--- a/test/CodeGen/R600/fmax3.ll
+++ b/test/CodeGen/R600/fmax3.ll
@@ -1,4 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
declare float @llvm.maxnum.f32(float, float) nounwind readnone
diff --git a/test/CodeGen/R600/fmaxnum.f64.ll b/test/CodeGen/R600/fmaxnum.f64.ll
index e92996aa2b1f..de563cec3412 100644
--- a/test/CodeGen/R600/fmaxnum.f64.ll
+++ b/test/CodeGen/R600/fmaxnum.f64.ll
@@ -1,4 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
declare double @llvm.maxnum.f64(double, double) #0
declare <2 x double> @llvm.maxnum.v2f64(<2 x double>, <2 x double>) #0
diff --git a/test/CodeGen/R600/fmaxnum.ll b/test/CodeGen/R600/fmaxnum.ll
index 473184af214b..c105598ff811 100644
--- a/test/CodeGen/R600/fmaxnum.ll
+++ b/test/CodeGen/R600/fmaxnum.ll
@@ -1,4 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
declare float @llvm.maxnum.f32(float, float) #0
declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>) #0
diff --git a/test/CodeGen/R600/fmin3.ll b/test/CodeGen/R600/fmin3.ll
index aeeed1c7dd39..716beb16bb10 100644
--- a/test/CodeGen/R600/fmin3.ll
+++ b/test/CodeGen/R600/fmin3.ll
@@ -1,4 +1,6 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
declare float @llvm.minnum.f32(float, float) nounwind readnone
diff --git a/test/CodeGen/R600/fminnum.f64.ll b/test/CodeGen/R600/fminnum.f64.ll
index b8476f98bab8..0f929d6a81f0 100644
--- a/test/CodeGen/R600/fminnum.f64.ll
+++ b/test/CodeGen/R600/fminnum.f64.ll
@@ -1,4 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
declare double @llvm.minnum.f64(double, double) #0
declare <2 x double> @llvm.minnum.v2f64(<2 x double>, <2 x double>) #0
diff --git a/test/CodeGen/R600/fminnum.ll b/test/CodeGen/R600/fminnum.ll
index cd1a948707e8..6b93b830033b 100644
--- a/test/CodeGen/R600/fminnum.ll
+++ b/test/CodeGen/R600/fminnum.ll
@@ -1,4 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
declare float @llvm.minnum.f32(float, float) #0
declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>) #0
diff --git a/test/CodeGen/R600/fmul.ll b/test/CodeGen/R600/fmul.ll
index 7296a8760be2..6c09aa242677 100644
--- a/test/CodeGen/R600/fmul.ll
+++ b/test/CodeGen/R600/fmul.ll
@@ -1,4 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
diff --git a/test/CodeGen/R600/fmul64.ll b/test/CodeGen/R600/fmul64.ll
index 882307ef458b..9d7787ccbe1f 100644
--- a/test/CodeGen/R600/fmul64.ll
+++ b/test/CodeGen/R600/fmul64.ll
@@ -1,4 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s
; FUNC-LABEL: {{^}}fmul_f64:
; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
diff --git a/test/CodeGen/R600/fnearbyint.ll b/test/CodeGen/R600/fnearbyint.ll
index 30bc67689e1c..4fa9adaabdae 100644
--- a/test/CodeGen/R600/fnearbyint.ll
+++ b/test/CodeGen/R600/fnearbyint.ll
@@ -1,5 +1,6 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s
; This should have the exactly the same output as the test for rint,
; so no need to check anything.
diff --git a/test/CodeGen/R600/fneg-fabs.f64.ll b/test/CodeGen/R600/fneg-fabs.f64.ll
index 04a87e377857..7430e7ffb33d 100644
--- a/test/CodeGen/R600/fneg-fabs.f64.ll
+++ b/test/CodeGen/R600/fneg-fabs.f64.ll
@@ -1,4 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; FIXME: Check something here. Currently it seems fabs + fneg aren't
; into 2 modifiers, although theoretically that should work.
diff --git a/test/CodeGen/R600/fneg-fabs.ll b/test/CodeGen/R600/fneg-fabs.ll
index 94e8256cd261..4fde0484567c 100644
--- a/test/CodeGen/R600/fneg-fabs.ll
+++ b/test/CodeGen/R600/fneg-fabs.ll
@@ -1,4 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}fneg_fabs_fadd_f32:
diff --git a/test/CodeGen/R600/fp-classify.ll b/test/CodeGen/R600/fp-classify.ll
index a1b2f08eddeb..c1de85203104 100644
--- a/test/CodeGen/R600/fp-classify.ll
+++ b/test/CodeGen/R600/fp-classify.ll
@@ -1,4 +1,5 @@
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=r600 -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
declare i1 @llvm.AMDGPU.class.f32(float, i32) #1
declare i1 @llvm.AMDGPU.class.f64(double, i32) #1
diff --git a/test/CodeGen/R600/fp16_to_fp.ll b/test/CodeGen/R600/fp16_to_fp.ll
index be84582a73a6..da78f6155c85 100644
--- a/test/CodeGen/R600/fp16_to_fp.ll
+++ b/test/CodeGen/R600/fp16_to_fp.ll
@@ -1,4 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
declare float @llvm.convert.from.fp16.f32(i16) nounwind readnone
declare double @llvm.convert.from.fp16.f64(i16) nounwind readnone
diff --git a/test/CodeGen/R600/fp32_to_fp16.ll b/test/CodeGen/R600/fp32_to_fp16.ll
index 43dd09b5ec05..c3c65aece082 100644
--- a/test/CodeGen/R600/fp32_to_fp16.ll
+++ b/test/CodeGen/R600/fp32_to_fp16.ll
@@ -1,4 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
declare i16 @llvm.convert.to.fp16.f32(float) nounwind readnone
diff --git a/test/CodeGen/R600/fp_to_sint.ll b/test/CodeGen/R600/fp_to_sint.ll
index d76e8a341c6f..16549c392b00 100644
--- a/test/CodeGen/R600/fp_to_sint.ll
+++ b/test/CodeGen/R600/fp_to_sint.ll
@@ -1,5 +1,6 @@
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s --check-prefix=EG --check-prefix=FUNC
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s --check-prefix=SI --check-prefix=FUNC
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s --check-prefix=SI --check-prefix=FUNC
declare float @llvm.fabs.f32(float) #0
diff --git a/test/CodeGen/R600/fp_to_uint.ll b/test/CodeGen/R600/fp_to_uint.ll
index 5970adf999c9..804d90f476da 100644
--- a/test/CodeGen/R600/fp_to_uint.ll
+++ b/test/CodeGen/R600/fp_to_uint.ll
@@ -1,5 +1,6 @@
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=EG -check-prefix=FUNC
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC
; FUNC-LABEL: {{^}}fp_to_uint_f32_to_i32:
; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
diff --git a/test/CodeGen/R600/fpext.ll b/test/CodeGen/R600/fpext.ll
index 320545edf56b..21c7bfd48df8 100644
--- a/test/CodeGen/R600/fpext.ll
+++ b/test/CodeGen/R600/fpext.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=CHECK
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
; CHECK: {{^}}fpext:
; CHECK: v_cvt_f64_f32_e32
diff --git a/test/CodeGen/R600/fptrunc.ll b/test/CodeGen/R600/fptrunc.ll
index 15ae4e18ff38..94fcdab9c52f 100644
--- a/test/CodeGen/R600/fptrunc.ll
+++ b/test/CodeGen/R600/fptrunc.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=CHECK
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
; CHECK: {{^}}fptrunc:
; CHECK: v_cvt_f32_f64_e32
diff --git a/test/CodeGen/R600/frem.ll b/test/CodeGen/R600/frem.ll
index 50d6687abeec..564634178656 100644
--- a/test/CodeGen/R600/frem.ll
+++ b/test/CodeGen/R600/frem.ll
@@ -40,10 +40,14 @@ define void @unsafe_frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
ret void
}
-; TODO: This should check something when f64 fdiv is implemented
-; correctly
-
; FUNC-LABEL: {{^}}frem_f64:
+; SI: buffer_load_dwordx2 [[Y:v\[[0-9]+:[0-9]+\]]], {{.*}}, 0
+; SI: buffer_load_dwordx2 [[X:v\[[0-9]+:[0-9]+\]]], {{.*}}, 0
+; SI-DAG: v_div_fmas_f64
+; SI-DAG: v_div_scale_f64
+; SI-DAG: v_mul_f64
+; SI: v_add_f64
+; SI: buffer_store_dwordx2
; SI: s_endpgm
define void @frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) #0 {
diff --git a/test/CodeGen/R600/fsqrt.ll b/test/CodeGen/R600/fsqrt.ll
index 0d1095cf2683..1fdf3e453bf3 100644
--- a/test/CodeGen/R600/fsqrt.ll
+++ b/test/CodeGen/R600/fsqrt.ll
@@ -1,5 +1,7 @@
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck %s
; Run with unsafe-fp-math to make sure nothing tries to turn this into 1 / rsqrt(x)
diff --git a/test/CodeGen/R600/fsub.ll b/test/CodeGen/R600/fsub.ll
index 4fe47e7badf3..ef90fea67900 100644
--- a/test/CodeGen/R600/fsub.ll
+++ b/test/CodeGen/R600/fsub.ll
@@ -1,5 +1,6 @@
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}v_fsub_f32:
diff --git a/test/CodeGen/R600/fsub64.ll b/test/CodeGen/R600/fsub64.ll
index d0f894607a61..62f46142fe0d 100644
--- a/test/CodeGen/R600/fsub64.ll
+++ b/test/CodeGen/R600/fsub64.ll
@@ -1,4 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; SI-LABEL: {{^}}fsub_f64:
; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
diff --git a/test/CodeGen/R600/ftrunc.ll b/test/CodeGen/R600/ftrunc.ll
index 39eb2b5accbf..edc08609a8aa 100644
--- a/test/CodeGen/R600/ftrunc.ll
+++ b/test/CodeGen/R600/ftrunc.ll
@@ -1,5 +1,6 @@
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG --check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI --check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI --check-prefix=FUNC %s
declare float @llvm.trunc.f32(float) nounwind readnone
declare <2 x float> @llvm.trunc.v2f32(<2 x float>) nounwind readnone
diff --git a/test/CodeGen/R600/global-directive.ll b/test/CodeGen/R600/global-directive.ll
index 189510ad08fe..3ba12c206ad3 100644
--- a/test/CodeGen/R600/global-directive.ll
+++ b/test/CodeGen/R600/global-directive.ll
@@ -1,4 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; Make sure the GlobalDirective isn't merged with the function name
diff --git a/test/CodeGen/R600/global-extload-i1.ll b/test/CodeGen/R600/global-extload-i1.ll
index 940911e73453..5dc494900ce8 100644
--- a/test/CodeGen/R600/global-extload-i1.ll
+++ b/test/CodeGen/R600/global-extload-i1.ll
@@ -1,4 +1,5 @@
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; XUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; FIXME: Evergreen broken
diff --git a/test/CodeGen/R600/global-extload-i16.ll b/test/CodeGen/R600/global-extload-i16.ll
index 838068470ff2..a1740ec8236a 100644
--- a/test/CodeGen/R600/global-extload-i16.ll
+++ b/test/CodeGen/R600/global-extload-i16.ll
@@ -1,4 +1,5 @@
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; XUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; FIXME: cypress is broken because the bigger testcases spill and it's not implemented
diff --git a/test/CodeGen/R600/global-extload-i32.ll b/test/CodeGen/R600/global-extload-i32.ll
index ce78c446c3ba..f56b6ac8dc38 100644
--- a/test/CodeGen/R600/global-extload-i32.ll
+++ b/test/CodeGen/R600/global-extload-i32.ll
@@ -1,4 +1,5 @@
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}zextload_global_i32_to_i64:
diff --git a/test/CodeGen/R600/global-extload-i8.ll b/test/CodeGen/R600/global-extload-i8.ll
index 8d6042f1de02..86245232d3e4 100644
--- a/test/CodeGen/R600/global-extload-i8.ll
+++ b/test/CodeGen/R600/global-extload-i8.ll
@@ -1,4 +1,5 @@
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}zextload_global_i8_to_i32:
diff --git a/test/CodeGen/R600/global-zero-initializer.ll b/test/CodeGen/R600/global-zero-initializer.ll
index 031df59cd1e1..6909c58354c5 100644
--- a/test/CodeGen/R600/global-zero-initializer.ll
+++ b/test/CodeGen/R600/global-zero-initializer.ll
@@ -1,4 +1,5 @@
; RUN: not llc -march=amdgcn -mcpu=SI < %s 2>&1 | FileCheck %s
+; RUN: not llc -march=amdgcn -mcpu=tonga < %s 2>&1 | FileCheck %s
; CHECK: error: unsupported initializer for address space in load_init_global_global
diff --git a/test/CodeGen/R600/half.ll b/test/CodeGen/R600/half.ll
index cb7a94a0b859..35a41c5cd0b0 100644
--- a/test/CodeGen/R600/half.ll
+++ b/test/CodeGen/R600/half.ll
@@ -1,4 +1,5 @@
; RUN: llc < %s -march=amdgcn -mcpu=SI | FileCheck %s
+; RUN: llc < %s -march=amdgcn -mcpu=tonga | FileCheck %s
define void @test_load_store(half addrspace(1)* %in, half addrspace(1)* %out) {
; CHECK-LABEL: {{^}}test_load_store:
diff --git a/test/CodeGen/R600/i1-copy-implicit-def.ll b/test/CodeGen/R600/i1-copy-implicit-def.ll
index 51e230196bf6..b11a21137642 100644
--- a/test/CodeGen/R600/i1-copy-implicit-def.ll
+++ b/test/CodeGen/R600/i1-copy-implicit-def.ll
@@ -1,4 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; SILowerI1Copies was not handling IMPLICIT_DEF
; SI-LABEL: {{^}}br_implicit_def:
diff --git a/test/CodeGen/R600/i1-copy-phi.ll b/test/CodeGen/R600/i1-copy-phi.ll
index 8b761710f9d2..430466e9f80e 100644
--- a/test/CodeGen/R600/i1-copy-phi.ll
+++ b/test/CodeGen/R600/i1-copy-phi.ll
@@ -1,4 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; SI-LABEL: {{^}}br_i1_phi:
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
diff --git a/test/CodeGen/R600/icmp64.ll b/test/CodeGen/R600/icmp64.ll
index ed0f221b87b1..0eaa33ebafed 100644
--- a/test/CodeGen/R600/icmp64.ll
+++ b/test/CodeGen/R600/icmp64.ll
@@ -1,4 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; SI-LABEL: {{^}}test_i64_eq:
; SI: v_cmp_eq_i64
diff --git a/test/CodeGen/R600/indirect-addressing-si.ll b/test/CodeGen/R600/indirect-addressing-si.ll
index db597a363775..f551606d63a7 100644
--- a/test/CodeGen/R600/indirect-addressing-si.ll
+++ b/test/CodeGen/R600/indirect-addressing-si.ll
@@ -1,9 +1,10 @@
; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
; Tests for indirect addressing on SI, which is implemented using dynamic
; indexing of vectors.
-; CHECK: extract_w_offset
+; CHECK-LABEL: {{^}}extract_w_offset:
; CHECK: s_mov_b32 m0
; CHECK-NEXT: v_movrels_b32_e32
define void @extract_w_offset(float addrspace(1)* %out, i32 %in) {
@@ -14,7 +15,7 @@ entry:
ret void
}
-; CHECK: extract_wo_offset
+; CHECK-LABEL: {{^}}extract_wo_offset:
; CHECK: s_mov_b32 m0
; CHECK-NEXT: v_movrels_b32_e32
define void @extract_wo_offset(float addrspace(1)* %out, i32 %in) {
@@ -24,7 +25,34 @@ entry:
ret void
}
-; CHECK: insert_w_offset
+; CHECK-LABEL: {{^}}extract_neg_offset_sgpr:
+; The offset depends on the register that holds the first element of the vector.
+; CHECK: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}}
+; CHECK: v_movrels_b32_e32 v{{[0-9]}}, v0
+define void @extract_neg_offset_sgpr(i32 addrspace(1)* %out, i32 %offset) {
+entry:
+ %index = add i32 %offset, -512
+ %value = extractelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 %index
+ store i32 %value, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}extract_neg_offset_vgpr:
+; The offset depends on the register that holds the first element of the vector.
+; CHECK: v_readfirstlane_b32
+; CHECK: s_add_i32 m0, m0, 0xfffffe{{[0-9a-z]+}}
+; CHECK-NEXT: v_movrels_b32_e32 v{{[0-9]}}, v0
+; CHECK: s_cbranch_execnz
+define void @extract_neg_offset_vgpr(i32 addrspace(1)* %out) {
+entry:
+ %id = call i32 @llvm.r600.read.tidig.x() #1
+ %index = add i32 %id, -512
+ %value = extractelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 %index
+ store i32 %value, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}insert_w_offset:
; CHECK: s_mov_b32 m0
; CHECK-NEXT: v_movreld_b32_e32
define void @insert_w_offset(float addrspace(1)* %out, i32 %in) {
@@ -36,7 +64,7 @@ entry:
ret void
}
-; CHECK: insert_wo_offset
+; CHECK-LABEL: {{^}}insert_wo_offset:
; CHECK: s_mov_b32 m0
; CHECK-NEXT: v_movreld_b32_e32
define void @insert_wo_offset(float addrspace(1)* %out, i32 %in) {
@@ -46,3 +74,48 @@ entry:
store float %1, float addrspace(1)* %out
ret void
}
+
+; CHECK-LABEL: {{^}}insert_neg_offset_sgpr:
+; The offset depends on the register that holds the first element of the vector.
+; CHECK: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}}
+; CHECK: v_movreld_b32_e32 v0, v{{[0-9]}}
+define void @insert_neg_offset_sgpr(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out, i32 %offset) {
+entry:
+ %index = add i32 %offset, -512
+ %value = insertelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 5, i32 %index
+ store <4 x i32> %value, <4 x i32> addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}insert_neg_offset_vgpr:
+; The offset depends on the register that holds the first element of the vector.
+; CHECK: v_readfirstlane_b32
+; CHECK: s_add_i32 m0, m0, 0xfffffe{{[0-9a-z]+}}
+; CHECK-NEXT: v_movreld_b32_e32 v0, v{{[0-9]}}
+; CHECK: s_cbranch_execnz
+define void @insert_neg_offset_vgpr(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out) {
+entry:
+ %id = call i32 @llvm.r600.read.tidig.x() #1
+ %index = add i32 %id, -512
+ %value = insertelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 5, i32 %index
+ store <4 x i32> %value, <4 x i32> addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}insert_neg_inline_offset_vgpr:
+; The offset depends on the register that holds the first element of the vector.
+; CHECK: v_readfirstlane_b32
+; CHECK: s_add_i32 m0, m0, -{{[0-9]+}}
+; CHECK-NEXT: v_movreld_b32_e32 v0, v{{[0-9]}}
+; CHECK: s_cbranch_execnz
+define void @insert_neg_inline_offset_vgpr(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out) {
+entry:
+ %id = call i32 @llvm.r600.read.tidig.x() #1
+ %index = add i32 %id, -16
+ %value = insertelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 5, i32 %index
+ store <4 x i32> %value, <4 x i32> addrspace(1)* %out
+ ret void
+}
+
+declare i32 @llvm.r600.read.tidig.x() #1
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/R600/indirect-private-64.ll b/test/CodeGen/R600/indirect-private-64.ll
index 24006f8799b2..cb06d609da49 100644
--- a/test/CodeGen/R600/indirect-private-64.ll
+++ b/test/CodeGen/R600/indirect-private-64.ll
@@ -1,5 +1,7 @@
; RUN: llc -march=amdgcn -mcpu=SI -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=SI -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s
declare void @llvm.AMDGPU.barrier.local() noduplicate nounwind
diff --git a/test/CodeGen/R600/infinite-loop.ll b/test/CodeGen/R600/infinite-loop.ll
index 0f82a7df6098..7233aa57fd78 100644
--- a/test/CodeGen/R600/infinite-loop.ll
+++ b/test/CodeGen/R600/infinite-loop.ll
@@ -1,4 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; SI-LABEL: {{^}}infinite_loop:
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3e7
diff --git a/test/CodeGen/R600/inline-asm.ll b/test/CodeGen/R600/inline-asm.ll
index 6f1f977de2a4..37e4486db380 100644
--- a/test/CodeGen/R600/inline-asm.ll
+++ b/test/CodeGen/R600/inline-asm.ll
@@ -1,4 +1,5 @@
; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=tonga -verify-machineinstrs | FileCheck %s
; CHECK: {{^}}inline_asm:
; CHECK: s_endpgm
diff --git a/test/CodeGen/R600/inline-calls.ll b/test/CodeGen/R600/inline-calls.ll
index b8700d55e155..33a4c832e75e 100644
--- a/test/CodeGen/R600/inline-calls.ll
+++ b/test/CodeGen/R600/inline-calls.ll
@@ -1,4 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck %s
; CHECK-NOT: {{^}}func:
diff --git a/test/CodeGen/R600/input-mods.ll b/test/CodeGen/R600/input-mods.ll
index e3e94995fc95..1c4d285cbcb1 100644
--- a/test/CodeGen/R600/input-mods.ll
+++ b/test/CodeGen/R600/input-mods.ll
@@ -1,13 +1,13 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG-CHECK
-;RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM-CHECK
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG
+;RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM
-;EG-CHECK-LABEL: {{^}}test:
-;EG-CHECK: EXP_IEEE *
-;CM-CHECK-LABEL: {{^}}test:
-;CM-CHECK: EXP_IEEE T{{[0-9]+}}.X, -|T{{[0-9]+}}.X|
-;CM-CHECK: EXP_IEEE T{{[0-9]+}}.Y (MASKED), -|T{{[0-9]+}}.X|
-;CM-CHECK: EXP_IEEE T{{[0-9]+}}.Z (MASKED), -|T{{[0-9]+}}.X|
-;CM-CHECK: EXP_IEEE * T{{[0-9]+}}.W (MASKED), -|T{{[0-9]+}}.X|
+;EG-LABEL: {{^}}test:
+;EG: EXP_IEEE *
+;CM-LABEL: {{^}}test:
+;CM: EXP_IEEE T{{[0-9]+}}.X, -|T{{[0-9]+}}.X|
+;CM: EXP_IEEE T{{[0-9]+}}.Y (MASKED), -|T{{[0-9]+}}.X|
+;CM: EXP_IEEE T{{[0-9]+}}.Z (MASKED), -|T{{[0-9]+}}.X|
+;CM: EXP_IEEE * T{{[0-9]+}}.W (MASKED), -|T{{[0-9]+}}.X|
define void @test(<4 x float> inreg %reg0) #0 {
%r0 = extractelement <4 x float> %reg0, i32 0
diff --git a/test/CodeGen/R600/insert_subreg.ll b/test/CodeGen/R600/insert_subreg.ll
index dfb58d54796c..4a5e8869c2df 100644
--- a/test/CodeGen/R600/insert_subreg.ll
+++ b/test/CodeGen/R600/insert_subreg.ll
@@ -1,4 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=SI -mattr=-promote-alloca -verify-machineinstrs < %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca -verify-machineinstrs < %s
; Test that INSERT_SUBREG instructions don't have non-register operands after
; instruction selection.
diff --git a/test/CodeGen/R600/insert_vector_elt.ll b/test/CodeGen/R600/insert_vector_elt.ll
index 2442c868444f..64afddcca21d 100644
--- a/test/CodeGen/R600/insert_vector_elt.ll
+++ b/test/CodeGen/R600/insert_vector_elt.ll
@@ -1,4 +1,5 @@
; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI %s
; FIXME: Broken on evergreen
; FIXME: For some reason the 8 and 16 vectors are being stored as
diff --git a/test/CodeGen/R600/kernel-args.ll b/test/CodeGen/R600/kernel-args.ll
index 1984d333d4d5..42d289d4ef23 100644
--- a/test/CodeGen/R600/kernel-args.ll
+++ b/test/CodeGen/R600/kernel-args.ll
@@ -1,11 +1,11 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG-CHECK
-; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG-CHECK
-; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG
+; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI
-; EG-CHECK-LABEL: {{^}}i8_arg:
-; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-; SI-CHECK-LABEL: {{^}}i8_arg:
-; SI-CHECK: buffer_load_ubyte
+; EG-LABEL: {{^}}i8_arg:
+; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
+; SI-LABEL: {{^}}i8_arg:
+; SI: buffer_load_ubyte
define void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind {
entry:
@@ -14,10 +14,10 @@ entry:
ret void
}
-; EG-CHECK-LABEL: {{^}}i8_zext_arg:
-; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-; SI-CHECK-LABEL: {{^}}i8_zext_arg:
-; SI-CHECK: s_load_dword s{{[0-9]}}, s[0:1], 0xb
+; EG-LABEL: {{^}}i8_zext_arg:
+; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
+; SI-LABEL: {{^}}i8_zext_arg:
+; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
define void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind {
entry:
@@ -26,10 +26,10 @@ entry:
ret void
}
-; EG-CHECK-LABEL: {{^}}i8_sext_arg:
-; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-; SI-CHECK-LABEL: {{^}}i8_sext_arg:
-; SI-CHECK: s_load_dword s{{[0-9]}}, s[0:1], 0xb
+; EG-LABEL: {{^}}i8_sext_arg:
+; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
+; SI-LABEL: {{^}}i8_sext_arg:
+; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
define void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 signext %in) nounwind {
entry:
@@ -38,10 +38,10 @@ entry:
ret void
}
-; EG-CHECK-LABEL: {{^}}i16_arg:
-; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-; SI-CHECK-LABEL: {{^}}i16_arg:
-; SI-CHECK: buffer_load_ushort
+; EG-LABEL: {{^}}i16_arg:
+; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
+; SI-LABEL: {{^}}i16_arg:
+; SI: buffer_load_ushort
define void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind {
entry:
@@ -50,10 +50,10 @@ entry:
ret void
}
-; EG-CHECK-LABEL: {{^}}i16_zext_arg:
-; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-; SI-CHECK-LABEL: {{^}}i16_zext_arg:
-; SI-CHECK: s_load_dword s{{[0-9]}}, s[0:1], 0xb
+; EG-LABEL: {{^}}i16_zext_arg:
+; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
+; SI-LABEL: {{^}}i16_zext_arg:
+; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
define void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind {
entry:
@@ -62,10 +62,10 @@ entry:
ret void
}
-; EG-CHECK-LABEL: {{^}}i16_sext_arg:
-; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-; SI-CHECK-LABEL: {{^}}i16_sext_arg:
-; SI-CHECK: s_load_dword s{{[0-9]}}, s[0:1], 0xb
+; EG-LABEL: {{^}}i16_sext_arg:
+; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
+; SI-LABEL: {{^}}i16_sext_arg:
+; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
define void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 signext %in) nounwind {
entry:
@@ -74,9 +74,9 @@ entry:
ret void
}
-; EG-CHECK-LABEL: {{^}}i32_arg:
-; EG-CHECK: T{{[0-9]\.[XYZW]}}, KC0[2].Z
-; SI-CHECK-LABEL: {{^}}i32_arg:
+; EG-LABEL: {{^}}i32_arg:
+; EG: T{{[0-9]\.[XYZW]}}, KC0[2].Z
+; SI-LABEL: {{^}}i32_arg:
; s_load_dword s{{[0-9]}}, s[0:1], 0xb
define void @i32_arg(i32 addrspace(1)* nocapture %out, i32 %in) nounwind {
entry:
@@ -84,9 +84,9 @@ entry:
ret void
}
-; EG-CHECK-LABEL: {{^}}f32_arg:
-; EG-CHECK: T{{[0-9]\.[XYZW]}}, KC0[2].Z
-; SI-CHECK-LABEL: {{^}}f32_arg:
+; EG-LABEL: {{^}}f32_arg:
+; EG: T{{[0-9]\.[XYZW]}}, KC0[2].Z
+; SI-LABEL: {{^}}f32_arg:
; s_load_dword s{{[0-9]}}, s[0:1], 0xb
define void @f32_arg(float addrspace(1)* nocapture %out, float %in) nounwind {
entry:
@@ -94,360 +94,360 @@ entry:
ret void
}
-; EG-CHECK-LABEL: {{^}}v2i8_arg:
-; EG-CHECK: VTX_READ_8
-; EG-CHECK: VTX_READ_8
-; SI-CHECK-LABEL: {{^}}v2i8_arg:
-; SI-CHECK: buffer_load_ubyte
-; SI-CHECK: buffer_load_ubyte
+; EG-LABEL: {{^}}v2i8_arg:
+; EG: VTX_READ_8
+; EG: VTX_READ_8
+; SI-LABEL: {{^}}v2i8_arg:
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
define void @v2i8_arg(<2 x i8> addrspace(1)* %out, <2 x i8> %in) {
entry:
store <2 x i8> %in, <2 x i8> addrspace(1)* %out
ret void
}
-; EG-CHECK-LABEL: {{^}}v2i16_arg:
-; EG-CHECK: VTX_READ_16
-; EG-CHECK: VTX_READ_16
-; SI-CHECK-LABEL: {{^}}v2i16_arg:
-; SI-CHECK-DAG: buffer_load_ushort
-; SI-CHECK-DAG: buffer_load_ushort
+; EG-LABEL: {{^}}v2i16_arg:
+; EG: VTX_READ_16
+; EG: VTX_READ_16
+; SI-LABEL: {{^}}v2i16_arg:
+; SI-DAG: buffer_load_ushort
+; SI-DAG: buffer_load_ushort
define void @v2i16_arg(<2 x i16> addrspace(1)* %out, <2 x i16> %in) {
entry:
store <2 x i16> %in, <2 x i16> addrspace(1)* %out
ret void
}
-; EG-CHECK-LABEL: {{^}}v2i32_arg:
-; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
-; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
-; SI-CHECK-LABEL: {{^}}v2i32_arg:
-; SI-CHECK: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
+; EG-LABEL: {{^}}v2i32_arg:
+; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
+; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
+; SI-LABEL: {{^}}v2i32_arg:
+; SI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
define void @v2i32_arg(<2 x i32> addrspace(1)* nocapture %out, <2 x i32> %in) nounwind {
entry:
store <2 x i32> %in, <2 x i32> addrspace(1)* %out, align 4
ret void
}
-; EG-CHECK-LABEL: {{^}}v2f32_arg:
-; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
-; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
-; SI-CHECK-LABEL: {{^}}v2f32_arg:
-; SI-CHECK: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
+; EG-LABEL: {{^}}v2f32_arg:
+; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
+; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
+; SI-LABEL: {{^}}v2f32_arg:
+; SI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
define void @v2f32_arg(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) nounwind {
entry:
store <2 x float> %in, <2 x float> addrspace(1)* %out, align 4
ret void
}
-; EG-CHECK-LABEL: {{^}}v3i8_arg:
+; EG-LABEL: {{^}}v3i8_arg:
; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 40
; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 41
; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 42
-; SI-CHECK-LABEL: {{^}}v3i8_arg:
+; SI-LABEL: {{^}}v3i8_arg:
define void @v3i8_arg(<3 x i8> addrspace(1)* nocapture %out, <3 x i8> %in) nounwind {
entry:
store <3 x i8> %in, <3 x i8> addrspace(1)* %out, align 4
ret void
}
-; EG-CHECK-LABEL: {{^}}v3i16_arg:
+; EG-LABEL: {{^}}v3i16_arg:
; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 44
; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 46
; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 48
-; SI-CHECK-LABEL: {{^}}v3i16_arg:
+; SI-LABEL: {{^}}v3i16_arg:
define void @v3i16_arg(<3 x i16> addrspace(1)* nocapture %out, <3 x i16> %in) nounwind {
entry:
store <3 x i16> %in, <3 x i16> addrspace(1)* %out, align 4
ret void
}
-; EG-CHECK-LABEL: {{^}}v3i32_arg:
-; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
-; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
-; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
-; SI-CHECK-LABEL: {{^}}v3i32_arg:
-; SI-CHECK: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd
+; EG-LABEL: {{^}}v3i32_arg:
+; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
+; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
+; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
+; SI-LABEL: {{^}}v3i32_arg:
+; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd
define void @v3i32_arg(<3 x i32> addrspace(1)* nocapture %out, <3 x i32> %in) nounwind {
entry:
store <3 x i32> %in, <3 x i32> addrspace(1)* %out, align 4
ret void
}
-; EG-CHECK-LABEL: {{^}}v3f32_arg:
-; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
-; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
-; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
-; SI-CHECK-LABEL: {{^}}v3f32_arg:
-; SI-CHECK: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd
+; EG-LABEL: {{^}}v3f32_arg:
+; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
+; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
+; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
+; SI-LABEL: {{^}}v3f32_arg:
+; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd
define void @v3f32_arg(<3 x float> addrspace(1)* nocapture %out, <3 x float> %in) nounwind {
entry:
store <3 x float> %in, <3 x float> addrspace(1)* %out, align 4
ret void
}
-; EG-CHECK-LABEL: {{^}}v4i8_arg:
-; EG-CHECK: VTX_READ_8
-; EG-CHECK: VTX_READ_8
-; EG-CHECK: VTX_READ_8
-; EG-CHECK: VTX_READ_8
-; SI-CHECK-LABEL: {{^}}v4i8_arg:
-; SI-CHECK: buffer_load_ubyte
-; SI-CHECK: buffer_load_ubyte
-; SI-CHECK: buffer_load_ubyte
-; SI-CHECK: buffer_load_ubyte
+; EG-LABEL: {{^}}v4i8_arg:
+; EG: VTX_READ_8
+; EG: VTX_READ_8
+; EG: VTX_READ_8
+; EG: VTX_READ_8
+; SI-LABEL: {{^}}v4i8_arg:
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
define void @v4i8_arg(<4 x i8> addrspace(1)* %out, <4 x i8> %in) {
entry:
store <4 x i8> %in, <4 x i8> addrspace(1)* %out
ret void
}
-; EG-CHECK-LABEL: {{^}}v4i16_arg:
-; EG-CHECK: VTX_READ_16
-; EG-CHECK: VTX_READ_16
-; EG-CHECK: VTX_READ_16
-; EG-CHECK: VTX_READ_16
-; SI-CHECK-LABEL: {{^}}v4i16_arg:
-; SI-CHECK: buffer_load_ushort
-; SI-CHECK: buffer_load_ushort
-; SI-CHECK: buffer_load_ushort
-; SI-CHECK: buffer_load_ushort
+; EG-LABEL: {{^}}v4i16_arg:
+; EG: VTX_READ_16
+; EG: VTX_READ_16
+; EG: VTX_READ_16
+; EG: VTX_READ_16
+; SI-LABEL: {{^}}v4i16_arg:
+; SI: buffer_load_ushort
+; SI: buffer_load_ushort
+; SI: buffer_load_ushort
+; SI: buffer_load_ushort
define void @v4i16_arg(<4 x i16> addrspace(1)* %out, <4 x i16> %in) {
entry:
store <4 x i16> %in, <4 x i16> addrspace(1)* %out
ret void
}
-; EG-CHECK-LABEL: {{^}}v4i32_arg:
-; EG-CHECK-DAG: T{{[0-9]\.[XY