diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2015-05-21 06:57:07 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2015-05-21 06:57:07 +0000 |
commit | f03b5bed27d0d2eafd68562ce14f8b5e3f1f0801 (patch) | |
tree | 311f96478e9fceea407d1f187f9c5cef712f796e | |
parent | b6bcb9a905dec7821221e8ceaf1504c1f329815e (diff) | |
download | src-f03b5bed27d0d2eafd68562ce14f8b5e3f1f0801.tar.gz src-f03b5bed27d0d2eafd68562ce14f8b5e3f1f0801.zip |
Vendor import of llvm RELEASE_361/final tag r237755 (effectively, 3.6.1 release):vendor/llvm/llvm-release_361-r237755vendor/llvm/llvm-3.6.x
Notes
Notes:
svn path=/vendor/llvm/dist/; revision=283176
svn path=/vendor/llvm/llvm-3.6.x/; revision=283619; tag=vendor/llvm/llvm-3.6.x
412 files changed, 12692 insertions, 5960 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index cfa32cf4bc5a..bb21cc5d7516 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -48,7 +48,7 @@ set(CMAKE_MODULE_PATH set(LLVM_VERSION_MAJOR 3) set(LLVM_VERSION_MINOR 6) -set(LLVM_VERSION_PATCH 0) +set(LLVM_VERSION_PATCH 1) if (NOT PACKAGE_VERSION) set(PACKAGE_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}") diff --git a/autoconf/configure.ac b/autoconf/configure.ac index 3bd8aa1806c2..577a7d521ea9 100644 --- a/autoconf/configure.ac +++ b/autoconf/configure.ac @@ -32,11 +32,11 @@ dnl===-----------------------------------------------------------------------=== dnl Initialize autoconf and define the package name, version number and dnl address for reporting bugs. -AC_INIT([LLVM],[3.6.0],[http://llvm.org/bugs/]) +AC_INIT([LLVM],[3.6.1],[http://llvm.org/bugs/]) LLVM_VERSION_MAJOR=3 LLVM_VERSION_MINOR=6 -LLVM_VERSION_PATCH=0 +LLVM_VERSION_PATCH=1 LLVM_VERSION_SUFFIX= AC_DEFINE_UNQUOTED([LLVM_VERSION_MAJOR], $LLVM_VERSION_MAJOR, [Major version of the LLVM API]) @@ -1714,7 +1714,9 @@ if test "$llvm_cv_os_type" = "MingW" ; then AC_CHECK_LIB(gcc,_alloca,AC_DEFINE([HAVE__ALLOCA],[1],[Have host's _alloca])) AC_CHECK_LIB(gcc,__alloca,AC_DEFINE([HAVE___ALLOCA],[1],[Have host's __alloca])) AC_CHECK_LIB(gcc,__chkstk,AC_DEFINE([HAVE___CHKSTK],[1],[Have host's __chkstk])) + AC_CHECK_LIB(gcc,__chkstk_ms,AC_DEFINE([HAVE___CHKSTK_MS],[1],[Have host's __chkstk_ms])) AC_CHECK_LIB(gcc,___chkstk,AC_DEFINE([HAVE____CHKSTK],[1],[Have host's ___chkstk])) + AC_CHECK_LIB(gcc,___chkstk_ms,AC_DEFINE([HAVE____CHKSTK_MS],[1],[Have host's ___chkstk_ms])) AC_CHECK_LIB(gcc,__ashldi3,AC_DEFINE([HAVE___ASHLDI3],[1],[Have host's __ashldi3])) AC_CHECK_LIB(gcc,__ashrdi3,AC_DEFINE([HAVE___ASHRDI3],[1],[Have host's __ashrdi3])) diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake index f806d9c54ed9..6a7538895c41 100755 --- a/cmake/config-ix.cmake +++ b/cmake/config-ix.cmake @@ -198,7 +198,9 @@ if( PURE_WINDOWS ) check_function_exists(_alloca HAVE__ALLOCA) check_function_exists(__alloca HAVE___ALLOCA) check_function_exists(__chkstk HAVE___CHKSTK) + check_function_exists(__chkstk_ms HAVE___CHKSTK_MS) check_function_exists(___chkstk HAVE____CHKSTK) + check_function_exists(___chkstk_ms HAVE____CHKSTK_MS) check_function_exists(__ashldi3 HAVE___ASHLDI3) check_function_exists(__ashrdi3 HAVE___ASHRDI3) diff --git a/configure b/configure index ba5ecbef334f..ea5bf52101b5 100755 --- a/configure +++ b/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.60 for LLVM 3.6.0. +# Generated by GNU Autoconf 2.60 for LLVM 3.6.1. # # Report bugs to <http://llvm.org/bugs/>. # @@ -561,8 +561,8 @@ SHELL=${CONFIG_SHELL-/bin/sh} # Identity of this package. PACKAGE_NAME='LLVM' PACKAGE_TARNAME='llvm' -PACKAGE_VERSION='3.6.0' -PACKAGE_STRING='LLVM 3.6.0' +PACKAGE_VERSION='3.6.1' +PACKAGE_STRING='LLVM 3.6.1' PACKAGE_BUGREPORT='http://llvm.org/bugs/' ac_unique_file="lib/IR/Module.cpp" @@ -1314,7 +1314,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures LLVM 3.6.0 to adapt to many kinds of systems. +\`configure' configures LLVM 3.6.1 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1380,7 +1380,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of LLVM 3.6.0:";; + short | recursive ) echo "Configuration of LLVM 3.6.1:";; esac cat <<\_ACEOF @@ -1550,7 +1550,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -LLVM configure 3.6.0 +LLVM configure 3.6.1 generated by GNU Autoconf 2.60 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, @@ -1566,7 +1566,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by LLVM $as_me 3.6.0, which was +It was created by LLVM $as_me 3.6.1, which was generated by GNU Autoconf 2.60. Invocation command line was $ $0 $@ @@ -1922,7 +1922,7 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu LLVM_VERSION_MAJOR=3 LLVM_VERSION_MINOR=6 -LLVM_VERSION_PATCH=0 +LLVM_VERSION_PATCH=1 LLVM_VERSION_SUFFIX= @@ -15438,6 +15438,91 @@ _ACEOF fi + { echo "$as_me:$LINENO: checking for __chkstk_ms in -lgcc" >&5 +echo $ECHO_N "checking for __chkstk_ms in -lgcc... $ECHO_C" >&6; } +if test "${ac_cv_lib_gcc___chkstk_ms+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lgcc $LIBS" +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char __chkstk_ms (); +int +main () +{ +return __chkstk_ms (); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_link") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err' + { (case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_lib_gcc___chkstk_ms=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_cv_lib_gcc___chkstk_ms=no +fi + +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ echo "$as_me:$LINENO: result: $ac_cv_lib_gcc___chkstk_ms" >&5 +echo "${ECHO_T}$ac_cv_lib_gcc___chkstk_ms" >&6; } +if test $ac_cv_lib_gcc___chkstk_ms = yes; then + +cat >>confdefs.h <<\_ACEOF +#define HAVE___CHKSTK_MS 1 +_ACEOF + +fi + { echo "$as_me:$LINENO: checking for ___chkstk in -lgcc" >&5 echo $ECHO_N "checking for ___chkstk in -lgcc... $ECHO_C" >&6; } if test "${ac_cv_lib_gcc____chkstk+set}" = set; then @@ -15523,6 +15608,91 @@ _ACEOF fi + { echo "$as_me:$LINENO: checking for ___chkstk_ms in -lgcc" >&5 +echo $ECHO_N "checking for ___chkstk_ms in -lgcc... $ECHO_C" >&6; } +if test "${ac_cv_lib_gcc____chkstk_ms+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lgcc $LIBS" +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char ___chkstk_ms (); +int +main () +{ +return ___chkstk_ms (); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_link") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err' + { (case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_lib_gcc____chkstk_ms=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_cv_lib_gcc____chkstk_ms=no +fi + +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ echo "$as_me:$LINENO: result: $ac_cv_lib_gcc____chkstk_ms" >&5 +echo "${ECHO_T}$ac_cv_lib_gcc____chkstk_ms" >&6; } +if test $ac_cv_lib_gcc____chkstk_ms = yes; then + +cat >>confdefs.h <<\_ACEOF +#define HAVE____CHKSTK_MS 1 +_ACEOF + +fi + { echo "$as_me:$LINENO: checking for __ashldi3 in -lgcc" >&5 echo $ECHO_N "checking for __ashldi3 in -lgcc... $ECHO_C" >&6; } @@ -18901,7 +19071,7 @@ exec 6>&1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by LLVM $as_me 3.6.0, which was +This file was extended by LLVM $as_me 3.6.1, which was generated by GNU Autoconf 2.60. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -18954,7 +19124,7 @@ Report bugs to <bug-autoconf@gnu.org>." _ACEOF cat >>$CONFIG_STATUS <<_ACEOF ac_cs_version="\\ -LLVM config.status 3.6.0 +LLVM config.status 3.6.1 configured by $0, generated by GNU Autoconf 2.60, with options \\"`echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\" diff --git a/docs/ReleaseNotes.rst b/docs/ReleaseNotes.rst index 04d7f5266510..a10f7e004abc 100644 --- a/docs/ReleaseNotes.rst +++ b/docs/ReleaseNotes.rst @@ -25,6 +25,31 @@ them. Non-comprehensive list of changes in this release ================================================= +Changes to the MIPS Target +-------------------------- + +* Added support for 128-bit integers on 64-bit targets. + +* Fixed some remaining N32/N64 calling convention bugs when using small + structures on big-endian targets. + +* Fixed missing sign-extensions that are required by the N32/N64 calling + convention when generating calls to library functions with 32-bit parameters. + +* ``-mno-odd-spreg`` is now honoured for vector insertion/extraction operations + when using ``-mmsa``. + +* Corrected the representation of member function pointers. This makes them + usable on microMIPS targets. + +* Fixed multiple segfaults and assertions in the disassembler when + disassembling instructions that have memory operands. + +* Fixed multiple cases of suboptimal code generation involving ``$zero``. + +Non-comprehensive list of changes in 3.6.0 +========================================== + .. NOTE For small 1-3 sentence descriptions, just add an entry at the end of this list. If your description won't fit comfortably in one bullet diff --git a/include/llvm/Config/config.h.cmake b/include/llvm/Config/config.h.cmake index 27f07d4e39d1..5c512f6bca6c 100644 --- a/include/llvm/Config/config.h.cmake +++ b/include/llvm/Config/config.h.cmake @@ -423,6 +423,9 @@ /* Have host's __chkstk */ #cmakedefine HAVE___CHKSTK ${HAVE___CHKSTK} +/* Have host's __chkstk_ms */ +#cmakedefine HAVE___CHKSTK_MS ${HAVE___CHKSTK_MS} + /* Have host's __cmpdi2 */ #cmakedefine HAVE___CMPDI2 ${HAVE___CMPDI2} @@ -459,6 +462,9 @@ /* Have host's ___chkstk */ #cmakedefine HAVE____CHKSTK ${HAVE____CHKSTK} +/* Have host's ___chkstk_ms */ +#cmakedefine HAVE____CHKSTK_MS ${HAVE____CHKSTK_MS} + /* Define if we link Polly to the tools */ #cmakedefine LINK_POLLY_INTO_TOOLS diff --git a/include/llvm/Config/config.h.in b/include/llvm/Config/config.h.in index ec09c84c5b71..c68c77aa6076 100644 --- a/include/llvm/Config/config.h.in +++ b/include/llvm/Config/config.h.in @@ -420,6 +420,9 @@ /* Have host's __chkstk */ #undef HAVE___CHKSTK +/* Have host's __chkstk_ms */ +#undef HAVE___CHKSTK_MS + /* Have host's __cmpdi2 */ #undef HAVE___CMPDI2 @@ -456,6 +459,9 @@ /* Have host's ___chkstk */ #undef HAVE____CHKSTK +/* Have host's ___chkstk_ms */ +#undef HAVE____CHKSTK_MS + /* Linker version detected at compile time. */ #undef HOST_LINK_VERSION diff --git a/include/llvm/Target/TargetCallingConv.h b/include/llvm/Target/TargetCallingConv.h index a0f26741a8f0..9071bfeec7ed 100644 --- a/include/llvm/Target/TargetCallingConv.h +++ b/include/llvm/Target/TargetCallingConv.h @@ -134,6 +134,8 @@ namespace ISD { /// Index original Function's argument. unsigned OrigArgIndex; + /// Sentinel value for implicit machine-level input arguments. + static const unsigned NoArgIndex = UINT_MAX; /// Offset in bytes of current input value relative to the beginning of /// original argument. E.g. if argument was splitted into four 32 bit @@ -147,6 +149,15 @@ namespace ISD { VT = vt.getSimpleVT(); ArgVT = argvt; } + + bool isOrigArg() const { + return OrigArgIndex != NoArgIndex; + } + + unsigned getOrigArgIndex() const { + assert(OrigArgIndex != NoArgIndex && "Implicit machine-level argument"); + return OrigArgIndex; + } }; /// OutputArg - This struct carries flags and a value for a diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index c5fed02e17b8..43d6f2772c5e 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -2806,6 +2806,11 @@ public: virtual bool useLoadStackGuardNode() const { return false; } + + /// Returns true if arguments should be sign-extended in lib calls. + virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const { + return IsSigned; + } }; /// Given an LLVM IR type and return type attributes, compute the return value diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index 59f19a002ecc..7e9e35127a49 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -1776,9 +1776,12 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, << *IsomorphicInc << '\n'); Value *NewInc = OrigInc; if (OrigInc->getType() != IsomorphicInc->getType()) { - Instruction *IP = isa<PHINode>(OrigInc) - ? (Instruction*)L->getHeader()->getFirstInsertionPt() - : OrigInc->getNextNode(); + Instruction *IP = nullptr; + if (PHINode *PN = dyn_cast<PHINode>(OrigInc)) + IP = PN->getParent()->getFirstInsertionPt(); + else + IP = OrigInc->getNextNode(); + IRBuilder<> Builder(IP); Builder.SetCurrentDebugLocation(IsomorphicInc->getDebugLoc()); NewInc = Builder. diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp index cbd62728ace9..96111225db5f 100644 --- a/lib/CodeGen/MachineCopyPropagation.cpp +++ b/lib/CodeGen/MachineCopyPropagation.cpp @@ -75,10 +75,9 @@ MachineCopyPropagation::SourceNoLongerAvailable(unsigned Reg, I != E; ++I) { unsigned MappedDef = *I; // Source of copy is no longer available for propagation. - if (AvailCopyMap.erase(MappedDef)) { - for (MCSubRegIterator SR(MappedDef, TRI); SR.isValid(); ++SR) - AvailCopyMap.erase(*SR); - } + AvailCopyMap.erase(MappedDef); + for (MCSubRegIterator SR(MappedDef, TRI); SR.isValid(); ++SR) + AvailCopyMap.erase(*SR); } } } diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index afb986f1d7c6..1df61e4fb1fe 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1160,13 +1160,6 @@ void DAGCombiner::Run(CombineLevel AtLevel) { LegalOperations = Level >= AfterLegalizeVectorOps; LegalTypes = Level >= AfterLegalizeTypes; - // Early exit if this basic block is in an optnone function. - AttributeSet FnAttrs = - DAG.getMachineFunction().getFunction()->getAttributes(); - if (FnAttrs.hasAttribute(AttributeSet::FunctionIndex, - Attribute::OptimizeNone)) - return; - // Add all the dag nodes to the worklist. for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), E = DAG.allnodes_end(); I != E; ++I) @@ -2788,9 +2781,13 @@ SDValue DAGCombiner::visitAND(SDNode *N) { SplatBitSize = SplatBitSize * 2) SplatValue |= SplatValue.shl(SplatBitSize); - Constant = APInt::getAllOnesValue(BitWidth); - for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i) - Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth); + // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a + // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value. + if (SplatBitSize % BitWidth == 0) { + Constant = APInt::getAllOnesValue(BitWidth); + for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i) + Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth); + } } } @@ -11043,7 +11040,9 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { } else if (VecInT.getSizeInBits() == VT.getSizeInBits() * 2) { // If the input vector is too large, try to split it. // We don't support having two input vectors that are too large. - if (VecIn2.getNode()) + // If the zero vector was used, we can not split the vector, + // since we'd need 3 inputs. + if (UsesZeroVector || VecIn2.getNode()) return SDValue(); if (!TLI.isExtractSubvectorCheap(VT, VT.getVectorNumElements())) @@ -11055,7 +11054,6 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { DAG.getConstant(VT.getVectorNumElements(), TLI.getVectorIdxTy())); VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1, DAG.getConstant(0, TLI.getVectorIdxTy())); - UsesZeroVector = false; } else return SDValue(); } diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 97fed230c536..c46539b71dbe 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -497,7 +497,7 @@ bool FastISel::selectGetElementPtr(const User *I) { OI != E; ++OI) { const Value *Idx = *OI; if (auto *StTy = dyn_cast<StructType>(Ty)) { - unsigned Field = cast<ConstantInt>(Idx)->getZExtValue(); + uint64_t Field = cast<ConstantInt>(Idx)->getZExtValue(); if (Field) { // N = N + Offset TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field); @@ -518,8 +518,8 @@ bool FastISel::selectGetElementPtr(const User *I) { if (CI->isZero()) continue; // N = N + Offset - TotalOffs += - DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue(); + uint64_t IdxN = CI->getValue().sextOrTrunc(64).getSExtValue(); + TotalOffs += DL.getTypeAllocSize(Ty) * IdxN; if (TotalOffs >= MaxOffs) { N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); if (!N) // Unhandled operand. Halt "fast" selection and bail. diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 4591e79316d8..b59671554348 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -658,7 +658,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) { NVT, N->getOperand(0)); return TLI.makeLibCall(DAG, LC, TLI.getTypeToTransformTo(*DAG.getContext(), RVT), - &Op, 1, false, dl).first; + &Op, 1, Signed, dl).first; } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 8b54e6568b9e..5222de1063bb 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -1423,9 +1423,10 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { // If one or more successors has been unscheduled, then the current // node is no longer available. - if (!TrySU->isAvailable) + if (!TrySU->isAvailable || !TrySU->NodeQueueId) CurSU = AvailableQueue->pop(); else { + // Available and in AvailableQueue AvailableQueue->remove(TrySU); CurSU = TrySU; } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index d1929107fcb7..66095ee015e6 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3399,30 +3399,21 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { Ty = StTy->getElementType(Field); } else { Ty = cast<SequentialType>(Ty)->getElementType(); + MVT PtrTy = DAG.getTargetLoweringInfo().getPointerTy(AS); + unsigned PtrSize = PtrTy.getSizeInBits(); + APInt ElementSize(PtrSize, DL->getTypeAllocSize(Ty)); // If this is a constant subscript, handle it quickly. - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) { - if (CI->isZero()) continue; - uint64_t Offs = - DL->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); - SDValue OffsVal; - EVT PTy = TLI.getPointerTy(AS); - unsigned PtrBits = PTy.getSizeInBits(); - if (PtrBits < 64) - OffsVal = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), PTy, - DAG.getConstant(Offs, MVT::i64)); - else - OffsVal = DAG.getConstant(Offs, PTy); - - N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N, - OffsVal); + if (const auto *CI = dyn_cast<ConstantInt>(Idx)) { + if (CI->isZero()) + continue; + APInt Offs = ElementSize * CI->getValue().sextOrTrunc(PtrSize); + SDValue OffsVal = DAG.getConstant(Offs, PtrTy); + N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N, OffsVal); continue; } // N = N + Idx * ElementSize; - APInt ElementSize = - APInt(TLI.getPointerSizeInBits(AS), DL->getTypeAllocSize(Ty)); SDValue IdxN = getValue(Idx); // If the index is smaller or larger than intptr_t, truncate or extend @@ -5727,6 +5718,11 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, // Skip the first return-type Attribute to get to params. Entry.setAttributes(&CS, i - CS.arg_begin() + 1); Args.push_back(Entry); + + // If we have an explicit sret argument that is an Instruction, (i.e., it + // might point to function-local memory), we can't meaningfully tail-call. + if (Entry.isSRet && isa<Instruction>(V)) + isTailCall = false; } // Check if target-independent constraints permit a tail call here. @@ -7353,6 +7349,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Entry.Alignment = Align; CLI.getArgs().insert(CLI.getArgs().begin(), Entry); CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext()); + + // sret demotion isn't compatible with tail-calls, since the sret argument + // points into the callers stack frame. + CLI.IsTailCall = false; } else { for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { EVT VT = RetTys[I]; @@ -7638,7 +7638,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) { ISD::ArgFlagsTy Flags; Flags.setSRet(); MVT RegisterVT = TLI->getRegisterType(*DAG.getContext(), ValueVTs[0]); - ISD::InputArg RetArg(Flags, RegisterVT, ValueVTs[0], true, 0, 0); + ISD::InputArg RetArg(Flags, RegisterVT, ValueVTs[0], true, + ISD::InputArg::NoArgIndex, 0); Ins.push_back(RetArg); } diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 72e0aca84080..f12c035e7858 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -96,18 +96,19 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, for (unsigned i = 0; i != NumOps; ++i) { Entry.Node = Ops[i]; Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); - Entry.isSExt = isSigned; - Entry.isZExt = !isSigned; + Entry.isSExt = shouldSignExtendTypeInLibCall(Ops[i].getValueType(), isSigned); + Entry.isZExt = !shouldSignExtendTypeInLibCall(Ops[i].getValueType(), isSigned); Args.push_back(Entry); } SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), getPointerTy()); Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); TargetLowering::CallLoweringInfo CLI(DAG); + bool signExtend = shouldSignExtendTypeInLibCall(RetVT, isSigned); CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()) .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0) .setNoReturn(doesNotReturn).setDiscardResult(!isReturnValueUsed) - .setSExtResult(isSigned).setZExtResult(!isSigned); + .setSExtResult(signExtend).setZExtResult(!signExtend); return LowerCallTo(CLI); } diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp index 21893d2909ed..d75be2807862 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp @@ -177,25 +177,30 @@ bool RuntimeDyldMachO::isCompatibleFile(const object::ObjectFile &Obj) const { } template <typename Impl> -void RuntimeDyldMachOCRTPBase<Impl>::finalizeLoad(const ObjectFile &ObjImg, +void RuntimeDyldMachOCRTPBase<Impl>::finalizeLoad(const ObjectFile &Obj, ObjSectionToIDMap &SectionMap) { unsigned EHFrameSID = RTDYLD_INVALID_SECTION_ID; unsigned TextSID = RTDYLD_INVALID_SECTION_ID; unsigned ExceptTabSID = RTDYLD_INVALID_SECTION_ID; - ObjSectionToIDMap::iterator i, e; - for (i = SectionMap.begin(), e = SectionMap.end(); i != e; ++i) { - const SectionRef &Section = i->first; + for (const auto &Section : Obj.sections()) { StringRef Name; Section.getName(Name); - if (Name == "__eh_frame") - EHFrameSID = i->second; - else if (Name == "__text") - TextSID = i->second; + + // Force emission of the __text, __eh_frame, and __gcc_except_tab sections + // if they're present. Otherwise call down to the impl to handle other + // sections that have already been emitted. + if (Name == "__text") + TextSID = findOrEmitSection(Obj, Section, true, SectionMap); + else if (Name == "__eh_frame") + EHFrameSID = findOrEmitSection(Obj, Section, false, SectionMap); else if (Name == "__gcc_except_tab") - ExceptTabSID = i->second; - else - impl().finalizeSection(ObjImg, i->second, Section); + ExceptTabSID = findOrEmitSection(Obj, Section, true, SectionMap); + else { + auto I = SectionMap.find(Section); + if (I != SectionMap.end()) + impl().finalizeSection(Obj, I->second, Section); + } } UnregisteredEHFrameSections.push_back( EHFrameRelatedSections(EHFrameSID, TextSID, ExceptTabSID)); @@ -238,7 +243,8 @@ unsigned char *RuntimeDyldMachOCRTPBase<Impl>::processFDE(unsigned char *P, } static int64_t computeDelta(SectionEntry *A, SectionEntry *B) { - int64_t ObjDistance = A->ObjAddress - B->ObjAddress; + int64_t ObjDistance = + static_cast<int64_t>(A->ObjAddress) - static_cast<int64_t>(B->ObjAddress); int64_t MemDistance = A->LoadAddress - B->LoadAddress; return ObjDistance - MemDistance; } diff --git a/lib/IR/ConstantFold.cpp b/lib/IR/ConstantFold.cpp index 9176bf2aeeea..39d9a1d0a282 100644 --- a/lib/IR/ConstantFold.cpp +++ b/lib/IR/ConstantFold.cpp @@ -1120,27 +1120,18 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, return ConstantInt::get(CI1->getContext(), C1V | C2V); case Instruction::Xor: return ConstantInt::get(CI1->getContext(), C1V ^ C2V); - case Instruction::Shl: { - uint32_t shiftAmt = C2V.getZExtValue(); - if (shiftAmt < C1V.getBitWidth()) - return ConstantInt::get(CI1->getContext(), C1V.shl(shiftAmt)); - else - return UndefValue::get(C1->getType()); // too big shift is undef - } - case Instruction::LShr: { - uint32_t shiftAmt = C2V.getZExtValue(); - if (shiftAmt < C1V.getBitWidth()) - return ConstantInt::get(CI1->getContext(), C1V.lshr(shiftAmt)); - else - return UndefValue::get(C1->getType()); // too big shift is undef - } - case Instruction::AShr: { - uint32_t shiftAmt = C2V.getZExtValue(); - if (shiftAmt < C1V.getBitWidth()) - return ConstantInt::get(CI1->getContext(), C1V.ashr(shiftAmt)); - else - return UndefValue::get(C1->getType()); // too big shift is undef - } + case Instruction::Shl: + if (C2V.ult(C1V.getBitWidth())) + return ConstantInt::get(CI1->getContext(), C1V.shl(C2V)); + return UndefValue::get(C1->getType()); // too big shift is undef + case Instruction::LShr: + if (C2V.ult(C1V.getBitWidth())) + return ConstantInt::get(CI1->getContext(), C1V.lshr(C2V)); + return UndefValue::get(C1->getType()); // too big shift is undef + case Instruction::AShr: + if (C2V.ult(C1V.getBitWidth())) + return ConstantInt::get(CI1->getContext(), C1V.ashr(C2V)); + return UndefValue::get(C1->getType()); // too big shift is undef } } diff --git a/lib/IR/GCOV.cpp b/lib/IR/GCOV.cpp index 245c500cf621..88e0cd094ec2 100644 --- a/lib/IR/GCOV.cpp +++ b/lib/IR/GCOV.cpp @@ -263,10 +263,12 @@ bool GCOVFunction::readGCDA(GCOVBuffer &Buff, GCOV::GCOVVersion Version) { // required to combine the edge counts that are contained in the GCDA file. for (uint32_t BlockNo = 0; Count > 0; ++BlockNo) { // The last block is always reserved for exit block - if (BlockNo >= Blocks.size()-1) { + if (BlockNo >= Blocks.size()) { errs() << "Unexpected number of edges (in " << Name << ").\n"; return false; } + if (BlockNo == Blocks.size() - 1) + errs() << "(" << Name << ") has arcs from exit block.\n"; GCOVBlock &Block = *Blocks[BlockNo]; for (size_t EdgeNo = 0, End = Block.getNumDstEdges(); EdgeNo < End; ++EdgeNo) { diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 8eff90a9ef7f..e2a4fc1bef7b 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -3636,21 +3636,27 @@ bool AsmParser::parseDirectiveSpace(StringRef IDVal) { } /// parseDirectiveLEB128 -/// ::= (.sleb128 | .uleb128) expression +/// ::= (.sleb128 | .uleb128) [ expression (, expression)* ] bool AsmParser::parseDirectiveLEB128(bool Signed) { checkForValidSection(); const MCExpr *Value; - if (parseExpression(Value)) - return true; + for (;;) { + if (parseExpression(Value)) + return true; - if (getLexer().isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in directive"); + if (Signed) + getStreamer().EmitSLEB128Value(Value); + else + getStreamer().EmitULEB128Value(Value); - if (Signed) - getStreamer().EmitSLEB128Value(Value); - else - getStreamer().EmitULEB128Value(Value); + if (getLexer().is(AsmToken::EndOfStatement)) + break; + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lex(); + } return false; } diff --git a/lib/Support/Unix/Memory.inc b/lib/Support/Unix/Memory.inc index 7ccde463459d..c421ee84c2b7 100644 --- a/lib/Support/Unix/Memory.inc +++ b/lib/Support/Unix/Memory.inc @@ -333,23 +333,12 @@ void Memory::InvalidateInstructionCache(const void *Addr, for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize) asm volatile("icbi 0, %0" : : "r"(Line)); asm volatile("isync"); -# elif (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__) +# elif (defined(__arm__) || defined(__aarch64__) || defined(__mips__)) && \ + defined(__GNUC__) // FIXME: Can we safely always call this for __GNUC__ everywhere? const char *Start = static_cast<const char *>(Addr); const char *End = Start + Len; __clear_cache(const_cast<char *>(Start), const_cast<char *>(End)); -# elif defined(__mips__) - const char *Start = static_cast<const char *>(Addr); -# if defined(ANDROID) - // The declaration of "cacheflush" in Android bionic: - // extern int cacheflush(long start, long end, long flags); - const char *End = Start + Len; - long LStart = reinterpret_cast<long>(const_cast<char *>(Start)); - long LEnd = reinterpret_cast<long>(const_cast<char *>(End)); - cacheflush(LStart, LEnd, BCACHE); -# else - cacheflush(const_cast<char *>(Start), Len, BCACHE); -# endif # endif #endif // end apple diff --git a/lib/Support/Windows/explicit_symbols.inc b/lib/Support/Windows/explicit_symbols.inc index cd56b13c14c3..bbbf7ea6a777 100644 --- a/lib/Support/Windows/explicit_symbols.inc +++ b/lib/Support/Windows/explicit_symbols.inc @@ -10,9 +10,15 @@ #ifdef HAVE___CHKSTK EXPLICIT_SYMBOL(__chkstk) #endif +#ifdef HAVE___CHKSTK_MS + EXPLICIT_SYMBOL(__chkstk_ms) +#endif #ifdef HAVE____CHKSTK EXPLICIT_SYMBOL(___chkstk) #endif +#ifdef HAVE____CHKSTK_MS + EXPLICIT_SYMBOL(___chkstk_ms) +#endif #ifdef HAVE___MAIN EXPLICIT_SYMBOL(__main) // FIXME: Don't call it. #endif diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp index 08ee687d84a2..5159dbf0529d 100644 --- a/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -12,6 +12,8 @@ // //===----------------------------------------------------------------------===// +#include "MCTargetDesc/AArch64AddressingModes.h" +#include "MCTargetDesc/AArch64MCExpr.h" #include "AArch64.h" #include "AArch64MCInstLower.h" #include "AArch64MachineFunctionInfo.h" @@ -494,24 +496,57 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) { EmitToStreamer(OutStreamer, TmpInst); return; } - case AArch64::TLSDESC_BLR: { - MCOperand Callee, Sym; - MCInstLowering.lowerOperand(MI->getOperand(0), Callee); - MCInstLowering.lowerOperand(MI->getOperand(1), Sym); - - // First emit a relocation-annotation. This expands to no code, but requests + case AArch64::TLSDESC_CALLSEQ: { + /// lower this to: + /// adrp x0, :tlsdesc:var + /// ldr x1, [x0, #:tlsdesc_lo12:var] + /// add x0, x0, #:tlsdesc_lo12:var + /// .tlsdesccall var + /// blr x1 + /// (TPIDR_EL0 offset now in x0) + const MachineOperand &MO_Sym = MI->getOperand(0); + MachineOperand MO_TLSDESC_LO12(MO_Sym), MO_TLSDESC(MO_Sym); + MCOperand Sym, SymTLSDescLo12, SymTLSDesc; + MO_TLSDESC_LO12.setTargetFlags(AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | + AArch64II::MO_NC); + MO_TLSDESC.setTargetFlags(AArch64II::MO_TLS | AArch64II::MO_PAGE); + MCInstLowering.lowerOperand(MO_Sym, Sym); + MCInstLowering.lowerOperand(MO_TLSDESC_LO12, SymTLSDescLo12); + MCInstLowering.lowerOperand(MO_TLSDESC, SymTLSDesc); + + MCInst Adrp; + Adrp.setOpcode(AArch64::ADRP); + Adrp.addOperand(MCOperand::CreateReg(AArch64::X0)); + Adrp.addOperand(SymTLSDesc); + EmitToStreamer(OutStreamer, Adrp); + + MCInst Ldr; + Ldr.setOpcode(AArch64::LDRXui); + Ldr.addOperand(MCOperand::CreateReg(AArch64::X1)); + Ldr.addOperand(MCOperand::CreateReg(AArch64::X0)); + Ldr.addOperand(SymTLSDescLo12); + Ldr.addOperand(MCOperand::CreateImm(0)); + EmitToStreamer(OutStreamer, Ldr); + + MCInst Add; + Add.setOpcode(AArch64::ADDXri); + Add.addOperand(MCOperand::CreateReg(AArch64::X0)); + Add.addOperand(MCOperand::CreateReg(AArch64::X0)); + Add.addOperand(SymTLSDescLo12); + Add.addOperand(MCOperand::CreateImm(AArch64_AM::getShiftValue(0))); + EmitToStreamer(OutStreamer, Add); + + // Emit a relocation-annotation. This expands to no code, but requests // the following instruction gets an R_AARCH64_TLSDESC_CALL. MCInst TLSDescCall; TLSDescCall.setOpcode(AArch64::TLSDESCCALL); TLSDescCall.addOperand(Sym); EmitToStreamer(OutStreamer, TLSDescCall); - // Other than that it's just a normal indirect call to the function loaded - // from the descriptor. - MCInst BLR; - BLR.setOpcode(AArch64::BLR); - BLR.addOperand(Callee); - EmitToStreamer(OutStreamer, BLR); + MCInst Blr; + Blr.setOpcode(AArch64::BLR); + Blr.addOperand(MCOperand::CreateReg(AArch64::X1)); + EmitToStreamer(OutStreamer, Blr); return; } diff --git a/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp b/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp index aab8e384b8d0..ba4fc3b25e0e 100644 --- a/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp +++ b/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp @@ -62,10 +62,10 @@ struct LDTLSCleanup : public MachineFunctionPass { for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { switch (I->getOpcode()) { - case AArch64::TLSDESC_BLR: + case AArch64::TLSDESC_CALLSEQ: // Make sure it's a local dynamic access. - if (!I->getOperand(1).isSymbol() || - strcmp(I->getOperand(1).getSymbolName(), "_TLS_MODULE_BASE_")) + if (!I->getOperand(0).isSymbol() || + strcmp(I->getOperand(0).getSymbolName(), "_TLS_MODULE_BASE_")) break; if (TLSBaseAddrReg) diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 399b5eeaf5f5..6458d56c751f 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -64,8 +64,16 @@ EnableAArch64ExtrGeneration("aarch64-extr-generation", cl::Hidden, static cl::opt<bool> EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden, - cl::desc("Allow AArch64 SLI/SRI formation"), - cl::init(false)); + cl::desc("Allow AArch64 SLI/SRI formation"), + cl::init(false)); + +// FIXME: The necessary dtprel relocations don't seem to be supported +// well in the GNU bfd and gold linkers at the moment. Therefore, by +// default, for now, fall back to GeneralDynamic code generation. +cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration( + "aarch64-elf-ldtls-generation", cl::Hidden, + cl::desc("Allow AArch64 Local Dynamic TLS code generation"), + cl::init(false)); AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM) @@ -760,7 +768,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { case AArch64ISD::CSNEG: return "AArch64ISD::CSNEG"; case AArch64ISD::CSINC: return "AArch64ISD::CSINC"; case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER"; - case AArch64ISD::TLSDESC_CALL: return "AArch64ISD::TLSDESC_CALL"; + case AArch64ISD::TLSDESC_CALLSEQ: return "AArch64ISD::TLSDESC_CALLSEQ"; case AArch64ISD::ADC: return "AArch64ISD::ADC"; case AArch64ISD::SBC: return "AArch64ISD::SBC"; case AArch64ISD::ADDS: return "AArch64ISD::ADDS"; @@ -2023,18 +2031,19 @@ SDValue AArch64TargetLowering::LowerFormalArguments( unsigned CurArgIdx = 0; for (unsigned i = 0; i != NumArgs; ++i) { MVT ValVT = Ins[i].VT; - std::advance(CurOrigArg, Ins[i].OrigArgIndex - CurArgIdx); - CurArgIdx = Ins[i].OrigArgIndex; - - // Get type of the original argument. - EVT ActualVT = getValueType(CurOrigArg->getType(), /*AllowUnknown*/ true); - MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other; - // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16. - if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8) - ValVT = MVT::i8; - else if (ActualMVT == MVT::i16) - ValVT = MVT::i16; + if (Ins[i].isOrigArg()) { + std::advance(CurOrigArg, Ins[i].getOrigArgIndex() - CurArgIdx); + CurArgIdx = Ins[i].getOrigArgIndex(); + // Get type of the original argument. + EVT ActualVT = getValueType(CurOrigArg->getType(), /*AllowUnknown*/ true); + MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other; + // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16. + if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8) + ValVT = MVT::i8; + else if (ActualMVT == MVT::i16) + ValVT = MVT::i16; + } CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false); bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full, Ins[i].Flags, CCInfo); @@ -3049,61 +3058,34 @@ AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op, /// When accessing thread-local variables under either the general-dynamic or /// local-dynamic system, we make a "TLS-descriptor" call. The variable will /// have a descriptor, accessible via a PC-relative ADRP, and whose first entry -/// is a function pointer to carry out the resolution. This function takes the -/// address of the descriptor in X0 and returns the TPIDR_EL0 offset in X0. All -/// other registers (except LR, NZCV) are preserved. -/// -/// Thus, the ideal call sequence on AArch64 is: -/// -/// adrp x0, :tlsdesc:thread_var -/// ldr x8, [x0, :tlsdesc_lo12:thread_var] -/// add x0, x0, :tlsdesc_lo12:thread_var -/// .tlsdesccall thread_var -/// blr x8 -/// (TPIDR_EL0 offset now in x0). +/// is a function pointer to carry out the resolution. /// -/// The ".tlsdesccall" directive instructs the assembler to insert a particular -/// relocation to help the linker relax this sequence if it turns out to be too -/// conservative. +/// The sequence is: +/// adrp x0, :tlsdesc:var +/// ldr x1, [x0, #:tlsdesc_lo12:var] +/// add x0, x0, #:tlsdesc_lo12:var +/// .tlsdesccall var +/// blr x1 +/// (TPIDR_EL0 offset now in x0) /// -/// FIXME: we currently produce an extra, duplicated, ADRP instruction, but this -/// is harmless. -SDValue AArch64TargetLowering::LowerELFTLSDescCall(SDValue SymAddr, - SDValue DescAddr, SDLoc DL, - SelectionDAG &DAG) const { +/// The above sequence must be produced unscheduled, to enable the linker to +/// optimize/relax this sequence. +/// Therefore, a pseudo-instruction (TLSDESC_CALLSEQ) is used to represent the +/// above sequence, and expanded really late in the compilation flow, to ensure +/// the sequence is produced as per above. +SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(SDValue SymAddr, SDLoc DL, + SelectionDAG &DAG) const { EVT PtrVT = getPointerTy(); - // The function we need to call is simply the first entry in the GOT for this - // descriptor, load it in preparation. - SDValue Func = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, SymAddr); - - // TLS calls preserve all registers except those that absolutely must be - // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be - // silly). - const TargetRegisterInfo *TRI = - getTargetMachine().getSubtargetImpl()->getRegisterInfo(); - const AArch64RegisterInfo *ARI = - static_cast<const AArch64RegisterInfo *>(TRI); - const uint32_t *Mask = ARI->getTLSCallPreservedMask(); - - // The function takes only one argument: the address of the descriptor itself - // in X0. - SDValue Glue, Chain; - Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, AArch64::X0, DescAddr, Glue); - Glue = Chain.getValue(1); + SDValue Chain = DAG.getEntryNode(); + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); - // We're now ready to populate the argument list, as with a normal call: - SmallVector<SDValue, 6> Ops; + SmallVector<SDValue, 2> Ops; Ops.push_back(Chain); - Ops.push_back(Func); Ops.push_back(SymAddr); - Ops.push_back(DAG.getRegister(AArch64::X0, PtrVT)); - Ops.push_back(DAG.getRegisterMask(Mask)); - Ops.push_back(Glue); - SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); - Chain = DAG.getNode(AArch64ISD::TLSDESC_CALL, DL, NodeTys, Ops); - Glue = Chain.getValue(1); + Chain = DAG.getNode(AArch64ISD::TLSDESC_CALLSEQ, DL, NodeTys, Ops); + SDValue Glue = Chain.getValue(1); return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue); } @@ -3114,9 +3096,18 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op, assert(Subtarget->isTargetELF() && "This function expects an ELF target"); assert(getTargetMachine().getCodeModel() == CodeModel::Small && "ELF TLS only supported in small memory model"); + // Different choices can be made for the maximum size of the TLS area for a + // module. For the small address model, the default TLS size is 16MiB and the + // maximum TLS size is 4GiB. + // FIXME: add -mtls-size command line option and make it control the 16MiB + // vs. 4GiB code sequence generation. const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal()); + if (!EnableAArch64ELFLocalDynamicTLSGeneration) { + if (Model == TLSModel::LocalDynamic) + Model = TLSModel::GeneralDynamic; + } SDValue TPOff; EVT PtrVT = getPointerTy(); @@ -3127,17 +3118,20 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op, if (Model == TLSModel::LocalExec) { SDValue HiVar = DAG.getTargetGlobalAddress( - GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_G1); + GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12); SDValue LoVar = DAG.getTargetGlobalAddress( GV, DL, PtrVT, 0, - AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC); + AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); - TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar, - DAG.getTargetConstant(16, MVT::i32)), - 0); - TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, LoVar, - DAG.getTargetConstant(0, MVT::i32)), - 0); + SDValue TPWithOff_lo = + SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase, + HiVar, DAG.getTargetConstant(0, MVT::i32)), + 0); + SDValue TPWithOff = + SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPWithOff_lo, + LoVar, DAG.getTargetConstant(0, MVT::i32)), + 0); + return TPWithOff; } else if (Model == TLSModel::InitialExec) { TPOff = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS); TPOff = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TPOff); @@ -3152,19 +3146,6 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op, DAG.getMachineFunction().getInfo<AArch64FunctionInfo>(); MFI->incNumLocalDynamicTLSAccesses(); - // Accesses used in this sequence go via the TLS descriptor which lives in - // the GOT. Prepare an address we can use to handle this. - SDValue HiDesc = DAG.getTargetExternalSymbol( - "_TLS_MODULE_BASE_", PtrVT, AArch64II::MO_TLS | AArch64II::MO_PAGE); - SDValue LoDesc = DAG.getTargetExternalSymbol( - "_TLS_MODULE_BASE_", PtrVT, - AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); - - // First argument to the descriptor call is the address of the descriptor - // itself. - SDValue DescAddr = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, HiDesc); - DescAddr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, DescAddr, LoDesc); - // The call needs a relocation too for linker relaxation. It doesn't make // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of // the address. @@ -3173,40 +3154,23 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op, // Now we can calculate the offset from TPIDR_EL0 to this module's // thread-local area. - TPOff = LowerELFTLSDescCall(SymAddr, DescAddr, DL, DAG); + TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG); // Now use :dtprel_whatever: operations to calculate this variable's offset // in its thread-storage area. SDValue HiVar = DAG.getTargetGlobalAddress( - GV, DL, MVT::i64, 0, AArch64II::MO_TLS | AArch64II::MO_G1); + GV, DL, MVT::i64, 0, AArch64II::MO_TLS | AArch64II::MO_HI12); SDValue LoVar = DAG.getTargetGlobalAddress( GV, DL, MVT::i64, 0, - AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC); - - SDValue DTPOff = - SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar, - DAG.getTargetConstant(16, MVT::i32)), - 0); - DTPOff = - SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, DTPOff, LoVar, - DAG.getTargetConstant(0, MVT::i32)), - 0); - - TPOff = DAG.getNode(ISD::ADD, DL, PtrVT, TPOff, DTPOff); - } else if (Model == TLSModel::GeneralDynamic) { - // Accesses used in this sequence go via the TLS descriptor which lives in - // the GOT. Prepare an address we can use to handle this. - SDValue HiDesc = DAG.getTargetGlobalAddress( - GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_PAGE); - SDValue LoDesc = DAG.getTargetGlobalAddress( - GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); - // First argument to the descriptor call is the address of the descriptor - // itself. - SDValue DescAddr = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, HiDesc); - DescAddr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, DescAddr, LoDesc); - + TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, HiVar, + DAG.getTargetConstant(0, MVT::i32)), + 0); + TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, LoVar, + DAG.getTargetConstant(0, MVT::i32)), + 0); + } else if (Model == TLSModel::GeneralDynamic) { // The call needs a relocation too for linker relaxation. It doesn't make // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of // the address. @@ -3214,7 +3178,7 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op, DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS); // Finally we can make a call to calculate the offset from tpidr_el0. - TPOff = LowerELFTLSDescCall(SymAddr, DescAddr, DL, DAG); + TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG); } else llvm_unreachable("Unsupported ELF TLS access model"); diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h index cc25bede8d62..5a193228f189 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.h +++ b/lib/Target/AArch64/AArch64ISelLowering.h @@ -29,9 +29,9 @@ enum { WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses. CALL, // Function call. - // Almost the same as a normal call node, except that a TLSDesc relocation is - // needed so the linker can relax it correctly if possible. - TLSDESC_CALL, + // Produces the full sequence of instructions for getting the thread pointer + // offset of a variable into X0, using the TLSDesc model. + TLSDESC_CALLSEQ, ADRP, // Page address of a TargetGlobalAddress operand. ADDlow, // Add the low 12 bits of a TargetGlobalAddress operand. LOADgot, // Load from automatically generated descriptor (e.g. Global @@ -399,8 +399,8 @@ private: SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerELFTLSDescCall(SDValue SymAddr, SDValue DescAddr, SDLoc DL, - SelectionDAG &DAG) const; + SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, SDLoc DL, + SelectionDAG &DAG) const; SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index e0fb90a9f621..a6f09e944af4 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -96,6 +96,19 @@ def SDT_AArch64ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>; def SDT_AArch64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>; + +// Generates the general dynamic sequences, i.e. +// adrp x0, :tlsdesc:var +// ldr x1, [x0, #:tlsdesc_lo12:var] +// add x0, x0, #:tlsdesc_lo12:var +// .tlsdesccall var +// blr x1 + +// (the TPIDR_EL0 offset is put directly in X0, hence no "result" here) +// number of operands (the variable) +def SDT_AArch64TLSDescCallSeq : SDTypeProfile<0,1, + [SDTCisPtrTy<0>]>; + def SDT_AArch64WrapperLarge : SDTypeProfile<1, 4, [SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>, SDTCisSameAs<1, 3>, @@ -229,10 +242,11 @@ def AArch64Prefetch : SDNode<"AArch64ISD::PREFETCH", SDT_AArch64PREFETCH, def AArch64sitof: SDNode<"AArch64ISD::SITOF", SDT_AArch64ITOF>; def AArch64uitof: SDNode<"AArch64ISD::UITOF", SDT_AArch64ITOF>; -def AArch64tlsdesc_call : SDNode<"AArch64ISD::TLSDESC_CALL", - SDT_AArch64TLSDescCall, - [SDNPInGlue, SDNPOutGlue, SDNPHasChain, - SDNPVariadic]>; +def AArch64tlsdesc_callseq : SDNode<"AArch64ISD::TLSDESC_CALLSEQ", + SDT_AArch64TLSDescCallSeq, + [SDNPInGlue, SDNPOutGlue, SDNPHasChain, + SDNPVariadic]>; + def AArch64WrapperLarge : SDNode<"AArch64ISD::WrapperLarge", SDT_AArch64WrapperLarge>; @@ -1049,15 +1063,16 @@ def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym), []> { let AsmString = ".tlsdesccall $sym"; } -// Pseudo-instruction representing a BLR with attached TLSDESC relocation. It -// gets expanded to two MCInsts during lowering. -let isCall = 1, Defs = [LR] in -def TLSDESC_BLR - : Pseudo<(outs), (ins GPR64:$dest, i64imm:$sym), - [(AArch64tlsdesc_call GPR64:$dest, tglobaltlsaddr:$sym)]>; +// FIXME: maybe the scratch register used shouldn't be fixed to X1? +// FIXME: can "hasSideEffects be dropped? +let isCall = 1, Defs = [LR, X0, X1], hasSideEffects = 1, + isCodeGenOnly = 1 in +def TLSDESC_CALLSEQ + : Pseudo<(outs), (ins i64imm:$sym), + [(AArch64tlsdesc_callseq tglobaltlsaddr:$sym)]>; +def : Pat<(AArch64tlsdesc_callseq texternalsym:$sym), + (TLSDESC_CALLSEQ texternalsym:$sym)>; -def : Pat<(AArch64tlsdesc_call GPR64:$dest, texternalsym:$sym), - (TLSDESC_BLR GPR64:$dest, texternalsym:$sym)>; //===----------------------------------------------------------------------===// // Conditional branch (immediate) instruction. //===----------------------------------------------------------------------===// diff --git a/lib/Target/AArch64/AArch64MCInstLower.cpp b/lib/Target/AArch64/AArch64MCInstLower.cpp index e57b0f4dbb09..b82934134d91 100644 --- a/lib/Target/AArch64/AArch64MCInstLower.cpp +++ b/lib/Target/AArch64/AArch64MCInstLower.cpp @@ -22,9 +22,12 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/CodeGen.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; +extern cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration; + AArch64MCInstLower::AArch64MCInstLower(MCContext &ctx, AsmPrinter &printer) : Ctx(ctx), Printer(printer), TargetTriple(printer.getTargetTriple()) {} @@ -84,10 +87,16 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandELF(const MachineOperand &MO, if (MO.isGlobal()) { const GlobalValue *GV = MO.getGlobal(); Model = Printer.TM.getTLSModel(GV); + if (!EnableAArch64ELFLocalDynamicTLSGeneration && + Model == TLSModel::LocalDynamic) + Model = TLSModel::GeneralDynamic; + } else { assert(MO.isSymbol() && StringRef(MO.getSymbolName()) == "_TLS_MODULE_BASE_" && "unexpected external TLS symbol"); + // The general dynamic access sequence is used to get the + // address of _TLS_MODULE_BASE_. Model = TLSModel::GeneralDynamic; } switch (Model) { @@ -123,6 +132,8 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandELF(const MachineOperand &MO, RefFlags |= AArch64MCExpr::VK_G1; else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_G0) RefFlags |= AArch64MCExpr::VK_G0; + else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_HI12) + RefFlags |= AArch64MCExpr::VK_HI12; if (MO.getTargetFlags() & AArch64II::MO_NC) RefFlags |= AArch64MCExpr::VK_NC; diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h index c60b09a5f119..6d0337ce15ed 100644 --- a/lib/Target/AArch64/Utils/AArch64BaseInfo.h +++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h @@ -1229,7 +1229,7 @@ namespace AArch64II { MO_NO_FLAG, - MO_FRAGMENT = 0x7, + MO_FRAGMENT = 0xf, /// MO_PAGE - A symbol operand with this flag represents the pc-relative /// offset of the 4K page containing the symbol. This is used with the @@ -1257,26 +1257,31 @@ namespace AArch64II { /// 0-15 of a 64-bit address, used in a MOVZ or MOVK instruction MO_G0 = 6, + /// MO_HI12 - This flag indicates that a symbol operand represents the bits + /// 13-24 of a 64-bit address, used in a arithmetic immediate-shifted-left- + /// by-12-bits instruction. + MO_HI12 = 7, + /// MO_GOT - This flag indicates that a symbol operand represents the /// address of the GOT entry for the symbol, rather than the address of /// the symbol itself. - MO_GOT = 8, + MO_GOT = 0x10, /// MO_NC - Indicates whether the linker is expected to check the symbol /// reference for overflow. For example in an ADRP/ADD pair of relocations /// the ADRP usually does check, but not the ADD. - MO_NC = 0x10, + MO_NC = 0x20, /// MO_TLS - Indicates that the operand being accessed is some kind of /// thread-local symbol. On Darwin, only one type of thread-local access /// exists (pre linker-relaxation), but on ELF the TLSModel used for the /// referee will affect interpretation. - MO_TLS = 0x20, + MO_TLS = 0x40, /// MO_CONSTPOOL - This flag indicates that a symbol operand represents /// the address of a constant pool entry for the symbol, rather than the /// address of the symbol itself. - MO_CONSTPOOL = 0x40 + MO_CONSTPOOL = 0x80 }; } // end namespace AArch64II diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index a1de5efb4507..2dc4707bb0a1 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -3092,8 +3092,11 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; - std::advance(CurOrigArg, Ins[VA.getValNo()].OrigArgIndex - CurArgIdx); - CurArgIdx = Ins[VA.getValNo()].OrigArgIndex; + if (Ins[VA.getValNo()].isOrigArg()) { + std::advance(CurOrigArg, + Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx); + CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex(); + } // Arguments stored in registers. if (VA.isRegLoc()) { EVT RegVT = VA.getLocVT(); @@ -3173,7 +3176,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, assert(VA.isMemLoc()); assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered"); - int index = ArgLocs[i].getValNo(); + int index = VA.getValNo(); // Some Ins[] entries become multiple ArgLoc[] entries. // Process them only once. @@ -3186,6 +3189,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, // Since they could be overwritten by lowering of arguments in case of // a tail call. if (Flags.isByVal()) { + assert(Ins[index].isOrigArg() && + "Byval arguments cannot be implicit"); unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed(); ByValStoreOffset = RoundUpToAlignment(ByValStoreOffset, Flags.getByValAlign()); diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 7db5b34204c3..699dfae02fc9 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -3667,43 +3667,44 @@ bool MipsAsmParser::parseDirectiveModule() { return false; } - if (Lexer.is(AsmToken::Identifier)) { - StringRef Option = Parser.getTok().getString(); - Parser.Lex(); - - if (Option == "oddspreg") { - getTargetStreamer().emitDirectiveModuleOddSPReg(true, isABI_O32()); - clearFeatureBits(Mips::FeatureNoOddSPReg, "nooddspreg"); + StringRef Option; + if (Parser.parseIdentifier(Option)) { + reportParseError("expected .module option identifier"); + return false; + } - if (getLexer().isNot(AsmToken::EndOfStatement)) { - reportParseError("unexpected token, expected end of statement"); - return false; - } + if (Option == "oddspreg") { + getTargetStreamer().emitDirectiveModuleOddSPReg(true, isABI_O32()); + clearFeatureBits(Mips::FeatureNoOddSPReg, "nooddspreg"); + // If this is not the end of the statement, report an error. + if (getLexer().isNot(AsmToken::EndOfStatement)) { + reportParseError("unexpected token, expected end of statement"); return false; - } else if (Option == "nooddspreg") { - if (!isABI_O32()) { - Error(L, "'.module nooddspreg' requires the O32 ABI"); - return false; - } + } - getTargetStreamer().emitDirectiveModuleOddSPReg(false, isABI_O32()); - setFeatureBits(Mips::FeatureNoOddSPReg, "nooddspreg"); + return false; // parseDirectiveModule has finished successfully. + } else if (Option == "nooddspreg") { + if (!isABI_O32()) { + Error(L, "'.module nooddspreg' requires the O32 ABI"); + return false; + } - if (getLexer().isNot(AsmToken::EndOfStatement)) { - reportParseError("unexpected token, expected end of statement"); - return false; - } + getTargetStreamer().emitDirectiveModuleOddSPReg(false, isABI_O32()); + setFeatureBits(Mips::FeatureNoOddSPReg, "nooddspreg"); + // If this is not the end of the statement, report an error. + if (getLexer().isNot(AsmToken::EndOfStatement)) { + reportParseError("unexpected token, expected end of statement"); return false; - } else if (Option == "fp") { - return parseDirectiveModuleFP(); } + return false; // parseDirectiveModule has finished successfully. + } else if (Option == "fp") { + return parseDirectiveModuleFP(); + } else { return Error(L, "'" + Twine(Option) + "' is not a valid .module option."); } - - return false; } /// parseDirectiveModuleFP diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp index da33f3b913cd..d42b948cc615 100644 --- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp +++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp @@ -259,6 +259,11 @@ static DecodeStatus DecodeCacheOp(MCInst &Inst, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeCacheOpR6(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + static DecodeStatus DecodeCacheOpMM(MCInst &Inst, unsigned Insn, uint64_t Address, @@ -304,6 +309,10 @@ static DecodeStatus DecodeFMem3(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeFMemCop2R6(MCInst &Inst, unsigned Insn, + uint64_t Address, + const void *Decoder); + static DecodeStatus DecodeSpecial3LlSc(MCInst &Inst, unsigned Insn, uint64_t Address, @@ -1118,6 +1127,23 @@ static DecodeStatus DecodeCacheOpMM(MCInst &Inst, return MCDisassembler::Success; } +static DecodeStatus DecodeCacheOpR6(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder) { + int Offset = fieldFromInstruction(Insn, 7, 9); + unsigned Hint = fieldFromInstruction(Insn, 16, 5); + unsigned Base = fieldFromInstruction(Insn, 21, 5); + + Base = getReg(Decoder, Mips::GPR32RegClassID, Base); + + Inst.addOperand(MCOperand::CreateReg(Base)); + Inst.addOperand(MCOperand::CreateImm(Offset)); + Inst.addOperand(MCOperand::CreateImm(Hint)); + + return MCDisassembler::Success; +} + static DecodeStatus DecodeSyncI(MCInst &Inst, unsigned Insn, uint64_t Address, @@ -1354,6 +1380,23 @@ static DecodeStatus DecodeFMem3(MCInst &Inst, return MCDisassembler::Success; } +static DecodeStatus DecodeFMemCop2R6(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder) { + int Offset = SignExtend32<11>(Insn & 0x07ff); + unsigned Reg = fieldFromInstruction(Insn, 16, 5); + unsigned Base = fieldFromInstruction(Insn, 11, 5); + + Reg = getReg(Decoder, Mips::COP2RegClassID, Reg); + Base = getReg(Decoder, Mips::GPR32RegClassID, Base); + + Inst.addOperand(MCOperand::CreateReg(Reg)); + Inst.addOperand(MCOperand::CreateReg(Base)); + Inst.addOperand(MCOperand::CreateImm(Offset)); + + return MCDisassembler::Success; +} static DecodeStatus DecodeSpecial3LlSc(MCInst &Inst, unsigned Insn, uint64_t Address, diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td index 3e1d047091a9..5ad56834607e 100644 --- a/lib/Target/Mips/Mips.td +++ b/lib/Target/Mips/Mips.td @@ -58,15 +58,15 @@ def MipsInstrInfo : InstrInfo; //===----------------------------------------------------------------------===// def FeatureNoABICalls : SubtargetFeature<"noabicalls", "NoABICalls", "true", - "Disable SVR4-style position-independent code.">; + "Disable SVR4-style position-independent code">; def FeatureGP64Bit : SubtargetFeature<"gp64", "IsGP64bit", "true", - "General Purpose Registers are 64-bit wide.">; + "General Purpose Registers are 64-bit wide">; def FeatureFP64Bit : SubtargetFeature<"fp64", "IsFP64bit", "true", - "Support 64-bit FP registers.">; + "Support 64-bit FP registers">; def FeatureFPXX : SubtargetFeature<"fpxx", "IsFPXX", "true", - "Support for FPXX.">; + "Support for FPXX">; def FeatureNaN2008 : SubtargetFeature<"nan2008", "IsNaN2008bit", "true", - "IEEE 754-2008 NaN encoding.">; + "IEEE 754-2008 NaN encoding">; def FeatureSingleFloat : SubtargetFeature<"single-float", "IsSingleFloat", "true", "Only supports single precision float">; def FeatureO32 : SubtargetFeature<"o32", "ABI", "MipsABIInfo::O32()", @@ -81,7 +81,7 @@ def FeatureNoOddSPReg : SubtargetFeature<"nooddspreg", "UseOddSPReg", "false", "Disable odd numbered single-precision " "registers">; def FeatureVFPU : SubtargetFeature<"vfpu", "HasVFPU", - "true", "Enable vector FPU instructions.">; + "true", "Enable vector FPU instructions">; def FeatureMips1 : SubtargetFeature<"mips1", "MipsArchVersion", "Mips1", "Mips I ISA Support [highly experimental]">; def FeatureMips2 : SubtargetFeature<"mips2", "MipsArchVersion", "Mips2", diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp index 976beccfed9d..7e1fbfdcac46 100644 --- a/lib/Target/Mips/Mips16InstrInfo.cpp +++ b/lib/Target/Mips/Mips16InstrInfo.cpp @@ -293,6 +293,9 @@ void Mips16InstrInfo::adjustStackPtrBigUnrestricted( void Mips16InstrInfo::adjustStackPtr(unsigned SP, int64_t Amount, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { + if (Amount == 0) + return; + if (isInt<16>(Amount)) // need to change to addiu sp, ....and isInt<16> BuildAddiuSpImm(MBB, I, Amount); else diff --git a/lib/Target/Mips/Mips32r6InstrInfo.td b/lib/Target/Mips/Mips32r6InstrInfo.td index 185d12ec93fd..49c63226dc06 100644 --- a/lib/Target/Mips/Mips32r6InstrInfo.td +++ b/lib/Target/Mips/Mips32r6InstrInfo.td @@ -379,7 +379,6 @@ class JMP_IDX_COMPACT_DESC_BASE<string opstr, DAGOperand opnd, list<dag> Pattern = []; bit isTerminator = 1; bit hasDelaySlot = 0; - string DecoderMethod = "DecodeSimm16"; } class JIALC_DESC : JMP_IDX_COMPACT_DESC_BASE<"jialc", calloffset16, @@ -550,6 +549,7 @@ class CACHE_HINT_DESC<string instr_asm, Operand MemOpnd, dag InOperandList = (ins MemOpnd:$addr, uimm5:$hint); string AsmString = !strconcat(instr_asm, "\t$hint, $addr"); list<dag> Pattern = []; + string DecoderMethod = "DecodeCacheOpR6"; } class CACHE_DESC : CACHE_HINT_DESC<"cache", mem_simm9, GPR32Opnd>; @@ -561,6 +561,7 @@ class COP2LD_DESC_BASE<string instr_asm, RegisterOperand COPOpnd> { string AsmString = !strconcat(instr_asm, "\t$rt, $addr"); list<dag> Pattern = []; bit mayLoad = 1; + string DecoderMethod = "DecodeFMemCop2R6"; } class LDC2_R6_DESC : COP2LD_DESC_BASE<"ldc2", COP2Opnd>; @@ -572,6 +573,7 @@ class COP2ST_DESC_BASE<string instr_asm, RegisterOperand COPOpnd> { string AsmString = !strconcat(instr_asm, "\t$rt, $addr"); list<dag> Pattern = []; bit mayStore = 1; + string DecoderMethod = "DecodeFMemCop2R6"; } class SDC2_R6_DESC : COP2ST_DESC_BASE<"sdc2", COP2Opnd>; diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index bfe2ba011957..d6628d408480 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -440,6 +440,16 @@ def : MipsPat<(i64 (sext_inreg GPR64:$src, i32)), // bswap MipsPattern def : MipsPat<(bswap GPR64:$rt), (DSHD (DSBH GPR64:$rt))>; +// Carry pattern +def : MipsPat<(subc GPR64:$lhs, GPR64:$rhs), + (DSUBu GPR64:$lhs, GPR64:$rhs)>; +let AdditionalPredicates = [NotDSP] in { + def : MipsPat<(addc GPR64:$lhs, GPR64:$rhs), + (DADDu GPR64:$lhs, GPR64:$rhs)>; + def : MipsPat<(addc GPR64:$lhs, immSExt16:$imm), + (DADDiu GPR64:$lhs, imm:$imm)>; +} + //===----------------------------------------------------------------------===// // Instruction aliases //===----------------------------------------------------------------------===// diff --git a/lib/Target/Mips/MipsCCState.cpp b/lib/Target/Mips/MipsCCState.cpp index e18cc8b1f7b3..b8081295ca64 100644 --- a/lib/Target/Mips/MipsCCState.cpp +++ b/lib/Target/Mips/MipsCCState.cpp @@ -132,8 +132,8 @@ void MipsCCState::PreAnalyzeFormalArgumentsForF128( continue; } - assert(Ins[i].OrigArgIndex < MF.getFunction()->arg_size()); - std::advance(FuncArg, Ins[i].OrigArgIndex); + assert(Ins[i].getOrigArgIndex() < MF.getFunction()->arg_size()); + std::advance(FuncArg, Ins[i].getOrigArgIndex()); OriginalArgWasF128.push_back( originalTypeIsF128(FuncArg->getType(), nullptr)); diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td index 90bf9f3f5835..dcd88f25d25c 100644 --- a/lib/Target/Mips/MipsCallingConv.td +++ b/lib/Target/Mips/MipsCallingConv.td @@ -123,7 +123,7 @@ def CC_MipsN_SoftFloat : CallingConv<[ ]>; def CC_MipsN : CallingConv<[ - CCIfType<[i8, i16, i32], + CCIfType<[i8, i16, i32, i64], CCIfSubtargetNot<"isLittle()", CCIfInReg<CCPromoteToUpperBitsInType<i64>>>>, @@ -159,6 +159,10 @@ def CC_MipsN : CallingConv<[ // N32/64 variable arguments. // All arguments are passed in integer registers. def CC_MipsN_VarArg : CallingConv<[ + CCIfType<[i8, i16, i32, i64], + CCIfSubtargetNot<"isLittle()", + CCIfInReg<CCPromoteToUpperBitsInType<i64>>>>, + // All integers are promoted to 64-bit. CCIfType<[i8, i16, i32], CCPromoteToType<i64>>, diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index d25f5637f57c..37fc7849310b 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -261,6 +261,9 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM, setOperationAction(ISD::LOAD, MVT::i64, Custom); setOperationAction(ISD::STORE, MVT::i64, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); + setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom); + setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom); + setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom); } if (!Subtarget.isGP64bit()) { @@ -616,6 +619,33 @@ static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +static SDValue performCMovFPCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const MipsSubtarget &Subtarget) { + if (DCI.isBeforeLegalizeOps()) + return SDValue(); + + SDValue ValueIfTrue = N->getOperand(0), ValueIfFalse = N->getOperand(2); + + ConstantSDNode *FalseC = dyn_cast<ConstantSDNode>(ValueIfFalse); + if (!FalseC || FalseC->getZExtValue()) + return SDValue(); + + // Since RHS (False) is 0, we swap the order of the True/False operands + // (obviously also inverting the condition) so that we can + // take advantage of conditional moves using the $0 register. + // Example: + // return (a != 0) ? x : 0; + // load $reg, x + // movz $reg, $0, a + unsigned Opc = (N->getOpcode() == MipsISD::CMovFP_T) ? MipsISD::CMovFP_F : + MipsISD::CMovFP_T; + + SDValue FCC = N->getOperand(1), Glue = N->getOperand(3); + return DAG.getNode(Opc, SDLoc(N), ValueIfFalse.getValueType(), + ValueIfFalse, FCC, ValueIfTrue, Glue); +} + static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget) { @@ -749,6 +779,9 @@ SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) return performDivRemCombine(N, DAG, DCI, Subtarget); case ISD::SELECT: return performSELECTCombine(N, DAG, DCI, Subtarget); + case MipsISD::CMovFP_F: + case MipsISD::CMovFP_T: + return performCMovFPCombine(N, DAG, DCI, Subtarget); case ISD::AND: return performANDCombine(N, DAG, DCI, Subtarget); case ISD::OR: @@ -2017,10 +2050,11 @@ SDValue MipsTargetLowering::lowerATOMIC_FENCE(SDValue Op, SDValue MipsTargetLowering::lowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); + MVT VT = Subtarget.isGP64bit() ? MVT::i64 : MVT::i32; + SDValue Lo = Op.getOperand(0), Hi = Op.getOperand(1); SDValue Shamt = Op.getOperand(2); - - // if shamt < 32: + // if shamt < (VT.bits): // lo = (shl lo, shamt) // hi = (or (shl hi, shamt) (srl (srl lo, 1), ~shamt)) // else: @@ -2028,18 +2062,17 @@ SDValue MipsTargetLowering::lowerShiftLeftParts(SDValue Op, // hi = (shl lo, shamt[4:0]) SDValue Not = DAG.getNode(ISD::XOR, DL, MVT::i32, Shamt, DAG.getConstant(-1, MVT::i32)); - SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, MVT::i32, Lo, - DAG.getConstant(1, MVT::i32)); - SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, MVT::i32, ShiftRight1Lo, - Not); - SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, MVT::i32, Hi, Shamt); - SDValue Or = DAG.getNode(ISD::OR, DL, MVT::i32, ShiftLeftHi, ShiftRightLo); - SDValue ShiftLeftLo = DAG.getNode(ISD::SHL, DL, MVT::i32, Lo, Shamt); + SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, + DAG.getConstant(1, VT)); + SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, Not); + SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt); + SDValue Or = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo); + SDValue ShiftLeftLo = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt); SDValue Cond = DAG.getNode(ISD::AND, DL, MVT::i32, Shamt, - DAG.getConstant(0x20, MVT::i32)); - Lo = DAG.getNode(ISD::SELECT, DL, MVT::i32, Cond, - DAG.getConstant(0, MVT::i32), ShiftLeftLo); - Hi = DAG.getNode(ISD::SELECT, DL, MVT::i32, Cond, ShiftLeftLo, Or); + DAG.getConstant(VT.getSizeInBits(), MVT::i32)); + Lo = DAG.getNode(ISD::SELECT, DL, VT, Cond, + DAG.getConstant(0, VT), ShiftLeftLo); + Hi = DAG.getNode(ISD::SELECT, DL, VT, Cond, ShiftLeftLo, Or); SDValue Ops[2] = {Lo, Hi}; return DAG.getMergeValues(Ops, DL); @@ -2050,8 +2083,9 @@ SDValue MipsTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, SDLoc DL(Op); SDValue Lo = Op.getOperand(0), Hi = Op.getOperand(1); SDValue Shamt = Op.getOperand(2); + MVT VT = Subtarget.isGP64bit() ? MVT::i64 : MVT::i32; - // if shamt < 32: + // if shamt < (VT.bits): // lo = (or (shl (shl hi, 1), ~shamt) (srl lo, shamt)) // if isSRA: // hi = (sra hi, shamt) @@ -2066,21 +2100,20 @@ SDValue MipsTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, // hi = 0 SDValue Not = DAG.getNode(ISD::XOR, DL, MVT::i32, Shamt, DAG.getConstant(-1, MVT::i32)); - SDValue ShiftLeft1Hi = DAG.getNode(ISD::SHL, DL, MVT::i32, Hi, - DAG.getConstant(1, MVT::i32)); - SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, MVT::i32, ShiftLeft1Hi, Not); - SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, MVT::i32, Lo, Shamt); - SDValue Or = DAG.getNode(ISD::OR, DL, MVT::i32, ShiftLeftHi, ShiftRightLo); - SDValue ShiftRightHi = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, DL, MVT::i32, - Hi, Shamt); + SDValue ShiftLeft1Hi = DAG.getNode(ISD::SHL, DL, VT, Hi, + DAG.getConstant(1, VT)); + SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, ShiftLeft1Hi, Not); + SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt); + SDValue Or = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo); + SDValue ShiftRightHi = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, + DL, VT, Hi, Shamt); SDValue Cond = DAG.getNode(ISD::AND, DL, MVT::i32, Shamt, - DAG.getConstant(0x20, MVT::i32)); - SDValue Shift31 = DAG.getNode(ISD::SRA, DL, MVT::i32, Hi, - DAG.getConstant(31, MVT::i32)); - Lo = DAG.getNode(ISD::SELECT, DL, MVT::i32, Cond, ShiftRightHi, Or); - Hi = DAG.getNode(ISD::SELECT, DL, MVT::i32, Cond, - IsSRA ? Shift31 : DAG.getConstant(0, MVT::i32), - ShiftRightHi); + DAG.getConstant(VT.getSizeInBits(), MVT::i32)); + SDValue Ext = DAG.getNode(ISD::SRA, DL, VT, Hi, + DAG.getConstant(VT.getSizeInBits() - 1, VT)); + Lo = DAG.getNode(ISD::SELECT, DL, VT, Cond, ShiftRightHi, Or); + Hi = DAG.getNode(ISD::SELECT, DL, VT, Cond, + IsSRA ? Ext : DAG.getConstant(0, VT), ShiftRightHi); SDValue Ops[2] = {Lo, Hi}; return DAG.getMergeValues(Ops, DL); @@ -2900,13 +2933,16 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; - std::advance(FuncArg, Ins[i].OrigArgIndex - CurArgIdx); - CurArgIdx = Ins[i].OrigArgIndex; + if (Ins[i].isOrigArg()) { + std::advance(FuncArg, Ins[i].getOrigArgIndex() - CurArgIdx); + CurArgIdx = Ins[i].getOrigArgIndex(); + } EVT ValVT = VA.getValVT(); ISD::ArgFlagsTy Flags = Ins[i].Flags; bool IsRegLoc = VA.isRegLoc(); if (Flags.isByVal()) { + assert(Ins[i].isOrigArg() && "Byval arguments cannot be implicit"); unsigned FirstByValReg, LastByValReg; unsigned ByValIdx = CCInfo.getInRegsParamsProcessed(); CCInfo.getInRegsParamInfo(ByValIdx, FirstByValReg, LastByValReg); @@ -3027,6 +3063,15 @@ MipsTargetLowering::CanLowerReturn(CallingConv::ID CallConv, return CCInfo.CheckReturn(Outs, RetCC_Mips); } +bool +MipsTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const { + if (Subtarget.hasMips3() && Subtarget.abiUsesSoftFloat()) { + if (Type == MVT::i32) + return true; + } + return IsSigned; +} + SDValue MipsTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 4132de6dbcad..4da337a61dad 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -472,6 +472,8 @@ namespace llvm { const SmallVectorImpl<SDValue> &OutVals, SDLoc dl, SelectionDAG &DAG) const override; + bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const override; + // Inline asm support ConstraintType getConstraintType(const std::string &Constraint) const override; diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td index 2aa83289a106..ed97cb461923 100644 --- a/lib/Target/Mips/MipsInstrFPU.td +++ b/lib/Target/Mips/MipsInstrFPU.td @@ -458,42 +458,42 @@ def FSUB_S : MMRel, ADDS_FT<"sub.s", FGR32Opnd, II_SUB_S, 0, fsub>, defm FSUB : ADDS_M<"sub.d", II_SUB_D, 0, fsub>, ADDS_FM<0x01, 17>; def MADD_S : MMRel, MADDS_FT<"madd.s", FGR32Opnd, II_MADD_S, fadd>, - MADDS_FM<4, 0>, ISA_MIPS32R2_NOT_32R6_64R6; + MADDS_FM<4, 0>, INSN_MIPS4_32R2_NOT_32R6_64R6; def MSUB_S : MMRel, MADDS_FT<"msub.s", FGR32Opnd, II_MSUB_S, fsub>, - MADDS_FM<5, 0>, ISA_MIPS32R2_NOT_32R6_64R6; + MADDS_FM<5, 0>, INSN_MIPS4_32R2_NOT_32R6_64R6; let AdditionalPredicates = [NoNaNsFPMath] in { def NMADD_S : MMRel, NMADDS_FT<"nmadd.s", FGR32Opnd, II_NMADD_S, fadd>, - MADDS_FM<6, 0>, ISA_MIPS32R2_NOT_32R6_64R6; + MADDS_FM<6, 0>, INSN_MIPS4_32R2_NOT_32R6_64R6; def NMSUB_S : MMRel, NMADDS_FT<"nmsub.s", FGR32Opnd, II_NMSUB_S, fsub>, - MADDS_FM<7, 0>, ISA_MIPS32R2_NOT_32R6_64R6; + MADDS_FM<7, 0>, INSN_MIPS4_32R2_NOT_32R6_64R6; } def MADD_D32 : MMRel, MADDS_FT<"madd.d", AFGR64Opnd, II_MADD_D, fadd>, - MADDS_FM<4, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_32; + MADDS_FM<4, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32; def MSUB_D32 : MMRel, MADDS_FT<"msub.d", AFGR64Opnd, II_MSUB_D, fsub>, - MADDS_FM<5, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_32; + MADDS_FM<5, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32; let AdditionalPredicates = [NoNaNsFPMath] in { def NMADD_D32 : MMRel, NMADDS_FT<"nmadd.d", AFGR64Opnd, II_NMADD_D, fadd>, - MADDS_FM<6, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_32; + MADDS_FM<6, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32; def NMSUB_D32 : MMRel, NMADDS_FT<"nmsub.d", AFGR64Opnd, II_NMSUB_D, fsub>, - MADDS_FM<7, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_32; + MADDS_FM<7, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32; } -let isCodeGenOnly=1 in { +let DecoderNamespace = "Mips64" in { def MADD_D64 : MADDS_FT<"madd.d", FGR64Opnd, II_MADD_D, fadd>, - MADDS_FM<4, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_64; + MADDS_FM<4, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64; def MSUB_D64 : MADDS_FT<"msub.d", FGR64Opnd, II_MSUB_D, fsub>, - MADDS_FM<5, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_64; + MADDS_FM<5, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64; } let AdditionalPredicates = [NoNaNsFPMath], - isCodeGenOnly=1 in { + DecoderNamespace = "Mips64" in { def NMADD_D64 : NMADDS_FT<"nmadd.d", FGR64Opnd, II_NMADD_D, fadd>, - MADDS_FM<6, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_64; + MADDS_FM<6, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64; def NMSUB_D64 : NMADDS_FT<"nmsub.d", FGR64Opnd, II_NMSUB_D, fsub>, - MADDS_FM<7, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_64; + MADDS_FM<7, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td index 2b3b6c1e5242..1eb8d9ad821d 100644 --- a/lib/Target/Mips/MipsRegisterInfo.td +++ b/lib/Target/Mips/MipsRegisterInfo.td @@ -388,6 +388,8 @@ def MSA128W: RegisterClass<"Mips", [v4i32, v4f32], 128, (sequence "W%u", 0, 31)>; def MSA128D: RegisterClass<"Mips", [v2i64, v2f64], 128, (sequence "W%u", 0, 31)>; +def MSA128WEvens: RegisterClass<"Mips", [v4i32, v4f32], 128, + (decimate (sequence "W%u", 0, 31), 2)>; def MSACtrl: RegisterClass<"Mips", [i32], 32, (add MSAIR, MSACSR, MSAAccess, MSASave, MSAModify, MSARequest, MSAMap, MSAUnmap)>; diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp index 70963821690c..8c2620ca7f02 100644 --- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -236,13 +236,35 @@ SDNode *MipsSEDAGToDAGISel::selectAddESubE(unsigned MOp, SDValue InFlag, (Opc == ISD::SUBC || Opc == ISD::SUBE)) && "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn"); + unsigned SLTuOp = Mips::SLTu, ADDuOp = Mips::ADDu; + if (Subtarget->isGP64bit()) { + SLTuOp = Mips::SLTu64; + ADDuOp = Mips::DADDu; + } + SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) }; SDValue LHS = Node->getOperand(0), RHS = Node->getOperand(1); EVT VT = LHS.getValueType(); - SDNode *Carry = CurDAG->getMachineNode(Mips::SLTu, DL, VT, Ops); - SDNode *AddCarry = CurDAG->getMachineNode(Mips::ADDu, DL, VT, - SDValue(Carry, 0), RHS); + SDNode *Carry = CurDAG->getMachineNode(SLTuOp, DL, VT, Ops); + + if (Subtarget->isGP64bit()) { + // On 64-bit targets, sltu produces an i64 but our backend currently says + // that SLTu64 produces an i32. We need to fix this in the long run but for + // now, just make the DAG type-correct by asserting the upper bits are zero. + Carry = CurDAG->getMachineNode(Mips::SUBREG_TO_REG, DL, VT, + CurDAG->getTargetConstant(0, VT), + SDValue(Carry, 0), + CurDAG->getTargetConstant(Mips::sub_32, VT)); + } + + // Generate a second addition only if we know that RHS is not a + // constant-zero node. + SDNode *AddCarry = Carry; + ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS); + if (!C || C->getZExtValue()) + AddCarry = CurDAG->getMachineNode(ADDuOp, DL, VT, SDValue(Carry, 0), RHS); + return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, LHS, SDValue(AddCarry, 0)); } @@ -641,7 +663,8 @@ std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) { case ISD::SUBE: { SDValue InFlag = Node->getOperand(2); - Result = selectAddESubE(Mips::SUBu, InFlag, InFlag.getOperand(0), DL, Node); + unsigned Opc = Subtarget->isGP64bit() ? Mips::DSUBu : Mips::SUBu; + Result = selectAddESubE(Opc, InFlag, InFlag.getOperand(0), DL, Node); return std::make_pair(true, Result); } @@ -649,7 +672,8 @@ std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) { if (Subtarget->hasDSP()) // Select DSP instructions, ADDSC and ADDWC. break; SDValue InFlag = Node->getOperand(2); - Result = selectAddESubE(Mips::ADDu, InFlag, InFlag.getValue(0), DL, Node); + unsigned Opc = Subtarget->isGP64bit() ? Mips::DADDu : Mips::ADDu; + Result = selectAddESubE(Opc, InFlag, InFlag.getValue(0), DL, Node); return std::make_pair(true, Result); } diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index 29aac2e276b0..2c033ce61c16 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -122,6 +122,8 @@ MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM, setOperationAction(ISD::MUL, MVT::i64, Custom); if (Subtarget.isGP64bit()) { + setOperationAction(ISD::SMUL_LOHI, MVT::i64, Custom); + setOperationAction(ISD::UMUL_LOHI, MVT::i64, Custom); setOperationAction(ISD::MULHS, MVT::i64, Custom); setOperationAction(ISD::MULHU, MVT::i64, Custom); setOperationAction(ISD::SDIVREM, MVT::i64, Custom); @@ -200,6 +202,8 @@ MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM, if (Subtarget.hasMips64r6()) { // MIPS64r6 replaces the accumulator-based multiplies with a three register // instruction + setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); + setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); setOperationAction(ISD::MUL, MVT::i64, Legal); setOperationAction(ISD::MULHS, MVT::i64, Legal); setOperationAction(ISD::MULHU, MVT::i64, Legal); @@ -2879,10 +2883,21 @@ emitCOPY_FW(MachineInstr *MI, MachineBasicBlock *BB) const{ unsigned Ws = MI->getOperand(1).getReg(); unsigned Lane = MI->getOperand(2).getImm(); - if (Lane == 0) - BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Ws, 0, Mips::sub_lo); - else { - unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); + if (Lane == 0) { + unsigned Wt = Ws; + if (!Subtarget.useOddSPReg()) { + // We must copy to an even-numbered MSA register so that the + // single-precision sub-register is also guaranteed to be even-numbered. + Wt = RegInfo.createVirtualRegister(&Mips::MSA128WEvensRegClass); + + BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Wt).addReg(Ws); + } + + BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo); + } else { + unsigned Wt = RegInfo.createVirtualRegister( + Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass : + &Mips::MSA128WEvensRegClass); BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wt).addReg(Ws).addImm(Lane); BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo); @@ -2944,7 +2959,9 @@ MipsSETargetLowering::emitINSERT_FW(MachineInstr *MI, unsigned Wd_in = MI->getOperand(1).getReg(); unsigned Lane = MI->getOperand(2).getImm(); unsigned Fs = MI->getOperand(3).getReg(); - unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); + unsigned Wt = RegInfo.createVirtualRegister( + Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass : + &Mips::MSA128WEvensRegClass); BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt) .addImm(0) diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp index 74f291f609fd..180b04327fc9 100644 --- a/lib/Target/Mips/MipsSEInstrInfo.cpp +++ b/lib/Target/Mips/MipsSEInstrInfo.cpp @@ -364,6 +364,9 @@ void MipsSEInstrInfo::adjustStackPtr(unsigned SP, int64_t Amount, unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu; unsigned ADDiu = STI.isABI_N64() ? Mips::DADDiu : Mips::ADDiu; + if (Amount == 0) + return; + if (isInt<16>(Amount))// addi sp, sp, amount BuildMI(MBB, I, DL, get(ADDiu), SP).addReg(SP).addImm(Amount); else { // Expand immediate that doesn't fit in 16-bit. diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 5c52bb19b0d1..e580c811d71f 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -2688,9 +2688,10 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( unsigned ObjSize = ObjectVT.getStoreSize(); unsigned ArgSize = ObjSize; ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; - std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx); - CurArgIdx = Ins[ArgNo].OrigArgIndex; - + if (Ins[ArgNo].isOrigArg()) { + std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx); + CurArgIdx = Ins[ArgNo].getOrigArgIndex(); + } /* Respect alignment of argument on the stack. */ unsigned Align = CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize); @@ -2704,6 +2705,8 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( // FIXME the codegen can be much improved in some cases. // We do not have to keep everything in memory. if (Flags.isByVal()) { + assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit"); + // ObjSize is the true size, ArgSize rounded up to multiple of registers. ObjSize = Flags.getByValSize(); ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; @@ -3064,9 +3067,10 @@ PPCTargetLowering::LowerFormalArguments_Darwin( unsigned ObjSize = ObjectVT.getSizeInBits()/8; unsigned ArgSize = ObjSize; ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; - std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx); - CurArgIdx = Ins[ArgNo].OrigArgIndex; - + if (Ins[ArgNo].isOrigArg()) { + std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx); + CurArgIdx = Ins[ArgNo].getOrigArgIndex(); + } unsigned CurArgOffset = ArgOffset; // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary. @@ -3087,6 +3091,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin( // FIXME the codegen can be much improved in some cases. // We do not have to keep everything in memory. if (Flags.isByVal()) { + assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit"); + // ObjSize is the true size, ArgSize rounded up to multiple of registers. ObjSize = Flags.getByValSize(); ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; diff --git a/lib/Target/R600/AMDGPU.td b/lib/Target/R600/AMDGPU.td index 1df4448abf05..03f2bbe4820e 100644 --- a/lib/Target/R600/AMDGPU.td +++ b/lib/Target/R600/AMDGPU.td @@ -97,6 +97,11 @@ def FeatureVGPRSpilling : SubtargetFeature<"vgpr-spilling", "true", "Enable spilling of VGPRs to scratch memory">; +def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug", + "SGPRInitBug", + "true", + "VI SGPR initilization bug requiring a fixed SGPR allocation size">; + class SubtargetFeatureFetchLimit <string Value> : SubtargetFeature <"fetch"#Value, "TexVTXClauseSize", diff --git a/lib/Target/R600/AMDGPUAlwaysInlinePass.cpp b/lib/Target/R600/AMDGPUAlwaysInlinePass.cpp index b545b456161f..0b426bc63dd5 100644 --- a/lib/Target/R600/AMDGPUAlwaysInlinePass.cpp +++ b/lib/Target/R600/AMDGPUAlwaysInlinePass.cpp @@ -40,7 +40,8 @@ bool AMDGPUAlwaysInline::runOnModule(Module &M) { std::vector<Function*> FuncsToClone; for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { Function &F = *I; - if (!F.hasLocalLinkage() && !F.isDeclaration() && !F.use_empty()) + if (!F.hasLocalLinkage() && !F.isDeclaration() && !F.use_empty() && + !F.hasFnAttribute(Attribute::NoInline)) FuncsToClone.push_back(&F); } @@ -54,7 +55,7 @@ bool AMDGPUAlwaysInline::runOnModule(Module &M) { for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { Function &F = *I; - if (F.hasLocalLinkage()) { + if (F.hasLocalLinkage() && !F.hasFnAttribute(Attribute::NoInline)) { F.addFnAttr(Attribute::AlwaysInline); } } diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp index 6185e367ff50..1fae26e18a44 100644 --- a/lib/Target/R600/AMDGPUAsmPrinter.cpp +++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp @@ -343,6 +343,13 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, ProgInfo.NumVGPR = MaxVGPR + 1; ProgInfo.NumSGPR = MaxSGPR + 1; + if (STM.hasSGPRInitBug()) { + if (ProgInfo.NumSGPR > AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG) + llvm_unreachable("Too many SGPRs used with the SGPR init bug"); + + ProgInfo.NumSGPR = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG; + } + ProgInfo.VGPRBlocks = (ProgInfo.NumVGPR - 1) / 4; ProgInfo.SGPRBlocks = (ProgInfo.NumSGPR - 1) / 8; // Set the value to initialize FP_ROUND and FP_DENORM parts of the mode diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp index 15112c7e54d4..68d557a1cf70 100644 --- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp @@ -439,6 +439,31 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { break; } + case ISD::STORE: { + // Handle i64 stores here for the same reason mentioned above for loads. + StoreSDNode *ST = cast<StoreSDNode>(N); + SDValue Value = ST->getValue(); + if (Value.getValueType() != MVT::i64 || ST->isTruncatingStore()) + break; + + SDValue NewValue = CurDAG->getNode(ISD::BITCAST, SDLoc(N), + MVT::v2i32, Value); + SDValue NewStore = CurDAG->getStore(ST->getChain(), SDLoc(N), NewValue, + ST->getBasePtr(), ST->getMemOperand()); + + CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewStore); + + if (NewValue.getOpcode() == ISD::BITCAST) { + Select(NewStore.getNode()); + return SelectCode(NewValue.getNode()); + } + + // getNode() may fold the bitcast if its input was another bitcast. If that + // happens we should only select the new store. + N = NewStore.getNode(); + break; + } + case AMDGPUISD::REGISTER_LOAD: { if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) break; @@ -761,6 +786,8 @@ SDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args); } +// We need to handle this here because tablegen doesn't support matching +// instructions with multiple outputs. SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) { SDLoc SL(N); EVT VT = N->getValueType(0); @@ -770,19 +797,12 @@ SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) { unsigned Opc = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32; - const SDValue Zero = CurDAG->getTargetConstant(0, MVT::i32); - const SDValue False = CurDAG->getTargetConstant(0, MVT::i1); - SDValue Ops[] = { - Zero, // src0_modifiers - N->getOperand(0), // src0 - Zero, // src1_modifiers - N->getOperand(1), // src1 - Zero, // src2_modifiers - N->getOperand(2), // src2 - False, // clamp - Zero // omod - }; + // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod + SDValue Ops[8]; + SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]); + SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]); + SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]); return CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops); } diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 2adcdf1c299e..b137053fbbc2 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -141,9 +141,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::STORE, MVT::v2f32, Promote); AddPromotedToType(ISD::STORE, MVT::v2f32, MVT::v2i32); - setOperationAction(ISD::STORE, MVT::i64, Promote); - AddPromotedToType(ISD::STORE, MVT::i64, MVT::v2i32); - setOperationAction(ISD::STORE, MVT::v4f32, Promote); AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32); @@ -162,9 +159,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : // Custom lowering of vector stores is required for local address space // stores. setOperationAction(ISD::STORE, MVT::v4i32, Custom); - // XXX: Native v2i32 local address space stores are possible, but not - // currently implemented. - setOperationAction(ISD::STORE, MVT::v2i32, Custom); setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom); setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom); @@ -832,11 +826,9 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI, SDValue AMDGPUTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { SmallVector<SDValue, 8> Args; - SDValue A = Op.getOperand(0); - SDValue B = Op.getOperand(1); - DAG.ExtractVectorElements(A, Args); - DAG.ExtractVectorElements(B, Args); + for (const SDUse &U : Op->ops()) + DAG.ExtractVectorElements(U.get(), Args); return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(), Args); } @@ -881,9 +873,6 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return LowerIntrinsicIABS(Op, DAG); case AMDGPUIntrinsic::AMDGPU_lrp: return LowerIntrinsicLRP(Op, DAG); - case AMDGPUIntrinsic::AMDGPU_fract: - case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name. - return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1)); case AMDGPUIntrinsic::AMDGPU_clamp: case AMDGPUIntrinsic::AMDIL_clamp: // Legacy name. @@ -913,10 +902,9 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, } case Intrinsic::AMDGPU_div_fmas: - // FIXME: Dropping bool parameter. Work is needed to support the implicit - // read from VCC. return DAG.getNode(AMDGPUISD::DIV_FMAS, DL, VT, - Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3), + Op.getOperand(4)); case Intrinsic::AMDGPU_div_fixup: return DAG.getNode(AMDGPUISD::DIV_FIXUP, DL, VT, diff --git a/lib/Target/R600/AMDGPUInstrInfo.h b/lib/Target/R600/AMDGPUInstrInfo.h index e28ce0f03acc..202183c18a8d 100644 --- a/lib/Target/R600/AMDGPUInstrInfo.h +++ b/lib/Target/R600/AMDGPUInstrInfo.h @@ -140,6 +140,12 @@ public: /// not exist. If Opcode is not a pseudo instruction, this is identity. int pseudoToMCOpcode(int Opcode) const; + /// \brief Return the descriptor of the target-specific machine instruction + /// that corresponds to the specified pseudo or native opcode. + const MCInstrDesc &getMCOpcodeFromPseudo(unsigned Opcode) const { + return get(pseudoToMCOpcode(Opcode)); + } + //===---------------------------------------------------------------------===// // Pure virtual funtions to be implemented by sub-classes. //===---------------------------------------------------------------------===// diff --git a/lib/Target/R600/AMDGPUInstrInfo.td b/lib/Target/R600/AMDGPUInstrInfo.td index 0e34392bd50d..d657ad05c8c4 100644 --- a/lib/Target/R600/AMDGPUInstrInfo.td +++ b/lib/Target/R600/AMDGPUInstrInfo.td @@ -35,6 +35,11 @@ def AMDGPUDivScaleOp : SDTypeProfile<2, 3, [SDTCisFP<0>, SDTCisInt<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisSameAs<0, 4>] >; +// float, float, float, vcc +def AMDGPUFmasOp : SDTypeProfile<1, 4, + [SDTCisFP<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisInt<4>] +>; + //===----------------------------------------------------------------------===// // AMDGPU DAG Nodes // @@ -153,7 +158,7 @@ def AMDGPUdiv_scale : SDNode<"AMDGPUISD::DIV_SCALE", AMDGPUDivScaleOp>; // Special case divide FMA with scale and flags (src0 = Quotient, // src1 = Denominator, src2 = Numerator). -def AMDGPUdiv_fmas : SDNode<"AMDGPUISD::DIV_FMAS", SDTFPTernaryOp>; +def AMDGPUdiv_fmas : SDNode<"AMDGPUISD::DIV_FMAS", AMDGPUFmasOp>; // Single or double precision division fixup. // Special case divide fixup and flags(src0 = Quotient, src1 = diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td index 4e536c37b0bd..34b1fc8b5f47 100644 --- a/lib/Target/R600/AMDGPUInstructions.td +++ b/lib/Target/R600/AMDGPUInstructions.td @@ -164,10 +164,6 @@ class PrivateStore <SDPatternOperator op> : PrivateMemOp < (ops node:$value, node:$ptr), (op node:$value, node:$ptr) >; -def extloadi8_private : PrivateLoad <extloadi8>; -def sextloadi8_private : PrivateLoad <sextloadi8>; -def extloadi16_private : PrivateLoad <extloadi16>; -def sextloadi16_private : PrivateLoad <sextloadi16>; def load_private : PrivateLoad <load>; def truncstorei8_private : PrivateStore <truncstorei8>; @@ -231,6 +227,9 @@ def sextloadi8_local : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{ return isLocalLoad(dyn_cast<LoadSDNode>(N)); }]>; +def extloadi8_private : PrivateLoad <az_extloadi8>; +def sextloadi8_private : PrivateLoad <sextloadi8>; + def az_extloadi16 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16; }]>; @@ -267,6 +266,9 @@ def sextloadi16_local : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{ return isLocalLoad(dyn_cast<LoadSDNode>(N)); }]>; +def extloadi16_private : PrivateLoad <az_extloadi16>; +def sextloadi16_private : PrivateLoad <sextloadi16>; + def az_extloadi32 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32; }]>; @@ -649,17 +651,10 @@ class RcpPat<Instruction RcpInst, ValueType vt> : Pat < (RcpInst $src) >; -multiclass RsqPat<Instruction RsqInst, ValueType vt> { - def : Pat < - (fdiv FP_ONE, (fsqrt vt:$src)), - (RsqInst $src) - >; - - def : Pat < - (AMDGPUrcp (fsqrt vt:$src)), - (RsqInst $src) - >; -} +class RsqPat<Instruction RsqInst, ValueType vt> : Pat < + (AMDGPUrcp (fsqrt vt:$src)), + (RsqInst $src) +>; include "R600Instructions.td" include "R700Instructions.td" diff --git a/lib/Target/R600/AMDGPUIntrinsics.td b/lib/Target/R600/AMDGPUIntrinsics.td index eee9c29038d0..ab489cd2a4ab 100644 --- a/lib/Target/R600/AMDGPUIntrinsics.td +++ b/lib/Target/R600/AMDGPUIntrinsics.td @@ -68,6 +68,7 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in { def int_AMDGPU_bfe_u32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_AMDGPU_bfm : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_AMDGPU_brev : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_flbit_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>; def int_AMDGPU_barrier_local : Intrinsic<[], [], []>; def int_AMDGPU_barrier_global : Intrinsic<[], [], []>; } diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp index b1c7498fc142..87cdb5f8db83 100644 --- a/lib/Target/R600/AMDGPUSubtarget.cpp +++ b/lib/Target/R600/AMDGPUSubtarget.cpp @@ -80,7 +80,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef GPU, StringRef FS, FlatAddressSpace(false), EnableIRStructurizer(true), EnablePromoteAlloca(false), EnableIfCvt(true), EnableLoadStoreOpt(false), WavefrontSize(0), CFALUBug(false), LocalMemorySize(0), - EnableVGPRSpilling(false), + EnableVGPRSpilling(false),SGPRInitBug(false), DL(computeDataLayout(initializeSubtargetDependencies(GPU, FS))), FrameLowering(TargetFrameLowering::StackGrowsUp, 64 * 16, // Maximum stack alignment (long16) diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h index 566b45c1dccc..eeb41d3ec7f4 100644 --- a/lib/Target/R600/AMDGPUSubtarget.h +++ b/lib/Target/R600/AMDGPUSubtarget.h @@ -45,6 +45,10 @@ public: VOLCANIC_ISLANDS, }; + enum { + FIXED_SGPR_COUNT_FOR_INIT_BUG = 80 + }; + private: std::string DevName; bool Is64bit; @@ -66,6 +70,7 @@ private: bool CFALUBug; int LocalMemorySize; bool EnableVGPRSpilling; + bool SGPRInitBug; const DataLayout DL; AMDGPUFrameLowering FrameLowering; @@ -203,6 +208,10 @@ public: return LocalMemorySize; } + bool hasSGPRInitBug() const { + return SGPRInitBug; + } + unsigned getAmdKernelCodeChipID() const; bool enableMachineScheduler() const override { diff --git a/lib/Target/R600/CaymanInstructions.td b/lib/Target/R600/CaymanInstructions.td index 58b5ce24b4a3..433c3fc01855 100644 --- a/lib/Target/R600/CaymanInstructions.td +++ b/lib/Target/R600/CaymanInstructions.td @@ -46,7 +46,7 @@ def SIN_cm : SIN_Common<0x8D>; def COS_cm : COS_Common<0x8E>; } // End isVector = 1 -defm : RsqPat<RECIPSQRT_IEEE_cm, f32>; +def : RsqPat<RECIPSQRT_IEEE_cm, f32>; def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL>; diff --git a/lib/Target/R600/EvergreenInstructions.td b/lib/Target/R600/EvergreenInstructions.td index f24f76b7fe16..299d1fa14896 100644 --- a/lib/Target/R600/EvergreenInstructions.td +++ b/lib/Target/R600/EvergreenInstructions.td @@ -69,7 +69,7 @@ def EXP_IEEE_eg : EXP_IEEE_Common<0x81>; def LOG_IEEE_eg : LOG_IEEE_Common<0x83>; def RECIP_CLAMPED_eg : RECIP_CLAMPED_Common<0x84>; def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>; -defm : RsqPat<RECIPSQRT_IEEE_eg, f32>; +def : RsqPat<RECIPSQRT_IEEE_eg, f32>; def SIN_eg : SIN_Common<0x8D>; def COS_eg : COS_Common<0x8E>; diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp index 8271c6f45fb9..b66ed1000251 100644 --- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp +++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp @@ -291,6 +291,8 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, printImmediate64(Op.getImm(), O); else llvm_unreachable("Invalid register class size"); + } else if (Desc.OpInfo[OpNo].OperandType == MCOI::OPERAND_IMMEDIATE) { + printImmediate32(Op.getImm(), O); } else { // We hit this for the immediate instruction bits that don't yet have a // custom printer. diff --git a/lib/Target/R600/Processors.td b/lib/Target/R600/Processors.td index cff97cdb3beb..e5fef0c1d033 100644 --- a/lib/Target/R600/Processors.td +++ b/lib/Target/R600/Processors.td @@ -113,8 +113,12 @@ def : ProcessorModel<"mullins", SIQuarterSpeedModel, [FeatureSeaIslands]>; // Volcanic Islands //===----------------------------------------------------------------------===// -def : ProcessorModel<"tonga", SIFullSpeedModel, [FeatureVolcanicIslands]>; +def : ProcessorModel<"tonga", SIQuarterSpeedModel, + [FeatureVolcanicIslands, FeatureSGPRInitBug] +>; -def : ProcessorModel<"iceland", SIQuarterSpeedModel, [FeatureVolcanicIslands]>; +def : ProcessorModel<"iceland", SIQuarterSpeedModel, + [FeatureVolcanicIslands, FeatureSGPRInitBug] +>; def : ProcessorModel<"carrizo", SIQuarterSpeedModel, [FeatureVolcanicIslands]>; diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index 595f69884544..2e1b0943252d 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -838,6 +838,10 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const case Intrinsic::AMDGPU_rsq: // XXX - I'm assuming SI's RSQ_LEGACY matches R600's behavior. return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1)); + + case AMDGPUIntrinsic::AMDGPU_fract: + case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name. + return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1)); } // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode()) break; @@ -1694,7 +1698,7 @@ SDValue R600TargetLowering::LowerFormalArguments( // XXX - I think PartOffset should give you this, but it seems to give the // size of the register which isn't useful. - unsigned ValBase = ArgLocs[In.OrigArgIndex].getLocMemOffset(); + unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset(); unsigned PartOffset = VA.getLocMemOffset(); unsigned Offset = 36 + VA.getLocMemOffset(); diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index b1d3ce276eee..05957d2cf2a1 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -1193,7 +1193,7 @@ let Predicates = [isR600] in { def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>; def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_r600 $src))>; - defm : RsqPat<RECIPSQRT_IEEE_r600, f32>; + def : RsqPat<RECIPSQRT_IEEE_r600, f32>; def : FROUNDPat <CNDGE_r600, CNDGT_r600>; diff --git a/lib/Target/R600/SIAnnotateControlFlow.cpp b/lib/Target/R600/SIAnnotateControlFlow.cpp index c99219dd9074..b8165fb4ab2c 100644 --- a/lib/Target/R600/SIAnnotateControlFlow.cpp +++ b/lib/Target/R600/SIAnnotateControlFlow.cpp @@ -83,7 +83,7 @@ class SIAnnotateControlFlow : public FunctionPass { void insertElse(BranchInst *Term); - Value *handleLoopCondition(Value *Cond, PHINode *Broken); + Value *handleLoopCondition(Value *Cond, PHINode *Broken, llvm::Loop *L); void handleLoop(BranchInst *Term); @@ -207,8 +207,17 @@ void SIAnnotateControlFlow::insertElse(BranchInst *Term) { } /// \brief Recursively handle the condition leading to a loop -Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken) { - if (PHINode *Phi = dyn_cast<PHINode>(Cond)) { +Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken, + llvm::Loop *L) { + + // Only search through PHI nodes which are inside the loop. If we try this + // with PHI nodes that are outside of the loop, we end up inserting new PHI + // nodes outside of the loop which depend on values defined inside the loop. + // This will break the module with + // 'Instruction does not dominate all users!' errors. + PHINode *Phi = nullptr; + if ((Phi = dyn_cast<PHINode>(Cond)) && L->contains(Phi)) { + BasicBlock *Parent = Phi->getParent(); PHINode *NewPhi = PHINode::Create(Int64, 0, "", &Parent->front()); Value *Ret = NewPhi; @@ -223,7 +232,7 @@ Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken) } Phi->setIncomingValue(i, BoolFalse); - Value *PhiArg = handleLoopCondition(Incoming, Broken); + Value *PhiArg = handleLoopCondition(Incoming, Broken, L); NewPhi->addIncoming(PhiArg, From); } @@ -253,7 +262,12 @@ Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken) } else if (Instruction *Inst = dyn_cast<Instruction>(Cond)) { BasicBlock *Parent = Inst->getParent(); - TerminatorInst *Insert = Parent->getTerminator(); + Instruction *Insert; + if (L->contains(Inst)) { + Insert = Parent->getTerminator(); + } else { + Insert = L->getHeader()->getFirstNonPHIOrDbgOrLifetime(); + } Value *Args[] = { Cond, Broken }; return CallInst::Create(IfBreak, Args, "", Insert); @@ -265,14 +279,15 @@ Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken) /// \brief Handle a back edge (loop) void SIAnnotateControlFlow::handleLoop(BranchInst *Term) { + BasicBlock *BB = Term->getParent(); + llvm::Loop *L = LI->getLoopFor(BB); BasicBlock *Target = Term->getSuccessor(1); PHINode *Broken = PHINode::Create(Int64, 0, "", &Target->front()); Value *Cond = Term->getCondition(); Term->setCondition(BoolTrue); - Value *Arg = handleLoopCondition(Cond, Broken); + Value *Arg = handleLoopCondition(Cond, Broken, L); - BasicBlock *BB = Term->getParent(); for (pred_iterator PI = pred_begin(Target), PE = pred_end(Target); PI != PE; ++PI) { diff --git a/lib/Target/R600/SIDefines.h b/lib/Target/R600/SIDefines.h index 7601794beab8..b54014072bf9 100644 --- a/lib/Target/R600/SIDefines.h +++ b/lib/Target/R600/SIDefines.h @@ -35,7 +35,8 @@ enum { SMRD = 1 << 16, DS = 1 << 17, MIMG = 1 << 18, - FLAT = 1 << 19 + FLAT = 1 << 19, + WQM = 1 << 20 }; } diff --git a/lib/Target/R600/SIFoldOperands.cpp b/lib/Target/R600/SIFoldOperands.cpp index d8ffa4f75505..cb24bba24a2f 100644 --- a/lib/Target/R600/SIFoldOperands.cpp +++ b/lib/Target/R600/SIFoldOperands.cpp @@ -209,7 +209,12 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { APInt Imm; if (FoldingImm) { - const TargetRegisterClass *UseRC = MRI.getRegClass(UseOp.getReg()); + unsigned UseReg = UseOp.getReg(); + const TargetRegisterClass *UseRC + = TargetRegisterInfo::isVirtualRegister(UseReg) ? + MRI.getRegClass(UseReg) : + TRI.getRegClass(UseReg); + Imm = APInt(64, OpToFold.getImm()); // Split 64-bit constants into 32-bits for folding. @@ -228,8 +233,13 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { // In order to fold immediates into copies, we need to change the // copy to a MOV. if (UseMI->getOpcode() == AMDGPU::COPY) { - unsigned MovOp = TII->getMovOpcode( - MRI.getRegClass(UseMI->getOperand(0).getReg())); + unsigned DestReg = UseMI->getOperand(0).getReg(); + const TargetRegisterClass *DestRC + = TargetRegisterInfo::isVirtualRegister(DestReg) ? + MRI.getRegClass(DestReg) : + TRI.getRegClass(DestReg); + + unsigned MovOp = TII->getMovOpcode(DestRC); if (MovOp == AMDGPU::COPY) continue; diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 6b2ea0682a43..32ae605e3f63 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -89,8 +89,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setOperationAction(ISD::STORE, MVT::v16i32, Custom); setOperationAction(ISD::STORE, MVT::i1, Custom); - setOperationAction(ISD::STORE, MVT::i32, Custom); - setOperationAction(ISD::STORE, MVT::v2i32, Custom); setOperationAction(ISD::STORE, MVT::v4i32, Custom); setOperationAction(ISD::SELECT, MVT::i64, Custom); @@ -158,8 +156,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : for (MVT VT : MVT::fp_valuetypes()) setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand); - setTruncStoreAction(MVT::i32, MVT::i8, Custom); - setTruncStoreAction(MVT::i32, MVT::i16, Custom); setTruncStoreAction(MVT::f64, MVT::f32, Expand); setTruncStoreAction(MVT::i64, MVT::i32, Expand); setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand); @@ -214,6 +210,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : } setOperationAction(ISD::FDIV, MVT::f32, Custom); + setOperationAction(ISD::FDIV, MVT::f64, Custom); setTargetDAGCombine(ISD::FADD); setTargetDAGCombine(ISD::FSUB); @@ -314,9 +311,8 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT, if (!VT.isSimple() || VT == MVT::Other) return false; - // XXX - CI changes say "Support for unaligned memory accesses" but I don't - // see what for specifically. The wording everywhere else seems to be the - // same. + // TODO - CI+ supports unaligned memory accesses, but this requires driver + // support. // XXX - The only mention I see of this in the ISA manual is for LDS direct // reads the "byte address and must be dword aligned". Is it also true for the @@ -328,12 +324,18 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT, return Align % 4 == 0; } + // Smaller than dword value must be aligned. + // FIXME: This should be allowed on CI+ + if (VT.bitsLT(MVT::i32)) + return false; + // 8.1.6 - For Dword or larger reads or writes, the two LSBs of the // byte-address are ignored, thus forcing Dword alignment. // This applies to private, global, and constant memory. if (IsFast) *IsFast = true; - return VT.bitsGT(MVT::i32); + + return VT.bitsGT(MVT::i32) && Align % 4 == 0; } EVT SITargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, @@ -448,7 +450,7 @@ SDValue SITargetLowering::LowerFormalArguments( // We REALLY want the ORIGINAL number of vertex elements here, e.g. a // three or five element vertex only needs three or five registers, // NOT four or eigth. - Type *ParamType = FType->getParamType(Arg.OrigArgIndex); + Type *ParamType = FType->getParamType(Arg.getOrigArgIndex()); unsigned NumElements = ParamType->getVectorNumElements(); for (unsigned j = 0; j != NumElements; ++j) { @@ -531,7 +533,7 @@ SDValue SITargetLowering::LowerFormalArguments( Offset, Ins[i].Flags.isSExt()); const PointerType *ParamTy = - dyn_cast<PointerType>(FType->getParamType(Ins[i].OrigArgIndex)); + dyn_cast<PointerType>(FType->getParamType(Ins[i].getOrigArgIndex())); if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS && ParamTy && ParamTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { // On SI local pointers are just offsets into LDS, so they are always @@ -566,7 +568,7 @@ SDValue SITargetLowering::LowerFormalArguments( if (Arg.VT.isVector()) { // Build a vector from the registers - Type *ParamType = FType->getParamType(Arg.OrigArgIndex); + Type *ParamType = FType->getParamType(Arg.getOrigArgIndex()); unsigned NumElements = ParamType->getVectorNumElements(); SmallVector<SDValue, 4> Regs; @@ -919,6 +921,12 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + + case AMDGPUIntrinsic::AMDGPU_fract: + case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name. + return DAG.getNode(ISD::FSUB, DL, VT, Op.getOperand(1), + DAG.getNode(ISD::FFLOOR, DL, VT, Op.getOperand(1))); + default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); } @@ -1104,7 +1112,70 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const { } SDValue SITargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const { - return SDValue(); + if (DAG.getTarget().Options.UnsafeFPMath) + return LowerFastFDIV(Op, DAG); + + SDLoc SL(Op); + SDValue X = Op.getOperand(0); + SDValue Y = Op.getOperand(1); + + const SDValue One = DAG.getConstantFP(1.0, MVT::f64); + + SDVTList ScaleVT = DAG.getVTList(MVT::f64, MVT::i1); + + SDValue DivScale0 = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT, Y, Y, X); + + SDValue NegDivScale0 = DAG.getNode(ISD::FNEG, SL, MVT::f64, DivScale0); + + SDValue Rcp = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f64, DivScale0); + + SDValue Fma0 = DAG.getNode(ISD::FMA, SL, MVT::f64, NegDivScale0, Rcp, One); + + SDValue Fma1 = DAG.getNode(ISD::FMA, SL, MVT::f64, Rcp, Fma0, Rcp); + + SDValue Fma2 = DAG.getNode(ISD::FMA, SL, MVT::f64, NegDivScale0, Fma1, One); + + SDValue DivScale1 = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT, X, Y, X); + + SDValue Fma3 = DAG.getNode(ISD::FMA, SL, MVT::f64, Fma1, Fma2, Fma1); + SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f64, DivScale1, Fma3); + + SDValue Fma4 = DAG.getNode(ISD::FMA, SL, MVT::f64, + NegDivScale0, Mul, DivScale1); + + SDValue Scale; + + if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS) { + // Workaround a hardware bug on SI where the condition output from div_scale + // is not usable. + + const SDValue Hi = DAG.getConstant(1, MVT::i32); + + // Figure out if the scale to use for div_fmas. + SDValue NumBC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, X); + SDValue DenBC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Y); + SDValue Scale0BC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, DivScale0); + SDValue Scale1BC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, DivScale1); + + SDValue NumHi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, NumBC, Hi); + SDValue DenHi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, DenBC, Hi); + + SDValue Scale0Hi + = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Scale0BC, Hi); + SDValue Scale1Hi + = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Scale1BC, Hi); + + SDValue CmpDen = DAG.getSetCC(SL, MVT::i1, DenHi, Scale0Hi, ISD::SETEQ); + SDValue CmpNum = DAG.getSetCC(SL, MVT::i1, NumHi, Scale1Hi, ISD::SETEQ); + Scale = DAG.getNode(ISD::XOR, SL, MVT::i1, CmpNum, CmpDen); + } else { + Scale = DivScale1.getValue(1); + } + + SDValue Fmas = DAG.getNode(AMDGPUISD::DIV_FMAS, SL, MVT::f64, + Fma4, Fma3, Mul, Scale); + + return DAG.getNode(AMDGPUISD::DIV_FIXUP, SL, MVT::f64, Fmas, Y, X); } SDValue SITargetLowering::LowerFDIV(SDValue Op, SelectionDAG &DAG) const { @@ -1125,11 +1196,6 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { EVT VT = Store->getMemoryVT(); // These stores are legal. - if (Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && - VT.isVector() && VT.getVectorNumElements() == 2 && - VT.getVectorElementType() == MVT::i32) - return SDValue(); - if (Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) { if (VT.isVector() && VT.getVectorNumElements() > 4) return ScalarizeVectorStore(Op, DAG); @@ -1524,6 +1590,7 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N, case AMDGPUISD::UMAX: case AMDGPUISD::UMIN: { if (DCI.getDAGCombineLevel() >= AfterLegalizeDAG && + N->getValueType(0) != MVT::f64 && getTargetMachine().getOptLevel() > CodeGenOpt::None) return performMin3Max3Combine(N, DCI); break; diff --git a/lib/Target/R600/SIInsertWaits.cpp b/lib/Target/R600/SIInsertWaits.cpp index 181b11643bf3..50f20ac3619e 100644 --- a/lib/Target/R600/SIInsertWaits.cpp +++ b/lib/Target/R600/SIInsertWaits.cpp @@ -82,6 +82,8 @@ private: /// \brief Type of the last opcode. InstType LastOpcodeType; + bool LastInstWritesM0; + /// \brief Get increment/decrement amount for this instruction. Counters getHwCounts(MachineInstr &MI); @@ -106,6 +108,9 @@ private: /// \brief Resolve all operand dependencies to counter requirements Counters handleOperands(MachineInstr &MI); + /// \brief Insert S_NOP between an instruction writing M0 and S_SENDMSG. + void handleSendMsg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I); + public: SIInsertWaits(TargetMachine &tm) : MachineFunctionPass(ID), @@ -269,6 +274,7 @@ void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB, // Insert a NOP to break the clause. BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP)) .addImm(0); + LastInstWritesM0 = false; } if (TII->isSMRD(I->getOpcode())) @@ -362,6 +368,7 @@ bool SIInsertWaits::insertWait(MachineBasicBlock &MBB, ((Counts.Named.LGKM & 0x7) << 8)); LastOpcodeType = OTHER; + LastInstWritesM0 = false; return true; } @@ -403,6 +410,30 @@ Counters SIInsertWaits::handleOperands(MachineInstr &MI) { return Result; } +void SIInsertWaits::handleSendMsg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) { + if (TRI->ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS) + return; + + // There must be "S_NOP 0" between an instruction writing M0 and S_SENDMSG. + if (LastInstWritesM0 && I->getOpcode() == AMDGPU::S_SENDMSG) { + BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP)).addImm(0); + LastInstWritesM0 = false; + return; + } + + // Set whether this instruction sets M0 + LastInstWritesM0 = false; + + unsigned NumOperands = I->getNumOperands(); + for (unsigned i = 0; i < NumOperands; i++) { + const MachineOperand &Op = I->getOperand(i); + + if (Op.isReg() && Op.isDef() && Op.getReg() == AMDGPU::M0) + LastInstWritesM0 = true; + } +} + // FIXME: Insert waits listed in Table 4.2 "Required User-Inserted Wait States" // around other non-memory instructions. bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) { @@ -417,6 +448,7 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) { WaitedOn = ZeroCounts; LastIssued = ZeroCounts; LastOpcodeType = OTHER; + LastInstWritesM0 = false; memset(&UsedRegs, 0, sizeof(UsedRegs)); memset(&DefinedRegs, 0, sizeof(DefinedRegs)); @@ -433,7 +465,9 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) { Changes |= insertWait(MBB, I, LastIssued); else Changes |= insertWait(MBB, I, handleOperands(*I)); + pushInstruction(MBB, I); + handleSendMsg(MBB, I); } // Wait for everything at the end of the MBB diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td index 09c0cbe8f5c3..b825208c92be 100644 --- a/lib/Target/R600/SIInstrFormats.td +++ b/lib/Target/R600/SIInstrFormats.td @@ -38,6 +38,7 @@ class InstSI <dag outs, dag ins, string asm, list<dag> pattern> : field bits<1> DS = 0; field bits<1> MIMG = 0; field bits<1> FLAT = 0; + field bits<1> WQM = 0; // These need to be kept in sync with the enum in SIInstrFlags. let TSFlags{0} = VM_CNT; @@ -64,6 +65,7 @@ class InstSI <dag outs, dag ins, string asm, list<dag> pattern> : let TSFlags{17} = DS; let TSFlags{18} = MIMG; let TSFlags{19} = FLAT; + let TSFlags{20} = WQM; // Most instructions require adjustments after selection to satisfy // operand requirements. @@ -295,18 +297,32 @@ class VOP1e <bits<8> op> : Enc32 { } class VOP2e <bits<6> op> : Enc32 { + bits<8> vdst; + bits<9> src0; + bits<8> src1; - bits<8> VDST; - bits<9> SRC0; - bits<8> VSRC1; - - let Inst{8-0} = SRC0; - let Inst{16-9} = VSRC1; - let Inst{24-17} = VDST; + let Inst{8-0} = src0; + let Inst{16-9} = src1; + let Inst{24-17} = vdst; let Inst{30-25} = op; let Inst{31} = 0x0; //encoding } +class VOP2_MADKe <bits<6> op> : Enc64 { + + bits<8> vdst; + bits<9> src0; + bits<8> vsrc1; + bits<32> src2; + + let Inst{8-0} = src0; + let Inst{16-9} = vsrc1; + let Inst{24-17} = vdst; + let Inst{30-25} = op; + let Inst{31} = 0x0; // encoding + let Inst{63-32} = src2; +} + class VOP3e <bits<9> op> : Enc64 { bits<8> dst; @@ -554,9 +570,6 @@ class VOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> : class VOP2 <bits<6> op, dag outs, dag ins, string asm, list<dag> pattern> : VOP2Common <outs, ins, asm, pattern>, VOP2e<op>; -class VOP3b <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> : - VOP3Common <outs, ins, asm, pattern>, VOP3be<op>; - class VOPC <bits<8> op, dag ins, string asm, list<dag> pattern> : VOPCCommon <ins, asm, pattern>, VOPCe <op>; @@ -585,9 +598,6 @@ class DS <dag outs, dag ins, string asm, list<dag> pattern> : let SchedRW = [WriteLDS]; } -class DS_si <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> : - DS <outs, ins, asm, pattern>, DSe<op>; - class MUBUF <dag outs, dag ins, string asm, list<dag> pattern> : InstSI<outs, ins, asm, pattern> { diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp index 80b560eb65ae..5ab33b491ce3 100644 --- a/lib/Target/R600/SIInstrInfo.cpp +++ b/lib/Target/R600/SIInstrInfo.cpp @@ -121,12 +121,20 @@ bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, if (Load0->getOperand(0) != Load1->getOperand(0)) return false; + const ConstantSDNode *Load0Offset = + dyn_cast<ConstantSDNode>(Load0->getOperand(1)); + const ConstantSDNode *Load1Offset = + dyn_cast<ConstantSDNode>(Load1->getOperand(1)); + + if (!Load0Offset || !Load1Offset) + return false; + // Check chain. if (findChainOperand(Load0) != findChainOperand(Load1)) return false; - Offset0 = cast<ConstantSDNode>(Load0->getOperand(1))->getZExtValue(); - Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getZExtValue(); + Offset0 = Load0Offset->getZExtValue(); + Offset1 = Load1Offset->getZExtValue(); return true; } @@ -333,6 +341,21 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } else if (AMDGPU::SReg_64RegClass.contains(DestReg)) { + if (DestReg == AMDGPU::VCC) { + if (AMDGPU::SReg_64RegClass.contains(SrcReg)) { + BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), AMDGPU::VCC) + .addReg(SrcReg, getKillRegState(KillSrc)); + } else { + // FIXME: Hack until VReg_1 removed. + assert(AMDGPU::VGPR_32RegClass.contains(SrcReg)); + BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_I32_e32), AMDGPU::VCC) + .addImm(0) + .addReg(SrcReg, getKillRegState(KillSrc)); + } + + return; + } + assert(AMDGPU::SReg_64RegClass.contains(SrcReg)); BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)); @@ -408,11 +431,15 @@ unsigned SIInstrInfo::commuteOpcode(unsigned Opcode) const { int NewOpc; // Try to map original to commuted opcode - if ((NewOpc = AMDGPU::getCommuteRev(Opcode)) != -1) + NewOpc = AMDGPU::getCommuteRev(Opcode); + // Check if the commuted (REV) opcode exists on the target. + if (NewOpc != -1 && pseudoToMCOpcode(NewOpc) != -1) return NewOpc; // Try to map commuted to original opcode - if ((NewOpc = AMDGPU::getCommuteOrig(Opcode)) != -1) + NewOpc = AMDGPU::getCommuteOrig(Opcode); + // Check if the original (non-REV) opcode exists on the target. + if (NewOpc != -1 && pseudoToMCOpcode(NewOpc) != -1) return NewOpc; return Opcode; @@ -1121,6 +1148,8 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI, return false; } + int RegClass = Desc.OpInfo[i].RegClass; + switch (Desc.OpInfo[i].OperandType) { case MCOI::OPERAND_REGISTER: if (MI->getOperand(i).isImm() || MI->getOperand(i).isFPImm()) { @@ -1131,7 +1160,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI, case AMDGPU::OPERAND_REG_IMM32: break; case AMDGPU::OPERAND_REG_INLINE_C: - if (MI->getOperand(i).isImm() && !isInlineConstant(MI->getOperand(i))) { + if (isLiteralConstant(MI->getOperand(i))) { ErrInfo = "Illegal immediate value for operand."; return false; } @@ -1152,7 +1181,6 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI, if (!MI->getOperand(i).isReg()) continue; - int RegClass = Desc.OpInfo[i].RegClass; if (RegClass != -1) { unsigned Reg = MI->getOperand(i).getReg(); if (TargetRegisterInfo::isVirtualRegister(Reg)) @@ -1197,31 +1225,6 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI, } } - // Verify SRC1 for VOP2 and VOPC - if (Src1Idx != -1 && (isVOP2(Opcode) || isVOPC(Opcode))) { - const MachineOperand &Src1 = MI->getOperand(Src1Idx); - if (Src1.isImm()) { - ErrInfo = "VOP[2C] src1 cannot be an immediate."; - return false; - } - } - - // Verify VOP3 - if (isVOP3(Opcode)) { - if (Src0Idx != -1 && isLiteralConstant(MI->getOperand(Src0Idx))) { - ErrInfo = "VOP3 src0 cannot be a literal constant."; - return false; - } - if (Src1Idx != -1 && isLiteralConstant(MI->getOperand(Src1Idx))) { - ErrInfo = "VOP3 src1 cannot be a literal constant."; - return false; - } - if (Src2Idx != -1 && isLiteralConstant(MI->getOperand(Src2Idx))) { - ErrInfo = "VOP3 src2 cannot be a literal constant."; - return false; - } - } - // Verify misc. restrictions on specific instructions. if (Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32 || Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64) { @@ -1292,6 +1295,7 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) { case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e64; case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32; case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32; + case AMDGPU::S_FLBIT_I32: return AMDGPU::V_FFBH_I32_e64; } } @@ -2043,6 +2047,24 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { swapOperands(Inst); } break; + case AMDGPU::S_LSHL_B64: + if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { + NewOpcode = AMDGPU::V_LSHLREV_B64; + swapOperands(Inst); + } + break; + case AMDGPU::S_ASHR_I64: + if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { + NewOpcode = AMDGPU::V_ASHRREV_I64; + swapOperands(Inst); + } + break; + case AMDGPU::S_LSHR_B64: + if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { + NewOpcode = AMDGPU::V_LSHRREV_B64; + swapOperands(Inst); + } + break; case AMDGPU::S_BFE_U64: case AMDGPU::S_BFM_B64: diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h index 28cd27dd8962..129803072672 100644 --- a/lib/Target/R600/SIInstrInfo.h +++ b/lib/Target/R600/SIInstrInfo.h @@ -204,6 +204,10 @@ public: return get(Opcode).TSFlags & SIInstrFlags::FLAT; } + bool isWQM(uint16_t Opcode) const { + return get(Opcode).TSFlags & SIInstrFlags::WQM; + } + bool isInlineConstant(const APInt &Imm) const; bool isInlineConstant(const MachineOperand &MO) const; bool isLiteralConstant(const MachineOperand &MO) const; @@ -243,7 +247,27 @@ public: /// the register class of its machine operand. /// to infer the correct register class base on the other operands. const TargetRegisterClass *getOpRegClass(const MachineInstr &MI, - unsigned OpNo) const;\ + unsigned OpNo) const; + + /// \brief Return the size in bytes of the operand OpNo on the given + // instruction opcode. + unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const { + const MCOperandInfo &OpInfo = get(Opcode).OpInfo[OpNo]; + + if (OpInfo.RegClass == -1) { + // If this is an immediate operand, this must be a 32-bit literal. + assert(OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE); + return 4; + } + + return RI.getRegClass(OpInfo.RegClass)->getSize(); + } + + /// \brief This form should usually be preferred since it handles operands + /// with unknown register classes. + unsigned getOpSize(const MachineInstr &MI, unsigned OpNo) const { + return getOpRegClass(MI, OpNo)->getSize(); + } /// \returns true if it is legal for the operand at index \p OpNo /// to read a VGPR. diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index 175e11d709cf..a749e7f861bb 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -383,15 +383,13 @@ class SOP1_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> : let isPseudo = 1; } -class SOP1_Real_si <sop1 op, string opName, dag outs, dag ins, string asm, - list<dag> pattern> : - SOP1 <outs, ins, asm, pattern>, +class SOP1_Real_si <sop1 op, string opName, dag outs, dag ins, string asm> : + SOP1 <outs, ins, asm, []>, SOP1e <op.SI>, SIMCInstr<opName, SISubtarget.SI>; -class SOP1_Real_vi <sop1 op, string opName, dag outs, dag ins, string asm, - list<dag> pattern> : - SOP1 <outs, ins, asm, pattern>, +class SOP1_Real_vi <sop1 op, string opName, dag outs, dag ins, string asm> : + SOP1 <outs, ins, asm, []>, SOP1e <op.VI>, SIMCInstr<opName, SISubtarget.VI>; @@ -400,10 +398,10 @@ multiclass SOP1_32 <sop1 op, string opName, list<dag> pattern> { pattern>; def _si : SOP1_Real_si <op, opName, (outs SReg_32:$dst), (ins SSrc_32:$src0), - opName#" $dst, $src0", pattern>; + opName#" $dst, $src0">; def _vi : SOP1_Real_vi <op, opName, (outs SReg_32:$dst), (ins SSrc_32:$src0), - opName#" $dst, $src0", pattern>; + opName#" $dst, $src0">; } multiclass SOP1_64 <sop1 op, string opName, list<dag> pattern> { @@ -411,10 +409,10 @@ multiclass SOP1_64 <sop1 op, string opName, list<dag> pattern> { pattern>; def _si : SOP1_Real_si <op, opName, (outs SReg_64:$dst), (ins SSrc_64:$src0), - opName#" $dst, $src0", pattern>; + opName#" $dst, $src0">; def _vi : SOP1_Real_vi <op, opName, (outs SReg_64:$dst), (ins SSrc_64:$src0), - opName#" $dst, $src0", pattern>; + opName#" $dst, $src0">; } // no input, 64-bit output. @@ -422,12 +420,12 @@ multiclass SOP1_64_0 <sop1 op, string opName, list<dag> pattern> { def "" : SOP1_Pseudo <opName, (outs SReg_64:$dst), (ins), pattern>; def _si : SOP1_Real_si <op, opName, (outs SReg_64:$dst), (ins), - opName#" $dst", pattern> { + opName#" $dst"> { let SSRC0 = 0; } def _vi : SOP1_Real_vi <op, opName, (outs SReg_64:$dst), (ins), - opName#" $dst", pattern> { + opName#" $dst"> { let SSRC0 = 0; } } @@ -438,10 +436,10 @@ multiclass SOP1_32_64 <sop1 op, string opName, list<dag> pattern> { pattern>; def _si : SOP1_Real_si <op, opName, (outs SReg_32:$dst), (ins SSrc_64:$src0), - opName#" $dst, $src0", pattern>; + opName#" $dst, $src0">; def _vi : SOP1_Real_vi <op, opName, (outs SReg_32:$dst), (ins SSrc_64:$src0), - opName#" $dst, $src0", pattern>; + opName#" $dst, $src0">; } class SOP2_Pseudo<string opName, dag outs, dag ins, list<dag> pattern> : @@ -451,15 +449,13 @@ class SOP2_Pseudo<string opName, dag outs, dag ins, list<dag> pattern> : let Size = 4; } -class SOP2_Real_si<sop2 op, string opName, dag outs, dag ins, string asm, - list<dag> pattern> : - SOP2<outs, ins, asm, pattern>, +class SOP2_Real_si<sop2 op, string opName, dag outs, dag ins, string asm> : + SOP2<outs, ins, asm, []>, SOP2e<op.SI>, SIMCInstr<opName, SISubtarget.SI>; -class SOP2_Real_vi<sop2 op, string opName, dag outs, dag ins, string asm, - list<dag> pattern> : - SOP2<outs, ins, asm, pattern>, +class SOP2_Real_vi<sop2 op, string opName, dag outs, dag ins, string asm> : + SOP2<outs, ins, asm, []>, SOP2e<op.VI>, SIMCInstr<opName, SISubtarget.VI>; @@ -469,11 +465,11 @@ multiclass SOP2_SELECT_32 <sop2 op, string opName, list<dag> pattern> { def _si : SOP2_Real_si <op, opName, (outs SReg_32:$dst), (ins SSrc_32:$src0, SSrc_32:$src1, SCCReg:$scc), - opName#" $dst, $src0, $src1 [$scc]", pattern>; + opName#" $dst, $src0, $src1 [$scc]">; def _vi : SOP2_Real_vi <op, opName, (outs SReg_32:$dst), (ins SSrc_32:$src0, SSrc_32:$src1, SCCReg:$scc), - opName#" $dst, $src0, $src1 [$scc]", pattern>; + opName#" $dst, $src0, $src1 [$scc]">; } multiclass SOP2_32 <sop2 op, string opName, list<dag> pattern> { @@ -481,10 +477,10 @@ multiclass SOP2_32 <sop2 op, string opName, list<dag> pattern> { (ins SSrc_32:$src0, SSrc_32:$src1), pattern>; def _si : SOP2_Real_si <op, opName, (outs SReg_32:$dst), - (ins SSrc_32:$src0, SSrc_32:$src1), opName#" $dst, $src0, $src1", pattern>; + (ins SSrc_32:$src0, SSrc_32:$src1), opName#" $dst, $src0, $src1">; def _vi : SOP2_Real_vi <op, opName, (outs SReg_32:$dst), - (ins SSrc_32:$src0, SSrc_32:$src1), opName#" $dst, $src0, $src1", pattern>; + (ins SSrc_32:$src0, SSrc_32:$src1), opName#" $dst, $src0, $src1">; } multiclass SOP2_64 <sop2 op, string opName, list<dag> pattern> { @@ -492,10 +488,10 @@ multiclass SOP2_64 <sop2 op, string opName, list<dag> pattern> { (ins SSrc_64:$src0, SSrc_64:$src1), pattern>; def _si : SOP2_Real_si <op, opName, (outs SReg_64:$dst), - (ins SSrc_64:$src0, SSrc_64:$src1), opName#" $dst, $src0, $src1", pattern>; + (ins SSrc_64:$src0, SSrc_64:$src1), opName#" $dst, $src0, $src1">; def _vi : SOP2_Real_vi <op, opName, (outs SReg_64:$dst), - (ins SSrc_64:$src0, SSrc_64:$src1), opName#" $dst, $src0, $src1", pattern>; + (ins SSrc_64:$src0, SSrc_64:$src1), opName#" $dst, $src0, $src1">; } multiclass SOP2_64_32 <sop2 op, string opName, list<dag> pattern> { @@ -503,10 +499,10 @@ multiclass SOP2_64_32 <sop2 op, string opName, list<dag> pattern> { (ins SSrc_64:$src0, SSrc_32:$src1), pattern>; def _si : SOP2_Real_si <op, opName, (outs SReg_64:$dst), - (ins SSrc_64:$src0, SSrc_32:$src1), opName#" $dst, $src0, $src1", pattern>; + (ins SSrc_64:$src0, SSrc_32:$src1), opName#" $dst, $src0, $src1">; def _vi : SOP2_Real_vi <op, opName, (outs SReg_64:$dst), - (ins SSrc_64:$src0, SSrc_32:$src1), opName#" $dst, $src0, $src1", pattern>; + (ins SSrc_64:$src0, SSrc_32:$src1), opName#" $dst, $src0, $src1">; } @@ -527,15 +523,13 @@ class SOPK_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> : let isPseudo = 1; } -class SOPK_Real_si <sopk op, string opName, dag outs, dag ins, string asm, - list<dag> pattern> : - SOPK <outs, ins, asm, pattern>, +class SOPK_Real_si <sopk op, string opName, dag outs, dag ins, string asm> : + SOPK <outs, ins, asm, []>, SOPKe <op.SI>, SIMCInstr<opName, SISubtarget.SI>; -class SOPK_Real_vi <sopk op, string opName, dag outs, dag ins, string asm, - list<dag> pattern> : - SOPK <outs, ins, asm, pattern>, +class SOPK_Real_vi <sopk op, string opName, dag outs, dag ins, string asm> : + SOPK <outs, ins, asm, []>, SOPKe <op.VI>, SIMCInstr<opName, SISubtarget.VI>; @@ -544,10 +538,10 @@ multiclass SOPK_32 <sopk op, string opName, list<dag> pattern> { pattern>; def _si : SOPK_Real_si <op, opName, (outs SReg_32:$dst), (ins u16imm:$src0), - opName#" $dst, $src0", pattern>; + opName#" $dst, $src0">; def _vi : SOPK_Real_vi <op, opName, (outs SReg_32:$dst), (ins u16imm:$src0), - opName#" $dst, $src0", pattern>; + opName#" $dst, $src0">; } multiclass SOPK_SCC <sopk op, string opName, list<dag> pattern> { @@ -555,10 +549,10 @@ multiclass SOPK_SCC <sopk op, string opName, list<dag> pattern> { (ins SReg_32:$src0, u16imm:$src1), pattern>; def _si : SOPK_Real_si <op, opName, (outs SCCReg:$dst), - (ins SReg_32:$src0, u16imm:$src1), opName#" $dst, $src0", pattern>; + (ins SReg_32:$src0, u16imm:$src1), opName#" $dst, $src0">; def _vi : SOPK_Real_vi <op, opName, (outs SCCReg:$dst), - (ins SReg_32:$src0, u16imm:$src1), opName#" $dst, $src0", pattern>; + (ins SReg_32:$src0, u16imm:$src1), opName#" $dst, $src0">; } //===----------------------------------------------------------------------===// @@ -792,6 +786,7 @@ def VOP_F32_F32_I32 : VOPProfile <[f32, f32, i32, untyped]>; def VOP_F64_F64_F64 : VOPProfile <[f64, f64, f64, untyped]>; def VOP_F64_F64_I32 : VOPProfile <[f64, f64, i32, untyped]>; def VOP_I32_F32_F32 : VOPProfile <[i32, f32, f32, untyped]>; +def VOP_I32_F32_I32 : VOPProfile <[i32, f32, i32, untyped]>; def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>; def VOP_I32_I32_I32_VCC : VOPProfile <[i32, i32, i32, untyped]> { let Src0RC32 = VCSrc_32; @@ -808,9 +803,14 @@ def VOP_I1_F64_I32 : VOPProfile <[i1, f64, i32, untyped]> { } def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>; +def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>; def VOP_I64_I64_I64 : VOPProfile <[i64, i64, i64, untyped]>; def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>; +def VOP_MADK : VOPProfile <[f32, f32, f32, f32]> { + field dag Ins = (ins VCSrc_32:$src0, VGPR_32:$vsrc1, u32imm:$src2); + field string Asm = " $dst, $src0, $vsrc1, $src2"; +} def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>; def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>; def VOP_I64_I32_I32_I64 : VOPProfile <[i64, i32, i32, i64]>; @@ -847,6 +847,15 @@ multiclass VOP1_m <vop1 op, dag outs, dag ins, string asm, list<dag> pattern, SIMCInstr <opName#"_e32", SISubtarget.VI>; } +multiclass VOP1SI_m <vop1 op, dag outs, dag ins, string asm, list<dag> pattern, + string opName> { + def "" : VOP1_Pseudo <outs, ins, pattern, opName>; + + def _si : VOP1<op.SI, outs, ins, asm, []>, + SIMCInstr <opName#"_e32", SISubtarget.SI>; + // No VI instruction. This class is for SI only. +} + class VOP2_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> : VOP2Common <outs, ins, "", pattern>, VOP <opName>, @@ -855,25 +864,22 @@ class VOP2_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> : } multiclass VOP2SI_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern, - string opName, string revOpSI> { + string opName, string revOp> { def "" : VOP2_Pseudo <outs, ins, pattern, opName>, - VOP2_REV<revOpSI#"_e32", !eq(revOpSI, opName)>; + VOP2_REV<revOp#"_e32", !eq(revOp, opName)>; def _si : VOP2 <op.SI, outs, ins, opName#asm, []>, - VOP2_REV<revOpSI#"_e32_si", !eq(revOpSI, opName)>, SIMCInstr <opName#"_e32", SISubtarget.SI>; } multiclass VOP2_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern, - string opName, string revOpSI, string revOpVI> { + string opName, string revOp> { def "" : VOP2_Pseudo <outs, ins, pattern, opName>, - VOP2_REV<revOpSI#"_e32", !eq(revOpSI, opName)>; + VOP2_REV<revOp#"_e32", !eq(revOp, opName)>; def _si : VOP2 <op.SI, outs, ins, opName#asm, []>, - VOP2_REV<revOpSI#"_e32_si", !eq(revOpSI, opName)>, SIMCInstr <opName#"_e32", SISubtarget.SI>; def _vi : VOP2 <op.VI, outs, ins, opName#asm, []>, - VOP2_REV<revOpVI#"_e32_vi", !eq(revOpVI, opName)>, SIMCInstr <opName#"_e32", SISubtarget.VI>; } @@ -905,6 +911,16 @@ class VOP3_Real_vi <bits<10> op, dag outs, dag ins, string asm, string opName> : VOP3e_vi <op>, SIMCInstr <opName#"_e64", SISubtarget.VI>; +class VOP3b_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName> : + VOP3Common <outs, ins, asm, []>, + VOP3be <op>, + SIMCInstr<opName#"_e64", SISubtarget.SI>; + +class VOP3b_Real_vi <bits<10> op, dag outs, dag ins, string asm, string opName> : + VOP3Common <outs, ins, asm, []>, + VOP3be_vi <op>, + SIMCInstr <opName#"_e64", SISubtarget.VI>; + multiclass VOP3_m <vop op, dag outs, dag ins, string asm, list<dag> pattern, string opName, int NumSrcArgs, bit HasMods = 1> { @@ -946,24 +962,45 @@ multiclass VOP3_1_m <vop op, dag outs, dag ins, string asm, VOP3DisableFields<0, 0, HasMods>; } +multiclass VOP3SI_1_m <vop op, dag outs, dag ins, string asm, + list<dag> pattern, string opName, bit HasMods = 1> { + + def "" : VOP3_Pseudo <outs, ins, pattern, opName>; + + def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName>, + VOP3DisableFields<0, 0, HasMods>; + // No VI instruction. This class is for SI only. +} + multiclass VOP3_2_m <vop op, dag outs, dag ins, string asm, - list<dag> pattern, string opName, string revOpSI, string revOpVI, + list<dag> pattern, string opName, string revOp, bit HasMods = 1, bit UseFullOp = 0> { def "" : VOP3_Pseudo <outs, ins, pattern, opName>, - VOP2_REV<revOpSI#"_e64", !eq(revOpSI, opName)>; + VOP2_REV<revOp#"_e64", !eq(revOp, opName)>; - def _si : VOP3_Real_si <op.SI3, - outs, ins, asm, opName>, - VOP2_REV<revOpSI#"_e64_si", !eq(revOpSI, opName)>, + def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName>, + VOP3DisableFields<1, 0, HasMods>; + + def _vi : VOP3_Real_vi <op.VI3, outs, ins, asm, opName>, VOP3DisableFields<1, 0, HasMods>; +} + +multiclass VOP3SI_2_m <vop op, dag outs, dag ins, string asm, + list<dag> pattern, string opName, string revOp, + bit HasMods = 1, bit UseFullOp = 0> { + + def "" : VOP3_Pseudo <outs, ins, pattern, opName>, + VOP2_REV<revOp#"_e64", !eq(revOp, opName)>; - def _vi : VOP3_Real_vi <op.VI3, - outs, ins, asm, opName>, - VOP2_REV<revOpVI#"_e64_vi", !eq(revOpVI, opName)>, + def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName>, VOP3DisableFields<1, 0, HasMods>; + + // No VI instruction. This class is for SI only. } +// XXX - Is v_div_scale_{f32|f64} only available in vop3b without +// option of implicit vcc use? multiclass VOP3b_2_m <vop op, dag outs, dag ins, string asm, list<dag> pattern, string opName, string revOp, bit HasMods = 1, bit UseFullOp = 0> { @@ -974,19 +1011,27 @@ multiclass VOP3b_2_m <vop op, dag outs, dag ins, string asm, // can write it into any SGPR. We currently don't use the carry out, // so for now hardcode it to VCC as well. let sdst = SIOperand.VCC, Defs = [VCC] in { - def _si : VOP3b <op.SI3, outs, ins, asm, pattern>, - VOP3DisableFields<1, 0, HasMods>, - SIMCInstr<opName#"_e64", SISubtarget.SI>, - VOP2_REV<revOp#"_e64_si", !eq(revOp, opName)>; - - // TODO: Do we need this VI variant here? - /*def _vi : VOP3b_vi <op.VI3, outs, ins, asm, pattern>, - VOP3DisableFields<1, 0, HasMods>, - SIMCInstr<opName#"_e64", SISubtarget.VI>, - VOP2_REV<revOp#"_e64_vi", !eq(revOp, opName)>;*/ + def _si : VOP3b_Real_si <op.SI3, outs, ins, asm, opName>, + VOP3DisableFields<1, 0, HasMods>; + + def _vi : VOP3b_Real_vi <op.VI3, outs, ins, asm, opName>, + VOP3DisableFields<1, 0, HasMods>; } // End sdst = SIOperand.VCC, Defs = [VCC] } +multiclass VOP3b_3_m <vop op, dag outs, dag ins, string asm, + list<dag> pattern, string opName, string revOp, + bit HasMods = 1, bit UseFullOp = 0> { + def "" : VOP3_Pseudo <outs, ins, pattern, opName>; + + + def _si : VOP3b_Real_si <op.SI3, outs, ins, asm, opName>, + VOP3DisableFields<1, 1, HasMods>; + + def _vi : VOP3b_Real_vi <op.VI3, outs, ins, asm, opName>, + VOP3DisableFields<1, 1, HasMods>; +} + multiclass VOP3_C_m <vop op, dag outs, dag ins, string asm, list<dag> pattern, string opName, bit HasMods, bit defExec> { @@ -1046,33 +1091,30 @@ multiclass VOP1Inst <vop1 op, string opName, VOPProfile P, multiclass VOP1InstSI <vop1 op, string opName, VOPProfile P, SDPatternOperator node = null_frag> { - def _e32 : VOP1 <op.SI, P.Outs, P.Ins32, opName#P.Asm32, []>, - VOP <opName>; + defm _e32 : VOP1SI_m <op, P.Outs, P.Ins32, opName#P.Asm32, [], opName>; - def _e64 : VOP3Common <P.Outs, P.Ins64, opName#P.Asm64, + defm _e64 : VOP3SI_1_m <op, P.Outs, P.Ins64, opName#P.Asm64, !if(P.HasModifiers, [(set P.DstVT:$dst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod))))], - [(set P.DstVT:$dst, (node P.Src0VT:$src0))])>, - VOP <opName>, - VOP3e <op.SI3>, - VOP3DisableFields<0, 0, P.HasModifiers>; + [(set P.DstVT:$dst, (node P.Src0VT:$src0))]), + opName, P.HasModifiers>; } multiclass VOP2_Helper <vop2 op, string opName, dag outs, dag ins32, string asm32, list<dag> pat32, dag ins64, string asm64, list<dag> pat64, - string revOpSI, string revOpVI, bit HasMods> { - defm _e32 : VOP2_m <op, outs, ins32, asm32, pat32, opName, revOpSI, revOpVI>; + string revOp, bit HasMods> { + defm _e32 : VOP2_m <op, outs, ins32, asm32, pat32, opName, revOp>; defm _e64 : VOP3_2_m <op, - outs, ins64, opName#"_e64"#asm64, pat64, opName, revOpSI, revOpVI, HasMods + outs, ins64, opName#"_e64"#asm64, pat64, opName, revOp, HasMods >; } multiclass VOP2Inst <vop2 op, string opName, VOPProfile P, SDPatternOperator node = null_frag, - string revOpSI = opName, string revOpVI = revOpSI> : VOP2_Helper < + string revOp = opName> : VOP2_Helper < op, opName, P.Outs, P.Ins32, P.Asm32, [], P.Ins64, P.Asm64, @@ -1082,15 +1124,30 @@ multiclass VOP2Inst <vop2 op, string opName, VOPProfile P, i1:$clamp, i32:$omod)), (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))], [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))]), - revOpSI, revOpVI, P.HasModifiers + revOp, P.HasModifiers >; +multiclass VOP2InstSI <vop2 op, string opName, VOPProfile P, + SDPatternOperator node = null_frag, + string revOp = opName> { + defm _e32 : VOP2SI_m <op, P.Outs, P.Ins32, P.Asm32, [], opName, revOp>; + + defm _e64 : VOP3SI_2_m <op, P.Outs, P.Ins64, opName#"_e64"#P.Asm64, + !if(P.HasModifiers, + [(set P.DstVT:$dst, + (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, + i1:$clamp, i32:$omod)), + (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))], + [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))]), + opName, revOp, P.HasModifiers>; +} + multiclass VOP2b_Helper <vop2 op, string opName, dag outs, dag ins32, string asm32, list<dag> pat32, dag ins64, string asm64, list<dag> pat64, string revOp, bit HasMods> { - defm _e32 : VOP2_m <op, outs, ins32, asm32, pat32, opName, revOp, revOp>; + defm _e32 : VOP2_m <op, outs, ins32, asm32, pat32, opName, revOp>; defm _e64 : VOP3b_2_m <op, outs, ins64, opName#"_e64"#asm64, pat64, opName, revOp, HasMods @@ -1116,16 +1173,16 @@ multiclass VOP2bInst <vop2 op, string opName, VOPProfile P, multiclass VOP2_VI3_Helper <vop23 op, string opName, dag outs, dag ins32, string asm32, list<dag> pat32, dag ins64, string asm64, list<dag> pat64, - string revOpSI, string revOpVI, bit HasMods> { - defm _e32 : VOP2SI_m <op, outs, ins32, asm32, pat32, opName, revOpSI>; + string revOp, bit HasMods> { + defm _e32 : VOP2SI_m <op, outs, ins32, asm32, pat32, opName, revOp>; defm _e64 : VOP3_2_m <op, outs, ins64, opName#"_e64"#asm64, pat64, opName, - revOpSI, revOpVI, HasMods>; + revOp, HasMods>; } multiclass VOP2_VI3_Inst <vop23 op, string opName, VOPProfile P, SDPatternOperator node = null_frag, - string revOpSI = opName, string revOpVI = revOpSI> + string revOp = opName> : VOP2_VI3_Helper < op, opName, P.Outs, P.Ins32, P.Asm32, [], @@ -1136,9 +1193,26 @@ multiclass VOP2_VI3_Inst <vop23 op, string opName, VOPProfile P, i1:$clamp, i32:$omod)), (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))], [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))]), - revOpSI, revOpVI, P.HasModifiers + revOp, P.HasModifiers >; +multiclass VOP2MADK <vop2 op, string opName, list<dag> pattern = []> { + + def "" : VOP2_Pseudo <VOP_MADK.Outs, VOP_MADK.Ins, pattern, opName>; + +let isCodeGenOnly = 0 in { + def _si : VOP2Common <VOP_MADK.Outs, VOP_MADK.Ins, + !strconcat(opName, VOP_MADK.Asm), []>, + SIMCInstr <opName#"_e32", SISubtarget.SI>, + VOP2_MADKe <op.SI>; + + def _vi : VOP2Common <VOP_MADK.Outs, VOP_MADK.Ins, + !strconcat(opName, VOP_MADK.Asm), []>, + SIMCInstr <opName#"_e32", SISubtarget.VI>, + VOP2_MADKe <op.VI>; +} // End isCodeGenOnly = 0 +} + class VOPC_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> : VOPCCommon <ins, "", pattern>, VOP <opName>, @@ -1274,9 +1348,31 @@ multiclass VOP3Inst <vop3 op, string opName, VOPProfile P, P.NumSrcArgs, P.HasModifiers >; +// Special case for v_div_fmas_{f32|f64}, since it seems to be the +// only VOP instruction that implicitly reads VCC. +multiclass VOP3_VCC_Inst <vop3 op, string opName, + VOPProfile P, + SDPatternOperator node = null_frag> : VOP3_Helper < + op, opName, + P.Outs, + (ins InputModsNoDefault:$src0_modifiers, P.Src0RC64:$src0, + InputModsNoDefault:$src1_modifiers, P.Src1RC64:$src1, + InputModsNoDefault:$src2_modifiers, P.Src2RC64:$src2, + ClampMod:$clamp, + omod:$omod), + " $dst, $src0_modifiers, $src1_modifiers, $src2_modifiers"#"$clamp"#"$omod", + [(set P.DstVT:$dst, + (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, + i1:$clamp, i32:$omod)), + (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)), + (P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers)), + (i1 VCC)))], + 3, 1 +>; + multiclass VOP3b_Helper <vop op, RegisterClass vrc, RegisterOperand arc, string opName, list<dag> pattern> : - VOP3b_2_m < + VOP3b_3_m < op, (outs vrc:$vdst, SReg_64:$sdst), (ins InputModsNoDefault:$src0_modifiers, arc:$src0, InputModsNoDefault:$src1_modifiers, arc:$src1, @@ -1307,22 +1403,21 @@ class Vop3ModPat<Instruction Inst, VOPProfile P, SDPatternOperator node> : Pat< // Interpolation opcodes //===----------------------------------------------------------------------===// -class VINTRP_Pseudo <string opName, dag outs, dag ins, string asm, - list<dag> pattern> : - VINTRPCommon <outs, ins, asm, pattern>, +class VINTRP_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> : + VINTRPCommon <outs, ins, "", pattern>, SIMCInstr<opName, SISubtarget.NONE> { let isPseudo = 1; } class VINTRP_Real_si <bits <2> op, string opName, dag outs, dag ins, - string asm, list<dag> pattern> : - VINTRPCommon <outs, ins, asm, pattern>, + string asm> : + VINTRPCommon <outs, ins, asm, []>, VINTRPe <op>, SIMCInstr<opName, SISubtarget.SI>; class VINTRP_Real_vi <bits <2> op, string opName, dag outs, dag ins, - string asm, list<dag> pattern> : - VINTRPCommon <outs, ins, asm, pattern>, + string asm> : + VINTRPCommon <outs, ins, asm, []>, VINTRPe_vi <op>, SIMCInstr<opName, SISubtarget.VI>; @@ -1331,11 +1426,11 @@ multiclass VINTRP_m <bits <2> op, string opName, dag outs, dag ins, string asm, list<dag> pattern = []> { let DisableEncoding = disableEncoding, Constraints = constraints in { - def "" : VINTRP_Pseudo <opName, outs, ins, asm, pattern>; + def "" : VINTRP_Pseudo <opName, outs, ins, pattern>; - def _si : VINTRP_Real_si <op, opName, outs, ins, asm, pattern>; + def _si : VINTRP_Real_si <op, opName, outs, ins, asm>; - def _vi : VINTRP_Real_vi <op, opName, outs, ins, asm, pattern>; + def _vi : VINTRP_Real_vi <op, opName, outs, ins, asm>; } } @@ -1467,70 +1562,92 @@ multiclass DS_Store2_Helper <bits<8> op, string asm, RegisterClass regClass> asm#" $addr, $data0, $data1"#"$offset0"#"$offset1 [M0]", []>; -class DS_1A_si <bits<8> op, dag outs, dag ins, string asm, list<dag> pat> : - DS_si <op, outs, ins, asm, pat> { - bits<16> offset; - - // Single load interpret the 2 i8imm operands as a single i16 offset. - let offset0 = offset{7-0}; - let offset1 = offset{15-8}; - - let hasSideEffects = 0; +// 1 address, 1 data. +multiclass DS_1A1D_RET_m <bits<8> op, string opName, dag outs, dag ins, + string asm, list<dag> pat, string noRetOp> { + let mayLoad = 1, mayStore = 1, + hasPostISelHook = 1 // Adjusted to no return version. + in { + def "" : DS_Pseudo <opName, outs, ins, pat>, + AtomicNoRet<noRetOp, 1>; + + let data1 = 0 in { + def _si : DS_1A_Real_si <op, opName, outs, ins, asm>; + def _vi : DS_1A_Real_vi <op, opName, outs, ins, asm>; + } + } } -// 1 address, 1 data. -class DS_1A1D_RET <bits<8> op, string asm, RegisterClass rc, string noRetOp = ""> : DS_1A_si < - op, +multiclass DS_1A1D_RET <bits<8> op, string asm, RegisterClass rc, + string noRetOp = ""> : DS_1A1D_RET_m < + op, asm, (outs rc:$vdst), (ins i1imm:$gds, VGPR_32:$addr, rc:$data0, ds_offset:$offset, M0Reg:$m0), - asm#" $vdst, $addr, $data0"#"$offset"#" [M0]", []>, - AtomicNoRet<noRetOp, 1> { - - let data1 = 0; - let mayStore = 1; - let mayLoad = 1; + asm#" $vdst, $addr, $data0"#"$offset"#" [M0]", [], noRetOp>; - let hasPostISelHook = 1; // Adjusted to no return version. +// 1 address, 2 data. +multiclass DS_1A2D_RET_m <bits<8> op, string opName, dag outs, dag ins, + string asm, list<dag> pat, string noRetOp> { + let mayLoad = 1, mayStore = 1, + hasPostISelHook = 1 // Adjusted to no return version. + in { + def "" : DS_Pseudo <opName, outs, ins, pat>, + AtomicNoRet<noRetOp, 1>; + + def _si : DS_1A_Real_si <op, opName, outs, ins, asm>; + def _vi : DS_1A_Real_vi <op, opName, outs, ins, asm>; + } } -// 1 address, 2 data. -class DS_1A2D_RET <bits<8> op, string asm, RegisterClass rc, string noRetOp = ""> : DS_1A_si < - op, +multiclass DS_1A2D_RET <bits<8> op, string asm, RegisterClass rc, + string noRetOp = ""> : DS_1A2D_RET_m < + op, asm, (outs rc:$vdst), (ins i1imm:$gds, VGPR_32:$addr, rc:$data0, rc:$data1, ds_offset:$offset, M0Reg:$m0), asm#" $vdst, $addr, $data0, $data1"#"$offset"#" [M0]", - []>, - AtomicNoRet<noRetOp, 1> { - let mayStore = 1; - let mayLoad = 1; - let hasPostISelHook = 1; // Adjusted to no return version. -} + [], noRetOp>; // 1 address, 2 data. -class DS_1A2D_NORET <bits<8> op, string asm, RegisterClass rc, string noRetOp = asm> : DS_1A_si < - op, +multiclass DS_1A2D_NORET_m <bits<8> op, string opName, dag outs, dag ins, + string asm, list<dag> pat, string noRetOp> { + let mayLoad = 1, mayStore = 1 in { + def "" : DS_Pseudo <opName, outs, ins, pat>, + AtomicNoRet<noRetOp, 0>; + + def _si : DS_1A_Real_si <op, opName, outs, ins, asm>; + def _vi : DS_1A_Real_vi <op, opName, outs, ins, asm>; + } +} + +multiclass DS_1A2D_NORET <bits<8> op, string asm, RegisterClass rc, + string noRetOp = asm> : DS_1A2D_NORET_m < + op, asm, (outs), (ins i1imm:$gds, VGPR_32:$addr, rc:$data0, rc:$data1, ds_offset:$offset, M0Reg:$m0), asm#" $addr, $data0, $data1"#"$offset"#" [M0]", - []>, - AtomicNoRet<noRetOp, 0> { - let mayStore = 1; - let mayLoad = 1; -} + [], noRetOp>; // 1 address, 1 data. -class DS_1A1D_NORET <bits<8> op, string asm, RegisterClass rc, string noRetOp = asm> : DS_1A_si < - op, +multiclass DS_1A1D_NORET_m <bits<8> op, string opName, dag outs, dag ins, + string asm, list<dag> pat, string noRetOp> { + let mayLoad = 1, mayStore = 1 in { + def "" : DS_Pseudo <opName, outs, ins, pat>, + AtomicNoRet<noRetOp, 0>; + + let data1 = 0 in { + def _si : DS_1A_Real_si <op, opName, outs, ins, asm>; + def _vi : DS_1A_Real_vi <op, opName, outs, ins, asm>; + } + } +} + +multiclass DS_1A1D_NORET <bits<8> op, string asm, RegisterClass rc, + string noRetOp = asm> : DS_1A1D_NORET_m < + op, asm, (outs), (ins i1imm:$gds, VGPR_32:$addr, rc:$data0, ds_offset:$offset, M0Reg:$m0), asm#" $addr, $data0"#"$offset"#" [M0]", - []>, - AtomicNoRet<noRetOp, 0> { - - let data1 = 0; - let mayStore = 1; - let mayLoad = 1; -} + [], noRetOp>; //===----------------------------------------------------------------------===// // MTBUF classes @@ -1596,45 +1713,111 @@ multiclass MTBUF_Load_Helper <bits<3> op, string opName, // MUBUF classes //===----------------------------------------------------------------------===// -class MUBUF_si <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> : - MUBUF <outs, ins, asm, pattern>, MUBUFe <op> { - let lds = 0; -} - -class MUBUF_vi <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> : - MUBUF <outs, ins, asm, pattern>, MUBUFe_vi <op> { - let lds = 0; +class mubuf <bits<7> si, bits<7> vi = si> { + field bits<7> SI = si; + field bits<7> VI = vi; } class MUBUFAddr64Table <bit is_addr64, string suffix = ""> { - bit IsAddr64 = is_addr64; string OpName = NAME # suffix; } -class MUBUFAtomicAddr64 <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> - : MUBUF_si <op, outs, ins, asm, pattern> { +class MUBUF_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> : + MUBUF <outs, ins, "", pattern>, + SIMCInstr<opName, SISubtarget.NONE> { + let isPseudo = 1; + + // dummy fields, so that we can use let statements around multiclasses + bits<1> offen; + bits<1> idxen; + bits<8> vaddr; + bits<1> glc; + bits<1> slc; + bits<1> tfe; + bits<8> soffset; +} + +class MUBUF_Real_si <mubuf op, string opName, dag outs, dag ins, + string asm> : + MUBUF <outs, ins, asm, []>, + MUBUFe <op.SI>, + SIMCInstr<opName, SISubtarget.SI> { + let lds = 0; +} - let offen = 0; - let idxen = 0; - let addr64 = 1; - let tfe = 0; +class MUBUF_Real_vi <mubuf op, string opName, dag outs, dag ins, + string asm> : + MUBUF <outs, ins, asm, []>, + MUBUFe_vi <op.VI>, + SIMCInstr<opName, SISubtarget.VI> { let lds = 0; - let soffset = 128; } -class MUBUFAtomicOffset <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> - : MUBUF_si <op, outs, ins, asm, pattern> { +multiclass MUBUF_m <mubuf op, string opName, dag outs, dag ins, string asm, + list<dag> pattern> { + + def "" : MUBUF_Pseudo <opName, outs, ins, pattern>, + MUBUFAddr64Table <0>; - let offen = 0; - let idxen = 0; - let addr64 = 0; - let tfe = 0; + let addr64 = 0 in { + def _si : MUBUF_Real_si <op, opName, outs, ins, asm>; + } + + def _vi : MUBUF_Real_vi <op, opName, outs, ins, asm>; +} + +multiclass MUBUFAddr64_m <mubuf op, string opName, dag outs, + dag ins, string asm, list<dag> pattern> { + + def "" : MUBUF_Pseudo <opName, outs, ins, pattern>, + MUBUFAddr64Table <1>; + + let addr64 = 1 in { + def _si : MUBUF_Real_si <op, opName, outs, ins, asm>; + } + + // There is no VI version. If the pseudo is selected, it should be lowered + // for VI appropriately. +} + +class MUBUF_si <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> : + MUBUF <outs, ins, asm, pattern>, MUBUFe <op> { let lds = 0; - let vaddr = 0; } -multiclass MUBUF_Atomic <bits<7> op, string name, RegisterClass rc, +multiclass MUBUFAtomicOffset_m <mubuf op, string opName, dag outs, dag ins, + string asm, list<dag> pattern, bit is_return> { + + def "" : MUBUF_Pseudo <opName, outs, ins, pattern>, + MUBUFAddr64Table <0, !if(is_return, "_RTN", "")>, + AtomicNoRet<NAME#"_OFFSET", is_return>; + + let offen = 0, idxen = 0, tfe = 0, vaddr = 0 in { + let addr64 = 0 in { + def _si : MUBUF_Real_si <op, opName, outs, ins, asm>; + } + + def _vi : MUBUF_Real_vi <op, opName, outs, ins, asm>; + } +} + +multiclass MUBUFAtomicAddr64_m <mubuf op, string opName, dag outs, dag ins, + string asm, list<dag> pattern, bit is_return> { + + def "" : MUBUF_Pseudo <opName, outs, ins, pattern>, + MUBUFAddr64Table <1, !if(is_return, "_RTN", "")>, + AtomicNoRet<NAME#"_ADDR64", is_return>; + + let offen = 0, idxen = 0, addr64 = 1, tfe = 0, soffset = 128 in { + def _si : MUBUF_Real_si <op, opName, outs, ins, asm>; + } + + // There is no VI version. If the pseudo is selected, it should be lowered + // for VI appropriately. +} + +multiclass MUBUF_Atomic <mubuf op, string name, RegisterClass rc, ValueType vt, SDPatternOperator atomic> { let mayStore = 1, mayLoad = 1, hasPostISelHook = 1 in { @@ -1642,208 +1825,135 @@ multiclass MUBUF_Atomic <bits<7> op, string name, RegisterClass rc, // No return variants let glc = 0 in { - def _ADDR64 : MUBUFAtomicAddr64 < - op, (outs), + defm _ADDR64 : MUBUFAtomicAddr64_m < + op, name#"_addr64", (outs), (ins rc:$vdata, SReg_128:$srsrc, VReg_64:$vaddr, mbuf_offset:$offset, slc:$slc), - name#" $vdata, $vaddr, $srsrc, 0 addr64"#"$offset"#"$slc", [] - >, MUBUFAddr64Table<1>, AtomicNoRet<NAME#"_ADDR64", 0>; + name#" $vdata, $vaddr, $srsrc, 0 addr64"#"$offset"#"$slc", [], 0 + >; - def _OFFSET : MUBUFAtomicOffset < - op, (outs), + defm _OFFSET : MUBUFAtomicOffset_m < + op, name#"_offset", (outs), (ins rc:$vdata, SReg_128:$srsrc, mbuf_offset:$offset, SCSrc_32:$soffset, slc:$slc), - name#" $vdata, $srsrc, $soffset"#"$offset"#"$slc", [] - >, MUBUFAddr64Table<0>, AtomicNoRet<NAME#"_OFFSET", 0>; + name#" $vdata, $srsrc, $soffset"#"$offset"#"$slc", [], 0 + >; } // glc = 0 // Variant that return values let glc = 1, Constraints = "$vdata = $vdata_in", DisableEncoding = "$vdata_in" in { - def _RTN_ADDR64 : MUBUFAtomicAddr64 < - op, (outs rc:$vdata), + defm _RTN_ADDR64 : MUBUFAtomicAddr64_m < + op, name#"_rtn_addr64", (outs rc:$vdata), (ins rc:$vdata_in, SReg_128:$srsrc, VReg_64:$vaddr, mbuf_offset:$offset, slc:$slc), name#" $vdata, $vaddr, $srsrc, 0 addr64"#"$offset"#" glc"#"$slc", [(set vt:$vdata, (atomic (MUBUFAddr64Atomic v4i32:$srsrc, i64:$vaddr, i16:$offset, - i1:$slc), vt:$vdata_in))] - >, MUBUFAddr64Table<1, "_RTN">, AtomicNoRet<NAME#"_ADDR64", 1>; + i1:$slc), vt:$vdata_in))], 1 + >; - def _RTN_OFFSET : MUBUFAtomicOffset < - op, (outs rc:$vdata), + defm _RTN_OFFSET : MUBUFAtomicOffset_m < + op, name#"_rtn_offset", (outs rc:$vdata), (ins rc:$vdata_in, SReg_128:$srsrc, mbuf_offset:$offset, SCSrc_32:$soffset, slc:$slc), name#" $vdata, $srsrc, $soffset"#"$offset"#" glc $slc", [(set vt:$vdata, (atomic (MUBUFOffsetAtomic v4i32:$srsrc, i32:$soffset, i16:$offset, - i1:$slc), vt:$vdata_in))] - >, MUBUFAddr64Table<0, "_RTN">, AtomicNoRet<NAME#"_OFFSET", 1>; + i1:$slc), vt:$vdata_in))], 1 + >; } // glc = 1 } // mayStore = 1, mayLoad = 1, hasPostISelHook = 1 } -multiclass MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass, +multiclass MUBUF_Load_Helper <mubuf op, string name, RegisterClass regClass, ValueType load_vt = i32, SDPatternOperator ld = null_frag> { let mayLoad = 1, mayStore = 0 in { - - let addr64 = 0 in { - - let offen = 0, idxen = 0, vaddr = 0 in { - def _OFFSET : MUBUF_si <op, (outs regClass:$vdata), - (ins SReg_128:$srsrc, - mbuf_offset:$offset, SCSrc_32:$soffset, glc:$glc, - slc:$slc, tfe:$tfe), - asm#" $vdata, $srsrc, $soffset"#"$offset"#"$glc"#"$slc"#"$tfe", - [(set load_vt:$vdata, (ld (MUBUFOffset v4i32:$srsrc, - i32:$soffset, i16:$offset, - i1:$glc, i1:$slc, i1:$tfe)))]>, - MUBUFAddr64Table<0>; - } - - let offen = 1, idxen = 0 in { - def _OFFEN : MUBUF_si <op, (outs regClass:$vdata), - (ins SReg_128:$srsrc, VGPR_32:$vaddr, - SCSrc_32:$soffset, mbuf_offset:$offset, glc:$glc, slc:$slc, - tfe:$tfe), - asm#" $vdata, $vaddr, $srsrc, $soffset offen"#"$offset"#"$glc"#"$slc"#"$tfe", []>; - } - - let offen = 0, idxen = 1 in { - def _IDXEN : MUBUF_si <op, (outs regClass:$vdata), - (ins SReg_128:$srsrc, VGPR_32:$vaddr, - mbuf_offset:$offset, SCSrc_32:$soffset, glc:$glc, - slc:$slc, tfe:$tfe), - asm#" $vdata, $vaddr, $srsrc, $soffset idxen"#"$offset"#"$glc"#"$slc"#"$tfe", []>; - } - - let offen = 1, idxen = 1 in { - def _BOTHEN : MUBUF_si <op, (outs regClass:$vdata), - (ins SReg_128:$srsrc, VReg_64:$vaddr, - SCSrc_32:$soffset, glc:$glc, slc:$slc, tfe:$tfe), - asm#" $vdata, $vaddr, $srsrc, $soffset, idxen offen"#"$glc"#"$slc"#"$tfe", []>; - } - } - - let offen = 0, idxen = 0, addr64 = 1, glc = 0, slc = 0, tfe = 0, soffset = 128 /* ZERO */ in { - def _ADDR64 : MUBUF_si <op, (outs regClass:$vdata), - (ins SReg_128:$srsrc, VReg_64:$vaddr, mbuf_offset:$offset), - asm#" $vdata, $vaddr, $srsrc, 0 addr64"#"$offset", - [(set load_vt:$vdata, (ld (MUBUFAddr64 v4i32:$srsrc, - i64:$vaddr, i16:$offset)))]>, MUBUFAddr64Table<1>; - } - } -} - -multiclass MUBUF_Load_Helper_vi <bits<7> op, string asm, RegisterClass regClass, - ValueType load_vt = i32, - SDPatternOperator ld = null_frag> { - - let lds = 0, mayLoad = 1 in { let offen = 0, idxen = 0, vaddr = 0 in { - def _OFFSET : MUBUF_vi <op, (outs regClass:$vdata), + defm _OFFSET : MUBUF_m <op, name#"_offset", (outs regClass:$vdata), (ins SReg_128:$srsrc, mbuf_offset:$offset, SCSrc_32:$soffset, glc:$glc, slc:$slc, tfe:$tfe), - asm#" $vdata, $srsrc, $soffset"#"$offset"#"$glc"#"$slc"#"$tfe", + name#" $vdata, $srsrc, $soffset"#"$offset"#"$glc"#"$slc"#"$tfe", [(set load_vt:$vdata, (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, - i1:$glc, i1:$slc, i1:$tfe)))]>, - MUBUFAddr64Table<0>; + i1:$glc, i1:$slc, i1:$tfe)))]>; } let offen = 1, idxen = 0 in { - def _OFFEN : MUBUF_vi <op, (outs regClass:$vdata), + defm _OFFEN : MUBUF_m <op, name#"_offen", (outs regClass:$vdata), (ins SReg_128:$srsrc, VGPR_32:$vaddr, SCSrc_32:$soffset, mbuf_offset:$offset, glc:$glc, slc:$slc, tfe:$tfe), - asm#" $vdata, $vaddr, $srsrc, $soffset offen"#"$offset"#"$glc"#"$slc"#"$tfe", []>; + name#" $vdata, $vaddr, $srsrc, $soffset offen"#"$offset"#"$glc"#"$slc"#"$tfe", []>; } let offen = 0, idxen = 1 in { - def _IDXEN : MUBUF_vi <op, (outs regClass:$vdata), + defm _IDXEN : MUBUF_m <op, name#"_idxen", (outs regClass:$vdata), (ins SReg_128:$srsrc, VGPR_32:$vaddr, mbuf_offset:$offset, SCSrc_32:$soffset, glc:$glc, slc:$slc, tfe:$tfe), - asm#" $vdata, $vaddr, $srsrc, $soffset idxen"#"$offset"#"$glc"#"$slc"#"$tfe", []>; + name#" $vdata, $vaddr, $srsrc, $soffset idxen"#"$offset"#"$glc"#"$slc"#"$tfe", []>; } let offen = 1, idxen = 1 in { - def _BOTHEN : MUBUF_vi <op, (outs regClass:$vdata), + defm _BOTHEN : MUBUF_m <op, name#"_bothen", (outs regClass:$vdata), (ins SReg_128:$srsrc, VReg_64:$vaddr, SCSrc_32:$soffset, glc:$glc, slc:$slc, tfe:$tfe), - asm#" $vdata, $vaddr, $srsrc, $soffset, idxen offen"#"$glc"#"$slc"#"$tfe", []>; + name#" $vdata, $vaddr, $srsrc, $soffset, idxen offen"#"$glc"#"$slc"#"$tfe", []>; + } + + let offen = 0, idxen = 0, glc = 0, slc = 0, tfe = 0, soffset = 128 /* ZERO */ in { + defm _ADDR64 : MUBUFAddr64_m <op, name#"_addr64", (outs regClass:$vdata), + (ins SReg_128:$srsrc, VReg_64:$vaddr, mbuf_offset:$offset), + name#" $vdata, $vaddr, $srsrc, 0 addr64"#"$offset", + [(set load_vt:$vdata, (ld (MUBUFAddr64 v4i32:$srsrc, + i64:$vaddr, i16:$offset)))]>; } } } -multiclass MUBUF_Store_Helper <bits<7> op, string name, RegisterClass vdataClass, +multiclass MUBUF_Store_Helper <mubuf op, string name, RegisterClass vdataClass, ValueType store_vt, SDPatternOperator st> { - let mayLoad = 0, mayStore = 1 in { - let addr64 = 0 in { - - def "" : MUBUF_si < - op, (outs), - (ins vdataClass:$vdata, SReg_128:$srsrc, VGPR_32:$vaddr, SCSrc_32:$soffset, - mbuf_offset:$offset, offen:$offen, idxen:$idxen, glc:$glc, slc:$slc, - tfe:$tfe), - name#" $vdata, $vaddr, $srsrc, $soffset"#"$offen"#"$idxen"#"$offset"# - "$glc"#"$slc"#"$tfe", - [] - >; + defm : MUBUF_m <op, name, (outs), + (ins vdataClass:$vdata, SReg_128:$srsrc, VGPR_32:$vaddr, SCSrc_32:$soffset, + mbuf_offset:$offset, offen:$offen, idxen:$idxen, glc:$glc, slc:$slc, + tfe:$tfe), + name#" $vdata, $vaddr, $srsrc, $soffset"#"$offen"#"$idxen"#"$offset"# + "$glc"#"$slc"#"$tfe", []>; let offen = 0, idxen = 0, vaddr = 0 in { - def _OFFSET : MUBUF_si < - op, (outs), - (ins vdataClass:$vdata, SReg_128:$srsrc, mbuf_offset:$offset, - SCSrc_32:$soffset, glc:$glc, slc:$slc, tfe:$tfe), - name#" $vdata, $srsrc, $soffset"#"$offset"#"$glc"#"$slc"#"$tfe", - [(st store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset, - i16:$offset, i1:$glc, i1:$slc, - i1:$tfe))] - >, MUBUFAddr64Table<0>; + defm _OFFSET : MUBUF_m <op, name#"_offset",(outs), + (ins vdataClass:$vdata, SReg_128:$srsrc, mbuf_offset:$offset, + SCSrc_32:$soffset, glc:$glc, slc:$slc, tfe:$tfe), + name#" $vdata, $srsrc, $soffset"#"$offset"#"$glc"#"$slc"#"$tfe", + [(st store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset, + i16:$offset, i1:$glc, i1:$slc, i1:$tfe))]>; } // offen = 0, idxen = 0, vaddr = 0 let offen = 1, idxen = 0 in { - def _OFFEN : MUBUF_si < - op, (outs), - (ins vdataClass:$vdata, SReg_128:$srsrc, VGPR_32:$vaddr, SCSrc_32:$soffset, - mbuf_offset:$offset, glc:$glc, slc:$slc, tfe:$tfe), - name#" $vdata, $vaddr, $srsrc, $soffset offen"#"$offset"# - "$glc"#"$slc"#"$tfe", - [] - >; + defm _OFFEN : MUBUF_m <op, name#"_offen", (outs), + (ins vdataClass:$vdata, SReg_128:$srsrc, VGPR_32:$vaddr, SCSrc_32:$soffset, + mbuf_offset:$offset, glc:$glc, slc:$slc, tfe:$tfe), + name#" $vdata, $vaddr, $srsrc, $soffset offen"#"$offset"# + "$glc"#"$slc"#"$tfe", []>; } // end offen = 1, idxen = 0 - } // End addr64 = 0 - - def _ADDR64 : MUBUF_si < - op, (outs), - (ins vdataClass:$vdata, SReg_128:$srsrc, VReg_64:$vaddr, mbuf_offset:$offset), - name#" $vdata, $vaddr, $srsrc, 0 addr64"#"$offset", - [(st store_vt:$vdata, - (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i16:$offset))]>, MUBUFAddr64Table<1> - { - - let mayLoad = 0; - let mayStore = 1; - - // Encoding - let offen = 0; - let idxen = 0; - let glc = 0; - let addr64 = 1; - let slc = 0; - let tfe = 0; - let soffset = 128; // ZERO - } - } // End mayLoad = 0, mayStore = 1 + let offen = 0, idxen = 0, glc = 0, slc = 0, tfe = 0, + soffset = 128 /* ZERO */ in { + defm _ADDR64 : MUBUFAddr64_m <op, name#"_addr64", (outs), + (ins vdataClass:$vdata, SReg_128:$srsrc, VReg_64:$vaddr, mbuf_offset:$offset), + name#" $vdata, $vaddr, $srsrc, 0 addr64"#"$offset", + [(st store_vt:$vdata, + (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i16:$offset))]>; + } + } // End mayLoad = 0, mayStore = 1 } class FLAT_Load_Helper <bits<7> op, string asm, RegisterClass regClass> : @@ -1912,7 +2022,7 @@ multiclass MIMG_NoSampler <bits<7> op, string asm> { class MIMG_Sampler_Helper <bits<7> op, string asm, RegisterClass dst_rc, - RegisterClass src_rc> : MIMG < + RegisterClass src_rc, int wqm> : MIMG < op, (outs dst_rc:$vdata), (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128, @@ -1924,33 +2034,41 @@ class MIMG_Sampler_Helper <bits<7> op, string asm, let mayLoad = 1; let mayStore = 0; let hasPostISelHook = 1; + let WQM = wqm; } multiclass MIMG_Sampler_Src_Helper <bits<7> op, string asm, RegisterClass dst_rc, - int channels> { - def _V1 : MIMG_Sampler_Helper <op, asm, dst_rc, VGPR_32>, + int channels, int wqm> { + def _V1 : MIMG_Sampler_Helper <op, asm, dst_rc, VGPR_32, wqm>, MIMG_Mask<asm#"_V1", channels>; - def _V2 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_64>, + def _V2 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_64, wqm>, MIMG_Mask<asm#"_V2", channels>; - def _V4 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_128>, + def _V4 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_128, wqm>, MIMG_Mask<asm#"_V4", channels>; - def _V8 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_256>, + def _V8 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_256, wqm>, MIMG_Mask<asm#"_V8", channels>; - def _V16 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_512>, + def _V16 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_512, wqm>, MIMG_Mask<asm#"_V16", channels>; } multiclass MIMG_Sampler <bits<7> op, string asm> { - defm _V1 : MIMG_Sampler_Src_Helper<op, asm, VGPR_32, 1>; - defm _V2 : MIMG_Sampler_Src_Helper<op, asm, VReg_64, 2>; - defm _V3 : MIMG_Sampler_Src_Helper<op, asm, VReg_96, 3>; - defm _V4 : MIMG_Sampler_Src_Helper<op, asm, VReg_128, 4>; + defm _V1 : MIMG_Sampler_Src_Helper<op, asm, VGPR_32, 1, 0>; + defm _V2 : MIMG_Sampler_Src_Helper<op, asm, VReg_64, 2, 0>; + defm _V3 : MIMG_Sampler_Src_Helper<op, asm, VReg_96, 3, 0>; + defm _V4 : MIMG_Sampler_Src_Helper<op, asm, VReg_128, 4, 0>; +} + +multiclass MIMG_Sampler_WQM <bits<7> op, string asm> { + defm _V1 : MIMG_Sampler_Src_Helper<op, asm, VGPR_32, 1, 1>; + defm _V2 : MIMG_Sampler_Src_Helper<op, asm, VReg_64, 2, 1>; + defm _V3 : MIMG_Sampler_Src_Helper<op, asm, VReg_96, 3, 1>; + defm _V4 : MIMG_Sampler_Src_Helper<op, asm, VReg_128, 4, 1>; } class MIMG_Gather_Helper <bits<7> op, string asm, RegisterClass dst_rc, - RegisterClass src_rc> : MIMG < + RegisterClass src_rc, int wqm> : MIMG < op, (outs dst_rc:$vdata), (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128, @@ -1971,28 +2089,36 @@ class MIMG_Gather_Helper <bits<7> op, string asm, // Therefore, disable all code which updates DMASK by setting these two: let MIMG = 0; let hasPostISelHook = 0; + let WQM = wqm; } multiclass MIMG_Gather_Src_Helper <bits<7> op, string asm, RegisterClass dst_rc, - int channels> { - def _V1 : MIMG_Gather_Helper <op, asm, dst_rc, VGPR_32>, + int channels, int wqm> { + def _V1 : MIMG_Gather_Helper <op, asm, dst_rc, VGPR_32, wqm>, MIMG_Mask<asm#"_V1", channels>; - def _V2 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_64>, + def _V2 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_64, wqm>, MIMG_Mask<asm#"_V2", channels>; - def _V4 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_128>, + def _V4 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_128, wqm>, MIMG_Mask<asm#"_V4", channels>; - def _V8 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_256>, + def _V8 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_256, wqm>, MIMG_Mask<asm#"_V8", channels>; - def _V16 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_512>, + def _V16 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_512, wqm>, MIMG_Mask<asm#"_V16", channels>; } multiclass MIMG_Gather <bits<7> op, string asm> { - defm _V1 : MIMG_Gather_Src_Helper<op, asm, VGPR_32, 1>; - defm _V2 : MIMG_Gather_Src_Helper<op, asm, VReg_64, 2>; - defm _V3 : MIMG_Gather_Src_Helper<op, asm, VReg_96, 3>; - defm _V4 : MIMG_Gather_Src_Helper<op, asm, VReg_128, 4>; + defm _V1 : MIMG_Gather_Src_Helper<op, asm, VGPR_32, 1, 0>; + defm _V2 : MIMG_Gather_Src_Helper<op, asm, VReg_64, 2, 0>; + defm _V3 : MIMG_Gather_Src_Helper<op, asm, VReg_96, 3, 0>; + defm _V4 : MIMG_Gather_Src_Helper<op, asm, VReg_128, 4, 0>; +} + +multiclass MIMG_Gather_WQM <bits<7> op, string asm> { + defm _V1 : MIMG_Gather_Src_Helper<op, asm, VGPR_32, 1, 1>; + defm _V2 : MIMG_Gather_Src_Helper<op, asm, VReg_64, 2, 1>; + defm _V3 : MIMG_Gather_Src_Helper<op, asm, VReg_96, 3, 1>; + defm _V4 : MIMG_Gather_Src_Helper<op, asm, VReg_128, 4, 1>; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 4b1a84662cb5..bbedef29303b 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -152,9 +152,11 @@ defm S_FLBIT_I32_B32 : SOP1_32 <sop1<0x15, 0x12>, "s_flbit_i32_b32", [(set i32:$dst, (ctlz_zero_undef i32:$src0))] >; -//defm S_FLBIT_I32_B64 : SOP1_32 <sop1<0x16, 0x13>, "s_flbit_i32_b64", []>; -defm S_FLBIT_I32 : SOP1_32 <sop1<0x17, 0x14>, "s_flbit_i32", []>; -//defm S_FLBIT_I32_I64 : SOP1_32 <sop1<0x18, 0x15>, "s_flbit_i32_i64", []>; +defm S_FLBIT_I32_B64 : SOP1_32_64 <sop1<0x16, 0x13>, "s_flbit_i32_b64", []>; +defm S_FLBIT_I32 : SOP1_32 <sop1<0x17, 0x14>, "s_flbit_i32", + [(set i32:$dst, (int_AMDGPU_flbit_i32 i32:$src0))] +>; +defm S_FLBIT_I32_I64 : SOP1_32_64 <sop1<0x18, 0x15>, "s_flbit_i32_i64", []>; defm S_SEXT_I32_I8 : SOP1_32 <sop1<0x19, 0x16>, "s_sext_i32_i8", [(set i32:$dst, (sext_inreg i32:$src0, i8))] >; @@ -764,88 +766,88 @@ defm V_CMPX_CLASS_F64 : VOPCX_CLASS_F64 <vopc<0xb8, 0x13>, "v_cmpx_class_f64">; //===----------------------------------------------------------------------===// -def DS_ADD_U32 : DS_1A1D_NORET <0x0, "ds_add_u32", VGPR_32>; -def DS_SUB_U32 : DS_1A1D_NORET <0x1, "ds_sub_u32", VGPR_32>; -def DS_RSUB_U32 : DS_1A1D_NORET <0x2, "ds_rsub_u32", VGPR_32>; -def DS_INC_U32 : DS_1A1D_NORET <0x3, "ds_inc_u32", VGPR_32>; -def DS_DEC_U32 : DS_1A1D_NORET <0x4, "ds_dec_u32", VGPR_32>; -def DS_MIN_I32 : DS_1A1D_NORET <0x5, "ds_min_i32", VGPR_32>; -def DS_MAX_I32 : DS_1A1D_NORET <0x6, "ds_max_i32", VGPR_32>; -def DS_MIN_U32 : DS_1A1D_NORET <0x7, "ds_min_u32", VGPR_32>; -def DS_MAX_U32 : DS_1A1D_NORET <0x8, "ds_max_u32", VGPR_32>; -def DS_AND_B32 : DS_1A1D_NORET <0x9, "ds_and_b32", VGPR_32>; -def DS_OR_B32 : DS_1A1D_NORET <0xa, "ds_or_b32", VGPR_32>; -def DS_XOR_B32 : DS_1A1D_NORET <0xb, "ds_xor_b32", VGPR_32>; -def DS_MSKOR_B32 : DS_1A1D_NORET <0xc, "ds_mskor_b32", VGPR_32>; -def DS_CMPST_B32 : DS_1A2D_NORET <0x10, "ds_cmpst_b32", VGPR_32>; -def DS_CMPST_F32 : DS_1A2D_NORET <0x11, "ds_cmpst_f32", VGPR_32>; -def DS_MIN_F32 : DS_1A1D_NORET <0x12, "ds_min_f32", VGPR_32>; -def DS_MAX_F32 : DS_1A1D_NORET <0x13, "ds_max_f32", VGPR_32>; - -def DS_ADD_RTN_U32 : DS_1A1D_RET <0x20, "ds_add_rtn_u32", VGPR_32, "ds_add_u32">; -def DS_SUB_RTN_U32 : DS_1A1D_RET <0x21, "ds_sub_rtn_u32", VGPR_32, "ds_sub_u32">; -def DS_RSUB_RTN_U32 : DS_1A1D_RET <0x22, "ds_rsub_rtn_u32", VGPR_32, "ds_rsub_u32">; -def DS_INC_RTN_U32 : DS_1A1D_RET <0x23, "ds_inc_rtn_u32", VGPR_32, "ds_inc_u32">; -def DS_DEC_RTN_U32 : DS_1A1D_RET <0x24, "ds_dec_rtn_u32", VGPR_32, "ds_dec_u32">; -def DS_MIN_RTN_I32 : DS_1A1D_RET <0x25, "ds_min_rtn_i32", VGPR_32, "ds_min_i32">; -def DS_MAX_RTN_I32 : DS_1A1D_RET <0x26, "ds_max_rtn_i32", VGPR_32, "ds_max_i32">; -def DS_MIN_RTN_U32 : DS_1A1D_RET <0x27, "ds_min_rtn_u32", VGPR_32, "ds_min_u32">; -def DS_MAX_RTN_U32 : DS_1A1D_RET <0x28, "ds_max_rtn_u32", VGPR_32, "ds_max_u32">; -def DS_AND_RTN_B32 : DS_1A1D_RET <0x29, "ds_and_rtn_b32", VGPR_32, "ds_and_b32">; -def DS_OR_RTN_B32 : DS_1A1D_RET <0x2a, "ds_or_rtn_b32", VGPR_32, "ds_or_b32">; -def DS_XOR_RTN_B32 : DS_1A1D_RET <0x2b, "ds_xor_rtn_b32", VGPR_32, "ds_xor_b32">; -def DS_MSKOR_RTN_B32 : DS_1A1D_RET <0x2c, "ds_mskor_rtn_b32", VGPR_32, "ds_mskor_b32">; -def DS_WRXCHG_RTN_B32 : DS_1A1D_RET <0x2d, "ds_wrxchg_rtn_b32", VGPR_32>; +defm DS_ADD_U32 : DS_1A1D_NORET <0x0, "ds_add_u32", VGPR_32>; +defm DS_SUB_U32 : DS_1A1D_NORET <0x1, "ds_sub_u32", VGPR_32>; +defm DS_RSUB_U32 : DS_1A1D_NORET <0x2, "ds_rsub_u32", VGPR_32>; +defm DS_INC_U32 : DS_1A1D_NORET <0x3, "ds_inc_u32", VGPR_32>; +defm DS_DEC_U32 : DS_1A1D_NORET <0x4, "ds_dec_u32", VGPR_32>; +defm DS_MIN_I32 : DS_1A1D_NORET <0x5, "ds_min_i32", VGPR_32>; +defm DS_MAX_I32 : DS_1A1D_NORET <0x6, "ds_max_i32", VGPR_32>; +defm DS_MIN_U32 : DS_1A1D_NORET <0x7, "ds_min_u32", VGPR_32>; +defm DS_MAX_U32 : DS_1A1D_NORET <0x8, "ds_max_u32", VGPR_32>; +defm DS_AND_B32 : DS_1A1D_NORET <0x9, "ds_and_b32", VGPR_32>; +defm DS_OR_B32 : DS_1A1D_NORET <0xa, "ds_or_b32", VGPR_32>; +defm DS_XOR_B32 : DS_1A1D_NORET <0xb, "ds_xor_b32", VGPR_32>; +defm DS_MSKOR_B32 : DS_1A1D_NORET <0xc, "ds_mskor_b32", VGPR_32>; +defm DS_CMPST_B32 : DS_1A2D_NORET <0x10, "ds_cmpst_b32", VGPR_32>; +defm DS_CMPST_F32 : DS_1A2D_NORET <0x11, "ds_cmpst_f32", VGPR_32>; +defm DS_MIN_F32 : DS_1A1D_NORET <0x12, "ds_min_f32", VGPR_32>; +defm DS_MAX_F32 : DS_1A1D_NORET <0x13, "ds_max_f32", VGPR_32>; + +defm DS_ADD_RTN_U32 : DS_1A1D_RET <0x20, "ds_add_rtn_u32", VGPR_32, "ds_add_u32">; +defm DS_SUB_RTN_U32 : DS_1A1D_RET <0x21, "ds_sub_rtn_u32", VGPR_32, "ds_sub_u32">; +defm DS_RSUB_RTN_U32 : DS_1A1D_RET <0x22, "ds_rsub_rtn_u32", VGPR_32, "ds_rsub_u32">; +defm DS_INC_RTN_U32 : DS_1A1D_RET <0x23, "ds_inc_rtn_u32", VGPR_32, "ds_inc_u32">; +defm DS_DEC_RTN_U32 : DS_1A1D_RET <0x24, "ds_dec_rtn_u32", VGPR_32, "ds_dec_u32">; +defm DS_MIN_RTN_I32 : DS_1A1D_RET <0x25, "ds_min_rtn_i32", VGPR_32, "ds_min_i32">; +defm DS_MAX_RTN_I32 : DS_1A1D_RET <0x26, "ds_max_rtn_i32", VGPR_32, "ds_max_i32">; +defm DS_MIN_RTN_U32 : DS_1A1D_RET <0x27, "ds_min_rtn_u32", VGPR_32, "ds_min_u32">; +defm DS_MAX_RTN_U32 : DS_1A1D_RET <0x28, "ds_max_rtn_u32", VGPR_32, "ds_max_u32">; +defm DS_AND_RTN_B32 : DS_1A1D_RET <0x29, "ds_and_rtn_b32", VGPR_32, "ds_and_b32">; +defm DS_OR_RTN_B32 : DS_1A1D_RET <0x2a, "ds_or_rtn_b32", VGPR_32, "ds_or_b32">; +defm DS_XOR_RTN_B32 : DS_1A1D_RET <0x2b, "ds_xor_rtn_b32", VGPR_32, "ds_xor_b32">; +defm DS_MSKOR_RTN_B32 : DS_1A1D_RET <0x2c, "ds_mskor_rtn_b32", VGPR_32, "ds_mskor_b32">; +defm DS_WRXCHG_RTN_B32 : DS_1A1D_RET <0x2d, "ds_wrxchg_rtn_b32", VGPR_32>; //def DS_WRXCHG2_RTN_B32 : DS_2A0D_RET <0x2e, "ds_wrxchg2_rtn_b32", VGPR_32, "ds_wrxchg2_b32">; //def DS_WRXCHG2ST64_RTN_B32 : DS_2A0D_RET <0x2f, "ds_wrxchg2_rtn_b32", VGPR_32, "ds_wrxchg2st64_b32">; -def DS_CMPST_RTN_B32 : DS_1A2D_RET <0x30, "ds_cmpst_rtn_b32", VGPR_32, "ds_cmpst_b32">; -def DS_CMPST_RTN_F32 : DS_1A2D_RET <0x31, "ds_cmpst_rtn_f32", VGPR_32, "ds_cmpst_f32">; -def DS_MIN_RTN_F32 : DS_1A1D_RET <0x32, "ds_min_rtn_f32", VGPR_32, "ds_min_f32">; -def DS_MAX_RTN_F32 : DS_1A1D_RET <0x33, "ds_max_rtn_f32", VGPR_32, "ds_max_f32">; +defm DS_CMPST_RTN_B32 : DS_1A2D_RET <0x30, "ds_cmpst_rtn_b32", VGPR_32, "ds_cmpst_b32">; +defm DS_CMPST_RTN_F32 : DS_1A2D_RET <0x31, "ds_cmpst_rtn_f32", VGPR_32, "ds_cmpst_f32">; +defm DS_MIN_RTN_F32 : DS_1A1D_RET <0x32, "ds_min_rtn_f32", VGPR_32, "ds_min_f32">; +defm DS_MAX_RTN_F32 : DS_1A1D_RET <0x33, "ds_max_rtn_f32", VGPR_32, "ds_max_f32">; let SubtargetPredicate = isCI in { -def DS_WRAP_RTN_F32 : DS_1A1D_RET <0x34, "ds_wrap_rtn_f32", VGPR_32, "ds_wrap_f32">; +defm DS_WRAP_RTN_F32 : DS_1A1D_RET <0x34, "ds_wrap_rtn_f32", VGPR_32, "ds_wrap_f32">; } // End isCI -def DS_ADD_U64 : DS_1A1D_NORET <0x40, "ds_add_u64", VReg_64>; -def DS_SUB_U64 : DS_1A1D_NORET <0x41, "ds_sub_u64", VReg_64>; -def DS_RSUB_U64 : DS_1A1D_NORET <0x42, "ds_rsub_u64", VReg_64>; -def DS_INC_U64 : DS_1A1D_NORET <0x43, "ds_inc_u64", VReg_64>; -def DS_DEC_U64 : DS_1A1D_NORET <0x44, "ds_dec_u64", VReg_64>; -def DS_MIN_I64 : DS_1A1D_NORET <0x45, "ds_min_i64", VReg_64>; -def DS_MAX_I64 : DS_1A1D_NORET <0x46, "ds_max_i64", VReg_64>; -def DS_MIN_U64 : DS_1A1D_NORET <0x47, "ds_min_u64", VReg_64>; -def DS_MAX_U64 : DS_1A1D_NORET <0x48, "ds_max_u64", VReg_64>; -def DS_AND_B64 : DS_1A1D_NORET <0x49, "ds_and_b64", VReg_64>; -def DS_OR_B64 : DS_1A1D_NORET <0x4a, "ds_or_b64", VReg_64>; -def DS_XOR_B64 : DS_1A1D_NORET <0x4b, "ds_xor_b64", VReg_64>; -def DS_MSKOR_B64 : DS_1A1D_NORET <0x4c, "ds_mskor_b64", VReg_64>; -def DS_CMPST_B64 : DS_1A2D_NORET <0x50, "ds_cmpst_b64", VReg_64>; -def DS_CMPST_F64 : DS_1A2D_NORET <0x51, "ds_cmpst_f64", VReg_64>; -def DS_MIN_F64 : DS_1A1D_NORET <0x52, "ds_min_f64", VReg_64>; -def DS_MAX_F64 : DS_1A1D_NORET <0x53, "ds_max_f64", VReg_64>; - -def DS_ADD_RTN_U64 : DS_1A1D_RET <0x60, "ds_add_rtn_u64", VReg_64, "ds_add_u64">; -def DS_SUB_RTN_U64 : DS_1A1D_RET <0x61, "ds_sub_rtn_u64", VReg_64, "ds_sub_u64">; -def DS_RSUB_RTN_U64 : DS_1A1D_RET <0x62, "ds_rsub_rtn_u64", VReg_64, "ds_rsub_u64">; -def DS_INC_RTN_U64 : DS_1A1D_RET <0x63, "ds_inc_rtn_u64", VReg_64, "ds_inc_u64">; -def DS_DEC_RTN_U64 : DS_1A1D_RET <0x64, "ds_dec_rtn_u64", VReg_64, "ds_dec_u64">; -def DS_MIN_RTN_I64 : DS_1A1D_RET <0x65, "ds_min_rtn_i64", VReg_64, "ds_min_i64">; -def DS_MAX_RTN_I64 : DS_1A1D_RET <0x66, "ds_max_rtn_i64", VReg_64, "ds_max_i64">; -def DS_MIN_RTN_U64 : DS_1A1D_RET <0x67, "ds_min_rtn_u64", VReg_64, "ds_min_u64">; -def DS_MAX_RTN_U64 : DS_1A1D_RET <0x68, "ds_max_rtn_u64", VReg_64, "ds_max_u64">; -def DS_AND_RTN_B64 : DS_1A1D_RET <0x69, "ds_and_rtn_b64", VReg_64, "ds_and_b64">; -def DS_OR_RTN_B64 : DS_1A1D_RET <0x6a, "ds_or_rtn_b64", VReg_64, "ds_or_b64">; -def DS_XOR_RTN_B64 : DS_1A1D_RET <0x6b, "ds_xor_rtn_b64", VReg_64, "ds_xor_b64">; -def DS_MSKOR_RTN_B64 : DS_1A1D_RET <0x6c, "ds_mskor_rtn_b64", VReg_64, "ds_mskor_b64">; -def DS_WRXCHG_RTN_B64 : DS_1A1D_RET <0x6d, "ds_wrxchg_rtn_b64", VReg_64, "ds_wrxchg_b64">; +defm DS_ADD_U64 : DS_1A1D_NORET <0x40, "ds_add_u64", VReg_64>; +defm DS_SUB_U64 : DS_1A1D_NORET <0x41, "ds_sub_u64", VReg_64>; +defm DS_RSUB_U64 : DS_1A1D_NORET <0x42, "ds_rsub_u64", VReg_64>; +defm DS_INC_U64 : DS_1A1D_NORET <0x43, "ds_inc_u64", VReg_64>; +defm DS_DEC_U64 : DS_1A1D_NORET <0x44, "ds_dec_u64", VReg_64>; +defm DS_MIN_I64 : DS_1A1D_NORET <0x45, "ds_min_i64", VReg_64>; +defm DS_MAX_I64 : DS_1A1D_NORET <0x46, "ds_max_i64", VReg_64>; +defm DS_MIN_U64 : DS_1A1D_NORET <0x47, "ds_min_u64", VReg_64>; +defm DS_MAX_U64 : DS_1A1D_NORET <0x48, "ds_max_u64", VReg_64>; +defm DS_AND_B64 : DS_1A1D_NORET <0x49, "ds_and_b64", VReg_64>; +defm DS_OR_B64 : DS_1A1D_NORET <0x4a, "ds_or_b64", VReg_64>; +defm DS_XOR_B64 : DS_1A1D_NORET <0x4b, "ds_xor_b64", VReg_64>; +defm DS_MSKOR_B64 : DS_1A1D_NORET <0x4c, "ds_mskor_b64", VReg_64>; +defm DS_CMPST_B64 : DS_1A2D_NORET <0x50, "ds_cmpst_b64", VReg_64>; +defm DS_CMPST_F64 : DS_1A2D_NORET <0x51, "ds_cmpst_f64", VReg_64>; +defm DS_MIN_F64 : DS_1A1D_NORET <0x52, "ds_min_f64", VReg_64>; +defm DS_MAX_F64 : DS_1A1D_NORET <0x53, "ds_max_f64", VReg_64>; + +defm DS_ADD_RTN_U64 : DS_1A1D_RET <0x60, "ds_add_rtn_u64", VReg_64, "ds_add_u64">; +defm DS_SUB_RTN_U64 : DS_1A1D_RET <0x61, "ds_sub_rtn_u64", VReg_64, "ds_sub_u64">; +defm DS_RSUB_RTN_U64 : DS_1A1D_RET <0x62, "ds_rsub_rtn_u64", VReg_64, "ds_rsub_u64">; +defm DS_INC_RTN_U64 : DS_1A1D_RET <0x63, "ds_inc_rtn_u64", VReg_64, "ds_inc_u64">; +defm DS_DEC_RTN_U64 : DS_1A1D_RET <0x64, "ds_dec_rtn_u64", VReg_64, "ds_dec_u64">; +defm DS_MIN_RTN_I64 : DS_1A1D_RET <0x65, "ds_min_rtn_i64", VReg_64, "ds_min_i64">; +defm DS_MAX_RTN_I64 : DS_1A1D_RET <0x66, "ds_max_rtn_i64", VReg_64, "ds_max_i64">; +defm DS_MIN_RTN_U64 : DS_1A1D_RET <0x67, "ds_min_rtn_u64", VReg_64, "ds_min_u64">; +defm DS_MAX_RTN_U64 : DS_1A1D_RET <0x68, "ds_max_rtn_u64", VReg_64, "ds_max_u64">; +defm DS_AND_RTN_B64 : DS_1A1D_RET <0x69, "ds_and_rtn_b64", VReg_64, "ds_and_b64">; +defm DS_OR_RTN_B64 : DS_1A1D_RET <0x6a, "ds_or_rtn_b64", VReg_64, "ds_or_b64">; +defm DS_XOR_RTN_B64 : DS_1A1D_RET <0x6b, "ds_xor_rtn_b64", VReg_64, "ds_xor_b64">; +defm DS_MSKOR_RTN_B64 : DS_1A1D_RET <0x6c, "ds_mskor_rtn_b64", VReg_64, "ds_mskor_b64">; +defm DS_WRXCHG_RTN_B64 : DS_1A1D_RET <0x6d, "ds_wrxchg_rtn_b64", VReg_64, "ds_wrxchg_b64">; //def DS_WRXCHG2_RTN_B64 : DS_2A0D_RET <0x6e, "ds_wrxchg2_rtn_b64", VReg_64, "ds_wrxchg2_b64">; //def DS_WRXCHG2ST64_RTN_B64 : DS_2A0D_RET <0x6f, "ds_wrxchg2_rtn_b64", VReg_64, "ds_wrxchg2st64_b64">; -def DS_CMPST_RTN_B64 : DS_1A2D_RET <0x70, "ds_cmpst_rtn_b64", VReg_64, "ds_cmpst_b64">; -def DS_CMPST_RTN_F64 : DS_1A2D_RET <0x71, "ds_cmpst_rtn_f64", VReg_64, "ds_cmpst_f64">; -def DS_MIN_RTN_F64 : DS_1A1D_RET <0x72, "ds_min_f64", VReg_64, "ds_min_f64">; -def DS_MAX_RTN_F64 : DS_1A1D_RET <0x73, "ds_max_f64", VReg_64, "ds_max_f64">; +defm DS_CMPST_RTN_B64 : DS_1A2D_RET <0x70, "ds_cmpst_rtn_b64", VReg_64, "ds_cmpst_b64">; +defm DS_CMPST_RTN_F64 : DS_1A2D_RET <0x71, "ds_cmpst_rtn_f64", VReg_64, "ds_cmpst_f64">; +defm DS_MIN_RTN_F64 : DS_1A1D_RET <0x72, "ds_min_rtn_f64", VReg_64, "ds_min_f64">; +defm DS_MAX_RTN_F64 : DS_1A1D_RET <0x73, "ds_max_rtn_f64", VReg_64, "ds_max_f64">; //let SubtargetPredicate = isCI in { // DS_CONDXCHG32_RTN_B64 @@ -874,123 +876,120 @@ defm DS_WRITE2ST64_B64 : DS_Store2_Helper <0x0000004F, "ds_write2st64_b64", VReg defm DS_READ2_B32 : DS_Load2_Helper <0x00000037, "ds_read2_b32", VReg_64>; defm DS_READ2ST64_B32 : DS_Load2_Helper <0x00000038, "ds_read2st64_b32", VReg_64>; -defm DS_READ2_B64 : DS_Load2_Helper <0x00000075, "ds_read2_b64", VReg_128>; -defm DS_READ2ST64_B64 : DS_Load2_Helper <0x00000076, "ds_read2st64_b64", VReg_128>; +defm DS_READ2_B64 : DS_Load2_Helper <0x00000077, "ds_read2_b64", VReg_128>; +defm DS_READ2ST64_B64 : DS_Load2_Helper <0x00000078, "ds_read2st64_b64", VReg_128>; //===----------------------------------------------------------------------===// // MUBUF Instructions //===----------------------------------------------------------------------===// -let SubtargetPredicate = isSICI in { - -//def BUFFER_LOAD_FORMAT_X : MUBUF_ <0x00000000, "buffer_load_format_x", []>; -//def BUFFER_LOAD_FORMAT_XY : MUBUF_ <0x00000001, "buffer_load_format_xy", []>; -//def BUFFER_LOAD_FORMAT_XYZ : MUBUF_ <0x00000002, "buffer_load_format_xyz", []>; -defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <0x00000003, "buffer_load_format_xyzw", VReg_128>; -//def BUFFER_STORE_FORMAT_X : MUBUF_ <0x00000004, "buffer_store_format_x", []>; -//def BUFFER_STORE_FORMAT_XY : MUBUF_ <0x00000005, "buffer_store_format_xy", []>; -//def BUFFER_STORE_FORMAT_XYZ : MUBUF_ <0x00000006, "buffer_store_format_xyz", []>; -//def BUFFER_STORE_FORMAT_XYZW : MUBUF_ <0x00000007, "buffer_store_format_xyzw", []>; +//def BUFFER_LOAD_FORMAT_X : MUBUF_ <mubuf<0x00>, "buffer_load_format_x", []>; +//def BUFFER_LOAD_FORMAT_XY : MUBUF_ <mubuf<0x01>, "buffer_load_format_xy", []>; +//def BUFFER_LOAD_FORMAT_XYZ : MUBUF_ <mubuf<0x02>, "buffer_load_format_xyz", []>; +defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <mubuf<0x03>, "buffer_load_format_xyzw", VReg_128>; +//def BUFFER_STORE_FORMAT_X : MUBUF_ <mubuf<0x04>, "buffer_store_format_x", []>; +//def BUFFER_STORE_FORMAT_XY : MUBUF_ <mubuf<0x05>, "buffer_store_format_xy", []>; +//def BUFFER_STORE_FORMAT_XYZ : MUBUF_ <mubuf<0x06>, "buffer_store_format_xyz", []>; +//def BUFFER_STORE_FORMAT_XYZW : MUBUF_ <mubuf<0x07>, "buffer_store_format_xyzw", []>; defm BUFFER_LOAD_UBYTE : MUBUF_Load_Helper < - 0x00000008, "buffer_load_ubyte", VGPR_32, i32, az_extloadi8_global + mubuf<0x08, 0x10>, "buffer_load_ubyte", VGPR_32, i32, az_extloadi8_global >; defm BUFFER_LOAD_SBYTE : MUBUF_Load_Helper < - 0x00000009, "buffer_load_sbyte", VGPR_32, i32, sextloadi8_global + mubuf<0x09, 0x11>, "buffer_load_sbyte", VGPR_32, i32, sextloadi8_global >; defm BUFFER_LOAD_USHORT : MUBUF_Load_Helper < - 0x0000000a, "buffer_load_ushort", VGPR_32, i32, az_extloadi16_global + mubuf<0x0a, 0x12>, "buffer_load_ushort", VGPR_32, i32, az_extloadi16_global >; defm BUFFER_LOAD_SSHORT : MUBUF_Load_Helper < - 0x0000000b, "buffer_load_sshort", VGPR_32, i32, sextloadi16_global + mubuf<0x0b, 0x13>, "buffer_load_sshort", VGPR_32, i32, sextloadi16_global >; defm BUFFER_LOAD_DWORD : MUBUF_Load_Helper < - 0x0000000c, "buffer_load_dword", VGPR_32, i32, global_load + mubuf<0x0c, 0x14>, "buffer_load_dword", VGPR_32, i32, global_load >; defm BUFFER_LOAD_DWORDX2 : MUBUF_Load_Helper < - 0x0000000d, "buffer_load_dwordx2", VReg_64, v2i32, global_load + mubuf<0x0d, 0x15>, "buffer_load_dwordx2", VReg_64, v2i32, global_load >; defm BUFFER_LOAD_DWORDX4 : MUBUF_Load_Helper < - 0x0000000e, "buffer_load_dwordx4", VReg_128, v4i32, global_load + mubuf<0x0e, 0x17>, "buffer_load_dwordx4", VReg_128, v4i32, global_load >; defm BUFFER_STORE_BYTE : MUBUF_Store_Helper < - 0x00000018, "buffer_store_byte", VGPR_32, i32, truncstorei8_global + mubuf<0x18>, "buffer_store_byte", VGPR_32, i32, truncstorei8_global >; defm BUFFER_STORE_SHORT : MUBUF_Store_Helper < - 0x0000001a, "buffer_store_short", VGPR_32, i32, truncstorei16_global + mubuf<0x1a>, "buffer_store_short", VGPR_32, i32, truncstorei16_global >; defm BUFFER_STORE_DWORD : MUBUF_Store_Helper < - 0x0000001c, "buffer_store_dword", VGPR_32, i32, global_store + mubuf<0x1c>, "buffer_store_dword", VGPR_32, i32, global_store >; defm BUFFER_STORE_DWORDX2 : MUBUF_Store_Helper < - 0x0000001d, "buffer_store_dwordx2", VReg_64, v2i32, global_store + mubuf<0x1d>, "buffer_store_dwordx2", VReg_64, v2i32, global_store >; defm BUFFER_STORE_DWORDX4 : MUBUF_Store_Helper < - 0x0000001e, "buffer_store_dwordx4", VReg_128, v4i32, global_store + mubuf<0x1e, 0x1f>, "buffer_store_dwordx4", VReg_128, v4i32, global_store >; -//def BUFFER_ATOMIC_SWAP : MUBUF_ <0x00000030, "buffer_atomic_swap", []>; + defm BUFFER_ATOMIC_SWAP : MUBUF_Atomic < - 0x00000030, "buffer_atomic_swap", VGPR_32, i32, atomic_swap_global + mubuf<0x30, 0x40>, "buffer_atomic_swap", VGPR_32, i32, atomic_swap_global >; -//def BUFFER_ATOMIC_CMPSWAP : MUBUF_ <0x00000031, "buffer_atomic_cmpswap", []>; +//def BUFFER_ATOMIC_CMPSWAP : MUBUF_ <mubuf<0x31, 0x41>, "buffer_atomic_cmpswap", []>; defm BUFFER_ATOMIC_ADD : MUBUF_Atomic < - 0x00000032, "buffer_atomic_add", VGPR_32, i32, atomic_add_global + mubuf<0x32, 0x42>, "buffer_atomic_add", VGPR_32, i32, atomic_add_global >; defm BUFFER_ATOMIC_SUB : MUBUF_Atomic < - 0x00000033, "buffer_atomic_sub", VGPR_32, i32, atomic_sub_global + mubuf<0x33, 0x43>, "buffer_atomic_sub", VGPR_32, i32, atomic_sub_global >; -//def BUFFER_ATOMIC_RSUB : MUBUF_ <0x00000034, "buffer_atomic_rsub", []>; +//def BUFFER_ATOMIC_RSUB : MUBUF_ <mubuf<0x34>, "buffer_atomic_rsub", []>; // isn't on CI & VI defm BUFFER_ATOMIC_SMIN : MUBUF_Atomic < - 0x00000035, "buffer_atomic_smin", VGPR_32, i32, atomic_min_global + mubuf<0x35, 0x44>, "buffer_atomic_smin", VGPR_32, i32, atomic_min_global >; defm BUFFER_ATOMIC_UMIN : MUBUF_Atomic < - 0x00000036, "buffer_atomic_umin", VGPR_32, i32, atomic_umin_global + mubuf<0x36, 0x45>, "buffer_atomic_umin", VGPR_32, i32, atomic_umin_global >; defm BUFFER_ATOMIC_SMAX : MUBUF_Atomic < - 0x00000037, "buffer_atomic_smax", VGPR_32, i32, atomic_max_global + mubuf<0x37, 0x46>, "buffer_atomic_smax", VGPR_32, i32, atomic_max_global >; defm BUFFER_ATOMIC_UMAX : MUBUF_Atomic < - 0x00000038, "buffer_atomic_umax", VGPR_32, i32, atomic_umax_global + mubuf<0x38, 0x47>, "buffer_atomic_umax", VGPR_32, i32, atomic_umax_global >; defm BUFFER_ATOMIC_AND : MUBUF_Atomic < - 0x00000039, "buffer_atomic_and", VGPR_32, i32, atomic_and_global + mubuf<0x39, 0x48>, "buffer_atomic_and", VGPR_32, i32, atomic_and_global >; defm BUFFER_ATOMIC_OR : MUBUF_Atomic < - 0x0000003a, "buffer_atomic_or", VGPR_32, i32, atomic_or_global + mubuf<0x3a, 0x49>, "buffer_atomic_or", VGPR_32, i32, atomic_or_global >; defm BUFFER_ATOMIC_XOR : MUBUF_Atomic < - 0x0000003b, "buffer_atomic_xor", VGPR_32, i32, atomic_xor_global ->; -//def BUFFER_ATOMIC_INC : MUBUF_ <0x0000003c, "buffer_atomic_inc", []>; -//def BUFFER_ATOMIC_DEC : MUBUF_ <0x0000003d, "buffer_atomic_dec", []>; -//def BUFFER_ATOMIC_FCMPSWAP : MUBUF_ <0x0000003e, "buffer_atomic_fcmpswap", []>; -//def BUFFER_ATOMIC_FMIN : MUBUF_ <0x0000003f, "buffer_atomic_fmin", []>; -//def BUFFER_ATOMIC_FMAX : MUBUF_ <0x00000040, "buffer_atomic_fmax", []>; -//def BUFFER_ATOMIC_SWAP_X2 : MUBUF_X2 <0x00000050, "buffer_atomic_swap_x2", []>; -//def BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_X2 <0x00000051, "buffer_atomic_cmpswap_x2", []>; -//def BUFFER_ATOMIC_ADD_X2 : MUBUF_X2 <0x00000052, "buffer_atomic_add_x2", []>; -//def BUFFER_ATOMIC_SUB_X2 : MUBUF_X2 <0x00000053, "buffer_atomic_sub_x2", []>; -//def BUFFER_ATOMIC_RSUB_X2 : MUBUF_X2 <0x00000054, "buffer_atomic_rsub_x2", []>; -//def BUFFER_ATOMIC_SMIN_X2 : MUBUF_X2 <0x00000055, "buffer_atomic_smin_x2", []>; -//def BUFFER_ATOMIC_UMIN_X2 : MUBUF_X2 <0x00000056, "buffer_atomic_umin_x2", []>; -//def BUFFER_ATOMIC_SMAX_X2 : MUBUF_X2 <0x00000057, "buffer_atomic_smax_x2", []>; -//def BUFFER_ATOMIC_UMAX_X2 : MUBUF_X2 <0x00000058, "buffer_atomic_umax_x2", []>; -//def BUFFER_ATOMIC_AND_X2 : MUBUF_X2 <0x00000059, "buffer_atomic_and_x2", []>; -//def BUFFER_ATOMIC_OR_X2 : MUBUF_X2 <0x0000005a, "buffer_atomic_or_x2", []>; -//def BUFFER_ATOMIC_XOR_X2 : MUBUF_X2 <0x0000005b, "buffer_atomic_xor_x2", []>; -//def BUFFER_ATOMIC_INC_X2 : MUBUF_X2 <0x0000005c, "buffer_atomic_inc_x2", []>; -//def BUFFER_ATOMIC_DEC_X2 : MUBUF_X2 <0x0000005d, "buffer_atomic_dec_x2", []>; -//def BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_X2 <0x0000005e, "buffer_atomic_fcmpswap_x2", []>; -//def BUFFER_ATOMIC_FMIN_X2 : MUBUF_X2 <0x0000005f, "buffer_atomic_fmin_x2", []>; -//def BUFFER_ATOMIC_FMAX_X2 : MUBUF_X2 <0x00000060, "buffer_atomic_fmax_x2", []>; -//def BUFFER_WBINVL1_SC : MUBUF_WBINVL1 <0x00000070, "buffer_wbinvl1_sc", []>; -//def BUFFER_WBINVL1 : MUBUF_WBINVL1 <0x00000071, "buffer_wbinvl1", []>; - -} // End SubtargetPredicate = isSICI + mubuf<0x3b, 0x4a>, "buffer_atomic_xor", VGPR_32, i32, atomic_xor_global +>; +//def BUFFER_ATOMIC_INC : MUBUF_ <mubuf<0x3c, 0x4b>, "buffer_atomic_inc", []>; +//def BUFFER_ATOMIC_DEC : MUBUF_ <mubuf<0x3d, 0x4c>, "buffer_atomic_dec", []>; +//def BUFFER_ATOMIC_FCMPSWAP : MUBUF_ <mubuf<0x3e>, "buffer_atomic_fcmpswap", []>; // isn't on VI +//def BUFFER_ATOMIC_FMIN : MUBUF_ <mubuf<0x3f>, "buffer_atomic_fmin", []>; // isn't on VI +//def BUFFER_ATOMIC_FMAX : MUBUF_ <mubuf<0x40>, "buffer_atomic_fmax", []>; // isn't on VI +//def BUFFER_ATOMIC_SWAP_X2 : MUBUF_X2 <mubuf<0x50, 0x60>, "buffer_atomic_swap_x2", []>; +//def BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_X2 <mubuf<0x51, 0x61>, "buffer_atomic_cmpswap_x2", []>; +//def BUFFER_ATOMIC_ADD_X2 : MUBUF_X2 <mubuf<0x52, 0x62>, "buffer_atomic_add_x2", []>; +//def BUFFER_ATOMIC_SUB_X2 : MUBUF_X2 <mubuf<0x53, 0x63>, "buffer_atomic_sub_x2", []>; +//def BUFFER_ATOMIC_RSUB_X2 : MUBUF_X2 <mubuf<0x54>, "buffer_atomic_rsub_x2", []>; // isn't on CI & VI +//def BUFFER_ATOMIC_SMIN_X2 : MUBUF_X2 <mubuf<0x55, 0x64>, "buffer_atomic_smin_x2", []>; +//def BUFFER_ATOMIC_UMIN_X2 : MUBUF_X2 <mubuf<0x56, 0x65>, "buffer_atomic_umin_x2", []>; +//def BUFFER_ATOMIC_SMAX_X2 : MUBUF_X2 <mubuf<0x57, 0x66>, "buffer_atomic_smax_x2", []>; +//def BUFFER_ATOMIC_UMAX_X2 : MUBUF_X2 <mubuf<0x58, 0x67>, "buffer_atomic_umax_x2", []>; +//def BUFFER_ATOMIC_AND_X2 : MUBUF_X2 <mubuf<0x59, 0x68>, "buffer_atomic_and_x2", []>; +//def BUFFER_ATOMIC_OR_X2 : MUBUF_X2 <mubuf<0x5a, 0x69>, "buffer_atomic_or_x2", []>; +//def BUFFER_ATOMIC_XOR_X2 : MUBUF_X2 <mubuf<0x5b, 0x6a>, "buffer_atomic_xor_x2", []>; +//def BUFFER_ATOMIC_INC_X2 : MUBUF_X2 <mubuf<0x5c, 0x6b>, "buffer_atomic_inc_x2", []>; +//def BUFFER_ATOMIC_DEC_X2 : MUBUF_X2 <mubuf<0x5d, 0x6c>, "buffer_atomic_dec_x2", []>; +//def BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_X2 <mubuf<0x5e>, "buffer_atomic_fcmpswap_x2", []>; // isn't on VI +//def BUFFER_ATOMIC_FMIN_X2 : MUBUF_X2 <mubuf<0x5f>, "buffer_atomic_fmin_x2", []>; // isn't on VI +//def BUFFER_ATOMIC_FMAX_X2 : MUBUF_X2 <mubuf<0x60>, "buffer_atomic_fmax_x2", []>; // isn't on VI +//def BUFFER_WBINVL1_SC : MUBUF_WBINVL1 <mubuf<0x70>, "buffer_wbinvl1_sc", []>; // isn't on CI & VI +//def BUFFER_WBINVL1_VOL : MUBUF_WBINVL1 <mubuf<0x70, 0x3f>, "buffer_wbinvl1_vol", []>; // isn't on SI +//def BUFFER_WBINVL1 : MUBUF_WBINVL1 <mubuf<0x71, 0x3e>, "buffer_wbinvl1", []>; //===----------------------------------------------------------------------===// // MTBUF Instructions @@ -1037,63 +1036,63 @@ defm IMAGE_GET_RESINFO : MIMG_NoSampler <0x0000000e, "image_get_resinfo">; //def IMAGE_ATOMIC_FCMPSWAP : MIMG_NoPattern_ <"image_atomic_fcmpswap", 0x0000001d>; //def IMAGE_ATOMIC_FMIN : MIMG_NoPattern_ <"image_atomic_fmin", 0x0000001e>; //def IMAGE_ATOMIC_FMAX : MIMG_NoPattern_ <"image_atomic_fmax", 0x0000001f>; -defm IMAGE_SAMPLE : MIMG_Sampler <0x00000020, "image_sample">; -defm IMAGE_SAMPLE_CL : MIMG_Sampler <0x00000021, "image_sample_cl">; +defm IMAGE_SAMPLE : MIMG_Sampler_WQM <0x00000020, "image_sample">; +defm IMAGE_SAMPLE_CL : MIMG_Sampler_WQM <0x00000021, "image_sample_cl">; defm IMAGE_SAMPLE_D : MIMG_Sampler <0x00000022, "image_sample_d">; defm IMAGE_SAMPLE_D_CL : MIMG_Sampler <0x00000023, "image_sample_d_cl">; defm IMAGE_SAMPLE_L : MIMG_Sampler <0x00000024, "image_sample_l">; -defm IMAGE_SAMPLE_B : MIMG_Sampler <0x00000025, "image_sample_b">; -defm IMAGE_SAMPLE_B_CL : MIMG_Sampler <0x00000026, "image_sample_b_cl">; +defm IMAGE_SAMPLE_B : MIMG_Sampler_WQM <0x00000025, "image_sample_b">; +defm IMAGE_SAMPLE_B_CL : MIMG_Sampler_WQM <0x00000026, "image_sample_b_cl">; defm IMAGE_SAMPLE_LZ : MIMG_Sampler <0x00000027, "image_sample_lz">; -defm IMAGE_SAMPLE_C : MIMG_Sampler <0x00000028, "image_sample_c">; -defm IMAGE_SAMPLE_C_CL : MIMG_Sampler <0x00000029, "image_sample_c_cl">; +defm IMAGE_SAMPLE_C : MIMG_Sampler_WQM <0x00000028, "image_sample_c">; +defm IMAGE_SAMPLE_C_CL : MIMG_Sampler_WQM <0x00000029, "image_sample_c_cl">; defm IMAGE_SAMPLE_C_D : MIMG_Sampler <0x0000002a, "image_sample_c_d">; defm IMAGE_SAMPLE_C_D_CL : MIMG_Sampler <0x0000002b, "image_sample_c_d_cl">; defm IMAGE_SAMPLE_C_L : MIMG_Sampler <0x0000002c, "image_sample_c_l">; -defm IMAGE_SAMPLE_C_B : MIMG_Sampler <0x0000002d, "image_sample_c_b">; -defm IMAGE_SAMPLE_C_B_CL : MIMG_Sampler <0x0000002e, "image_sample_c_b_cl">; +defm IMAGE_SAMPLE_C_B : MIMG_Sampler_WQM <0x0000002d, "image_sample_c_b">; +defm IMAGE_SAMPLE_C_B_CL : MIMG_Sampler_WQM <0x0000002e, "image_sample_c_b_cl">; defm IMAGE_SAMPLE_C_LZ : MIMG_Sampler <0x0000002f, "image_sample_c_lz">; -defm IMAGE_SAMPLE_O : MIMG_Sampler <0x00000030, "image_sample_o">; -defm IMAGE_SAMPLE_CL_O : MIMG_Sampler <0x00000031, "image_sample_cl_o">; +defm IMAGE_SAMPLE_O : MIMG_Sampler_WQM <0x00000030, "image_sample_o">; +defm IMAGE_SAMPLE_CL_O : MIMG_Sampler_WQM <0x00000031, "image_sample_cl_o">; defm IMAGE_SAMPLE_D_O : MIMG_Sampler <0x00000032, "image_sample_d_o">; defm IMAGE_SAMPLE_D_CL_O : MIMG_Sampler <0x00000033, "image_sample_d_cl_o">; defm IMAGE_SAMPLE_L_O : MIMG_Sampler <0x00000034, "image_sample_l_o">; -defm IMAGE_SAMPLE_B_O : MIMG_Sampler <0x00000035, "image_sample_b_o">; -defm IMAGE_SAMPLE_B_CL_O : MIMG_Sampler <0x00000036, "image_sample_b_cl_o">; +defm IMAGE_SAMPLE_B_O : MIMG_Sampler_WQM <0x00000035, "image_sample_b_o">; +defm IMAGE_SAMPLE_B_CL_O : MIMG_Sampler_WQM <0x00000036, "image_sample_b_cl_o">; defm IMAGE_SAMPLE_LZ_O : MIMG_Sampler <0x00000037, "image_sample_lz_o">; -defm IMAGE_SAMPLE_C_O : MIMG_Sampler <0x00000038, "image_sample_c_o">; -defm IMAGE_SAMPLE_C_CL_O : MIMG_Sampler <0x00000039, "image_sample_c_cl_o">; +defm IMAGE_SAMPLE_C_O : MIMG_Sampler_WQM <0x00000038, "image_sample_c_o">; +defm IMAGE_SAMPLE_C_CL_O : MIMG_Sampler_WQM <0x00000039, "image_sample_c_cl_o">; defm IMAGE_SAMPLE_C_D_O : MIMG_Sampler <0x0000003a, "image_sample_c_d_o">; defm IMAGE_SAMPLE_C_D_CL_O : MIMG_Sampler <0x0000003b, "image_sample_c_d_cl_o">; defm IMAGE_SAMPLE_C_L_O : MIMG_Sampler <0x0000003c, "image_sample_c_l_o">; -defm IMAGE_SAMPLE_C_B_O : MIMG_Sampler <0x0000003d, "image_sample_c_b_o">; -defm IMAGE_SAMPLE_C_B_CL_O : MIMG_Sampler <0x0000003e, "image_sample_c_b_cl_o">; +defm IMAGE_SAMPLE_C_B_O : MIMG_Sampler_WQM <0x0000003d, "image_sample_c_b_o">; +defm IMAGE_SAMPLE_C_B_CL_O : MIMG_Sampler_WQM <0x0000003e, "image_sample_c_b_cl_o">; defm IMAGE_SAMPLE_C_LZ_O : MIMG_Sampler <0x0000003f, "image_sample_c_lz_o">; -defm IMAGE_GATHER4 : MIMG_Gather <0x00000040, "image_gather4">; -defm IMAGE_GATHER4_CL : MIMG_Gather <0x00000041, "image_gather4_cl">; +defm IMAGE_GATHER4 : MIMG_Gather_WQM <0x00000040, "image_gather4">; +defm IMAGE_GATHER4_CL : MIMG_Gather_WQM <0x00000041, "image_gather4_cl">; defm IMAGE_GATHER4_L : MIMG_Gather <0x00000044, "image_gather4_l">; -defm IMAGE_GATHER4_B : MIMG_Gather <0x00000045, "image_gather4_b">; -defm IMAGE_GATHER4_B_CL : MIMG_Gather <0x00000046, "image_gather4_b_cl">; +defm IMAGE_GATHER4_B : MIMG_Gather_WQM <0x00000045, "image_gather4_b">; +defm IMAGE_GATHER4_B_CL : MIMG_Gather_WQM <0x00000046, "image_gather4_b_cl">; defm IMAGE_GATHER4_LZ : MIMG_Gather <0x00000047, "image_gather4_lz">; -defm IMAGE_GATHER4_C : MIMG_Gather <0x00000048, "image_gather4_c">; -defm IMAGE_GATHER4_C_CL : MIMG_Gather <0x00000049, "image_gather4_c_cl">; +defm IMAGE_GATHER4_C : MIMG_Gather_WQM <0x00000048, "image_gather4_c">; +defm IMAGE_GATHER4_C_CL : MIMG_Gather_WQM <0x00000049, "image_gather4_c_cl">; defm IMAGE_GATHER4_C_L : MIMG_Gather <0x0000004c, "image_gather4_c_l">; -defm IMAGE_GATHER4_C_B : MIMG_Gather <0x0000004d, "image_gather4_c_b">; -defm IMAGE_GATHER4_C_B_CL : MIMG_Gather <0x0000004e, "image_gather4_c_b_cl">; +defm IMAGE_GATHER4_C_B : MIMG_Gather_WQM <0x0000004d, "image_gather4_c_b">; +defm IMAGE_GATHER4_C_B_CL : MIMG_Gather_WQM <0x0000004e, "image_gather4_c_b_cl">; defm IMAGE_GATHER4_C_LZ : MIMG_Gather <0x0000004f, "image_gather4_c_lz">; -defm IMAGE_GATHER4_O : MIMG_Gather <0x00000050, "image_gather4_o">; -defm IMAGE_GATHER4_CL_O : MIMG_Gather <0x00000051, "image_gather4_cl_o">; +defm IMAGE_GATHER4_O : MIMG_Gather_WQM <0x00000050, "image_gather4_o">; +defm IMAGE_GATHER4_CL_O : MIMG_Gather_WQM <0x00000051, "image_gather4_cl_o">; defm IMAGE_GATHER4_L_O : MIMG_Gather <0x00000054, "image_gather4_l_o">; -defm IMAGE_GATHER4_B_O : MIMG_Gather <0x00000055, "image_gather4_b_o">; +defm IMAGE_GATHER4_B_O : MIMG_Gather_WQM <0x00000055, "image_gather4_b_o">; defm IMAGE_GATHER4_B_CL_O : MIMG_Gather <0x00000056, "image_gather4_b_cl_o">; defm IMAGE_GATHER4_LZ_O : MIMG_Gather <0x00000057, "image_gather4_lz_o">; -defm IMAGE_GATHER4_C_O : MIMG_Gather <0x00000058, "image_gather4_c_o">; -defm IMAGE_GATHER4_C_CL_O : MIMG_Gather <0x00000059, "image_gather4_c_cl_o">; +defm IMAGE_GATHER4_C_O : MIMG_Gather_WQM <0x00000058, "image_gather4_c_o">; +defm IMAGE_GATHER4_C_CL_O : MIMG_Gather_WQM <0x00000059, "image_gather4_c_cl_o">; defm IMAGE_GATHER4_C_L_O : MIMG_Gather <0x0000005c, "image_gather4_c_l_o">; -defm IMAGE_GATHER4_C_B_O : MIMG_Gather <0x0000005d, "image_gather4_c_b_o">; -defm IMAGE_GATHER4_C_B_CL_O : MIMG_Gather <0x0000005e, "image_gather4_c_b_cl_o">; +defm IMAGE_GATHER4_C_B_O : MIMG_Gather_WQM <0x0000005d, "image_gather4_c_b_o">; +defm IMAGE_GATHER4_C_B_CL_O : MIMG_Gather_WQM <0x0000005e, "image_gather4_c_b_cl_o">; defm IMAGE_GATHER4_C_LZ_O : MIMG_Gather <0x0000005f, "image_gather4_c_lz_o">; -defm IMAGE_GET_LOD : MIMG_Sampler <0x00000060, "image_get_lod">; +defm IMAGE_GET_LOD : MIMG_Sampler_WQM <0x00000060, "image_get_lod">; defm IMAGE_SAMPLE_CD : MIMG_Sampler <0x00000068, "image_sample_cd">; defm IMAGE_SAMPLE_CD_CL : MIMG_Sampler <0x00000069, "image_sample_cd_cl">; defm IMAGE_SAMPLE_C_CD : MIMG_Sampler <0x0000006a, "image_sample_c_cd">; @@ -1445,53 +1444,37 @@ defm V_MIN_F32 : VOP2Inst <vop2<0xf, 0xa>, "v_min_f32", VOP_F32_F32_F32, fminnum>; defm V_MAX_F32 : VOP2Inst <vop2<0x10, 0xb>, "v_max_f32", VOP_F32_F32_F32, fmaxnum>; -defm V_MIN_I32 : VOP2Inst <vop2<0x11, 0xc>, "v_min_i32", VOP_I32_I32_I32, - AMDGPUsmin ->; -defm V_MAX_I32 : VOP2Inst <vop2<0x12, 0xd>, "v_max_i32", VOP_I32_I32_I32, - AMDGPUsmax ->; -defm V_MIN_U32 : VOP2Inst <vop2<0x13, 0xe>, "v_min_u32", VOP_I32_I32_I32, - AMDGPUumin ->; -defm V_MAX_U32 : VOP2Inst <vop2<0x14, 0xf>, "v_max_u32", VOP_I32_I32_I32, - AMDGPUumax ->; +defm V_MIN_I32 : VOP2Inst <vop2<0x11, 0xc>, "v_min_i32", VOP_I32_I32_I32>; +defm V_MAX_I32 : VOP2Inst <vop2<0x12, 0xd>, "v_max_i32", VOP_I32_I32_I32>; +defm V_MIN_U32 : VOP2Inst <vop2<0x13, 0xe>, "v_min_u32", VOP_I32_I32_I32>; +defm V_MAX_U32 : VOP2Inst <vop2<0x14, 0xf>, "v_max_u32", VOP_I32_I32_I32>; -// No non-Rev Op on VI defm V_LSHRREV_B32 : VOP2Inst < vop2<0x16, 0x10>, "v_lshrrev_b32", VOP_I32_I32_I32, null_frag, - "v_lshr_b32", "v_lshrrev_b32" + "v_lshr_b32" >; -// No non-Rev OP on VI defm V_ASHRREV_I32 : VOP2Inst < vop2<0x18, 0x11>, "v_ashrrev_i32", VOP_I32_I32_I32, null_frag, - "v_ashr_i32", "v_ashrrev_i32" + "v_ashr_i32" >; -// No non-Rev OP on VI defm V_LSHLREV_B32 : VOP2Inst < vop2<0x1a, 0x12>, "v_lshlrev_b32", VOP_I32_I32_I32, null_frag, - "v_lshl_b32", "v_lshlrev_b32" + "v_lshl_b32" >; -defm V_AND_B32 : VOP2Inst <vop2<0x1b, 0x13>, "v_and_b32", - VOP_I32_I32_I32, and>; -defm V_OR_B32 : VOP2Inst <vop2<0x1c, 0x14>, "v_or_b32", - VOP_I32_I32_I32, or ->; -defm V_XOR_B32 : VOP2Inst <vop2<0x1d, 0x15>, "v_xor_b32", - VOP_I32_I32_I32, xor ->; +defm V_AND_B32 : VOP2Inst <vop2<0x1b, 0x13>, "v_and_b32", VOP_I32_I32_I32>; +defm V_OR_B32 : VOP2Inst <vop2<0x1c, 0x14>, "v_or_b32", VOP_I32_I32_I32>; +defm V_XOR_B32 : VOP2Inst <vop2<0x1d, 0x15>, "v_xor_b32", VOP_I32_I32_I32>; defm V_MAC_F32 : VOP2Inst <vop2<0x1f, 0x16>, "v_mac_f32", VOP_F32_F32_F32>; } // End isCommutable = 1 -defm V_MADMK_F32 : VOP2Inst <vop2<0x20, 0x17>, "v_madmk_f32", VOP_F32_F32_F32>; +defm V_MADMK_F32 : VOP2MADK <vop2<0x20, 0x17>, "v_madmk_f32">; let isCommutable = 1 in { -defm V_MADAK_F32 : VOP2Inst <vop2<0x21, 0x18>, "v_madak_f32", VOP_F32_F32_F32>; +defm V_MADAK_F32 : VOP2MADK <vop2<0x21, 0x18>, "v_madak_f32">; } // End isCommutable = 1 let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC @@ -1503,9 +1486,7 @@ let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC defm V_ADD_I32 : VOP2bInst <vop2<0x25, 0x19>, "v_add_i32", VOP_I32_I32_I32, add >; -defm V_SUB_I32 : VOP2bInst <vop2<0x26, 0x1a>, "v_sub_i32", - VOP_I32_I32_I32, sub ->; +defm V_SUB_I32 : VOP2bInst <vop2<0x26, 0x1a>, "v_sub_i32", VOP_I32_I32_I32>; defm V_SUBREV_I32 : VOP2bInst <vop2<0x27, 0x1b>, "v_subrev_i32", VOP_I32_I32_I32, null_frag, "v_sub_i32" @@ -1513,10 +1494,10 @@ defm V_SUBREV_I32 : VOP2bInst <vop2<0x27, 0x1b>, "v_subrev_i32", let Uses = [VCC] in { // Carry-in comes from VCC defm V_ADDC_U32 : VOP2bInst <vop2<0x28, 0x1c>, "v_addc_u32", - VOP_I32_I32_I32_VCC, adde + VOP_I32_I32_I32_VCC >; defm V_SUBB_U32 : VOP2bInst <vop2<0x29, 0x1d>, "v_subb_u32", - VOP_I32_I32_I32_VCC, sube + VOP_I32_I32_I32_VCC >; defm V_SUBBREV_U32 : VOP2bInst <vop2<0x2a, 0x1e>, "v_subbrev_u32", VOP_I32_I32_I32_VCC, null_frag, "v_subb_u32" @@ -1529,47 +1510,41 @@ defm V_READLANE_B32 : VOP2SI_3VI_m < vop3 <0x001, 0x289>, "v_readlane_b32", (outs SReg_32:$vdst), - (ins VGPR_32:$src0, SSrc_32:$vsrc1), - "v_readlane_b32 $vdst, $src0, $vsrc1" + (ins VGPR_32:$src0, SCSrc_32:$src1), + "v_readlane_b32 $vdst, $src0, $src1" >; defm V_WRITELANE_B32 : VOP2SI_3VI_m < vop3 <0x002, 0x28a>, "v_writelane_b32", (outs VGPR_32:$vdst), - (ins SReg_32:$src0, SSrc_32:$vsrc1), - "v_writelane_b32 $vdst, $src0, $vsrc1" + (ins SReg_32:$src0, SCSrc_32:$src1), + "v_writelane_b32 $vdst, $src0, $src1" >; // These instructions only exist on SI and CI let SubtargetPredicate = isSICI in { -let isCommutable = 1 in { -defm V_MAC_LEGACY_F32 : VOP2Inst <vop2<0x6>, "v_mac_legacy_f32", - VOP_F32_F32_F32 ->; -} // End isCommutable = 1 - -defm V_MIN_LEGACY_F32 : VOP2Inst <vop2<0xd>, "v_min_legacy_f32", +defm V_MIN_LEGACY_F32 : VOP2InstSI <vop2<0xd>, "v_min_legacy_f32", VOP_F32_F32_F32, AMDGPUfmin_legacy >; -defm V_MAX_LEGACY_F32 : VOP2Inst <vop2<0xe>, "v_max_legacy_f32", +defm V_MAX_LEGACY_F32 : VOP2InstSI <vop2<0xe>, "v_max_legacy_f32", VOP_F32_F32_F32, AMDGPUfmax_legacy >; let isCommutable = 1 in { -defm V_LSHR_B32 : VOP2Inst <vop2<0x15>, "v_lshr_b32", VOP_I32_I32_I32, srl>; -defm V_ASHR_I32 : VOP2Inst <vop2<0x17>, "v_ashr_i32", - VOP_I32_I32_I32, sra ->; - -let hasPostISelHook = 1 in { -defm V_LSHL_B32 : VOP2Inst <vop2<0x19>, "v_lshl_b32", VOP_I32_I32_I32, shl>; -} - +defm V_LSHR_B32 : VOP2InstSI <vop2<0x15>, "v_lshr_b32", VOP_I32_I32_I32>; +defm V_ASHR_I32 : VOP2InstSI <vop2<0x17>, "v_ashr_i32", VOP_I32_I32_I32>; +defm V_LSHL_B32 : VOP2InstSI <vop2<0x19>, "v_lshl_b32", VOP_I32_I32_I32>; } // End isCommutable = 1 } // End let SubtargetPredicate = SICI +let isCommutable = 1 in { +defm V_MAC_LEGACY_F32 : VOP2_VI3_Inst <vop23<0x6, 0x28e>, "v_mac_legacy_f32", + VOP_F32_F32_F32 +>; +} // End isCommutable = 1 + defm V_BFM_B32 : VOP2_VI3_Inst <vop23<0x1e, 0x293>, "v_bfm_b32", VOP_I32_I32_I32, AMDGPUbfm >; @@ -1586,14 +1561,25 @@ defm V_LDEXP_F32 : VOP2_VI3_Inst <vop23<0x2b, 0x288>, "v_ldexp_f32", VOP_F32_F32_I32, AMDGPUldexp >; -////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "v_cvt_pkaccum_u8_f32", []>; -////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "v_cvt_pknorm_i16_f32", []>; -////def V_CVT_PKNORM_U16_F32 : VOP2_U16 <0x0000002e, "v_cvt_pknorm_u16_f32", []>; + +defm V_CVT_PKACCUM_U8_F32 : VOP2_VI3_Inst <vop23<0x2c, 0x1f0>, "v_cvt_pkaccum_u8_f32", + VOP_I32_F32_I32>; // TODO: set "Uses = dst" + +defm V_CVT_PKNORM_I16_F32 : VOP2_VI3_Inst <vop23<0x2d, 0x294>, "v_cvt_pknorm_i16_f32", + VOP_I32_F32_F32 +>; +defm V_CVT_PKNORM_U16_F32 : VOP2_VI3_Inst <vop23<0x2e, 0x295>, "v_cvt_pknorm_u16_f32", + VOP_I32_F32_F32 +>; defm V_CVT_PKRTZ_F16_F32 : VOP2_VI3_Inst <vop23<0x2f, 0x296>, "v_cvt_pkrtz_f16_f32", - VOP_I32_F32_F32, int_SI_packf16 + VOP_I32_F32_F32, int_SI_packf16 +>; +defm V_CVT_PK_U16_U32 : VOP2_VI3_Inst <vop23<0x30, 0x297>, "v_cvt_pk_u16_u32", + VOP_I32_I32_I32 +>; +defm V_CVT_PK_I16_I32 : VOP2_VI3_Inst <vop23<0x31, 0x298>, "v_cvt_pk_i16_i32", + VOP_I32_I32_I32 >; -////def V_CVT_PK_U16_U32 : VOP2_U16 <0x00000030, "v_cvt_pk_u16_u32", []>; -////def V_CVT_PK_I16_I32 : VOP2_I16 <0x00000031, "v_cvt_pk_i16_i32", []>; //===----------------------------------------------------------------------===// // VOP3 Instructions @@ -1659,27 +1645,34 @@ defm V_ALIGNBYTE_B32 : VOP3Inst <vop3<0x14f, 0x1cf>, "v_alignbyte_b32", VOP_I32_I32_I32_I32 >; -defm V_MIN3_F32 : VOP3Inst <vop3<0x151>, "v_min3_f32", +defm V_MIN3_F32 : VOP3Inst <vop3<0x151, 0x1d0>, "v_min3_f32", VOP_F32_F32_F32_F32, AMDGPUfmin3>; -defm V_MIN3_I32 : VOP3Inst <vop3<0x152>, "v_min3_i32", +defm V_MIN3_I32 : VOP3Inst <vop3<0x152, 0x1d1>, "v_min3_i32", VOP_I32_I32_I32_I32, AMDGPUsmin3 >; -defm V_MIN3_U32 : VOP3Inst <vop3<0x153>, "v_min3_u32", +defm V_MIN3_U32 : VOP3Inst <vop3<0x153, 0x1d2>, "v_min3_u32", VOP_I32_I32_I32_I32, AMDGPUumin3 >; -defm V_MAX3_F32 : VOP3Inst <vop3<0x154>, "v_max3_f32", +defm V_MAX3_F32 : VOP3Inst <vop3<0x154, 0x1d3>, "v_max3_f32", VOP_F32_F32_F32_F32, AMDGPUfmax3 >; -defm V_MAX3_I32 : VOP3Inst <vop3<0x155>, "v_max3_i32", +defm V_MAX3_I32 : VOP3Inst <vop3<0x155, 0x1d4>, "v_max3_i32", VOP_I32_I32_I32_I32, AMDGPUsmax3 >; -defm V_MAX3_U32 : VOP3Inst <vop3<0x156>, "v_max3_u32", +defm V_MAX3_U32 : VOP3Inst <vop3<0x156, 0x1d5>, "v_max3_u32", VOP_I32_I32_I32_I32, AMDGPUumax3 >; -//def V_MED3_F32 : VOP3_MED3 <0x00000157, "v_med3_f32", []>; -//def V_MED3_I32 : VOP3_MED3 <0x00000158, "v_med3_i32", []>; -//def V_MED3_U32 : VOP3_MED3 <0x00000159, "v_med3_u32", []>; +defm V_MED3_F32 : VOP3Inst <vop3<0x157, 0x1d6>, "v_med3_f32", + VOP_F32_F32_F32_F32 +>; +defm V_MED3_I32 : VOP3Inst <vop3<0x158, 0x1d7>, "v_med3_i32", + VOP_I32_I32_I32_I32 +>; +defm V_MED3_U32 : VOP3Inst <vop3<0x159, 0x1d8>, "v_med3_u32", + VOP_I32_I32_I32_I32 +>; + //def V_SAD_U8 : VOP3_U8 <0x0000015a, "v_sad_u8", []>; //def V_SAD_HI_U8 : VOP3_U8 <0x0000015b, "v_sad_hi_u8", []>; //def V_SAD_U16 : VOP3_U16 <0x0000015c, "v_sad_u16", []>; @@ -1742,21 +1735,36 @@ defm V_MUL_HI_I32 : VOP3Inst <vop3<0x16c, 0x287>, "v_mul_hi_i32", } // isCommutable = 1, SchedRW = [WriteQuarterRate32] +let SchedRW = [WriteFloatFMA, WriteSALU] in { defm V_DIV_SCALE_F32 : VOP3b_32 <vop3<0x16d, 0x1e0>, "v_div_scale_f32", []>; +} -let SchedRW = [WriteDouble] in { +let SchedRW = [WriteDouble, WriteSALU] in { // Double precision division pre-scale. defm V_DIV_SCALE_F64 : VOP3b_64 <vop3<0x16e, 0x1e1>, "v_div_scale_f64", []>; } // let SchedRW = [WriteDouble] -let isCommutable = 1 in { -defm V_DIV_FMAS_F32 : VOP3Inst <vop3<0x16f, 0x1e2>, "v_div_fmas_f32", +let isCommutable = 1, Uses = [VCC] in { + +// v_div_fmas_f32: +// result = src0 * src1 + src2 +// if (vcc) +// result *= 2^32 +// +defm V_DIV_FMAS_F32 : VOP3_VCC_Inst <vop3<0x16f, 0x1e2>, "v_div_fmas_f32", VOP_F32_F32_F32_F32, AMDGPUdiv_fmas >; + let SchedRW = [WriteDouble] in { -defm V_DIV_FMAS_F64 : VOP3Inst <vop3<0x170, 0x1e3>, "v_div_fmas_f64", +// v_div_fmas_f64: +// result = src0 * src1 + src2 +// if (vcc) +// result *= 2^64 +// +defm V_DIV_FMAS_F64 : VOP3_VCC_Inst <vop3<0x170, 0x1e3>, "v_div_fmas_f64", VOP_F64_F64_F64_F64, AMDGPUdiv_fmas >; + } // End SchedRW = [WriteDouble] } // End isCommutable = 1 @@ -1774,23 +1782,29 @@ defm V_TRIG_PREOP_F64 : VOP3Inst < // These instructions only exist on SI and CI let SubtargetPredicate = isSICI in { -defm V_LSHL_B64 : VOP3Inst <vop3<0x161>, "v_lshl_b64", - VOP_I64_I64_I32, shl ->; - -defm V_LSHR_B64 : VOP3Inst <vop3<0x162>, "v_lshr_b64", - VOP_I64_I64_I32, srl ->; - -defm V_ASHR_I64 : VOP3Inst <vop3<0x163>, "v_ashr_i64", - VOP_I64_I64_I32, sra ->; +defm V_LSHL_B64 : VOP3Inst <vop3<0x161>, "v_lshl_b64", VOP_I64_I64_I32>; +defm V_LSHR_B64 : VOP3Inst <vop3<0x162>, "v_lshr_b64", VOP_I64_I64_I32>; +defm V_ASHR_I64 : VOP3Inst <vop3<0x163>, "v_ashr_i64", VOP_I64_I64_I32>; defm V_MULLIT_F32 : VOP3Inst <vop3<0x150>, "v_mullit_f32", VOP_F32_F32_F32_F32>; } // End SubtargetPredicate = isSICI +let SubtargetPredicate = isVI in { + +defm V_LSHLREV_B64 : VOP3Inst <vop3<0, 0x28f>, "v_lshlrev_b64", + VOP_I64_I32_I64 +>; +defm V_LSHRREV_B64 : VOP3Inst <vop3<0, 0x290>, "v_lshrrev_b64", + VOP_I64_I32_I64 +>; +defm V_ASHRREV_I64 : VOP3Inst <vop3<0, 0x291>, "v_ashrrev_i64", + VOP_I64_I32_I64 +>; + +} // End SubtargetPredicate = isVI + //===----------------------------------------------------------------------===// // Pseudo Instructions //===----------------------------------------------------------------------===// @@ -1809,8 +1823,8 @@ def SGPR_USE : InstSI <(outs),(ins), "", []>; // SI pseudo instructions. These are used by the CFG structurizer pass // and should be lowered to ISA instructions prior to codegen. -let mayLoad = 1, mayStore = 1, hasSideEffects = 1, - Uses = [EXEC], Defs = [EXEC] in { +let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in { +let Uses = [EXEC], Defs = [EXEC] in { let isBranch = 1, isTerminator = 1 in { @@ -1867,15 +1881,18 @@ def SI_END_CF : InstSI < [(int_SI_end_cf i64:$saved)] >; +} // End Uses = [EXEC], Defs = [EXEC] + +let Uses = [EXEC], Defs = [EXEC,VCC] in { def SI_KILL : InstSI < (outs), (ins VSrc_32:$src), "si_kill $src", [(int_AMDGPU_kill f32:$src)] >; +} // End Uses = [EXEC], Defs = [EXEC,VCC] } // end mayLoad = 1, mayStore = 1, hasSideEffects = 1 - // Uses = [EXEC], Defs = [EXEC] let Uses = [EXEC], Defs = [EXEC,VCC,M0] in { @@ -2020,16 +2037,12 @@ def : Pat < (SI_KILL 0xbf800000) >; -let Predicates = [isSICI] in { - /* int_SI_vs_load_input */ def : Pat< (SIload_input v4i32:$tlst, imm:$attr_offset, i32:$buf_idx_vgpr), (BUFFER_LOAD_FORMAT_XYZW_IDXEN $tlst, $buf_idx_vgpr, imm:$attr_offset, 0, 0, 0, 0) >; -} // End Predicates = [isSICI] - /* int_SI_export */ def : Pat < (int_SI_export imm:$en, imm:$vm, imm:$done, imm:$tgt, imm:$compr, @@ -2156,9 +2169,13 @@ def : Pat < //===----------------------------------------------------------------------===// let Predicates = [UnsafeFPMath] in { -def : RcpPat<V_RCP_F64_e32, f64>; -defm : RsqPat<V_RSQ_F64_e32, f64>; -defm : RsqPat<V_RSQ_F32_e32, f32>; + +//def : RcpPat<V_RCP_F64_e32, f64>; +//defm : RsqPat<V_RSQ_F64_e32, f64>; +//defm : RsqPat<V_RSQ_F32_e32, f32>; + +def : RsqPat<V_RSQ_F32_e32, f32>; +def : RsqPat<V_RSQ_F64_e32, f64>; } //===----------------------------------------------------------------------===// @@ -2675,13 +2692,6 @@ def : Pat < (V_MUL_LEGACY_F32_e32 $src0, (V_RCP_LEGACY_F32_e32 $src1)) >; -def : Pat< - (fdiv f64:$src0, f64:$src1), - (V_MUL_F64 0 /* src0_modifiers */, $src0, - 0 /* src1_modifiers */, (V_RCP_F64_e32 $src1), - 0 /* clamp */, 0 /* omod */) ->; - def : Pat < (int_AMDGPU_cube v4f32:$src), (REG_SEQUENCE VReg_128, @@ -2716,16 +2726,12 @@ class Ext32Pat <SDNode ext> : Pat < def : Ext32Pat <zext>; def : Ext32Pat <anyext>; -let Predicates = [isSICI] in { - // Offset in an 32Bit VGPR def : Pat < (SIload_constant v4i32:$sbase, i32:$voff), (BUFFER_LOAD_DWORD_OFFEN $sbase, $voff, 0, 0, 0, 0, 0) >; -} // End Predicates = [isSICI] - // The multiplication scales from [0,1] to the unsigned integer range def : Pat < (AMDGPUurecip i32:$src0), @@ -2907,7 +2913,6 @@ class MUBUFScratchLoadPat <MUBUF Instr, ValueType vt, PatFrag ld> : Pat < (Instr $srsrc, $vaddr, $soffset, $offset, 0, 0, 0) >; -let Predicates = [isSICI] in { def : MUBUFScratchLoadPat <BUFFER_LOAD_SBYTE_OFFEN, i32, sextloadi8_private>; def : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, i32, extloadi8_private>; def : MUBUFScratchLoadPat <BUFFER_LOAD_SSHORT_OFFEN, i32, sextloadi16_private>; @@ -2915,7 +2920,6 @@ def : MUBUFScratchLoadPat <BUFFER_LOAD_USHORT_OFFEN, i32, extloadi16_private>; def : MUBUFScratchLoadPat <BUFFER_LOAD_DWORD_OFFEN, i32, load_private>; def : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX2_OFFEN, v2i32, load_private>; def : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX4_OFFEN, v4i32, load_private>; -} // End Predicates = [isSICI] // BUFFER_LOAD_DWORD*, addr64=0 multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxen, @@ -2954,14 +2958,12 @@ multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxe >; } -let Predicates = [isSICI] in { defm : MUBUF_Load_Dword <i32, BUFFER_LOAD_DWORD_OFFSET, BUFFER_LOAD_DWORD_OFFEN, BUFFER_LOAD_DWORD_IDXEN, BUFFER_LOAD_DWORD_BOTHEN>; defm : MUBUF_Load_Dword <v2i32, BUFFER_LOAD_DWORDX2_OFFSET, BUFFER_LOAD_DWORDX2_OFFEN, BUFFER_LOAD_DWORDX2_IDXEN, BUFFER_LOAD_DWORDX2_BOTHEN>; defm : MUBUF_Load_Dword <v4i32, BUFFER_LOAD_DWORDX4_OFFSET, BUFFER_LOAD_DWORDX4_OFFEN, BUFFER_LOAD_DWORDX4_IDXEN, BUFFER_LOAD_DWORDX4_BOTHEN>; -} // End Predicates = [isSICI] class MUBUFScratchStorePat <MUBUF Instr, ValueType vt, PatFrag st> : Pat < (st vt:$value, (MUBUFScratch v4i32:$srsrc, i32:$vaddr, i32:$soffset, @@ -2969,13 +2971,11 @@ class MUBUFScratchStorePat <MUBUF Instr, ValueType vt, PatFrag st> : Pat < (Instr $value, $srsrc, $vaddr, $soffset, $offset, 0, 0, 0) >; -let Predicates = [isSICI] in { def : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, i32, truncstorei8_private>; def : MUBUFScratchStorePat <BUFFER_STORE_SHORT_OFFEN, i32, truncstorei16_private>; def : MUBUFScratchStorePat <BUFFER_STORE_DWORD_OFFEN, i32, store_private>; def : MUBUFScratchStorePat <BUFFER_STORE_DWORDX2_OFFEN, v2i32, store_private>; def : MUBUFScratchStorePat <BUFFER_STORE_DWORDX4_OFFEN, v4i32, store_private>; -} // End Predicates = [isSICI] /* class MUBUFStore_Pattern <MUBUF Instr, ValueType vt, PatFrag st> : Pat < @@ -3246,6 +3246,12 @@ def : Pat < >; def : Pat < + (i1 (trunc i64:$a)), + (V_CMP_EQ_I32_e64 (V_AND_B32_e64 (i32 1), + (EXTRACT_SUBREG $a, sub0)), 1) +>; + +def : Pat < (i32 (bswap i32:$a)), (V_BFI_B32 (S_MOV_B32 0x00ff00ff), (V_ALIGNBIT_B32 $a, $a, 24), @@ -3257,6 +3263,28 @@ def : Pat < (V_CNDMASK_B32_e64 $src0, $src1, $src2) >; +//===----------------------------------------------------------------------===// +// Fract Patterns +//===----------------------------------------------------------------------===// + +let Predicates = [isCI] in { + +// Convert (x - floor(x)) to fract(x) +def : Pat < + (f32 (fsub (f32 (VOP3Mods f32:$x, i32:$mods)), + (f32 (ffloor (f32 (VOP3Mods f32:$x, i32:$mods)))))), + (V_FRACT_F32_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE) +>; + +// Convert (x + (-floor(x))) to fract(x) +def : Pat < + (f64 (fadd (f64 (VOP3Mods f64:$x, i32:$mods)), + (f64 (fneg (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))))))), + (V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE) +>; + +} // End Predicates = [isCI] + //============================================================================// // Miscellaneous Optimization Patterns //============================================================================// diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp index 068b22f37704..c319b32111fe 100644 --- a/lib/Target/R600/SILowerControlFlow.cpp +++ b/lib/Target/R600/SILowerControlFlow.cpp @@ -88,7 +88,8 @@ private: void Kill(MachineInstr &MI); void Branch(MachineInstr &MI); - void LoadM0(MachineInstr &MI, MachineInstr *MovRel); + void LoadM0(MachineInstr &MI, MachineInstr *MovRel, int Offset = 0); + void computeIndirectRegAndOffset(unsigned VecReg, unsigned &Reg, int &Offset); void IndirectSrc(MachineInstr &MI); void IndirectDst(MachineInstr &MI); @@ -323,7 +324,7 @@ void SILowerControlFlowPass::Kill(MachineInstr &MI) { MI.eraseFromParent(); } -void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) { +void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel, int Offset) { MachineBasicBlock &MBB = *MI.getParent(); DebugLoc DL = MI.getDebugLoc(); @@ -333,8 +334,14 @@ void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) { unsigned Idx = MI.getOperand(3).getReg(); if (AMDGPU::SReg_32RegClass.contains(Idx)) { - BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0) - .addReg(Idx); + if (Offset) { + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0) + .addReg(Idx) + .addImm(Offset); + } else { + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0) + .addReg(Idx); + } MBB.insert(I, MovRel); } else { @@ -363,6 +370,11 @@ void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) { BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), AMDGPU::VCC) .addReg(AMDGPU::VCC); + if (Offset) { + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0) + .addReg(AMDGPU::M0) + .addImm(Offset); + } // Do the actual move MBB.insert(I, MovRel); @@ -384,6 +396,33 @@ void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) { MI.eraseFromParent(); } +/// \param @VecReg The register which holds element zero of the vector +/// being addressed into. +/// \param[out] @Reg The base register to use in the indirect addressing instruction. +/// \param[in,out] @Offset As an input, this is the constant offset part of the +// indirect Index. e.g. v0 = v[VecReg + Offset] +// As an output, this is a constant value that needs +// to be added to the value stored in M0. +void SILowerControlFlowPass::computeIndirectRegAndOffset(unsigned VecReg, + unsigned &Reg, + int &Offset) { + unsigned SubReg = TRI->getSubReg(VecReg, AMDGPU::sub0); + if (!SubReg) + SubReg = VecReg; + + const TargetRegisterClass *RC = TRI->getPhysRegClass(SubReg); + int RegIdx = TRI->getHWRegIndex(SubReg) + Offset; + + if (RegIdx < 0) { + Offset = RegIdx; + RegIdx = 0; + } else { + Offset = 0; + } + + Reg = RC->getRegister(RegIdx); +} + void SILowerControlFlowPass::IndirectSrc(MachineInstr &MI) { MachineBasicBlock &MBB = *MI.getParent(); @@ -391,18 +430,18 @@ void SILowerControlFlowPass::IndirectSrc(MachineInstr &MI) { unsigned Dst = MI.getOperand(0).getReg(); unsigned Vec = MI.getOperand(2).getReg(); - unsigned Off = MI.getOperand(4).getImm(); - unsigned SubReg = TRI->getSubReg(Vec, AMDGPU::sub0); - if (!SubReg) - SubReg = Vec; + int Off = MI.getOperand(4).getImm(); + unsigned Reg; + + computeIndirectRegAndOffset(Vec, Reg, Off); MachineInstr *MovRel = BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst) - .addReg(SubReg + Off) + .addReg(Reg) .addReg(AMDGPU::M0, RegState::Implicit) .addReg(Vec, RegState::Implicit); - LoadM0(MI, MovRel); + LoadM0(MI, MovRel, Off); } void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) { @@ -411,20 +450,20 @@ void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) { DebugLoc DL = MI.getDebugLoc(); unsigned Dst = MI.getOperand(0).getReg(); - unsigned Off = MI.getOperand(4).getImm(); + int Off = MI.getOperand(4).getImm(); unsigned Val = MI.getOperand(5).getReg(); - unsigned SubReg = TRI->getSubReg(Dst, AMDGPU::sub0); - if (!SubReg) - SubReg = Dst; + unsigned Reg; + + computeIndirectRegAndOffset(Dst, Reg, Off); MachineInstr *MovRel = BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELD_B32_e32)) - .addReg(SubReg + Off, RegState::Define) + .addReg(Reg, RegState::Define) .addReg(Val) .addReg(AMDGPU::M0, RegState::Implicit) .addReg(Dst, RegState::Implicit); - LoadM0(MI, MovRel); + LoadM0(MI, MovRel, Off); } bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { @@ -447,7 +486,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { Next = std::next(I); MachineInstr &MI = *I; - if (TII->isDS(MI.getOpcode())) + if (TII->isWQM(MI.getOpcode()) || TII->isDS(MI.getOpcode())) NeedWQM = true; // Flat uses m0 in case it needs to access LDS. @@ -513,12 +552,6 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { case AMDGPU::SI_INDIRECT_DST_V16: IndirectDst(MI); break; - - case AMDGPU::V_INTERP_P1_F32: - case AMDGPU::V_INTERP_P2_F32: - case AMDGPU::V_INTERP_MOV_F32: - NeedWQM = true; - break; } } } diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp index 58c2cd109680..f50299112604 100644 --- a/lib/Target/R600/SIRegisterInfo.cpp +++ b/lib/Target/R600/SIRegisterInfo.cpp @@ -14,7 +14,6 @@ #include "SIRegisterInfo.h" -#include "AMDGPUSubtarget.h" #include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -47,13 +46,31 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(AMDGPU::VGPR255); Reserved.set(AMDGPU::VGPR254); + // Tonga and Iceland can only allocate a fixed number of SGPRs due + // to a hw bug. + if (ST.hasSGPRInitBug()) { + unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs(); + // Reserve some SGPRs for FLAT_SCRATCH and VCC (4 SGPRs). + // Assume XNACK_MASK is unused. + unsigned Limit = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4; + + for (unsigned i = Limit; i < NumSGPRs; ++i) { + unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i); + MCRegAliasIterator R = MCRegAliasIterator(Reg, this, true); + + for (; R.isValid(); ++R) + Reserved.set(*R); + } + } + return Reserved; } unsigned SIRegisterInfo::getRegPressureSetLimit(unsigned Idx) const { // FIXME: We should adjust the max number of waves based on LDS size. - unsigned SGPRLimit = getNumSGPRsAllowed(ST.getMaxWavesPerCU()); + unsigned SGPRLimit = getNumSGPRsAllowed(ST.getGeneration(), + ST.getMaxWavesPerCU()); unsigned VGPRLimit = getNumVGPRsAllowed(ST.getMaxWavesPerCU()); for (regclass_iterator I = regclass_begin(), E = regclass_end(); @@ -204,7 +221,9 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, Ctx.emitError("Ran out of VGPRs for spilling SGPR"); } - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill.VGPR) + BuildMI(*MBB, MI, DL, + TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32), + Spill.VGPR) .addReg(SubReg) .addImm(Spill.Lane); @@ -236,7 +255,9 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, if (isM0) SubReg = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0); - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READLANE_B32), SubReg) + BuildMI(*MBB, MI, DL, + TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32), + SubReg) .addReg(Spill.VGPR) .addImm(Spill.Lane) .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine); @@ -245,7 +266,22 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, .addReg(SubReg); } } - TII->insertNOPs(MI, 3); + + // TODO: only do this when it is needed + switch (ST.getGeneration()) { + case AMDGPUSubtarget::SOUTHERN_ISLANDS: + // "VALU writes SGPR" -> "SMRD reads that SGPR" needs "S_NOP 3" on SI + TII->insertNOPs(MI, 3); + break; + case AMDGPUSubtarget::SEA_ISLANDS: + break; + default: // VOLCANIC_ISLANDS and later + // "VALU writes SGPR -> VMEM reads that SGPR" needs "S_NOP 4" on VI + // and later. This also applies to VALUs which write VCC, but we're + // unlikely to see VMEM use VCC. + TII->insertNOPs(MI, 4); + } + MI->eraseFromParent(); break; } @@ -490,14 +526,24 @@ unsigned SIRegisterInfo::getNumVGPRsAllowed(unsigned WaveCount) const { } } -unsigned SIRegisterInfo::getNumSGPRsAllowed(unsigned WaveCount) const { - switch(WaveCount) { - case 10: return 48; - case 9: return 56; - case 8: return 64; - case 7: return 72; - case 6: return 80; - case 5: return 96; - default: return 103; +unsigned SIRegisterInfo::getNumSGPRsAllowed(AMDGPUSubtarget::Generation gen, + unsigned WaveCount) const { + if (gen >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { + switch (WaveCount) { + case 10: return 80; + case 9: return 80; + case 8: return 96; + default: return 102; + } + } else { + switch(WaveCount) { + case 10: return 48; + case 9: return 56; + case 8: return 64; + case 7: return 72; + case 6: return 80; + case 5: return 96; + default: return 103; + } } } diff --git a/lib/Target/R600/SIRegisterInfo.h b/lib/Target/R600/SIRegisterInfo.h index d908ffd12d2c..1dfe53093eb4 100644 --- a/lib/Target/R600/SIRegisterInfo.h +++ b/lib/Target/R600/SIRegisterInfo.h @@ -17,6 +17,7 @@ #define LLVM_LIB_TARGET_R600_SIREGISTERINFO_H #include "AMDGPURegisterInfo.h" +#include "AMDGPUSubtarget.h" #include "llvm/Support/Debug.h" namespace llvm { @@ -111,7 +112,8 @@ struct SIRegisterInfo : public AMDGPURegisterInfo { /// \brief Give the maximum number of SGPRs that can be used by \p WaveCount /// concurrent waves. - unsigned getNumSGPRsAllowed(unsigned WaveCount) const; + unsigned getNumSGPRsAllowed(AMDGPUSubtarget::Generation gen, + unsigned WaveCount) const; unsigned findUnusedRegister(const MachineRegisterInfo &MRI, const TargetRegisterClass *RC) const; diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td index 1a1efb0c89a9..c63f30508f63 100644 --- a/lib/Target/R600/SIRegisterInfo.td +++ b/lib/Target/R600/SIRegisterInfo.td @@ -209,7 +209,9 @@ def VReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 256, (add VGPR_256 def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 512, (add VGPR_512)>; -def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add VGPR_32)>; +def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add VGPR_32)> { + let Size = 32; +} class RegImmOperand <RegisterClass rc> : RegisterOperand<rc> { let OperandNamespace = "AMDGPU"; diff --git a/lib/Target/R600/VIInstrFormats.td b/lib/Target/R600/VIInstrFormats.td index 5285d18ced46..c24223511248 100644 --- a/lib/Target/R600/VIInstrFormats.td +++ b/lib/Target/R600/VIInstrFormats.td @@ -136,6 +136,32 @@ class VOP3e_vi <bits<10> op> : Enc64 { let Inst{63} = src2_modifiers{0}; } +class VOP3be_vi <bits<10> op> : Enc64 { + bits<8> vdst; + bits<2> src0_modifiers; + bits<9> src0; + bits<2> src1_modifiers; + bits<9> src1; + bits<2> src2_modifiers; + bits<9> src2; + bits<7> sdst; + bits<2> omod; + bits<1> clamp; + + let Inst{7-0} = vdst; + let Inst{14-8} = sdst; + let Inst{15} = clamp; + let Inst{25-16} = op; + let Inst{31-26} = 0x34; //encoding + let Inst{40-32} = src0; + let Inst{49-41} = src1; + let Inst{58-50} = src2; + let Inst{60-59} = omod; + let Inst{61} = src0_modifiers{0}; + let Inst{62} = src1_modifiers{0}; + let Inst{63} = src2_modifiers{0}; +} + class EXPe_vi : EXPe { let Inst{31-26} = 0x31; //encoding } diff --git a/lib/Target/R600/VIInstructions.td b/lib/Target/R600/VIInstructions.td index 24e66cea6277..4a6e933783bb 100644 --- a/lib/Target/R600/VIInstructions.td +++ b/lib/Target/R600/VIInstructions.td @@ -9,18 +9,6 @@ // Instruction definitions for VI and newer. //===----------------------------------------------------------------------===// -let SubtargetPredicate = isVI in { - -defm BUFFER_LOAD_DWORD_VI : MUBUF_Load_Helper_vi < - 0x14, "buffer_load_dword", VGPR_32, i32, global_load ->; - -defm BUFFER_LOAD_FORMAT_XYZW_VI : MUBUF_Load_Helper_vi < - 0x03, "buffer_load_format_xyzw", VReg_128 ->; - -} // End SubtargetPredicate = isVI - //===----------------------------------------------------------------------===// // SMEM Patterns @@ -28,37 +16,10 @@ defm BUFFER_LOAD_FORMAT_XYZW_VI : MUBUF_Load_Helper_vi < let Predicates = [isVI] in { -// 1. Offset as 8bit DWORD immediate +// 1. Offset as 20bit DWORD immediate def : Pat < (SIload_constant v4i32:$sbase, IMM20bit:$offset), (S_BUFFER_LOAD_DWORD_IMM $sbase, (as_i32imm $offset)) >; -//===----------------------------------------------------------------------===// -// MUBUF Patterns -//===----------------------------------------------------------------------===// - -// Offset in an 32Bit VGPR -def : Pat < - (SIload_constant v4i32:$sbase, i32:$voff), - (BUFFER_LOAD_DWORD_VI_OFFEN $sbase, $voff, 0, 0, 0, 0, 0) ->; - -// Offset in an 32Bit VGPR -def : Pat < - (SIload_constant v4i32:$sbase, i32:$voff), - (BUFFER_LOAD_DWORD_VI_OFFEN $sbase, $voff, 0, 0, 0, 0, 0) ->; - -/* int_SI_vs_load_input */ -def : Pat< - (SIload_input v4i32:$tlst, imm:$attr_offset, i32:$buf_idx_vgpr), - (BUFFER_LOAD_FORMAT_XYZW_VI_IDXEN $tlst, $buf_idx_vgpr, imm:$attr_offset, 0, 0, 0, 0) ->; - -defm : MUBUF_Load_Dword <i32, BUFFER_LOAD_DWORD_VI_OFFSET, - BUFFER_LOAD_DWORD_VI_OFFEN, - BUFFER_LOAD_DWORD_VI_IDXEN, - BUFFER_LOAD_DWORD_VI_BOTHEN>; - } // End Predicates = [isVI] diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 16aab16d63ee..41e717febe94 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -375,12 +375,25 @@ static bool usesTheStack(const MachineFunction &MF) { return false; } -void X86FrameLowering::getStackProbeFunction(const X86Subtarget &STI, - unsigned &CallOp, - const char *&Symbol) { - CallOp = STI.is64Bit() ? X86::W64ALLOCA : X86::CALLpcrel32; +void X86FrameLowering::emitStackProbeCall(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + DebugLoc DL) { + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>(); + bool Is64Bit = STI.is64Bit(); + bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large; + const X86RegisterInfo *RegInfo = + static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo()); + + unsigned CallOp; + if (Is64Bit) + CallOp = IsLargeCodeModel ? X86::CALL64r : X86::CALL64pcrel32; + else + CallOp = X86::CALLpcrel32; - if (STI.is64Bit()) { + const char *Symbol; + if (Is64Bit) { if (STI.isTargetCygMing()) { Symbol = "___chkstk_ms"; } else { @@ -390,6 +403,37 @@ void X86FrameLowering::getStackProbeFunction(const X86Subtarget &STI, Symbol = "_alloca"; else Symbol = "_chkstk"; + + MachineInstrBuilder CI; + + // All current stack probes take AX and SP as input, clobber flags, and + // preserve all registers. x86_64 probes leave RSP unmodified. + if (Is64Bit && MF.getTarget().getCodeModel() == CodeModel::Large) { + // For the large code model, we have to call through a register. Use R11, + // as it is scratch in all supported calling conventions. + BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::R11) + .addExternalSymbol(Symbol); + CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addReg(X86::R11); + } else { + CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addExternalSymbol(Symbol); + } + + unsigned AX = Is64Bit ? X86::RAX : X86::EAX; + unsigned SP = Is64Bit ? X86::RSP : X86::ESP; + CI.addReg(AX, RegState::Implicit) + .addReg(SP, RegState::Implicit) + .addReg(AX, RegState::Define | RegState::Implicit) + .addReg(SP, RegState::Define | RegState::Implicit) + .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); + + if (Is64Bit) { + // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp + // themselves. It also does not clobber %rax so we can reuse it when + // adjusting %rsp. + BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64rr), X86::RSP) + .addReg(X86::RSP) + .addReg(X86::RAX); + } } /// emitPrologue - Push callee-saved registers onto the stack, which @@ -722,11 +766,6 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { // increments is necessary to ensure that the guard pages used by the OS // virtual memory manager are allocated in correct sequence. if (NumBytes >= StackProbeSize && UseStackProbe) { - const char *StackProbeSymbol; - unsigned CallOp; - - getStackProbeFunction(STI, CallOp, StackProbeSymbol); - // Check whether EAX is livein for this function. bool isEAXAlive = isEAXLiveIn(MF); @@ -755,22 +794,17 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { .setMIFlag(MachineInstr::FrameSetup); } - BuildMI(MBB, MBBI, DL, - TII.get(CallOp)) - .addExternalSymbol(StackProbeSymbol) - .addReg(StackPtr, RegState::Define | RegState::Implicit) - .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit) - .setMIFlag(MachineInstr::FrameSetup); + // Save a pointer to the MI where we set AX. + MachineBasicBlock::iterator SetRAX = MBBI; + --SetRAX; + + // Call __chkstk, __chkstk_ms, or __alloca. + emitStackProbeCall(MF, MBB, MBBI, DL); + + // Apply the frame setup flag to all inserted instrs. + for (; SetRAX != MBBI; ++SetRAX) + SetRAX->setFlag(MachineInstr::FrameSetup); - if (Is64Bit) { - // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp - // themself. It also does not clobber %rax so we can reuse it when - // adjusting %rsp. - BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64rr), StackPtr) - .addReg(StackPtr) - .addReg(X86::RAX) - .setMIFlag(MachineInstr::FrameSetup); - } if (isEAXAlive) { // Restore EAX MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h index ee0ee227cad8..dd8fc3240c23 100644 --- a/lib/Target/X86/X86FrameLowering.h +++ b/lib/Target/X86/X86FrameLowering.h @@ -27,9 +27,11 @@ public: explicit X86FrameLowering(StackDirection D, unsigned StackAl, int LAO) : TargetFrameLowering(StackGrowsDown, StackAl, LAO) {} - static void getStackProbeFunction(const X86Subtarget &STI, - unsigned &CallOp, - const char *&Symbol); + /// Emit a call to the target's stack probe function. This is required for all + /// large stack allocations on Windows. The caller is required to materialize + /// the number of bytes to probe in RAX/EAX. + static void emitStackProbeCall(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, DebugLoc DL); void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 177299b8afc4..85978d8aaa5c 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -15,6 +15,7 @@ #include "X86ISelLowering.h" #include "Utils/X86ShuffleDecode.h" #include "X86CallingConv.h" +#include "X86FrameLowering.h" #include "X86InstrBuilder.h" #include "X86MachineFunctionInfo.h" #include "X86TargetMachine.h" @@ -10094,12 +10095,12 @@ static SDValue lowerV2X128VectorShuffle(SDLoc DL, MVT VT, SDValue V1, VT.getVectorNumElements() / 2); // Check for patterns which can be matched with a single insert of a 128-bit // subvector. - if (isShuffleEquivalent(Mask, 0, 1, 0, 1) || - isShuffleEquivalent(Mask, 0, 1, 4, 5)) { + bool OnlyUsesV1 = isShuffleEquivalent(Mask, 0, 1, 0, 1); + if (OnlyUsesV1 || isShuffleEquivalent(Mask, 0, 1, 4, 5)) { SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1, DAG.getIntPtrConstant(0)); SDValue HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, - Mask[2] < 4 ? V1 : V2, DAG.getIntPtrConstant(0)); + OnlyUsesV1 ? V1 : V2, DAG.getIntPtrConstant(0)); return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoV, HiV); } if (isShuffleEquivalent(Mask, 0, 1, 6, 7)) { @@ -10112,7 +10113,15 @@ static SDValue lowerV2X128VectorShuffle(SDLoc DL, MVT VT, SDValue V1, // Otherwise form a 128-bit permutation. // FIXME: Detect zero-vector inputs and use the VPERM2X128 to zero that half. - unsigned PermMask = Mask[0] / 2 | (Mask[2] / 2) << 4; + int MaskLO = Mask[0]; + if (MaskLO == SM_SentinelUndef) + MaskLO = Mask[1] == SM_SentinelUndef ? 0 : Mask[1]; + + int MaskHI = Mask[2]; + if (MaskHI == SM_SentinelUndef) + MaskHI = Mask[3] == SM_SentinelUndef ? 0 : Mask[3]; + + unsigned PermMask = MaskLO / 2 | (MaskHI / 2) << 4; return DAG.getNode(X86ISD::VPERM2X128, DL, VT, V1, V2, DAG.getConstant(PermMask, MVT::i8)); } @@ -17172,6 +17181,13 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget switch (IntNo) { default: return SDValue(); // Don't custom lower most intrinsics. + case Intrinsic::x86_avx2_permd: + case Intrinsic::x86_avx2_permps: + // Operands intentionally swapped. Mask is last operand to intrinsic, + // but second operand for node/instruction. + return DAG.getNode(X86ISD::VPERMV, dl, Op.getValueType(), + Op.getOperand(2), Op.getOperand(1)); + case Intrinsic::x86_avx512_mask_valign_q_512: case Intrinsic::x86_avx512_mask_valign_d_512: // Vector source operands are swapped. @@ -21076,47 +21092,7 @@ X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI, assert(!Subtarget->isTargetMachO()); - // The lowering is pretty easy: we're just emitting the call to _alloca. The - // non-trivial part is impdef of ESP. - - if (Subtarget->isTargetWin64()) { - if (Subtarget->isTargetCygMing()) { - // ___chkstk(Mingw64): - // Clobbers R10, R11, RAX and EFLAGS. - // Updates RSP. - BuildMI(*BB, MI, DL, TII->get(X86::W64ALLOCA)) - .addExternalSymbol("___chkstk") - .addReg(X86::RAX, RegState::Implicit) - .addReg(X86::RSP, RegState::Implicit) - .addReg(X86::RAX, RegState::Define | RegState::Implicit) - .addReg(X86::RSP, RegState::Define | RegState::Implicit) - .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); - } else { - // __chkstk(MSVCRT): does not update stack pointer. - // Clobbers R10, R11 and EFLAGS. - BuildMI(*BB, MI, DL, TII->get(X86::W64ALLOCA)) - .addExternalSymbol("__chkstk") - .addReg(X86::RAX, RegState::Implicit) - .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); - // RAX has the offset to be subtracted from RSP. - BuildMI(*BB, MI, DL, TII->get(X86::SUB64rr), X86::RSP) - .addReg(X86::RSP) - .addReg(X86::RAX); - } - } else { - const char *StackProbeSymbol = (Subtarget->isTargetKnownWindowsMSVC() || - Subtarget->isTargetWindowsItanium()) - ? "_chkstk" - : "_alloca"; - - BuildMI(*BB, MI, DL, TII->get(X86::CALLpcrel32)) - .addExternalSymbol(StackProbeSymbol) - .addReg(X86::EAX, RegState::Implicit) - .addReg(X86::ESP, RegState::Implicit) - .addReg(X86::EAX, RegState::Define | RegState::Implicit) - .addReg(X86::ESP, RegState::Define | RegState::Implicit) - .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); - } + X86FrameLowering::emitStackProbeCall(*BB->getParent(), *BB, MI, DL); MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; @@ -25558,45 +25534,51 @@ static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG, if ((CC == ISD::SETNE || CC == ISD::SETEQ) && LHS.getOpcode() == ISD::SUB) if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(LHS.getOperand(0))) if (C->getAPIntValue() == 0 && LHS.hasOneUse()) { - SDValue addV = DAG.getNode(ISD::ADD, SDLoc(N), - LHS.getValueType(), RHS, LHS.getOperand(1)); - return DAG.getSetCC(SDLoc(N), N->getValueType(0), - addV, DAG.getConstant(0, addV.getValueType()), CC); + SDValue addV = DAG.getNode(ISD::ADD, SDLoc(N), LHS.getValueType(), RHS, + LHS.getOperand(1)); + return DAG.getSetCC(SDLoc(N), N->getValueType(0), addV, + DAG.getConstant(0, addV.getValueType()), CC); } if ((CC == ISD::SETNE || CC == ISD::SETEQ) && RHS.getOpcode() == ISD::SUB) if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS.getOperand(0))) if (C->getAPIntValue() == 0 && RHS.hasOneUse()) { - SDValue addV = DAG.getNode(ISD::ADD, SDLoc(N), - RHS.getValueType(), LHS, RHS.getOperand(1)); - return DAG.getSetCC(SDLoc(N), N->getValueType(0), - addV, DAG.getConstant(0, addV.getValueType()), CC); + SDValue addV = DAG.getNode(ISD::ADD, SDLoc(N), RHS.getValueType(), LHS, + RHS.getOperand(1)); + return DAG.getSetCC(SDLoc(N), N->getValueType(0), addV, + DAG.getConstant(0, addV.getValueType()), CC); } - if (VT.getScalarType() == MVT::i1) { - bool IsSEXT0 = (LHS.getOpcode() == ISD::SIGN_EXTEND) && - (LHS.getOperand(0).getValueType().getScalarType() == MVT::i1); - bool IsVZero0 = ISD::isBuildVectorAllZeros(LHS.getNode()); - if (!IsSEXT0 && !IsVZero0) - return SDValue(); - bool IsSEXT1 = (RHS.getOpcode() == ISD::SIGN_EXTEND) && - (RHS.getOperand(0).getValueType().getScalarType() == MVT::i1); + if (VT.getScalarType() == MVT::i1 && + (CC == ISD::SETNE || CC == ISD::SETEQ || ISD::isSignedIntSetCC(CC))) { + bool IsSEXT0 = + (LHS.getOpcode() == ISD::SIGN_EXTEND) && + (LHS.getOperand(0).getValueType().getScalarType() == MVT::i1); bool IsVZero1 = ISD::isBuildVectorAllZeros(RHS.getNode()); - if (!IsSEXT1 && !IsVZero1) - return SDValue(); + if (!IsSEXT0 || !IsVZero1) { + // Swap the operands and update the condition code. + std::swap(LHS, RHS); + CC = ISD::getSetCCSwappedOperands(CC); + + IsSEXT0 = (LHS.getOpcode() == ISD::SIGN_EXTEND) && + (LHS.getOperand(0).getValueType().getScalarType() == MVT::i1); + IsVZero1 = ISD::isBuildVectorAllZeros(RHS.getNode()); + } if (IsSEXT0 && IsVZero1) { - assert(VT == LHS.getOperand(0).getValueType() && "Uexpected operand type"); - if (CC == ISD::SETEQ) + assert(VT == LHS.getOperand(0).getValueType() && + "Uexpected operand type"); + if (CC == ISD::SETGT) + return DAG.getConstant(0, VT); + if (CC == ISD::SETLE) + return DAG.getConstant(1, VT); + if (CC == ISD::SETEQ || CC == ISD::SETGE) return DAG.getNOT(DL, LHS.getOperand(0), VT); + + assert((CC == ISD::SETNE || CC == ISD::SETLT) && + "Unexpected condition code!"); return LHS.getOperand(0); } - if (IsSEXT1 && IsVZero0) { - assert(VT == RHS.getOperand(0).getValueType() && "Uexpected operand type"); - if (CC == ISD::SETEQ) - return DAG.getNOT(DL, RHS.getOperand(0), VT); - return RHS.getOperand(0); - } } return SDValue(); diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td index 71415c6fde8e..7baff19e51a9 100644 --- a/lib/Target/X86/X86InstrControl.td +++ b/lib/Target/X86/X86InstrControl.td @@ -279,7 +279,8 @@ let isCall = 1, Uses = [RSP], SchedRW = [WriteJump] in { } let isCall = 1, isCodeGenOnly = 1 in - // __chkstk(MSVC): clobber R10, R11 and EFLAGS. + // __chkstk(MSVC): clobber R10, R11 and EFLAGS + // ___chkstk_ms(Mingw64): clobber R10, R11 and EFLAGS // ___chkstk(Mingw64): clobber R10, R11, RAX and EFLAGS, and update RSP. let Defs = [RAX, R10, R11, RSP, EFLAGS], Uses = [RSP] in { diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h index 7130ae2c3097..b411d079c56c 100644 --- a/lib/Target/X86/X86IntrinsicsInfo.h +++ b/lib/Target/X86/X86IntrinsicsInfo.h @@ -175,8 +175,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx2_packsswb, INTR_TYPE_2OP, X86ISD::PACKSS, 0), X86_INTRINSIC_DATA(avx2_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0), X86_INTRINSIC_DATA(avx2_packuswb, INTR_TYPE_2OP, X86ISD::PACKUS, 0), - X86_INTRINSIC_DATA(avx2_permd, INTR_TYPE_2OP, X86ISD::VPERMV, 0), - X86_INTRINSIC_DATA(avx2_permps, INTR_TYPE_2OP, X86ISD::VPERMV, 0), X86_INTRINSIC_DATA(avx2_phadd_d, INTR_TYPE_2OP, X86ISD::HADD, 0), X86_INTRINSIC_DATA(avx2_phadd_w, INTR_TYPE_2OP, X86ISD::HADD, 0), X86_INTRINSIC_DATA(avx2_phsub_d, INTR_TYPE_2OP, X86ISD::HSUB, 0), diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp index cb965fb9a225..60b541f1aa2f 100644 --- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -47,6 +47,8 @@ using namespace llvm; static cl::opt<std::string> DefaultGCOVVersion("default-gcov-version", cl::init("402*"), cl::Hidden, cl::ValueRequired); +static cl::opt<bool> DefaultExitBlockBeforeBody("gcov-exit-block-before-body", + cl::init(false), cl::Hidden); GCOVOptions GCOVOptions::getDefault() { GCOVOptions Options; @@ -312,7 +314,7 @@ namespace { class GCOVFunction : public GCOVRecord { public: GCOVFunction(DISubprogram SP, raw_ostream *os, uint32_t Ident, - bool UseCfgChecksum) + bool UseCfgChecksum, bool ExitBlockBeforeBody) : SP(SP), Ident(Ident), UseCfgChecksum(UseCfgChecksum), CfgChecksum(0), ReturnBlock(1, os) { this->os = os; @@ -322,11 +324,13 @@ namespace { uint32_t i = 0; for (auto &BB : *F) { - // Skip index 1 (0, 2, 3, 4, ...) because that's assigned to the - // ReturnBlock. - bool first = i == 0; - Blocks.insert(std::make_pair(&BB, GCOVBlock(i++ + !first, os))); + // Skip index 1 if it's assigned to the ReturnBlock. + if (i == 1 && ExitBlockBeforeBody) + ++i; + Blocks.insert(std::make_pair(&BB, GCOVBlock(i++, os))); } + if (!ExitBlockBeforeBody) + ReturnBlock.Number = i; std::string FunctionNameAndLine; raw_string_ostream FNLOS(FunctionNameAndLine); @@ -469,7 +473,7 @@ static bool functionHasLines(Function *F) { if (Loc.isUnknown()) continue; // Artificial lines such as calls to the global constructors. - if (Loc.getLine() == 0) continue; + if (Loc.getLine() == 0) continue; return true; } @@ -513,7 +517,8 @@ void GCOVProfiler::emitProfileNotes() { EntryBlock.splitBasicBlock(It); Funcs.push_back(make_unique<GCOVFunction>(SP, &out, FunctionIdent++, - Options.UseCfgChecksum)); + Options.UseCfgChecksum, + DefaultExitBlockBeforeBody)); GCOVFunction &Func = *Funcs.back(); for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index ac13eebf8275..1ed14d001093 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -2183,12 +2183,16 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, // Handle the floating point versions of equality comparisons too. if ((isKnownTrue && Cmp->getPredicate() == CmpInst::FCMP_OEQ) || (isKnownFalse && Cmp->getPredicate() == CmpInst::FCMP_UNE)) { - // Floating point -0.0 and 0.0 compare equal, so we can't - // propagate a constant based on that comparison. + + // Floating point -0.0 and 0.0 compare equal, so we can only + // propagate values if we know that we have a constant and that + // its value is non-zero. + // FIXME: We should do this optimization if 'no signed zeros' is // applicable via an instruction-level fast-math-flag or some other // indicator that relaxed FP semantics are being used. - if (!isa<ConstantFP>(Op1) || !cast<ConstantFP>(Op1)->isZero()) + + if (isa<ConstantFP>(Op1) && !cast<ConstantFP>(Op1)->isZero()) Worklist.push_back(std::make_pair(Op0, Op1)); } diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp index 9164be224654..267cb999d245 100644 --- a/lib/Transforms/Scalar/LoopRotation.cpp +++ b/lib/Transforms/Scalar/LoopRotation.cpp @@ -535,6 +535,8 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { Loop *PredLoop = LI->getLoopFor(*PI); if (!PredLoop || PredLoop->contains(Exit)) continue; + if (isa<IndirectBrInst>((*PI)->getTerminator())) + continue; SplitLatchEdge |= L->getLoopLatch() == *PI; BasicBlock *ExitSplit = SplitCriticalEdge(*PI, Exit, this); ExitSplit->moveBefore(Exit); diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp index 6cb91a154f06..d54c09aa6ae4 100644 --- a/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -288,14 +288,11 @@ bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO, IntegerType *IT = cast<IntegerType>(IVOperand->getType()); Value *OtherOperand = nullptr; - int OtherOperandIdx = -1; if (BO->getOperand(0) == IVOperand) { OtherOperand = BO->getOperand(1); - OtherOperandIdx = 1; } else { assert(BO->getOperand(1) == IVOperand && "only other use!"); OtherOperand = BO->getOperand(0); - OtherOperandIdx = 0; } bool Changed = false; diff --git a/test/Analysis/ScalarEvolution/pr22856.ll b/test/Analysis/ScalarEvolution/pr22856.ll new file mode 100644 index 000000000000..89e83516efdd --- /dev/null +++ b/test/Analysis/ScalarEvolution/pr22856.ll @@ -0,0 +1,33 @@ +; RUN: opt -loop-reduce -verify < %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64--linux-gnu" + +define void @unbounded() { + +block_A: + %0 = sext i32 undef to i64 + br i1 undef, label %block_F, label %block_G + +block_C: ; preds = %block_F + br i1 undef, label %block_D, label %block_E + +block_D: ; preds = %block_D, %block_C + br i1 undef, label %block_E, label %block_D + +block_E: ; preds = %block_D, %block_C + %iv2 = phi i64 [ %4, %block_D ], [ %4, %block_C ] + %1 = add nsw i32 %iv1, 1 + %2 = icmp eq i32 %1, undef + br i1 %2, label %block_G, label %block_F + +block_F: ; preds = %block_E, %block_A + %iv3 = phi i64 [ %iv2, %block_E ], [ %0, %block_A ] + %iv1 = phi i32 [ %1, %block_E ], [ undef, %block_A ] + %3 = add nsw i64 %iv3, 2 + %4 = add nsw i64 %iv3, 1 + br label %block_C + +block_G: ; preds = %block_E, %block_A + ret void +} diff --git a/test/CodeGen/AArch64/arm64-tls-dynamics.ll b/test/CodeGen/AArch64/arm64-tls-dynamics.ll index 30ea63b4664a..a89c2c5e6fd5 100644 --- a/test/CodeGen/AArch64/arm64-tls-dynamics.ll +++ b/test/CodeGen/AArch64/arm64-tls-dynamics.ll @@ -1,5 +1,7 @@ -; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -filetype=obj < %s | llvm-objdump -r - | FileCheck --check-prefix=CHECK-RELOC %s +; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -aarch64-elf-ldtls-generation=1 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -aarch64-elf-ldtls-generation=1 -filetype=obj < %s | llvm-objdump -r - | FileCheck --check-prefix=CHECK-RELOC %s +; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-NOLD %s +; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -filetype=obj < %s | llvm-objdump -r - | FileCheck --check-prefix=CHECK-NOLD-RELOC %s @general_dynamic_var = external thread_local global i32 @@ -9,22 +11,34 @@ define i32 @test_generaldynamic() { %val = load i32* @general_dynamic_var ret i32 %val - ; FIXME: the adrp instructions are redundant (if harmless). -; CHECK: adrp [[TLSDESC_HI:x[0-9]+]], :tlsdesc:general_dynamic_var -; CHECK: add x0, [[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var ; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:general_dynamic_var -; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var] -; CHECK: .tlsdesccall general_dynamic_var +; CHECK-NEXT: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var] +; CHECK-NEXT: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var +; CHECK-NEXT: .tlsdesccall general_dynamic_var ; CHECK-NEXT: blr [[CALLEE]] +; CHECK-NOLD: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:general_dynamic_var +; CHECK-NOLD-NEXT: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var] +; CHECK-NOLD-NEXT: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var +; CHECK-NOLD-NEXT: .tlsdesccall general_dynamic_var +; CHECK-NOLD-NEXT: blr [[CALLEE]] + + ; CHECK: mrs x[[TP:[0-9]+]], TPIDR_EL0 ; CHECK: ldr w0, [x[[TP]], x0] +; CHECK-NOLD: mrs x[[TP:[0-9]+]], TPIDR_EL0 +; CHECK-NOLD: ldr w0, [x[[TP]], x0] ; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21 -; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC ; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC +; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC ; CHECK-RELOC: R_AARCH64_TLSDESC_CALL +; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21 +; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC +; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC +; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_CALL + } define i32* @test_generaldynamic_addr() { @@ -32,21 +46,25 @@ define i32* @test_generaldynamic_addr() { ret i32* @general_dynamic_var - ; FIXME: the adrp instructions are redundant (if harmless). -; CHECK: adrp [[TLSDESC_HI:x[0-9]+]], :tlsdesc:general_dynamic_var -; CHECK: add x0, [[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var ; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:general_dynamic_var -; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var] -; CHECK: .tlsdesccall general_dynamic_var +; CHECK-NEXT: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var] +; CHECK-NEXT: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var +; CHECK-NEXT: .tlsdesccall general_dynamic_var ; CHECK-NEXT: blr [[CALLEE]] ; CHECK: mrs [[TP:x[0-9]+]], TPIDR_EL0 ; CHECK: add x0, [[TP]], x0 ; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21 -; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC ; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC +; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC ; CHECK-RELOC: R_AARCH64_TLSDESC_CALL + +; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21 +; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC +; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC +; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_CALL + } @local_dynamic_var = external thread_local(localdynamic) global i32 @@ -58,54 +76,71 @@ define i32 @test_localdynamic() { ret i32 %val ; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_ -; CHECK: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_ -; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_ -; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_] -; CHECK: .tlsdesccall _TLS_MODULE_BASE_ +; CHECK-NEXT: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_] +; CHECK-NEXT: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_ +; CHECK-NEXT: .tlsdesccall _TLS_MODULE_BASE_ ; CHECK-NEXT: blr [[CALLEE]] - -; CHECK: movz [[DTP_OFFSET:x[0-9]+]], #:dtprel_g1:local_dynamic_var -; CHECK: movk [[DTP_OFFSET]], #:dtprel_g0_nc:local_dynamic_var - -; CHECK: add x[[TPREL:[0-9]+]], x0, [[DTP_OFFSET]] - +; CHECK-NEXT: add x[[TPOFF:[0-9]+]], x0, :dtprel_hi12:local_dynamic_var +; CHECK-NEXT: add x[[TPOFF]], x[[TPOFF]], :dtprel_lo12_nc:local_dynamic_var ; CHECK: mrs x[[TPIDR:[0-9]+]], TPIDR_EL0 +; CHECK: ldr w0, [x[[TPIDR]], x[[TPOFF]]] + +; CHECK-NOLD: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:local_dynamic_var +; CHECK-NOLD-NEXT: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:local_dynamic_var] +; CHECK-NOLD-NEXT: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:local_dynamic_var +; CHECK-NOLD-NEXT: .tlsdesccall local_dynamic_var +; CHECK-NOLD-NEXT: blr [[CALLEE]] +; CHECK-NOLD: mrs x[[TPIDR:[0-9]+]], TPIDR_EL0 +; CHECK-NOLD: ldr w0, [x[[TPIDR]], x0] -; CHECK: ldr w0, [x[[TPIDR]], x[[TPREL]]] ; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21 -; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC ; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC +; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC ; CHECK-RELOC: R_AARCH64_TLSDESC_CALL +; CHECK-RELOC: R_AARCH64_TLSLD_ADD_DTPREL_HI12 +; CHECK-RELOC: R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC + +; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21 +; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC +; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC +; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_CALL } define i32* @test_localdynamic_addr() { ; CHECK-LABEL: test_localdynamic_addr: - ret i32* @local_dynamic_var - ; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_ -; CHECK: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_ -; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_ -; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_] -; CHECK: .tlsdesccall _TLS_MODULE_BASE_ +; CHECK-NEXT: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_] +; CHECK-NEXT: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_ +; CHECK-NEXT: .tlsdesccall _TLS_MODULE_BASE_ ; CHECK-NEXT: blr [[CALLEE]] - -; CHECK: movz [[DTP_OFFSET:x[0-9]+]], #:dtprel_g1:local_dynamic_var -; CHECK: movk [[DTP_OFFSET]], #:dtprel_g0_nc:local_dynamic_var - -; CHECK: add [[TPREL:x[0-9]+]], x0, [[DTP_OFFSET]] - -; CHECK: mrs [[TPIDR:x[0-9]+]], TPIDR_EL0 - -; CHECK: add x0, [[TPIDR]], [[TPREL]] +; CHECK-NEXT: add x[[TPOFF:[0-9]+]], x0, :dtprel_hi12:local_dynamic_var +; CHECK-NEXT: add x[[TPOFF]], x[[TPOFF]], :dtprel_lo12_nc:local_dynamic_var +; CHECK: mrs x[[TPIDR:[0-9]+]], TPIDR_EL0 +; CHECK: add x0, x[[TPIDR]], x[[TPOFF]] + +; CHECK-NOLD: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:local_dynamic_var +; CHECK-NOLD-NEXT: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:local_dynamic_var] +; CHECK-NOLD-NEXT: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:local_dynamic_var +; CHECK-NOLD-NEXT: .tlsdesccall local_dynamic_var +; CHECK-NOLD-NEXT: blr [[CALLEE]] +; CHECK-NOLD: mrs x[[TPIDR:[0-9]+]], TPIDR_EL0 +; CHECK-NOLD: add x0, x[[TPIDR]], x0 + ret i32* @local_dynamic_var ; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21 -; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC ; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC +; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC ; CHECK-RELOC: R_AARCH64_TLSDESC_CALL +; CHECK-RELOC: R_AARCH64_TLSLD_ADD_DTPREL_HI12 +; CHECK-RELOC: R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC +; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21 +; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC +; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC +; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_CALL } ; The entire point of the local-dynamic access model is to have a single call to @@ -122,11 +157,10 @@ define i32 @test_localdynamic_deduplicate() { %sum = add i32 %val, %val2 ret i32 %sum -; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_ -; CHECK: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_ -; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_ -; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_] -; CHECK: .tlsdesccall _TLS_MODULE_BASE_ +; CHECK: adrp x[[DTPREL_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_ +; CHECK-NEXT: ldr [[CALLEE:x[0-9]+]], [x[[DTPREL_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_] +; CHECK-NEXT: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE +; CHECK-NEXT: .tlsdesccall _TLS_MODULE_BASE_ ; CHECK-NEXT: blr [[CALLEE]] ; CHECK-NOT: _TLS_MODULE_BASE_ diff --git a/test/CodeGen/AArch64/arm64-tls-execs.ll b/test/CodeGen/AArch64/arm64-tls-execs.ll index f0130d858896..e6d3d680f417 100644 --- a/test/CodeGen/AArch64/arm64-tls-execs.ll +++ b/test/CodeGen/AArch64/arm64-tls-execs.ll @@ -38,14 +38,13 @@ define i32 @test_local_exec() { ; CHECK-LABEL: test_local_exec: %val = load i32* @local_exec_var -; CHECK: movz [[TP_OFFSET:x[0-9]+]], #:tprel_g1:local_exec_var // encoding: [0bAAA{{[01]+}},A,0b101AAAAA,0x92] -; CHECK: movk [[TP_OFFSET]], #:tprel_g0_nc:local_exec_var -; CHECK: mrs x[[TP:[0-9]+]], TPIDR_EL0 -; CHECK: ldr w0, [x[[TP]], [[TP_OFFSET]]] - -; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G1 -; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G0_NC +; CHECK: mrs x[[R1:[0-9]+]], TPIDR_EL0 +; CHECK: add x[[R2:[0-9]+]], x[[R1]], :tprel_hi12:local_exec_var +; CHECK: add x[[R3:[0-9]+]], x[[R2]], :tprel_lo12_nc:local_exec_var +; CHECK: ldr w0, [x[[R3]]] +; CHECK-RELOC: R_AARCH64_TLSLE_ADD_TPREL_HI12 +; CHECK-RELOC: R_AARCH64_TLSLE_ADD_TPREL_LO12_NC ret i32 %val } @@ -53,11 +52,11 @@ define i32* @test_local_exec_addr() { ; CHECK-LABEL: test_local_exec_addr: ret i32* @local_exec_var -; CHECK: movz [[TP_OFFSET:x[0-9]+]], #:tprel_g1:local_exec_var -; CHECK: movk [[TP_OFFSET]], #:tprel_g0_nc:local_exec_var -; CHECK: mrs [[TP:x[0-9]+]], TPIDR_EL0 -; CHECK: add x0, [[TP]], [[TP_OFFSET]] +; CHECK: mrs x[[R1:[0-9]+]], TPIDR_EL0 +; CHECK: add x[[R2:[0-9]+]], x[[R1]], :tprel_hi12:local_exec_var +; CHECK: add x0, x[[R2]], :tprel_lo12_nc:local_exec_var +; CHECK: ret -; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G1 -; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G0_NC +; CHECK-RELOC: R_AARCH64_TLSLE_ADD_TPREL_HI12 +; CHECK-RELOC: R_AARCH64_TLSLE_ADD_TPREL_LO12_NC } diff --git a/test/CodeGen/AArch64/implicit-sret.ll b/test/CodeGen/AArch64/implicit-sret.ll new file mode 100644 index 000000000000..264d519f36f8 --- /dev/null +++ b/test/CodeGen/AArch64/implicit-sret.ll @@ -0,0 +1,13 @@ +; RUN: llc %s -o - -mtriple=arm64-apple-ios7.0 | FileCheck %s +; +; Handle implicit sret arguments that are generated on-the-fly during lowering. +; <rdar://19792160> Null pointer assertion in AArch64TargetLowering + +; CHECK-LABEL: big_retval +; ... str or stp for the first 1024 bits +; CHECK: strb wzr, [x8, #128] +; CHECK: ret +define i1032 @big_retval() { +entry: + ret i1032 0 +} diff --git a/test/CodeGen/AArch64/machine-copy-prop.ll b/test/CodeGen/AArch64/machine-copy-prop.ll new file mode 100644 index 000000000000..92d877d40f59 --- /dev/null +++ b/test/CodeGen/AArch64/machine-copy-prop.ll @@ -0,0 +1,101 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mcpu=cortex-a57 -verify-machineinstrs < %s | FileCheck %s + +; This file check a bug in MachineCopyPropagation pass. The last COPY will be +; incorrectly removed if the machine instructions are as follows: +; %Q5_Q6<def> = COPY %Q2_Q3 +; %D5<def> = +; %D3<def> = +; %D3<def> = COPY %D6 +; This is caused by a bug in function SourceNoLongerAvailable(), which fails to +; remove the relationship of D6 and "%Q5_Q6<def> = COPY %Q2_Q3". + +@failed = internal unnamed_addr global i1 false + +; CHECK-LABEL: foo: +; CHECK: ld2 +; CHECK-NOT: // kill: D{{[0-9]+}}<def> D{{[0-9]+}}<kill> +define void @foo(<2 x i32> %shuffle251, <8 x i8> %vtbl1.i, i8* %t2, <2 x i32> %vrsubhn_v2.i1364) { +entry: + %val0 = alloca [2 x i64], align 8 + %val1 = alloca <2 x i64>, align 16 + %vmull = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> <i32 -1, i32 -1>, <2 x i32> %shuffle251) + %vgetq_lane = extractelement <2 x i64> %vmull, i32 0 + %cmp = icmp eq i64 %vgetq_lane, 1 + br i1 %cmp, label %if.end, label %if.then + +if.then: ; preds = %entry + store i1 true, i1* @failed, align 1 + br label %if.end + +if.end: ; preds = %if.then, %entry + tail call void @f2() + %sqdmull = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> <i16 1, i16 0, i16 0, i16 0>, <4 x i16> <i16 2, i16 0, i16 0, i16 0>) + %sqadd = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> zeroinitializer, <4 x i32> %sqdmull) + %shuffle = shufflevector <4 x i32> %sqadd, <4 x i32> undef, <2 x i32> zeroinitializer + %0 = mul <2 x i32> %shuffle, <i32 -1, i32 0> + %sub = add <2 x i32> %0, <i32 1, i32 0> + %sext = sext <2 x i32> %sub to <2 x i64> + %vset_lane603 = shufflevector <2 x i64> %sext, <2 x i64> undef, <1 x i32> zeroinitializer + %t1 = bitcast [2 x i64]* %val0 to i8* + call void @llvm.aarch64.neon.st2lane.v2i64.p0i8(<2 x i64> zeroinitializer, <2 x i64> zeroinitializer, i64 1, i8* %t1) + call void @llvm.aarch64.neon.st2lane.v1i64.p0i8(<1 x i64> <i64 4096>, <1 x i64> <i64 -1>, i64 0, i8* %t2) + %vld2_lane = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0i8(<1 x i64> <i64 11>, <1 x i64> <i64 11>, i64 0, i8* %t2) + %vld2_lane.0.extract = extractvalue { <1 x i64>, <1 x i64> } %vld2_lane, 0 + %vld2_lane.1.extract = extractvalue { <1 x i64>, <1 x i64> } %vld2_lane, 1 + %vld2_lane1 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0i8(<1 x i64> %vld2_lane.0.extract, <1 x i64> %vld2_lane.1.extract, i64 0, i8* %t1) + %vld2_lane1.0.extract = extractvalue { <1 x i64>, <1 x i64> } %vld2_lane1, 0 + %vld2_lane1.1.extract = extractvalue { <1 x i64>, <1 x i64> } %vld2_lane1, 1 + %t3 = bitcast <2 x i64>* %val1 to i8* + call void @llvm.aarch64.neon.st2.v1i64.p0i8(<1 x i64> %vld2_lane1.0.extract, <1 x i64> %vld2_lane1.1.extract, i8* %t3) + %t4 = load <2 x i64>* %val1, align 16 + %vsubhn = sub <2 x i64> <i64 11, i64 0>, %t4 + %vsubhn1 = lshr <2 x i64> %vsubhn, <i64 32, i64 32> + %vsubhn2 = trunc <2 x i64> %vsubhn1 to <2 x i32> + %neg = xor <2 x i32> %vsubhn2, <i32 -1, i32 -1> + %sqadd1 = call <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64> <i64 -1>, <1 x i64> <i64 1>) + %sqadd2 = call <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64> %vset_lane603, <1 x i64> %sqadd1) + %sqadd3 = call <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64> <i64 1>, <1 x i64> %sqadd2) + %shuffle.i = shufflevector <2 x i32> <i32 undef, i32 0>, <2 x i32> %vrsubhn_v2.i1364, <2 x i32> <i32 1, i32 3> + %cmp.i = icmp uge <2 x i32> %shuffle.i, %neg + %sext.i = sext <2 x i1> %cmp.i to <2 x i32> + %vpadal = call <1 x i64> @llvm.aarch64.neon.uaddlp.v1i64.v2i32(<2 x i32> %sext.i) + %t5 = sub <1 x i64> %vpadal, %sqadd3 + %vget_lane1 = extractelement <1 x i64> %t5, i32 0 + %cmp2 = icmp eq i64 %vget_lane1, 15 + br i1 %cmp2, label %if.end2, label %if.then2 + +if.then2: ; preds = %if.end + store i1 true, i1* @failed, align 1 + br label %if.end2 + +if.end2: ; preds = %if.then682, %if.end + call void @f2() + %vext = shufflevector <8 x i8> <i8 undef, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i8> %vtbl1.i, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8> + %t6 = bitcast <8 x i8> %vext to <2 x i32> + call void @f0(<2 x i32> %t6) + ret void +} + +declare void @f0(<2 x i32>) + +declare <8 x i8> @f1() + +declare void @f2() + +declare <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>) + +declare void @llvm.aarch64.neon.st2lane.v2i64.p0i8(<2 x i64>, <2 x i64>, i64, i8* nocapture) + +declare void @llvm.aarch64.neon.st2lane.v1i64.p0i8(<1 x i64>, <1 x i64>, i64, i8* nocapture) + +declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0i8(<1 x i64>, <1 x i64>, i64, i8*) + +declare void @llvm.aarch64.neon.st2.v1i64.p0i8(<1 x i64>, <1 x i64>, i8* nocapture) + +declare <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64>, <1 x i64>) + +declare <1 x i64> @llvm.aarch64.neon.uaddlp.v1i64.v2i32(<2 x i32>) + +declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>) + +declare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32>, <2 x i32>) diff --git a/test/CodeGen/AArch64/tailcall-explicit-sret.ll b/test/CodeGen/AArch64/tailcall-explicit-sret.ll new file mode 100644 index 000000000000..f4ad65584095 --- /dev/null +++ b/test/CodeGen/AArch64/tailcall-explicit-sret.ll @@ -0,0 +1,106 @@ +; RUN: llc < %s -mtriple arm64-apple-darwin -aarch64-load-store-opt=false -asm-verbose=false | FileCheck %s +; Disable the load/store optimizer to avoid having LDP/STPs and simplify checks. + +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + +; Check that we don't try to tail-call with a non-forwarded sret parameter. +declare void @test_explicit_sret(i1024* sret) #0 + +; This is the only OK case, where we forward the explicit sret pointer. + +; CHECK-LABEL: _test_tailcall_explicit_sret: +; CHECK-NEXT: b _test_explicit_sret +define void @test_tailcall_explicit_sret(i1024* sret %arg) #0 { + tail call void @test_explicit_sret(i1024* %arg) + ret void +} + +; CHECK-LABEL: _test_call_explicit_sret: +; CHECK-NOT: mov x8 +; CHECK: bl _test_explicit_sret +; CHECK: ret +define void @test_call_explicit_sret(i1024* sret %arg) #0 { + call void @test_explicit_sret(i1024* %arg) + ret void +} + +; CHECK-LABEL: _test_tailcall_explicit_sret_alloca_unused: +; CHECK: mov x8, sp +; CHECK-NEXT: bl _test_explicit_sret +; CHECK: ret +define void @test_tailcall_explicit_sret_alloca_unused() #0 { + %l = alloca i1024, align 8 + tail call void @test_explicit_sret(i1024* %l) + ret void +} + +; CHECK-LABEL: _test_tailcall_explicit_sret_alloca_dummyusers: +; CHECK: ldr [[PTRLOAD1:x[0-9]+]], [x0] +; CHECK: str [[PTRLOAD1]], [sp] +; CHECK: mov x8, sp +; CHECK-NEXT: bl _test_explicit_sret +; CHECK: ret +define void @test_tailcall_explicit_sret_alloca_dummyusers(i1024* %ptr) #0 { + %l = alloca i1024, align 8 + %r = load i1024* %ptr, align 8 + store i1024 %r, i1024* %l, align 8 + tail call void @test_explicit_sret(i1024* %l) + ret void +} + +; This is too conservative, but doesn't really happen in practice. + +; CHECK-LABEL: _test_tailcall_explicit_sret_gep: +; CHECK: add x8, x0, #128 +; CHECK-NEXT: bl _test_explicit_sret +; CHECK: ret +define void @test_tailcall_explicit_sret_gep(i1024* %ptr) #0 { + %ptr2 = getelementptr i1024* %ptr, i32 1 + tail call void @test_explicit_sret(i1024* %ptr2) + ret void +} + +; CHECK-LABEL: _test_tailcall_explicit_sret_alloca_returned: +; CHECK: mov x[[CALLERX8NUM:[0-9]+]], x8 +; CHECK: mov x8, sp +; CHECK-NEXT: bl _test_explicit_sret +; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp] +; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]] +; CHECK: ret +define i1024 @test_tailcall_explicit_sret_alloca_returned() #0 { + %l = alloca i1024, align 8 + tail call void @test_explicit_sret(i1024* %l) + %r = load i1024* %l, align 8 + ret i1024 %r +} + +; CHECK-LABEL: _test_indirect_tailcall_explicit_sret_nosret_arg: +; CHECK-DAG: mov x[[CALLERX8NUM:[0-9]+]], x8 +; CHECK-DAG: mov [[FPTR:x[0-9]+]], x0 +; CHECK: mov x0, sp +; CHECK-NEXT: blr [[FPTR]] +; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp] +; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]] +; CHECK: ret +define void @test_indirect_tailcall_explicit_sret_nosret_arg(i1024* sret %arg, void (i1024*)* %f) #0 { + %l = alloca i1024, align 8 + tail call void %f(i1024* %l) + %r = load i1024* %l, align 8 + store i1024 %r, i1024* %arg, align 8 + ret void +} + +; CHECK-LABEL: _test_indirect_tailcall_explicit_sret_: +; CHECK: mov x[[CALLERX8NUM:[0-9]+]], x8 +; CHECK: mov x8, sp +; CHECK-NEXT: blr x0 +; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp] +; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]] +; CHECK: ret +define void @test_indirect_tailcall_explicit_sret_(i1024* sret %arg, i1024 ()* %f) #0 { + %ret = tail call i1024 %f() + store i1024 %ret, i1024* %arg, align 8 + ret void +} + +attributes #0 = { nounwind } diff --git a/test/CodeGen/AArch64/tailcall-implicit-sret.ll b/test/CodeGen/AArch64/tailcall-implicit-sret.ll new file mode 100644 index 000000000000..5d6805998d22 --- /dev/null +++ b/test/CodeGen/AArch64/tailcall-implicit-sret.ll @@ -0,0 +1,46 @@ +; RUN: llc < %s -mtriple arm64-apple-darwin -aarch64-load-store-opt=false -asm-verbose=false | FileCheck %s +; Disable the load/store optimizer to avoid having LDP/STPs and simplify checks. + +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + +; Check that we don't try to tail-call with an sret-demoted return. + +declare i1024 @test_sret() #0 + +; CHECK-LABEL: _test_call_sret: +; CHECK: mov x[[CALLERX8NUM:[0-9]+]], x8 +; CHECK: mov x8, sp +; CHECK-NEXT: bl _test_sret +; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp] +; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]] +; CHECK: ret +define i1024 @test_call_sret() #0 { + %a = call i1024 @test_sret() + ret i1024 %a +} + +; CHECK-LABEL: _test_tailcall_sret: +; CHECK: mov x[[CALLERX8NUM:[0-9]+]], x8 +; CHECK: mov x8, sp +; CHECK-NEXT: bl _test_sret +; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp] +; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]] +; CHECK: ret +define i1024 @test_tailcall_sret() #0 { + %a = tail call i1024 @test_sret() + ret i1024 %a +} + +; CHECK-LABEL: _test_indirect_tailcall_sret: +; CHECK: mov x[[CALLERX8NUM:[0-9]+]], x8 +; CHECK: mov x8, sp +; CHECK-NEXT: blr x0 +; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp] +; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]] +; CHECK: ret +define i1024 @test_indirect_tailcall_sret(i1024 ()* %f) #0 { + %a = tail call i1024 %f() + ret i1024 %a +} + +attributes #0 = { nounwind } diff --git a/test/CodeGen/Mips/adjust-callstack-sp.ll b/test/CodeGen/Mips/adjust-callstack-sp.ll new file mode 100644 index 000000000000..8c61a650a962 --- /dev/null +++ b/test/CodeGen/Mips/adjust-callstack-sp.ll @@ -0,0 +1,20 @@ +; RUN: llc < %s -march=mips -mcpu=mips16 | FileCheck %s -check-prefix=M16 +; RUN: llc < %s -march=mips -mcpu=mips2 | FileCheck %s -check-prefix=GP32 +; RUN: llc < %s -march=mips -mcpu=mips32 | FileCheck %s -check-prefix=GP32 +; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s -check-prefix=GP32 +; RUN: llc < %s -march=mips -mcpu=mips3 | FileCheck %s -check-prefix=GP64 +; RUN: llc < %s -march=mips -mcpu=mips64 | FileCheck %s -check-prefix=GP64 +; RUN: llc < %s -march=mips -mcpu=mips64r6 | FileCheck %s -check-prefix=GP64 + +declare void @bar(i32*) + +define void @foo(i32 %sz) { + ; ALL-LABEL: foo: + + ; M16-NOT: addiu $sp, 0 # 16 bit inst + ; GP32-NOT: addiu $sp, $sp, 0 + ; GP64-NOT: daddiu $sp, $sp, 0 + %a = alloca i32, i32 %sz + call void @bar(i32* %a) + ret void +} diff --git a/test/CodeGen/Mips/cconv/arguments-small-structures-bigger-than-32bits.ll b/test/CodeGen/Mips/cconv/arguments-small-structures-bigger-than-32bits.ll new file mode 100644 index 000000000000..d17290e552e0 --- /dev/null +++ b/test/CodeGen/Mips/cconv/arguments-small-structures-bigger-than-32bits.ll @@ -0,0 +1,80 @@ +; RUN: llc < %s -march=mips64 -target-abi n64 -mcpu=mips64r2 | FileCheck %s -check-prefix=ALL -check-prefix=MIPSEB +; RUN: llc < %s -march=mips64el -target-abi n64 -mcpu=mips64r2 | FileCheck %s -check-prefix=ALL -check-prefix=MIPSEL +; RUN: llc < %s -march=mips64 -target-abi n32 -mcpu=mips64r2 | FileCheck %s -check-prefix=ALL -check-prefix=MIPSEB +; RUN: llc < %s -march=mips64el -target-abi n32 -mcpu=mips64r2 | FileCheck %s -check-prefix=ALL -check-prefix=MIPSEL + +; #include <stdio.h> +; +; struct S1 { +; char x1; +; short x2; +; char x3; +; }; +; +; struct S2 { +; char x1; +; char x2; +; char x3; +; char x4; +; char x5; +; }; +; +; void fS1(struct S1 s); +; void fS2(struct S2 s); +; +; void f1() { +; struct S1 s1_1; +; fS1(s1_1); +; } +; +; void f2() { +; struct S2 s2_1; +; fS2(s2_1); +; } +; +; int main() { +; f1(); +; f2(); +; } + +%struct.S1 = type { i8, i16, i8 } +%struct.S2 = type { i8, i8, i8, i8, i8 } + +declare void @fS1(i48 inreg) #1 +declare void @fS2(i40 inreg) #1 + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #2 + +define void @f1() #0 { +entry: + %s1_1 = alloca %struct.S1, align 2 + %s1_1.coerce = alloca { i48 } + %0 = bitcast { i48 }* %s1_1.coerce to i8* + %1 = bitcast %struct.S1* %s1_1 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 6, i32 0, i1 false) + %2 = getelementptr { i48 }* %s1_1.coerce, i32 0, i32 0 + %3 = load i48* %2, align 1 + call void @fS1(i48 inreg %3) + ret void + ; ALL-LABEL: f1: + + ; MIPSEB: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 16 + ; MIPSEL-NOT: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 16 +} + +define void @f2() #0 { +entry: + %s2_1 = alloca %struct.S2, align 1 + %s2_1.coerce = alloca { i40 } + %0 = bitcast { i40 }* %s2_1.coerce to i8* + %1 = bitcast %struct.S2* %s2_1 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 5, i32 0, i1 false) + %2 = getelementptr { i40 }* %s2_1.coerce, i32 0, i32 0 + %3 = load i40* %2, align 1 + call void @fS2(i40 inreg %3) + ret void + ; ALL-LABEL: f2: + + ; MIPSEB: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 24 + ; MIPSEL-NOT: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 24 +} diff --git a/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-byte.ll b/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-byte.ll new file mode 100644 index 000000000000..458b124c9927 --- /dev/null +++ b/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-byte.ll @@ -0,0 +1,282 @@ +; RUN: llc --march=mips64 -mcpu=mips64r2 < %s | FileCheck %s + +; Generated from the C program: +; +; #include <stdio.h> +; #include <string.h> +; +; struct SmallStruct_1b { +; char x1; +; }; +; +; struct SmallStruct_2b { +; char x1; +; char x2; +; }; +; +; struct SmallStruct_3b { +; char x1; +; char x2; +; char x3; +; }; +; +; struct SmallStruct_4b { +; char x1; +; char x2; +; char x3; +; char x4; +; }; +; +; struct SmallStruct_5b { +; char x1; +; char x2; +; char x3; +; char x4; +; char x5; +; }; +; +; struct SmallStruct_6b { +; char x1; +; char x2; +; char x3; +; char x4; +; char x5; +; char x6; +; }; +; +; struct SmallStruct_7b { +; char x1; +; char x2; +; char x3; +; char x4; +; char x5; +; char x6; +; char x7; +; }; +; +; struct SmallStruct_8b { +; char x1; +; char x2; +; char x3; +; char x4; +; char x5; +; char x6; +; char x7; +; char x8; +; }; +; +; struct SmallStruct_9b { +; char x1; +; char x2; +; char x3; +; char x4; +; char x5; +; char x6; +; char x7; +; char x8; +; char x9; +; }; +; +; void varArgF_SmallStruct(char* c, ...); +; +; void smallStruct_1b(struct SmallStruct_1b* ss) { +; varArgF_SmallStruct("", *ss); +; } +; +; void smallStruct_2b(struct SmallStruct_2b* ss) { +; varArgF_SmallStruct("", *ss); +; } +; +; void smallStruct_3b(struct SmallStruct_3b* ss) +; { +; varArgF_SmallStruct("", *ss); +; } +; +; void smallStruct_4b(struct SmallStruct_4b* ss) +; { +; varArgF_SmallStruct("", *ss); +; } +; +; void smallStruct_5b(struct SmallStruct_5b* ss) +; { +; varArgF_SmallStruct("", *ss); +; } +; +; void smallStruct_6b(struct SmallStruct_6b* ss) +; { +; varArgF_SmallStruct("", *ss); +; } +; +; void smallStruct_7b(struct SmallStruct_7b* ss) +; { +; varArgF_SmallStruct("", *ss); +; } +; +; void smallStruct_8b(struct SmallStruct_8b* ss) +; { +; varArgF_SmallStruct("", *ss); +; } +; +; void smallStruct_9b(struct SmallStruct_9b* ss) +; { +; varArgF_SmallStruct("", *ss); +; } + +%struct.SmallStruct_1b = type { i8 } +%struct.SmallStruct_2b = type { i8, i8 } +%struct.SmallStruct_3b = type { i8, i8, i8 } +%struct.SmallStruct_4b = type { i8, i8, i8, i8 } +%struct.SmallStruct_5b = type { i8, i8, i8, i8, i8 } +%struct.SmallStruct_6b = type { i8, i8, i8, i8, i8, i8 } +%struct.SmallStruct_7b = type { i8, i8, i8, i8, i8, i8, i8 } +%struct.SmallStruct_8b = type { i8, i8, i8, i8, i8, i8, i8, i8 } +%struct.SmallStruct_9b = type { i8, i8, i8, i8, i8, i8, i8, i8, i8 } + +@.str = private unnamed_addr constant [3 x i8] c"01\00", align 1 + +declare void @varArgF_SmallStruct(i8* %c, ...) + +define void @smallStruct_1b(%struct.SmallStruct_1b* %ss) #0 { +entry: + %ss.addr = alloca %struct.SmallStruct_1b*, align 8 + store %struct.SmallStruct_1b* %ss, %struct.SmallStruct_1b** %ss.addr, align 8 + %0 = load %struct.SmallStruct_1b** %ss.addr, align 8 + %1 = bitcast %struct.SmallStruct_1b* %0 to { i8 }* + %2 = getelementptr { i8 }* %1, i32 0, i32 0 + %3 = load i8* %2, align 1 + call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i8 inreg %3) + ret void + ; CHECK-LABEL: smallStruct_1b: + ; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 56 +} + +define void @smallStruct_2b(%struct.SmallStruct_2b* %ss) #0 { +entry: + %ss.addr = alloca %struct.SmallStruct_2b*, align 8 + store %struct.SmallStruct_2b* %ss, %struct.SmallStruct_2b** %ss.addr, align 8 + %0 = load %struct.SmallStruct_2b** %ss.addr, align 8 + %1 = bitcast %struct.SmallStruct_2b* %0 to { i16 }* + %2 = getelementptr { i16 }* %1, i32 0, i32 0 + %3 = load i16* %2, align 1 + call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i16 inreg %3) + ret void + ; CHECK-LABEL: smallStruct_2b: + ; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 48 +} + +define void @smallStruct_3b(%struct.SmallStruct_3b* %ss) #0 { +entry: + %ss.addr = alloca %struct.SmallStruct_3b*, align 8 + %.coerce = alloca { i24 } + store %struct.SmallStruct_3b* %ss, %struct.SmallStruct_3b** %ss.addr, align 8 + %0 = load %struct.SmallStruct_3b** %ss.addr, align 8 + %1 = bitcast { i24 }* %.coerce to i8* + %2 = bitcast %struct.SmallStruct_3b* %0 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 3, i32 0, i1 false) + %3 = getelementptr { i24 }* %.coerce, i32 0, i32 0 + %4 = load i24* %3, align 1 + call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i24 inreg %4) + ret void + ; CHECK-LABEL: smallStruct_3b: + ; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 40 +} + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #1 + +define void @smallStruct_4b(%struct.SmallStruct_4b* %ss) #0 { +entry: + %ss.addr = alloca %struct.SmallStruct_4b*, align 8 + store %struct.SmallStruct_4b* %ss, %struct.SmallStruct_4b** %ss.addr, align 8 + %0 = load %struct.SmallStruct_4b** %ss.addr, align 8 + %1 = bitcast %struct.SmallStruct_4b* %0 to { i32 }* + %2 = getelementptr { i32 }* %1, i32 0, i32 0 + %3 = load i32* %2, align 1 + call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i32 inreg %3) + ret void + ; CHECK-LABEL: smallStruct_4b: + ; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 32 +} + +define void @smallStruct_5b(%struct.SmallStruct_5b* %ss) #0 { +entry: + %ss.addr = alloca %struct.SmallStruct_5b*, align 8 + %.coerce = alloca { i40 } + store %struct.SmallStruct_5b* %ss, %struct.SmallStruct_5b** %ss.addr, align 8 + %0 = load %struct.SmallStruct_5b** %ss.addr, align 8 + %1 = bitcast { i40 }* %.coerce to i8* + %2 = bitcast %struct.SmallStruct_5b* %0 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 5, i32 0, i1 false) + %3 = getelementptr { i40 }* %.coerce, i32 0, i32 0 + %4 = load i40* %3, align 1 + call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i40 inreg %4) + ret void + ; CHECK-LABEL: smallStruct_5b: + ; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 24 +} + +define void @smallStruct_6b(%struct.SmallStruct_6b* %ss) #0 { +entry: + %ss.addr = alloca %struct.SmallStruct_6b*, align 8 + %.coerce = alloca { i48 } + store %struct.SmallStruct_6b* %ss, %struct.SmallStruct_6b** %ss.addr, align 8 + %0 = load %struct.SmallStruct_6b** %ss.addr, align 8 + %1 = bitcast { i48 }* %.coerce to i8* + %2 = bitcast %struct.SmallStruct_6b* %0 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 6, i32 0, i1 false) + %3 = getelementptr { i48 }* %.coerce, i32 0, i32 0 + %4 = load i48* %3, align 1 + call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i48 inreg %4) + ret void + ; CHECK-LABEL: smallStruct_6b: + ; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 16 +} + +define void @smallStruct_7b(%struct.SmallStruct_7b* %ss) #0 { +entry: + %ss.addr = alloca %struct.SmallStruct_7b*, align 8 + %.coerce = alloca { i56 } + store %struct.SmallStruct_7b* %ss, %struct.SmallStruct_7b** %ss.addr, align 8 + %0 = load %struct.SmallStruct_7b** %ss.addr, align 8 + %1 = bitcast { i56 }* %.coerce to i8* + %2 = bitcast %struct.SmallStruct_7b* %0 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 7, i32 0, i1 false) + %3 = getelementptr { i56 }* %.coerce, i32 0, i32 0 + %4 = load i56* %3, align 1 + call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i56 inreg %4) + ret void + ; CHECK-LABEL: smallStruct_7b: + ; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 8 +} + +define void @smallStruct_8b(%struct.SmallStruct_8b* %ss) #0 { +entry: + %ss.addr = alloca %struct.SmallStruct_8b*, align 8 + store %struct.SmallStruct_8b* %ss, %struct.SmallStruct_8b** %ss.addr, align 8 + %0 = load %struct.SmallStruct_8b** %ss.addr, align 8 + %1 = bitcast %struct.SmallStruct_8b* %0 to { i64 }* + %2 = getelementptr { i64 }* %1, i32 0, i32 0 + %3 = load i64* %2, align 1 + call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i64 inreg %3) + ret void + ; CHECK-LABEL: smallStruct_8b: + ; CHECK-NOT: dsll +} + +define void @smallStruct_9b(%struct.SmallStruct_9b* %ss) #0 { +entry: + %ss.addr = alloca %struct.SmallStruct_9b*, align 8 + %.coerce = alloca { i64, i8 } + store %struct.SmallStruct_9b* %ss, %struct.SmallStruct_9b** %ss.addr, align 8 + %0 = load %struct.SmallStruct_9b** %ss.addr, align 8 + %1 = bitcast { i64, i8 }* %.coerce to i8* + %2 = bitcast %struct.SmallStruct_9b* %0 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 9, i32 0, i1 false) + %3 = getelementptr { i64, i8 }* %.coerce, i32 0, i32 0 + %4 = load i64* %3, align 1 + %5 = getelementptr { i64, i8 }* %.coerce, i32 0, i32 1 + %6 = load i8* %5, align 1 + call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i64 inreg %4, i8 inreg %6) + ret void + ; CHECK-LABEL: smallStruct_9b: + ; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 56 +} diff --git a/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-combinations.ll b/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-combinations.ll new file mode 100644 index 000000000000..899a3e8ff0a1 --- /dev/null +++ b/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-combinations.ll @@ -0,0 +1,149 @@ +; RUN: llc --march=mips64 -mcpu=mips64r2 < %s | FileCheck %s + +; Generated from the C program: +; +; #include <stdio.h> +; #include <string.h> +; +; struct SmallStruct_1b1s { +; char x1; +; short x2; +; }; +; +; struct SmallStruct_1b1i { +; char x1; +; int x2; +; }; +; +; struct SmallStruct_1b1s1b { +; char x1; +; short x2; +; char x3; +; }; +; +; struct SmallStruct_1s1i { +; short x1; +; int x2; +; }; +; +; struct SmallStruct_3b1s { +; char x1; +; char x2; +; char x3; +; short x4; +; }; +; +; void varArgF_SmallStruct(char* c, ...); +; +; void smallStruct_1b1s(struct SmallStruct_1b1s* ss) +; { +; varArgF_SmallStruct("", *ss); +; } +; +; void smallStruct_1b1i(struct SmallStruct_1b1i* ss) +; { +; varArgF_SmallStruct("", *ss); +; } +; +; void smallStruct_1b1s1b(struct SmallStruct_1b1s1b* ss) +; { +; varArgF_SmallStruct("", *ss); +; } +; +; void smallStruct_1s1i(struct SmallStruct_1s1i* ss) +; { +; varArgF_SmallStruct("", *ss); +; } +; +; void smallStruct_3b1s(struct SmallStruct_3b1s* ss) +; { +; varArgF_SmallStruct("", *ss); +; } + +%struct.SmallStruct_1b1s = type { i8, i16 } +%struct.SmallStruct_1b1i = type { i8, i32 } +%struct.SmallStruct_1b1s1b = type { i8, i16, i8 } +%struct.SmallStruct_1s1i = type { i16, i32 } +%struct.SmallStruct_3b1s = type { i8, i8, i8, i16 } + +@.str = private unnamed_addr constant [3 x i8] c"01\00", align 1 + +declare void @varArgF_SmallStruct(i8* %c, ...) + +define void @smallStruct_1b1s(%struct.SmallStruct_1b1s* %ss) #0 { +entry: + %ss.addr = alloca %struct.SmallStruct_1b1s*, align 8 + store %struct.SmallStruct_1b1s* %ss, %struct.SmallStruct_1b1s** %ss.addr, align 8 + %0 = load %struct.SmallStruct_1b1s** %ss.addr, align 8 + %1 = bitcast %struct.SmallStruct_1b1s* %0 to { i32 }* + %2 = getelementptr { i32 }* %1, i32 0, i32 0 + %3 = load i32* %2, align 1 + call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i32 inreg %3) + ret void + ; CHECK-LABEL: smallStruct_1b1s: + ; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 32 +} + +define void @smallStruct_1b1i(%struct.SmallStruct_1b1i* %ss) #0 { +entry: + %ss.addr = alloca %struct.SmallStruct_1b1i*, align 8 + store %struct.SmallStruct_1b1i* %ss, %struct.SmallStruct_1b1i** %ss.addr, align 8 + %0 = load %struct.SmallStruct_1b1i** %ss.addr, align 8 + %1 = bitcast %struct.SmallStruct_1b1i* %0 to { i64 }* + %2 = getelementptr { i64 }* %1, i32 0, i32 0 + %3 = load i64* %2, align 1 + call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i64 inreg %3) + ret void + ; CHECK-LABEL: smallStruct_1b1i: + ; CHECK-NOT: dsll +} + +define void @smallStruct_1b1s1b(%struct.SmallStruct_1b1s1b* %ss) #0 { +entry: + %ss.addr = alloca %struct.SmallStruct_1b1s1b*, align 8 + %.coerce = alloca { i48 } + store %struct.SmallStruct_1b1s1b* %ss, %struct.SmallStruct_1b1s1b** %ss.addr, align 8 + %0 = load %struct.SmallStruct_1b1s1b** %ss.addr, align 8 + %1 = bitcast { i48 }* %.coerce to i8* + %2 = bitcast %struct.SmallStruct_1b1s1b* %0 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 6, i32 0, i1 false) + %3 = getelementptr { i48 }* %.coerce, i32 0, i32 0 + %4 = load i48* %3, align 1 + call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i48 inreg %4) + ret void + ; CHECK-LABEL: smallStruct_1b1s1b: + ; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 16 +} + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #1 + +define void @smallStruct_1s1i(%struct.SmallStruct_1s1i* %ss) #0 { +entry: + %ss.addr = alloca %struct.SmallStruct_1s1i*, align 8 + store %struct.SmallStruct_1s1i* %ss, %struct.SmallStruct_1s1i** %ss.addr, align 8 + %0 = load %struct.SmallStruct_1s1i** %ss.addr, align 8 + %1 = bitcast %struct.SmallStruct_1s1i* %0 to { i64 }* + %2 = getelementptr { i64 }* %1, i32 0, i32 0 + %3 = load i64* %2, align 1 + call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i64 inreg %3) + ret void + ; CHECK-LABEL: smallStruct_1s1i: + ; CHECK-NOT: dsll +} + +define void @smallStruct_3b1s(%struct.SmallStruct_3b1s* %ss) #0 { +entry: + %ss.addr = alloca %struct.SmallStruct_3b1s*, align 8 + %.coerce = alloca { i48 } + store %struct.SmallStruct_3b1s* %ss, %struct.SmallStruct_3b1s** %ss.addr, align 8 + %0 = load %struct.SmallStruct_3b1s** %ss.addr, align 8 + %1 = bitcast { i48 }* %.coerce to i8* + %2 = bitcast %struct.SmallStruct_3b1s* %0 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 6, i32 0, i1 false) + %3 = getelementptr { i48 }* %.coerce, i32 0, i32 0 + %4 = load i48* %3, align 1 + call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i48 inreg %4) + ret void + ; CHECK-LABEL: smallStruct_3b1s: + ; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 16 +} diff --git a/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-multiple-args.ll b/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-multiple-args.ll new file mode 100644 index 000000000000..1f7362523346 --- /dev/null +++ b/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-multiple-args.ll @@ -0,0 +1,161 @@ +; RUN: llc --march=mips64 -mcpu=mips64r2 < %s | FileCheck %s + +; Generated from the C program: +; +; #include <stdio.h> +; #include <string.h> +; +; struct SmallStruct_1b { +; char x1; +; }; +; +; struct SmallStruct_2b { +; char x1; +; char x2; +; }; +; +; struct SmallStruct_3b { +; char x1; +; char x2; +; char x3; +; }; +; +; struct SmallStruct_4b { +; char x1; +; char x2; +; char x3; +; char x4; +; }; +; +; struct SmallStruct_5b { +; char x1; +; char x2; +; char x3; +; char x4; +; char x5; +; }; +; +; struct SmallStruct_6b { +; char x1; +; char x2; +; char x3; +; char x4; +; char x5; +; char x6; +; }; +; +; struct SmallStruct_7b { +; char x1; +; char x2; +; char x3; +; char x4; +; char x5; +; char x6; +; char x7; +; }; +; +; struct SmallStruct_8b { +; char x1; +; char x2; +; char x3; +; char x4; +; char x5; +; char x6; +; char x7; +; char x8; +; }; +; +; struct SmallStruct_9b { +; char x1; +; char x2; +; char x3; +; char x4; +; char x5; +; char x6; +; char x7; +; char x8; +; char x9; +; }; +; +; void varArgF_SmallStruct(char* c, ...); +; +; void smallStruct_1b_x9(struct SmallStruct_1b* ss1, struct SmallStruct_1b* ss2, struct SmallStruct_1b* ss3, struct SmallStruct_1b* ss4, struct SmallStruct_1b* ss5, struct SmallStruct_1b* ss6, struct SmallStruct_1b* ss7, struct SmallStruct_1b* ss8, struct SmallStruct_1b* ss9) +; { +; varArgF_SmallStruct("", *ss1, *ss2, *ss3, *ss4, *ss5, *ss6, *ss7, *ss8, *ss9); +; } + +%struct.SmallStruct_1b = type { i8 } + +@.str = private unnamed_addr constant [3 x i8] c"01\00", align 1 + +declare void @varArgF_SmallStruct(i8* %c, ...) + +define void @smallStruct_1b_x9(%struct.SmallStruct_1b* %ss1, %struct.SmallStruct_1b* %ss2, %struct.SmallStruct_1b* %ss3, %struct.SmallStruct_1b* %ss4, %struct.SmallStruct_1b* %ss5, %struct.SmallStruct_1b* %ss6, %struct.SmallStruct_1b* %ss7, %struct.SmallStruct_1b* %ss8, %struct.SmallStruct_1b* %ss9) #0 { +entry: + %ss1.addr = alloca %struct.SmallStruct_1b*, align 8 + %ss2.addr = alloca %struct.SmallStruct_1b*, align 8 + %ss3.addr = alloca %struct.SmallStruct_1b*, align 8 + %ss4.addr = alloca %struct.SmallStruct_1b*, align 8 + %ss5.addr = alloca %struct.SmallStruct_1b*, align 8 + %ss6.addr = alloca %struct.SmallStruct_1b*, align 8 + %ss7.addr = alloca %struct.SmallStruct_1b*, align 8 + %ss8.addr = alloca %struct.SmallStruct_1b*, align 8 + %ss9.addr = alloca %struct.SmallStruct_1b*, align 8 + store %struct.SmallStruct_1b* %ss1, %struct.SmallStruct_1b** %ss1.addr, align 8 + store %struct.SmallStruct_1b* %ss2, %struct.SmallStruct_1b** %ss2.addr, align 8 + store %struct.SmallStruct_1b* %ss3, %struct.SmallStruct_1b** %ss3.addr, align 8 + store %struct.SmallStruct_1b* %ss4, %struct.SmallStruct_1b** %ss4.addr, align 8 + store %struct.SmallStruct_1b* %ss5, %struct.SmallStruct_1b** %ss5.addr, align 8 + store %struct.SmallStruct_1b* %ss6, %struct.SmallStruct_1b** %ss6.addr, align 8 + store %struct.SmallStruct_1b* %ss7, %struct.SmallStruct_1b** %ss7.addr, align 8 + store %struct.SmallStruct_1b* %ss8, %struct.SmallStruct_1b** %ss8.addr, align 8 + store %struct.SmallStruct_1b* %ss9, %struct.SmallStruct_1b** %ss9.addr, align 8 + %0 = load %struct.SmallStruct_1b** %ss1.addr, align 8 + %1 = load %struct.SmallStruct_1b** %ss2.addr, align 8 + %2 = load %struct.SmallStruct_1b** %ss3.addr, align 8 + %3 = load %struct.SmallStruct_1b** %ss4.addr, align 8 + %4 = load %struct.SmallStruct_1b** %ss5.addr, align 8 + %5 = load %struct.SmallStruct_1b** %ss6.addr, align 8 + %6 = load %struct.SmallStruct_1b** %ss7.addr, align 8 + %7 = load %struct.SmallStruct_1b** %ss8.addr, align 8 + %8 = load %struct.SmallStruct_1b** %ss9.addr, align 8 + %9 = bitcast %struct.SmallStruct_1b* %0 to { i8 }* + %10 = getelementptr { i8 }* %9, i32 0, i32 0 + %11 = load i8* %10, align 1 + %12 = bitcast %struct.SmallStruct_1b* %1 to { i8 }* + %13 = getelementptr { i8 }* %12, i32 0, i32 0 + %14 = load i8* %13, align 1 + %15 = bitcast %struct.SmallStruct_1b* %2 to { i8 }* + %16 = getelementptr { i8 }* %15, i32 0, i32 0 + %17 = load i8* %16, align 1 + %18 = bitcast %struct.SmallStruct_1b* %3 to { i8 }* + %19 = getelementptr { i8 }* %18, i32 0, i32 0 + %20 = load i8* %19, align 1 + %21 = bitcast %struct.SmallStruct_1b* %4 to { i8 }* + %22 = getelementptr { i8 }* %21, i32 0, i32 0 + %23 = load i8* %22, align 1 + %24 = bitcast %struct.SmallStruct_1b* %5 to { i8 }* + %25 = getelementptr { i8 }* %24, i32 0, i32 0 + %26 = load i8* %25, align 1 + %27 = bitcast %struct.SmallStruct_1b* %6 to { i8 }* + %28 = getelementptr { i8 }* %27, i32 0, i32 0 + %29 = load i8* %28, align 1 + %30 = bitcast %struct.SmallStruct_1b* %7 to { i8 }* + %31 = getelementptr { i8 }* %30, i32 0, i32 0 + %32 = load i8* %31, align 1 + %33 = bitcast %struct.SmallStruct_1b* %8 to { i8 }* + %34 = getelementptr { i8 }* %33, i32 0, i32 0 + %35 = load i8* %34, align 1 + call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i8 inreg %11, i8 inreg %14, i8 inreg %17, i8 inreg %20, i8 inreg %23, i8 inreg %26, i8 inreg %29, i8 inreg %32, i8 inreg %35) + ret void + ; CHECK-LABEL: smallStruct_1b_x9: + ; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 56 + ; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 56 + ; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 56 + ; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 56 + ; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 56 + ; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 56 + ; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 56 + ; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 56 + ; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 56 +} diff --git a/test/CodeGen/Mips/check-adde-redundant-moves.ll b/test/CodeGen/Mips/check-adde-redundant-moves.ll new file mode 100644 index 000000000000..527c21770263 --- /dev/null +++ b/test/CodeGen/Mips/check-adde-redundant-moves.ll @@ -0,0 +1,29 @@ +; RUN: llc < %s -march=mips -mcpu=mips2 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=GP32 +; RUN: llc < %s -march=mips -mcpu=mips32 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=GP32 +; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=GP32 +; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=GP32 +; RUN: llc < %s -march=mips64 -mcpu=mips3 | FileCheck %s -check-prefix=ALL +; RUN: llc < %s -march=mips64 -mcpu=mips4 | FileCheck %s -check-prefix=ALL +; RUN: llc < %s -march=mips64 -mcpu=mips64 | FileCheck %s -check-prefix=ALL +; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s -check-prefix=ALL +; RUN: llc < %s -march=mips64 -mcpu=mips64r6 | FileCheck %s -check-prefix=ALL + +define i64 @add_i64(i64 %a) { + ; GP32-LABEL: add_i64 + + ; GP32-NOT: move $[[T0:[0-9]+]], $[[T0]] + %r = add i64 5, %a + ret i64 %r +} + +define i128 @add_i128(i128 %a) { + ; ALL-LABEL: add_i128 + + ; ALL-NOT: move $[[T0:[0-9]+]], $[[T0]] + %r = add i128 5, %a + ret i128 %r +} diff --git a/test/CodeGen/Mips/fcmp.ll b/test/CodeGen/Mips/fcmp.ll index 8e83b0064ed9..aa1f09bf7aba 100644 --- a/test/CodeGen/Mips/fcmp.ll +++ b/test/CodeGen/Mips/fcmp.ll @@ -1,10 +1,17 @@ -; RUN: llc < %s -march=mipsel -mcpu=mips32 | FileCheck %s -check-prefix=ALL -check-prefix=32-C -; RUN: llc < %s -march=mipsel -mcpu=mips32r2 | FileCheck %s -check-prefix=ALL -check-prefix=32-C -; RUN: llc < %s -march=mipsel -mcpu=mips32r6 | FileCheck %s -check-prefix=ALL -check-prefix=32-CMP -; RUN: llc < %s -march=mips64el -mcpu=mips4 | FileCheck %s -check-prefix=ALL -check-prefix=64-C -; RUN: llc < %s -march=mips64el -mcpu=mips64 | FileCheck %s -check-prefix=ALL -check-prefix=64-C -; RUN: llc < %s -march=mips64el -mcpu=mips64r2 | FileCheck %s -check-prefix=ALL -check-prefix=64-C -; RUN: llc < %s -march=mips64el -mcpu=mips64r6 | FileCheck %s -check-prefix=ALL -check-prefix=64-CMP +; RUN: llc < %s -march=mips -mcpu=mips32 | \ +; RUN: FileCheck %s -check-prefix=ALL -check-prefix=32-C +; RUN: llc < %s -march=mips -mcpu=mips32r2 | \ +; RUN: FileCheck %s -check-prefix=ALL -check-prefix=32-C +; RUN: llc < %s -march=mips -mcpu=mips32r6 | \ +; RUN: FileCheck %s -check-prefix=ALL -check-prefix=32-CMP +; RUN: llc < %s -march=mips64 -mcpu=mips4 | \ +; RUN: FileCheck %s -check-prefix=ALL -check-prefix=64-C +; RUN: llc < %s -march=mips64 -mcpu=mips64 | \ +; RUN: FileCheck %s -check-prefix=ALL -check-prefix=64-C +; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | \ +; RUN: FileCheck %s -check-prefix=ALL -check-prefix=64-C +; RUN: llc < %s -march=mips64 -mcpu=mips64r6 | \ +; RUN: FileCheck %s -check-prefix=ALL -check-prefix=64-CMP define i32 @false_f32(float %a, float %b) nounwind { ; ALL-LABEL: false_f32: @@ -18,15 +25,13 @@ define i32 @false_f32(float %a, float %b) nounwind { define i32 @oeq_f32(float %a, float %b) nounwind { ; ALL-LABEL: oeq_f32: -; 32-C-DAG: addiu $[[T0:2]], $zero, 0 -; 32-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1 +; 32-C-DAG: addiu $2, $zero, 1 ; 32-C-DAG: c.eq.s $f12, $f14 -; 32-C-DAG: movt $[[T0]], $1, $fcc0 +; 32-C: movf $2, $zero, $fcc0 -; 64-C-DAG: addiu $[[T0:2]], $zero, 0 -; 64-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1 +; 64-C-DAG: addiu $2, $zero, 1 ; 64-C-DAG: c.eq.s $f12, $f13 -; 64-C-DAG: movt $[[T0]], $1, $fcc0 +; 64-C: movf $2, $zero, $fcc0 ; 32-CMP-DAG: cmp.eq.s $[[T0:f[0-9]+]], $f12, $f14 ; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] @@ -44,15 +49,13 @@ define i32 @oeq_f32(float %a, float %b) nounwind { define i32 @ogt_f32(float %a, float %b) nounwind { ; ALL-LABEL: ogt_f32: -; 32-C-DAG: addiu $[[T0:2]], $zero, 0 -; 32-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1 +; 32-C-DAG: addiu $2, $zero, 1 ; 32-C-DAG: c.ule.s $f12, $f14 -; 32-C-DAG: movf $[[T0]], $1, $fcc0 +; 32-C: movt $2, $zero, $fcc0 -; 64-C-DAG: addiu $[[T0:2]], $zero, 0 -; 64-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1 +; 64-C-DAG: addiu $2, $zero, 1 ; 64-C-DAG: c.ule.s $f12, $f13 -; 64-C-DAG: movf $[[T0]], $1, $fcc0 +; 64-C: movt $2, $zero, $fcc0 ; 32-CMP-DAG: cmp.lt.s $[[T0:f[0-9]+]], $f14, $f12 ; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] @@ -70,15 +73,13 @@ define i32 @ogt_f32(float %a, float %b) nounwind { define i32 @oge_f32(float %a, float %b) nounwind { ; ALL-LABEL: oge_f32: -; 32-C-DAG: addiu $[[T0:2]], $zero, 0 -; 32-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1 +; 32-C-DAG: addiu $2, $zero, 1 ; 32-C-DAG: c.ult.s $f12, $f14 -; 32-C-DAG: movf $[[T0]], $1, $fcc0 +; 32-C: movt $2, $zero, $fcc0 -; 64-C-DAG: addiu $[[T0:2]], $zero, 0 -; 64-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1 +; 64-C-DAG: addiu $2, $zero, 1 ; 64-C-DAG: c.ult.s $f12, $f13 -; 64-C-DAG: movf $[[T0]], $1, $fcc0 +; 64-C: movt $2, $zero, $fcc0 ; 32-CMP-DAG: cmp.le.s $[[T0:f[0-9]+]], $f14, $f12 ; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] @@ -96,15 +97,13 @@ define i32 @oge_f32(float %a, float %b) nounwind { define i32 @olt_f32(float %a, float %b) nounwind { ; ALL-LABEL: olt_f32: -; 32-C-DAG: addiu $[[T0:2]], $zero, 0 -; 32-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1 +; 32-C-DAG: addiu $2, $zero, 1 ; 32-C-DAG: c.olt.s $f12, $f14 -; 32-C-DAG: movt $[[T0]], $1, $fcc0 +; 32-C: movf $2, $zero, $fcc0 -; 64-C-DAG: addiu $[[T0:2]], $zero, 0 -; 64-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1 +; 64-C-DAG: addiu $2, $zero, 1 ; 64-C-DAG: c.olt.s $f12, $f13 -; 64-C-DAG: movt $[[T0]], $1, $fcc0 +; 64-C: movf $2, $zero, $fcc0 ; 32-CMP-DAG: cmp.lt.s $[[T0:f[0-9]+]], $f12, $f14 ; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] @@ -122,15 +121,13 @@ define i32 @olt_f32(float %a, float %b) nounwind { define i32 @ole_f32(float %a, float %b) nounwind { ; ALL-LABEL: ole_f32: -; 32-C-DAG: addiu $[[T0:2]], $zero, 0 -; 32-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1 +; 32-C-DAG: addiu $2, $zero, 1 ; 32-C-DAG: c.ole.s $f12, $f14 -; 32-C-DAG: movt $[[T0]], $1, $fcc0 +; 32-C: movf $2, $zero, $fcc0 -; 64-C-DAG: addiu $[[T0:2]], $zero, 0 -; 64-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1 +; 64-C-DAG: addiu $2, $zero, 1 ; 64-C-DAG: c.ole.s $f12, $f13 -; 64-C-DAG: movt $[[T0]], $1, $fcc0 +; 64-C: movf $2, $zero, $fcc0 ; 32-CMP-DAG: cmp.le.s $[[T0:f[0-9]+]], $f12, $f14 ; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] @@ -148,15 +145,13 @@ define i32 @ole_f32(float %a, float %b) nounwind { define i32 @one_f32(float %a, float %b) nounwind { ; ALL-LABEL: one_f32: -; 32-C-DAG: addiu $[[T0:2]], $zero, 0 -; 32-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1 +; 32-C-DAG: addiu $2, $zero, 1 ; 32-C-DAG: c.ueq.s $f12, $f14 -; 32-C-DAG: movf $[[T0]], $1, $fcc0 +; 32-C: movt $2, $zero, $fcc0 -; 64-C-DAG: addiu $[[T0:2]], $zero, 0 -; 64-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1 +; 64-C-DAG: addiu $2, $zero, 1 ; 64-C-DAG: c.ueq.s $f12, $f13 -; 64-C-DAG: movf $[[T0]], $1, $fcc0 +; 64-C: movt $2, $zero, $fcc0 ; 32-CMP-DAG: cmp.ueq.s $[[T0:f[0-9]+]], $f12, $f14 ; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] @@ -176,15 +171,13 @@ define i32 @one_f32(float %a, float %b) nounwind { define i32 @ord_f32(float %a, float %b) nounwind { ; ALL-LABEL: ord_f32: -; 32-C-DAG: addiu $[[T0:2]], $zero, 0 -; 32-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1 +; 32-C-DAG: addiu $2, $zero, 1 ; 32-C-DAG: c.un.s $f12, $f14 -; 32-C-DAG: movf $[[T0]], $1, $fcc0 +; 32-C: movt $2, $zero, $fcc0 -; 64-C-DAG: addiu $[[T0:2]], $zero, 0 -; 64-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1 +; 64-C-DAG: addiu $2, $zero, 1 ; 64-C-DAG: c.un.s $f12, $f13 -; 64-C-DAG: movf $[[T0]], $1, $fcc0 +; 64-C: movt $2, $zero, $fcc0 ; 32-CMP-DAG: cmp.un.s $[[T0:f[0-9]+]], $f12, $f14 ; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] @@ -204,15 +197,13 @@ define i32 @ord_f32(float %a, float %b) nounwind { define i32 @ueq_f32(float %a, float %b) nounwind { ; ALL-LABEL: ueq_f32: -; 32-C-DAG: addiu $[[T0:2]], $zero, 0 -; 32-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1 +; 32-C-DAG: addiu $2, $zero, 1 ; 32-C-DAG: c.ueq.s $f12, $f14 -; 32-C-DAG: movt $[[T0]], $1, $fcc0 +; 32-C: movf $2, $zero, $fcc0 -; 64-C-DAG: addiu $[[T0:2]], $zero, 0 -; 64-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1 +; 64-C-DAG: addiu $2, $zero, 1 ; 64-C-DAG: c.ueq.s $f12, $f13 -; 64-C-DAG: movt $[[T0]], $1, $fcc0 +; 64-C: movf $2, $zero, $fcc0 ; 32-CMP-DAG: cmp.ueq.s $[[T0:f[0-9]+]], $f12, $f14 ; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] @@ -230,15 +221,13 @@ define i32 @ueq_f32(float %a, float %b) nounwind { define i32 @ugt_f32(float %a, float %b) nounwind { ; ALL-LABEL: ugt_f32: -; 32-C-DAG: addiu $[[T0:2]], $zero, 0 -; 32-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1 +; 32-C-DAG: addiu $2, $zero, 1 ; 32-C-DAG: c.ole.s $f12, $f14 -; 32-C-DAG: movf $[[T0]], $1, $fcc0 +; 32-C: movt $2, $zero, $fcc0 -; 64-C-DAG: addiu $[[T0:2]], $zero, 0 -; 64-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1 +; 64-C-DAG: addiu $2, $zero, 1 ; 64-C-DAG: c.ole.s $f12, $f13 -; 64-C-DAG: movf $[[T0]], $1, $fcc0 +; 64-C: movt $2, $zero, $fcc0 ; 32-CMP-DAG: cmp.ult.s $[[T0:f[0-9]+]], $f14, $f12 ; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] @@ -256,15 +245,13 @@ define i32 @ugt_f32(float %a, float %b) nounwind { define i32 @uge_f32(float %a, float %b) nounwind { ; ALL-LABEL: uge_f32: -; 32-C-DAG: addiu $[[T0:2]], $zero, 0 -; 32-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1 +; 32-C-DAG: addiu $2, $zero, 1 ; 32-C-DAG: c.olt.s $f12, $f14 -; 32-C-DAG: movf $[[T0]], $1, $fcc0 +; 32-C: movt $2, $zero, $fcc0 -; 64-C-DAG: addiu $[[T0:2]], $zero, 0 -; 64-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1 +; 64-C-DAG: addiu $2, $zero, 1 ; 64-C-DAG: c.olt.s $f12, $f13 -; 64-C-DAG: movf $[[T0]], $1, $fcc0 +; 64-C: movt $2, $zero, $fcc0 ; 32-CMP-DAG: cmp.ule.s $[[T0:f[0-9]+]], $f14, $f12 ; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] @@ -282,15 +269,13 @@ define i32 @uge_f32(float %a, float %b) nounwind { define i32 @ult_f32(float %a, float %b) nounwind { ; ALL-LABEL: ult_f32: -; 32-C-DAG: addiu $[[T0:2]], $zero, 0 -; 32-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1 +; 32-C-DAG: addiu $2, $zero, 1 ; 32-C-DAG: c.ult.s $f12, $f14 -; 32-C-DAG: movt $[[T0]], $1, $fcc0 +; 32-C: movf $2, $zero, $fcc0 -; 64-C-DAG: addiu $[[T0:2]], $zero, 0 -; 64-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1 +; 64-C-DAG: addiu $2, $zero, 1 ; 64-C-DAG: c.ult.s $f12, $f13 -; 64-C-DAG: movt $[[T0]], $1, $fcc0 +; 64-C: movf $2, $zero, $fcc0 ; 32-CMP-DAG: cmp.ult.s $[[T0:f[0-9]+]], $f12, $f14 ; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] @@ -300,6 +285,7 @@ define i32 @ult_f32(float %a, float %b) nounwind { ; 64-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] ; 64-CMP-DAG: andi $2, $[[T1]], 1 + %1 = fcmp ult float %a, %b %2 = zext i1 %1 to i32 ret i32 %2 @@ -308,15 +294,13 @@ define i32 @ult_f32(float %a, float %b) nounwind { define i32 @ule_f32(float %a, float %b) nounwind { ; ALL-LABEL: ule_f32: -; 32-C-DAG: addiu $[[T0:2]], $zero, 0 -; 32-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1 +; 32-C-DAG: addiu $2, $zero, 1 ; 32-C-DAG: c.ule.s $f12, $f14 -; 32-C-DAG: movt $[[T0]], $1, $fcc0 +; 32-C: movf $2, $zero, $fcc0 -; 64-C-DAG: addiu $[[T0:2]], $zero, 0 -; 64-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1 +; 64-C-DAG: addiu $2, $zero, 1 ; 64-C-DAG: c.ule.s $f12, $f13 -; 64-C-DAG: movt $[[T0]], $1, $fcc0 +; 64-C: movf $2, $zero, $fcc0 ; 32-CMP-DAG: cmp.ule.s $[[T0:f[0-9]+]], $f12, $f14 ; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] @@ -334,15 +318,13 @@ define i32 @ule_f32(float %a, float %b) nounwind { define i32 @une_f32(float %a, float %b) nounwind { ; ALL-LABEL: une_f32: -; 32-C-DAG: addiu $[[T0:2]], $zero, 0 -; 32-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1 +; 32-C-DAG: addiu $2, $zero, 1 ; 32-C-DAG: c.eq.s $f12, $f14 -; 32-C-DAG: movf $[[T0]], $1, $fcc0 +; 32-C: movt $2, $zero, $fcc0 -; 64-C-DAG: addiu $[[T0:2]], $zero, 0 -; 64-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1 +; 64-C-DAG: addiu $2, $zero, 1 ; 64-C-DAG: c.eq.s $f12, $f13 -; 64-C-DAG: movf $[[T0]], $1, $fcc0 +; 64-C: movt $2, $zero, $fcc0 ; 32-CMP-DAG: cmp.eq.s $[[T0:f[0-9]+]], $f12, $f14 ; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] @@ -362,15 +344,13 @@ define i32 @une_f32(float %a, float %b) nounwind { define i32 @uno_f32(float %a, float %b) nounwind { ; ALL-LABEL: uno_f32: -; 32-C-DAG: addiu $[[T0:2]], $zero, 0 -; 32-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1 +; 32-C-DAG: addiu $2, $zero, 1 ; 32-C-DAG: c.un.s $f12, $f14 -; 32-C-DAG: movt $[[T0]], $1, $fcc0 +; 32-C: movf $2, $zero, $fcc0 -; 64-C-DAG: addiu $[[T0:2]], $zero, 0 -; 64-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1 +; 64-C-DAG: addiu $2, $zero, 1 ; 64-C-DAG: c.un.s $f12, $f13 -; 64-C-DAG: movt $[[T0]], $1, $fcc0 +; 64-C: movf $2, $zero, $fcc0 ; 32-CMP-DAG: cmp.un.s $[[T0:f[0-9]+]], $f12, $f14 ; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] @@ -406,15 +386,13 @@ define i32 @false_f64(double %a, double %b) nounwind { define i32 @oeq_f64(double %a, double %b) nounwind { ; ALL-LABEL: oeq_f64: -; 32-C-DAG: addiu $[[T0:2]], $zero, 0 -; 32-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1 +; 32-C-DAG: addiu $2, $zero, 1 ; 32-C-DAG: c.eq.d $f12, $f14 -; 32-C-DAG: movt $[[T0]], $1, $fcc0 +; 32-C: movf $2, $zero, $fcc0 -; 64-C-DAG: addiu $[[T0:2]], $zero, 0 -; 64-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1 +; 64-C-DAG: addiu $2, $zero, 1 ; 64-C-DAG: c.eq.d $f12, $f13 -; 64-C-DAG: movt $[[T0]], $1, $fcc0 +; 64-C: movf $2, $zero, $fcc0 ; 32-CMP-DAG: cmp.eq.d $[[T0:f[0-9]+]], $f12, $f14 ; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] @@ -432,15 +410,13 @@ define i32 @oeq_f64(double %a, double %b) nounwind { define i32 @ogt_f64(double %a, double %b) nounwind { ; ALL-LABEL: ogt_f64: -; 32-C-DAG: addiu $[[T0:2]], $zero, 0 -; 32-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1 +; 32-C-DAG: addiu $2, $zero, 1 ; 32-C-DAG: c.ule.d $f12, $f14 -; 32-C-DAG: movf $[[T0]], $1, $fcc0 +; 32-C: movt $2, $zero, $fcc0 -; 64-C-DAG: addiu $[[T0:2]], $zero, 0 -; 64-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1 +; 64-C-DAG: addiu $2, $zero, 1 ; 64-C-DAG: c.ule.d $f12, $f13 -; 64-C-DAG: movf $[[T0]], $1, $fcc0 +; 64-C: movt $2, $zero, $fcc0 ; 32-CMP-DAG: cmp.lt.d $[[T0:f[0-9]+]], $f14, $f12 ; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] @@ -458,15 +434,13 @@ define i32 @ogt_f64(double %a, double %b) nounwind { define i32 @oge_f64(double %a, double %b) nounwind { ; ALL-LABEL: oge_f64: -; 32-C-DAG: addiu $[[T0:2]], $zero, 0 -; 32-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1 +; 32-C-DAG: addiu $2, $zero, 1 ; 32-C-DAG: c.ult.d $f12, $f14 -; 32-C-DAG: movf $[[T0]], $1, $fcc0 +; 32-C: movt $2, $zero, $fcc0 -; 64-C-DAG: addiu $[[T0:2]], $zero, 0 -; 64-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1 +; 64-C-DAG: addiu $2, $zero, 1 ; 64-C-DAG: c.ult.d $f12, $f13 -; 64-C-DAG: movf $[[T0]], $1, $fcc0 +; 64-C: movt $2, $zero, $fcc0 ; 32-CMP-DAG: cmp.le.d $[[T0:f[0-9]+]], $f14, $f12 ; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] @@ -484,15 +458,13 @@ define i32 @oge_f64(double %a, double %b) nounwind { define i32 @olt_f64(double %a, double %b) nounwind { ; ALL-LABEL: olt_f64: -; 32-C-DAG: addiu $[[T0:2]], $zero, 0 -; 32-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1 +; 32-C-DAG: addiu $2, $zero, 1 ; 32-C-DAG: c.olt.d $f12, $f14 -; 32-C-DAG: movt $[[T0]], $1, $fcc0 +; 32-C: movf $2, $zero, $fcc0 -; 64-C-DAG: addiu $[[T0:2]], $zero, 0 -; 64-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1 +; 64-C-DAG: addiu $2, $zero, 1 ; 64-C-DAG: c.olt.d $f12, $f13 -; 64-C-DAG: movt $[[T0]], $1, $fcc0 +; 64-C: movf $2, $zero, $fcc0 ; 32-CMP-DAG: cmp.lt.d $[[T0:f[0-9]+]], $f12, $f14 ; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] @@ -510,15 +482,13 @@ define i32 @olt_f64(double %a, double %b) nounwind { define i32 @ole_f64(double %a, double %b) nounwind { ; ALL-LABEL: ole_f64: -; 32-C-DAG: addiu $[[T0:2]], $zero, 0 -; 32-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1 +; 32-C-DAG: addiu $2, $zero, 1 ; 32-C-DAG: c.ole.d $f12, $f14 -; 32-C-DAG: movt $[[T0]], $1, $fcc0 +; 32-C: movf $2, $zero, $fcc0 -; 64-C-DAG: addiu $[[T0:2]], $zero, 0 -; 64-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1 +; 64-C-DAG: addiu $2, $zero, 1 ; 64-C-DAG: c.ole.d $f12, $f13 -; 64-C-DAG: movt $[[T0]], $1, $fcc0 +; 64-C: movf $2, $zero, $fcc0 ; 32-CMP-DAG: cmp.le.d $[[T0:f[0-9]+]], $f12, $f14 ; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] @@ -536,15 +506,13 @@ define i32 @ole_f64(double %a, double %b) nounwind { define i32 @one_f64(double %a, double %b) nounwind { ; ALL-LABEL: one_f64: -; 32-C-DAG: addiu $[[T0:2]], $zero, 0 -; 32-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1 +; 32-C-DAG: addiu $2, $zero, 1 ; 32-C-DAG: c.ueq.d $f12, $f14 -; 32-C-DAG: movf $[[T0]], $1, $fcc0 +; 32-C: movt $2, $zero, $fcc0 -; 64-C-DAG: addiu $[[T0:2]], $zero, 0 -; 64-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1 +; 64-C-DAG: addiu $2, $zero, 1 ; 64-C-DAG: c.ueq.d $f12, $f13 -; 64-C-DAG: movf $[[T0]], $1, $fcc0 +; 64-C: movt $2, $zero, $fcc0 ; 32-CMP-DAG: cmp.ueq.d $[[T0:f[0-9]+]], $f12, $f14 ; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] @@ -564,15 +532,13 @@ define i32 @one_f64(double %a, double %b) nounwind { define i32 @ord_f64(double %a, double %b) nounwind { ; ALL-LABEL: ord_f64: -; 32-C-DAG: addiu $[[T0:2]], $zero, 0 -; 32-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1 +; 32-C-DAG: addiu $2, $zero, 1 ; 32-C-DAG: c.un.d $f12, $f14 -; 32-C-DAG: movf $[[T0]], $1, $fcc0 +; 32-C: movt $2, $zero, $fcc0 -; 64-C-DAG: addiu $[[T0:2]], $zero, 0 -; 64-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1 +; 64-C-DAG: addiu $2, $zero, 1 ; 64-C-DAG: c.un.d $f12, $f13 -; 64-C-DAG: movf $[[T0]], $1, $fcc0 +; 64-C: movt $2, $zero, $fcc0 ; 32-CMP-DAG: cmp.un.d $[[T0:f[0-9]+]], $f12, $f14 ; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] @@ -592,15 +558,13 @@ define i32 @ord_f64(double %a, double %b) nounwind { define i32 @ueq_f64(double %a, double %b) nounwind { ; ALL-LABEL: ueq_f64: -; 32-C-DAG: addiu $[[T0:2]], $zero, 0 -; 32-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1 +; 32-C-DAG: addiu $2, $zero, 1 ; 32-C-DAG: c.ueq.d $f12, $f14 -; 32-C-DAG: movt $[[T0]], $1, $fcc0 +; 32-C: movf $2, $zero, $fcc0 -; 64-C-DAG: addiu $[[T0:2]], $zero, 0 -; 64-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1 +; 64-C-DAG: addiu $2, $zero, 1 ; 64-C-DAG: c.ueq.d $f12, $f13 -; 64-C-DAG: movt $[[T0]], $1, $fcc0 +; 64-C: movf $2, $zero, $fcc0 ; 32-CMP-DAG: cmp.ueq.d $[[T0:f[0-9]+]], $f12, $f14 ; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] @@ -618,15 +582,13 @@ define i32 @ueq_f64(double %a, double %b) nounwind { define i32 @ugt_f64(double %a, double %b) nounwind { ; ALL-LABEL: ugt_f64: -; 32-C-DAG: addiu $[[T0:2]], $zero, 0 -; 32-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1 +; 32-C-DAG: addiu $2, $zero, 1 ; 32-C-DAG: c.ole.d $f12, $f14 -; 32-C-DAG: movf $[[T0]], $1, $fcc0 +; 32-C: movt $2, $zero, $fcc0 -; 64-C-DAG: addiu $[[T0:2]], $zero, 0 -; 64-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1 +; 64-C-DAG: addiu $2, $zero, 1 ; 64-C-DAG: c.ole.d $f12, $f13 -; 64-C-DAG: movf $[[T0]], $1, $fcc0 +; 64-C: movt $2, $zero, $fcc0 ; 32-CMP-DAG: cmp.ult.d $[[T0:f[0-9]+]], $f14, $f12 ; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] @@ -644,15 +606,13 @@ define i32 @ugt_f64(double %a, double %b) nounwind { define i32 @uge_f64(double %a, double %b) nounwind { ; ALL-LABEL: uge_f64: -; 32-C-DAG: addiu $[[T0:2]], $zero, 0 -; 32-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1 +; 32-C-DAG: addiu $2, $zero, 1 ; 32-C-DAG: c.olt.d $f12, $f14 -; 32-C-DAG: movf $[[T0]], $1, $fcc0 +; 32-C: movt $2, $zero, $fcc0 -; 64-C-DAG: addiu $[[T0:2]], $zero, 0 -; 64-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1 +; 64-C-DAG: addiu $2, $zero, 1 ; 64-C-DAG: c.olt.d $f12, $f13 -; 64-C-DAG: movf $[[T0]], $1, $fcc0 +; 64-C: movt $2, $zero, $fcc0 ; 32-CMP-DAG: cmp.ule.d $[[T0:f[0-9]+]], $f14, $f12 ; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] @@ -670,15 +630,13 @@ define i32 @uge_f64(double %a, double %b) nounwind { define i32 @ult_f64(double %a, double %b) nounwind { ; ALL-LABEL: ult_f64: -; 32-C-DAG: addiu $[[T0:2]], $zero, 0 -; 32-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1 +; 32-C-DAG: addiu $2, $zero, 1 ; 32-C-DAG: c.ult.d $f12, $f14 -; 32-C-DAG: movt $[[T0]], $1, $fcc0 +; 32-C: movf $2, $zero, $fcc0 -; 64-C-DAG: addiu $[[T0:2]], $zero, 0 -; 64-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1 +; 64-C-DAG: addiu $2, $zero, 1 ; 64-C-DAG: c.ult.d $f12, $f13 -; 64-C-DAG: movt $[[T0]], $1, $fcc0 +; 64-C: movf $2, $zero, $fcc0 ; 32-CMP-DAG: cmp.ult.d $[[T0:f[0-9]+]], $f12, $f14 ; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] @@ -696,15 +654,13 @@ define i32 @ult_f64(double %a, double %b) nounwind { define i32 @ule_f64(double %a, double %b) nounwind { ; ALL-LABEL: ule_f64: -; 32-C-DAG: addiu $[[T0:2]], $zero, 0 -; 32-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1 +; 32-C-DAG: addiu $2, $zero, 1 ; 32-C-DAG: c.ule.d $f12, $f14 -; 32-C-DAG: movt $[[T0]], $1, $fcc0 +; 32-C: movf $2, $zero, $fcc0 -; 64-C-DAG: addiu $[[T0:2]], $zero, 0 -; 64-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1 +; 64-C-DAG: addiu $2, $zero, 1 ; 64-C-DAG: c.ule.d $f12, $f13 -; 64-C-DAG: movt $[[T0]], $1, $fcc0 +; 64-C: movf $2, $zero, $fcc0 ; 32-CMP-DAG: cmp.ule.d $[[T0:f[0-9]+]], $f12, $f14 ; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] @@ -722,15 +678,13 @@ define i32 @ule_f64(double %a, double %b) nounwind { define i32 @une_f64(double %a, double %b) nounwind { ; ALL-LABEL: une_f64: -; 32-C-DAG: addiu $[[T0:2]], $zero, 0 -; 32-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1 +; 32-C-DAG: addiu $2, $zero, 1 ; 32-C-DAG: c.eq.d $f12, $f14 -; 32-C-DAG: movf $[[T0]], $1, $fcc0 +; 32-C: movt $2, $zero, $fcc0 -; 64-C-DAG: addiu $[[T0:2]], $zero, 0 -; 64-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1 +; 64-C-DAG: addiu $2, $zero, 1 ; 64-C-DAG: c.eq.d $f12, $f13 -; 64-C-DAG: movf $[[T0]], $1, $fcc0 +; 64-C: movt $2, $zero, $fcc0 ; 32-CMP-DAG: cmp.eq.d $[[T0:f[0-9]+]], $f12, $f14 ; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] @@ -750,15 +704,13 @@ define i32 @une_f64(double %a, double %b) nounwind { define i32 @uno_f64(double %a, double %b) nounwind { ; ALL-LABEL: uno_f64: -; 32-C-DAG: addiu $[[T0:2]], $zero, 0 -; 32-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1 +; 32-C-DAG: addiu $2, $zero, 1 ; 32-C-DAG: c.un.d $f12, $f14 -; 32-C-DAG: movt $[[T0]], $1, $fcc0 +; 32-C: movf $2, $zero, $fcc0 -; 64-C-DAG: addiu $[[T0:2]], $zero, 0 -; 64-C-DAG: addiu $[[T1:[0-9]+]], $zero, 1 +; 64-C-DAG: addiu $2, $zero, 1 ; 64-C-DAG: c.un.d $f12, $f13 -; 64-C-DAG: movt $[[T0]], $1, $fcc0 +; 64-C: movf $2, $zero, $fcc0 ; 32-CMP-DAG: cmp.un.d $[[T0:f[0-9]+]], $f12, $f14 ; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] diff --git a/test/CodeGen/Mips/fmadd1.ll b/test/CodeGen/Mips/fmadd1.ll index 271631efb40a..f0667eec3b33 100644 --- a/test/CodeGen/Mips/fmadd1.ll +++ b/test/CodeGen/Mips/fmadd1.ll @@ -39,10 +39,9 @@ entry: ; 32R6-DAG: mtc1 $zero, $[[T2:f[0-9]+]] ; 32R6-DAG: add.s $f0, $[[T1]], $[[T2]] -; 64-DAG: mul.s $[[T1:f[0-9]+]], $f12, $f13 -; 64-DAG: add.s $[[T2:f[0-9]+]], $[[T1]], $f14 -; 64-DAG: mtc1 $zero, $[[T2:f[0-9]+]] -; 64-DAG: add.s $f0, $[[T1]], $[[T2]] +; 64-DAG: madd.s $[[T0:f[0-9]+]], $f14, $f12, $f13 +; 64-DAG: mtc1 $zero, $[[T1:f[0-9]+]] +; 64-DAG: add.s $f0, $[[T0]], $[[T1]] ; 64R2: madd.s $[[T0:f[0-9]+]], $f14, $f12, $f13 ; 64R2: mtc1 $zero, $[[T1:f[0-9]+]] @@ -80,10 +79,9 @@ entry: ; 32R6-DAG: mtc1 $zero, $[[T2:f[0-9]+]] ; 32R6-DAG: add.s $f0, $[[T1]], $[[T2]] -; 64-DAG: mul.s $[[T1:f[0-9]+]], $f12, $f13 -; 64-DAG: sub.s $[[T2:f[0-9]+]], $[[T1]], $f14 -; 64-DAG: mtc1 $zero, $[[T2:f[0-9]+]] -; 64-DAG: add.s $f0, $[[T1]], $[[T2]] +; 64-DAG: msub.s $[[T0:f[0-9]+]], $f14, $f12, $f13 +; 64-DAG: mtc1 $zero, $[[T1:f[0-9]+]] +; 64-DAG: add.s $f0, $[[T0]], $[[T1]] ; 64R2: msub.s $[[T0:f[0-9]+]], $f14, $f12, $f13 ; 64R2: mtc1 $zero, $[[T1:f[0-9]+]] @@ -124,10 +122,11 @@ entry: ; 32R6-DAG: mtc1 $zero, $[[T2:f[0-9]+]] ; 32R6-DAG: sub.s $f0, $[[T2]], $[[T1]] -; 64-DAG: mul.s $[[T1:f[0-9]+]], $f12, $f13 -; 64-DAG: add.s $[[T2:f[0-9]+]], $[[T1]], $f14 -; 64-DAG: mtc1 $zero, $[[T2:f[0-9]+]] -; 64-DAG: sub.s $f0, $[[T2]], $[[T1]] +; 64-NONAN: nmadd.s $f0, $f14, $f12, $f13 + +; 64-NAN: madd.s $[[T0:f[0-9]+]], $f14, $f12, $f13 +; 64-NAN: mtc1 $zero, $[[T1:f[0-9]+]] +; 64-NAN: sub.s $f0, $[[T1]], $[[T0]] ; 64R2-NONAN: nmadd.s $f0, $f14, $f12, $f13 @@ -164,10 +163,11 @@ entry: ; 32R2-NAN: mtc1 $zero, $[[T2:f[0-9]+]] ; 32R2-NAN: sub.s $f0, $[[T2]], $[[T1]] -; 64-DAG: mul.s $[[T1:f[0-9]+]], $f12, $f13 -; 64-DAG: sub.s $[[T2:f[0-9]+]], $[[T1]], $f14 -; 64-DAG: mtc1 $zero, $[[T2:f[0-9]+]] -; 64-DAG: sub.s $f0, $[[T2]], $[[T1]] +; 64-NAN: msub.s $[[T0:f[0-9]+]], $f14, $f12, $f13 +; 64-NAN: mtc1 $zero, $[[T1:f[0-9]+]] +; 64-NAN: sub.s $f0, $[[T1]], $[[T0]] + +; 64-NONAN: nmsub.s $f0, $f14, $f12, $f13 ; 64R2-NAN: msub.s $[[T0:f[0-9]+]], $f14, $f12, $f13 ; 64R2-NAN: mtc1 $zero, $[[T1:f[0-9]+]] @@ -206,10 +206,9 @@ entry: ; 32R6-DAG: mtc1 $zero, $[[T2:f[0-9]+]] ; 32R6-DAG: add.d $f0, $[[T1]], $[[T2]] -; 64-DAG: mul.d $[[T1:f[0-9]+]], $f12, $f13 -; 64-DAG: add.d $[[T2:f[0-9]+]], $[[T1]], $f14 -; 64-DAG: dmtc1 $zero, $[[T2:f[0-9]+]] -; 64-DAG: add.d $f0, $[[T1]], $[[T2]] +; 64-DAG: madd.d $[[T0:f[0-9]+]], $f14, $f12, $f13 +; 64-DAG: mtc1 $zero, $[[T1:f[0-9]+]] +; 64-DAG: add.d $f0, $[[T0]], $[[T1]] ; 64R2: madd.d $[[T0:f[0-9]+]], $f14, $f12, $f13 ; 64R2: mtc1 $zero, $[[T1:f[0-9]+]] @@ -248,10 +247,9 @@ entry: ; 32R6-DAG: mtc1 $zero, $[[T2:f[0-9]+]] ; 32R6-DAG: add.d $f0, $[[T1]], $[[T2]] -; 64-DAG: mul.d $[[T1:f[0-9]+]], $f12, $f13 -; 64-DAG: sub.d $[[T2:f[0-9]+]], $[[T1]], $f14 -; 64-DAG: dmtc1 $zero, $[[T2:f[0-9]+]] -; 64-DAG: add.d $f0, $[[T1]], $[[T2]] +; 64-DAG: msub.d $[[T0:f[0-9]+]], $f14, $f12, $f13 +; 64-DAG: mtc1 $zero, $[[T1:f[0-9]+]] +; 64-DAG: add.d $f0, $[[T0]], $[[T1]] ; 64R2: msub.d $[[T0:f[0-9]+]], $f14, $f12, $f13 ; 64R2: mtc1 $zero, $[[T1:f[0-9]+]] @@ -293,10 +291,11 @@ entry: ; 32R6-DAG: mtc1 $zero, $[[T2:f[0-9]+]] ; 32R6-DAG: sub.d $f0, $[[T2]], $[[T1]] -; 64-DAG: mul.d $[[T1:f[0-9]+]], $f12, $f13 -; 64-DAG: add.d $[[T2:f[0-9]+]], $[[T1]], $f14 -; 64-DAG: dmtc1 $zero, $[[T2:f[0-9]+]] -; 64-DAG: sub.d $f0, $[[T2]], $[[T1]] +; 64-NONAN: nmadd.d $f0, $f14, $f12, $f13 + +; 64-NAN: madd.d $[[T0:f[0-9]+]], $f14, $f12, $f13 +; 64-NAN: mtc1 $zero, $[[T1:f[0-9]+]] +; 64-NAN: sub.d $f0, $[[T1]], $[[T0]] ; 64R2-NONAN: nmadd.d $f0, $f14, $f12, $f13 @@ -340,10 +339,11 @@ entry: ; 32R6-DAG: mtc1 $zero, $[[T2:f[0-9]+]] ; 32R6-DAG: sub.d $f0, $[[T2]], $[[T1]] -; 64-DAG: mul.d $[[T1:f[0-9]+]], $f12, $f13 -; 64-DAG: sub.d $[[T2:f[0-9]+]], $[[T1]], $f14 -; 64-DAG: dmtc1 $zero, $[[T2:f[0-9]+]] -; 64-DAG: sub.d $f0, $[[T2]], $[[T1]] +; 64-NONAN: nmsub.d $f0, $f14, $f12, $f13 + +; 64-NAN: msub.d $[[T0:f[0-9]+]], $f14, $f12, $f13 +; 64-NAN: mtc1 $zero, $[[T1:f[0-9]+]] +; 64-NAN: sub.d $f0, $[[T1]], $[[T0]] ; 64R2-NONAN: nmsub.d $f0, $f14, $f12, $f13 diff --git a/test/CodeGen/Mips/llvm-ir/add.ll b/test/CodeGen/Mips/llvm-ir/add.ll new file mode 100644 index 000000000000..83774eda634f --- /dev/null +++ b/test/CodeGen/Mips/llvm-ir/add.ll @@ -0,0 +1,115 @@ +; RUN: llc < %s -march=mips -mcpu=mips2 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=NOT-R2-R6 -check-prefix=GP32 +; RUN: llc < %s -march=mips -mcpu=mips32 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=NOT-R2-R6 -check-prefix=GP32 +; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=R2-R6 -check-prefix=GP32 +; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=R2-R6 -check-prefix=GP32 +; RUN: llc < %s -march=mips64 -mcpu=mips3 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=NOT-R2-R6 -check-prefix=GP64 +; RUN: llc < %s -march=mips64 -mcpu=mips4 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=NOT-R2-R6 -check-prefix=GP64 +; RUN: llc < %s -march=mips64 -mcpu=mips64 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=NOT-R2-R6 -check-prefix=GP64 +; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=R2-R6 -check-prefix=GP64 +; RUN: llc < %s -march=mips64 -mcpu=mips64r6 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=R2-R6 -check-prefix=GP64 + +define signext i1 @add_i1(i1 signext %a, i1 signext %b) { +entry: +; ALL-LABEL: add_i1: + + ; ALL: addu $[[T0:[0-9]+]], $4, $5 + ; ALL: sll $[[T0]], $[[T0]], 31 + ; ALL: sra $2, $[[T0]], 31 + + %r = add i1 %a, %b + ret i1 %r +} + +define signext i8 @add_i8(i8 signext %a, i8 signext %b) { +entry: +; ALL-LABEL: add_i8: + + ; NOT-R2-R6: addu $[[T0:[0-9]+]], $4, $5 + ; NOT-R2-R6: sll $[[T0]], $[[T0]], 24 + ; NOT-R2-R6: sra $2, $[[T0]], 24 + + ; R2-R6: addu $[[T0:[0-9]+]], $4, $5 + ; R2-R6: seb $2, $[[T0:[0-9]+]] + + %r = add i8 %a, %b + ret i8 %r +} + +define signext i16 @add_i16(i16 signext %a, i16 signext %b) { +entry: +; ALL-LABEL: add_i16: + + ; NOT-R2-R6: addu $[[T0:[0-9]+]], $4, $5 + ; NOT-R2-R6: sll $[[T0]], $[[T0]], 16 + ; NOT-R2-R6: sra $2, $[[T0]], 16 + + ; R2-R6: addu $[[T0:[0-9]+]], $4, $5 + ; R2-R6: seh $2, $[[T0:[0-9]+]] + + %r = add i16 %a, %b + ret i16 %r +} + +define signext i32 @add_i32(i32 signext %a, i32 signext %b) { +entry: +; ALL-LABEL: add_i32: + + ; ALL: addu $2, $4, $5 + + %r = add i32 %a, %b + ret i32 %r +} + +define signext i64 @add_i64(i64 signext %a, i64 signext %b) { +entry: +; ALL-LABEL: add_i64: + + ; GP32: addu $3, $5, $7 + ; GP32: sltu $[[T0:[0-9]+]], $3, $7 + ; GP32: addu $[[T1:[0-9]+]], $[[T0]], $6 + ; GP32: addu $2, $4, $[[T1]] + + ; GP64: daddu $2, $4, $5 + + %r = add i64 %a, %b + ret i64 %r +} + +define signext i128 @add_i128(i128 signext %a, i128 signext %b) { +entry: +; ALL-LABEL: add_i128: + + ; GP32: lw $[[T0:[0-9]+]], 28($sp) + ; GP32: addu $[[T1:[0-9]+]], $7, $[[T0]] + ; GP32: sltu $[[T2:[0-9]+]], $[[T1]], $[[T0]] + ; GP32: lw $[[T3:[0-9]+]], 24($sp) + ; GP32: addu $[[T4:[0-9]+]], $[[T2]], $[[T3]] + ; GP32: addu $[[T5:[0-9]+]], $6, $[[T4]] + ; GP32: sltu $[[T6:[0-9]+]], $[[T5]], $[[T3]] + ; GP32: lw $[[T7:[0-9]+]], 20($sp) + ; GP32: addu $[[T8:[0-9]+]], $[[T6]], $[[T7]] + ; GP32: lw $[[T9:[0-9]+]], 16($sp) + ; GP32: addu $3, $5, $[[T8]] + ; GP32: sltu $[[T10:[0-9]+]], $3, $[[T7]] + ; GP32: addu $[[T11:[0-9]+]], $[[T10]], $[[T9]] + ; GP32: addu $2, $4, $[[T11]] + ; GP32: move $4, $[[T5]] + ; GP32: move $5, $[[T1]] + + ; GP64: daddu $3, $5, $7 + ; GP64: sltu $[[T0:[0-9]+]], $3, $7 + ; GP64: daddu $[[T1:[0-9]+]], $[[T0]], $6 + ; GP64: daddu $2, $4, $[[T1]] + + %r = add i128 %a, %b + ret i128 %r +} diff --git a/test/CodeGen/Mips/llvm-ir/and.ll b/test/CodeGen/Mips/llvm-ir/and.ll new file mode 100644 index 000000000000..09d0ef9238af --- /dev/null +++ b/test/CodeGen/Mips/llvm-ir/and.ll @@ -0,0 +1,94 @@ +; RUN: llc < %s -march=mips -mcpu=mips2 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=GP32 +; RUN: llc < %s -march=mips -mcpu=mips32 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=GP32 +; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=GP32 +; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=GP32 +; RUN: llc < %s -march=mips64 -mcpu=mips3 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=GP64 +; RUN: llc < %s -march=mips64 -mcpu=mips4 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=GP64 +; RUN: llc < %s -march=mips64 -mcpu=mips64 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=GP64 +; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=GP64 +; RUN: llc < %s -march=mips64 -mcpu=mips64r6 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=GP64 + +define signext i1 @and_i1(i1 signext %a, i1 signext %b) { +entry: +; ALL-LABEL: and_i1: + + ; ALL: and $2, $4, $5 + + %r = and i1 %a, %b + ret i1 %r +} + +define signext i8 @and_i8(i8 signext %a, i8 signext %b) { +entry: +; ALL-LABEL: and_i8: + + ; ALL: and $2, $4, $5 + + %r = and i8 %a, %b + ret i8 %r +} + +define signext i16 @and_i16(i16 signext %a, i16 signext %b) { +entry: +; ALL-LABEL: and_i16: + + ; ALL: and $2, $4, $5 + + %r = and i16 %a, %b + ret i16 %r +} + +define signext i32 @and_i32(i32 signext %a, i32 signext %b) { +entry: +; ALL-LABEL: and_i32: + + ; GP32: and $2, $4, $5 + + ; GP64: and $[[T0:[0-9]+]], $4, $5 + ; GP64: sll $2, $[[T0]], 0 + + %r = and i32 %a, %b + ret i32 %r +} + +define signext i64 @and_i64(i64 signext %a, i64 signext %b) { +entry: +; ALL-LABEL: and_i64: + + ; GP32: and $2, $4, $6 + ; GP32: and $3, $5, $7 + + ; GP64: and $2, $4, $5 + + %r = and i64 %a, %b + ret i64 %r +} + +define signext i128 @and_i128(i128 signext %a, i128 signext %b) { +entry: +; ALL-LABEL: and_i128: + + ; GP32: lw $[[T0:[0-9]+]], 24($sp) + ; GP32: lw $[[T1:[0-9]+]], 20($sp) + ; GP32: lw $[[T2:[0-9]+]], 16($sp) + ; GP32: and $2, $4, $[[T2]] + ; GP32: and $3, $5, $[[T1]] + ; GP32: and $4, $6, $[[T0]] + ; GP32: lw $[[T3:[0-9]+]], 28($sp) + ; GP32: and $5, $7, $[[T3]] + + ; GP64: and $2, $4, $6 + ; GP64: and $3, $5, $7 + + %r = and i128 %a, %b + ret i128 %r +} diff --git a/test/CodeGen/Mips/llvm-ir/ashr.ll b/test/CodeGen/Mips/llvm-ir/ashr.ll new file mode 100644 index 000000000000..415998929aa0 --- /dev/null +++ b/test/CodeGen/Mips/llvm-ir/ashr.ll @@ -0,0 +1,188 @@ +; RUN: llc < %s -march=mips -mcpu=mips2 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=GP32 \ +; RUN: -check-prefix=M2 -check-prefix=NOT-R2-R6 +; RUN: llc < %s -march=mips -mcpu=mips32 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=GP32 -check-prefix=NOT-R2-R6 \ +; RUN: -check-prefix=32R1-R2 +; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=GP32 \ +; RUN: -check-prefix=32R1-R2 -check-prefix=R2-R6 +; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=GP32 \ +; RUN: -check-prefix=32R6 -check-prefix=R2-R6 +; RUN: llc < %s -march=mips64 -mcpu=mips3 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=GP64 \ +; RUN: -check-prefix=M3 -check-prefix=NOT-R2-R6 +; RUN: llc < %s -march=mips64 -mcpu=mips4 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=GP64 \ +; RUN: -check-prefix=GP64-NOT-R6 -check-prefix=NOT-R2-R6 +; RUN: llc < %s -march=mips64 -mcpu=mips64 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=GP64 \ +; RUN: -check-prefix=GP64-NOT-R6 -check-prefix=NOT-R2-R6 +; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=GP64 \ +; RUN: -check-prefix=GP64-NOT-R6 -check-prefix R2-R6 +; RUN: llc < %s -march=mips64 -mcpu=mips64r6 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=GP64 \ +; RUN: -check-prefix=64R6 -check-prefix=R2-R6 + +define signext i1 @ashr_i1(i1 signext %a, i1 signext %b) { +entry: +; ALL-LABEL: ashr_i1: + + ; ALL: move $2, $4 + + %r = ashr i1 %a, %b + ret i1 %r +} + +define signext i8 @ashr_i8(i8 signext %a, i8 signext %b) { +entry: +; ALL-LABEL: ashr_i8: + + ; FIXME: The andi instruction is redundant. + ; ALL: andi $[[T0:[0-9]+]], $5, 255 + ; ALL: srav $2, $4, $[[T0]] + + %r = ashr i8 %a, %b + ret i8 %r +} + +define signext i16 @ashr_i16(i16 signext %a, i16 signext %b) { +entry: +; ALL-LABEL: ashr_i16: + + ; FIXME: The andi instruction is redundant. + ; ALL: andi $[[T0:[0-9]+]], $5, 65535 + ; ALL: srav $2, $4, $[[T0]] + + %r = ashr i16 %a, %b + ret i16 %r +} + +define signext i32 @ashr_i32(i32 signext %a, i32 signext %b) { +entry: +; ALL-LABEL: ashr_i32: + + ; ALL: srav $2, $4, $5 + + %r = ashr i32 %a, %b + ret i32 %r +} + +define signext i64 @ashr_i64(i64 signext %a, i64 signext %b) { +entry: +; ALL-LABEL: ashr_i64: + + ; M2: srav $[[T0:[0-9]+]], $4, $7 + ; M2: andi $[[T1:[0-9]+]], $7, 32 + ; M2: bnez $[[T1]], $[[BB0:BB[0-9_]+]] + ; M2: move $3, $[[T0]] + ; M2: srlv $[[T2:[0-9]+]], $5, $7 + ; M2: not $[[T3:[0-9]+]], $7 + ; M2: sll $[[T4:[0-9]+]], $4, 1 + ; M2: sllv $[[T5:[0-9]+]], $[[T4]], $[[T3]] + ; M2: or $3, $[[T3]], $[[T2]] + ; M2: $[[BB0]]: + ; M2: beqz $[[T1]], $[[BB1:BB[0-9_]+]] + ; M2: nop + ; M2: sra $2, $4, 31 + ; M2: $[[BB1]]: + ; M2: jr $ra + ; M2: nop + + ; 32R1-R2: srlv $[[T0:[0-9]+]], $5, $7 + ; 32R1-R2: not $[[T1:[0-9]+]], $7 + ; 32R1-R2: sll $[[T2:[0-9]+]], $4, 1 + ; 32R1-R2: sllv $[[T3:[0-9]+]], $[[T2]], $[[T1]] + ; 32R1-R2: or $3, $[[T3]], $[[T0]] + ; 32R1-R2: srav $[[T4:[0-9]+]], $4, $7 + ; 32R1-R2: andi $[[T5:[0-9]+]], $7, 32 + ; 32R1-R2: movn $3, $[[T4]], $[[T5]] + ; 32R1-R2: sra $4, $4, 31 + ; 32R1-R2: jr $ra + ; 32R1-R2: movn $2, $4, $[[T5]] + + ; 32R6: srav $[[T0:[0-9]+]], $4, $7 + ; 32R6: andi $[[T1:[0-9]+]], $7, 32 + ; 32R6: seleqz $[[T2:[0-9]+]], $[[T0]], $[[T1]] + ; 32R6: sra $[[T3:[0-9]+]], $4, 31 + ; 32R6: selnez $[[T4:[0-9]+]], $[[T3]], $[[T1]] + ; 32R6: or $[[T5:[0-9]+]], $[[T4]], $[[T2]] + ; 32R6: srlv $[[T6:[0-9]+]], $5, $7 + ; 32R6: not $[[T7:[0-9]+]], $7 + ; 32R6: sll $[[T8:[0-9]+]], $4, 1 + ; 32R6: sllv $[[T9:[0-9]+]], $[[T8]], $[[T7]] + ; 32R6: or $[[T10:[0-9]+]], $[[T9]], $[[T6]] + ; 32R6: seleqz $[[T11:[0-9]+]], $[[T10]], $[[T1]] + ; 32R6: selnez $[[T12:[0-9]+]], $[[T0]], $[[T1]] + ; 32R6: jr $ra + ; 32R6: or $3, $[[T0]], $[[T11]] + + ; FIXME: The sll instruction below is redundant. + ; GP64: sll $[[T0:[0-9]+]], $5, 0 + ; GP64: dsrav $2, $4, $[[T0]] + + %r = ashr i64 %a, %b + ret i64 %r +} + +define signext i128 @ashr_i128(i128 signext %a, i128 signext %b) { +entry: +; ALL-LABEL: ashr_i128: + + ; GP32: lw $25, %call16(__ashrti3)($gp) + + ; M3: sll $[[T0:[0-9]+]], $7, 0 + ; M3: dsrav $[[T1:[0-9]+]], $4, $[[T0]] + ; M3: andi $[[T2:[0-9]+]], $[[T0]], 64 + ; M3: bnez $[[T3:[0-9]+]], $[[BB0:BB[0-9_]+]] + ; M3: move $3, $[[T1]] + ; M3: dsrlv $[[T4:[0-9]+]], $5, $[[T0]] + ; M3: dsll $[[T5:[0-9]+]], $4, 1 + ; M3: not $[[T6:[0-9]+]], $[[T0]] + ; M3: dsllv $[[T7:[0-9]+]], $[[T5]], $[[T6]] + ; M3: or $3, $[[T7]], $[[T4]] + ; M3: $[[BB0]]: + ; M3: beqz $[[T3]], $[[BB1:BB[0-9_]+]] + ; M3: nop + ; M3: dsra $2, $4, 63 + ; M3: $[[BB1]]: + ; M3: jr $ra + ; M3: nop + + ; GP64-NOT-R6: sll $[[T0:[0-9]+]], $7, 0 + ; GP64-NOT-R6: dsrlv $[[T1:[0-9]+]], $5, $[[T0]] + ; GP64-NOT-R6: dsll $[[T2:[0-9]+]], $4, 1 + ; GP64-NOT-R6: not $[[T3:[0-9]+]], $[[T0]] + ; GP64-NOT-R6: dsllv $[[T4:[0-9]+]], $[[T2]], $[[T3]] + ; GP64-NOT-R6: or $3, $[[T4]], $[[T1]] + ; GP64-NOT-R6: dsrav $2, $4, $[[T0]] + ; GP64-NOT-R6: andi $[[T5:[0-9]+]], $[[T0]], 64 + + ; GP64-NOT-R6: movn $3, $2, $[[T5]] + ; GP64-NOT-R6: dsra $[[T6:[0-9]+]], $4, 63 + ; GP64-NOT-R6: jr $ra + ; GP64-NOT-R6: movn $2, $[[T6]], $[[T5]] + + ; 64R6: sll $[[T0:[0-9]+]], $7, 0 + ; 64R6: dsrav $[[T1:[0-9]+]], $4, $[[T0]] + ; 64R6: andi $[[T2:[0-9]+]], $[[T0]], 64 + ; 64R6: sll $[[T3:[0-9]+]], $[[T2]], 0 + ; 64R6: seleqz $[[T4:[0-9]+]], $[[T1]], $[[T3]] + ; 64R6: dsra $[[T5:[0-9]+]], $4, 63 + ; 64R6: selnez $[[T6:[0-9]+]], $[[T5]], $[[T3]] + ; 64R6: or $2, $[[T6]], $[[T4]] + ; 64R6: dsrlv $[[T7:[0-9]+]], $5, $[[T0]] + ; 64R6: dsll $[[T8:[0-9]+]], $4, 1 + ; 64R6: not $[[T9:[0-9]+]], $[[T0]] + ; 64R6: dsllv $[[T10:[0-9]+]], $[[T8]], $[[T9]] + ; 64R6: or $[[T11:[0-9]+]], $[[T10]], $[[T7]] + ; 64R6: seleqz $[[T12:[0-9]+]], $[[T11]], $[[T3]] + ; 64R6: selnez $[[T13:[0-9]+]], $[[T1]], $[[T3]] + ; 64R6: jr $ra + ; 64R6: or $3, $[[T13]], $[[T12]] + + %r = ashr i128 %a, %b + ret i128 %r +} diff --git a/test/CodeGen/Mips/llvm-ir/lshr.ll b/test/CodeGen/Mips/llvm-ir/lshr.ll new file mode 100644 index 000000000000..59f4330dde6c --- /dev/null +++ b/test/CodeGen/Mips/llvm-ir/lshr.ll @@ -0,0 +1,176 @@ +; RUN: llc < %s -march=mips -mcpu=mips2 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=GP32 \ +; RUN: -check-prefix=M2 -check-prefix=NOT-R2-R6 +; RUN: llc < %s -march=mips -mcpu=mips32 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=GP32 -check-prefix=NOT-R2-R6 \ +; RUN: -check-prefix=32R1-R2 +; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=GP32 \ +; RUN: -check-prefix=32R1-R2 -check-prefix=R2-R6 +; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=GP32 \ +; RUN: -check-prefix=32R6 -check-prefix=R2-R6 +; RUN: llc < %s -march=mips64 -mcpu=mips3 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=GP64 \ +; RUN: -check-prefix=M3 -check-prefix=NOT-R2-R6 +; RUN: llc < %s -march=mips64 -mcpu=mips4 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=GP64 \ +; RUN: -check-prefix=GP64-NOT-R6 -check-prefix=NOT-R2-R6 +; RUN: llc < %s -march=mips64 -mcpu=mips64 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=GP64 \ +; RUN: -check-prefix=GP64-NOT-R6 -check-prefix=NOT-R2-R6 +; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=GP64 \ +; RUN: -check-prefix=GP64-NOT-R6 -check-prefix R2-R6 +; RUN: llc < %s -march=mips64 -mcpu=mips64r6 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=GP64 \ +; RUN: -check-prefix=64R6 -check-prefix=R2-R6 + +define signext i1 @lshr_i1(i1 signext %a, i1 signext %b) { +entry: +; ALL-LABEL: lshr_i1: + + ; ALL: move $2, $4 + + %r = lshr i1 %a, %b + ret i1 %r +} + +define zeroext i8 @lshr_i8(i8 zeroext %a, i8 zeroext %b) { +entry: +; ALL-LABEL: lshr_i8: + + ; ALL: srlv $[[T0:[0-9]+]], $4, $5 + ; ALL: andi $2, $[[T0]], 255 + + %r = lshr i8 %a, %b + ret i8 %r +} + +define zeroext i16 @lshr_i16(i16 zeroext %a, i16 zeroext %b) { +entry: +; ALL-LABEL: lshr_i16: + + ; ALL: srlv $[[T0:[0-9]+]], $4, $5 + ; ALL: andi $2, $[[T0]], 65535 + + %r = lshr i16 %a, %b + ret i16 %r +} + +define signext i32 @lshr_i32(i32 signext %a, i32 signext %b) { +entry: +; ALL-LABEL: lshr_i32: + + ; ALL: srlv $2, $4, $5 + + %r = lshr i32 %a, %b + ret i32 %r +} + +define signext i64 @lshr_i64(i64 signext %a, i64 signext %b) { +entry: +; ALL-LABEL: lshr_i64: + + ; M2: srlv $[[T0:[0-9]+]], $4, $7 + ; M2: andi $[[T1:[0-9]+]], $7, 32 + ; M2: bnez $[[T1]], $[[BB0:BB[0-9_]+]] + ; M2: move $3, $[[T0]] + ; M2: srlv $[[T2:[0-9]+]], $5, $7 + ; M2: not $[[T3:[0-9]+]], $7 + ; M2: sll $[[T4:[0-9]+]], $4, 1 + ; M2: sllv $[[T5:[0-9]+]], $[[T4]], $[[T3]] + ; M2: or $3, $[[T3]], $[[T2]] + ; M2: $[[BB0]]: + ; M2: bnez $[[T1]], $[[BB1:BB[0-9_]+]] + ; M2: addiu $2, $zero, 0 + ; M2: move $2, $[[T0]] + ; M2: $[[BB1]]: + ; M2: jr $ra + ; M2: nop + + ; 32R1-R2: srlv $[[T0:[0-9]+]], $5, $7 + ; 32R1-R2: not $[[T1:[0-9]+]], $7 + ; 32R1-R2: sll $[[T2:[0-9]+]], $4, 1 + ; 32R1-R2: sllv $[[T3:[0-9]+]], $[[T2]], $[[T1]] + ; 32R1-R2: or $3, $[[T3]], $[[T0]] + ; 32R1-R2: srlv $[[T4:[0-9]+]], $4, $7 + ; 32R1-R2: andi $[[T5:[0-9]+]], $7, 32 + ; 32R1-R2: movn $3, $[[T4]], $[[T5]] + ; 32R1-R2: jr $ra + ; 32R1-R2: movn $2, $zero, $[[T5]] + + ; 32R6: srlv $[[T0:[0-9]+]], $5, $7 + ; 32R6: not $[[T1:[0-9]+]], $7 + ; 32R6: sll $[[T2:[0-9]+]], $4, 1 + ; 32R6: sllv $[[T3:[0-9]+]], $[[T2]], $[[T1]] + ; 32R6: or $[[T4:[0-9]+]], $[[T3]], $[[T0]] + ; 32R6: andi $[[T5:[0-9]+]], $7, 32 + ; 32R6: seleqz $[[T6:[0-9]+]], $[[T4]], $[[T3]] + ; 32R6: srlv $[[T7:[0-9]+]], $4, $7 + ; 32R6: selnez $[[T8:[0-9]+]], $[[T7]], $[[T5]] + ; 32R6: or $3, $[[T8]], $[[T6]] + ; 32R6: jr $ra + ; 32R6: seleqz $2, $[[T7]], $[[T5]] + + ; GP64: sll $[[T0:[0-9]+]], $5, 0 + ; GP64: dsrlv $2, $4, $[[T0]] + + %r = lshr i64 %a, %b + ret i64 %r +} + +define signext i128 @lshr_i128(i128 signext %a, i128 signext %b) { +entry: +; ALL-LABEL: lshr_i128: + + ; GP32: lw $25, %call16(__lshrti3)($gp) + + ; M3: sll $[[T0:[0-9]+]], $7, 0 + ; M3: dsrlv $[[T1:[0-9]+]], $4, $[[T0]] + ; M3: andi $[[T2:[0-9]+]], $[[T0]], 64 + ; M3: bnez $[[T3:[0-9]+]], $[[BB0:BB[0-9_]+]] + ; M3: move $3, $[[T1]] + ; M3: dsrlv $[[T4:[0-9]+]], $5, $[[T0]] + ; M3: dsll $[[T5:[0-9]+]], $4, 1 + ; M3: not $[[T6:[0-9]+]], $[[T0]] + ; M3: dsllv $[[T7:[0-9]+]], $[[T5]], $[[T6]] + ; M3: or $3, $[[T7]], $[[T4]] + ; M3: $[[BB0]]: + ; M3: bnez $[[T3]], $[[BB1:BB[0-9_]+]] + ; M3: daddiu $2, $zero, 0 + ; M3: move $2, $[[T1]] + ; M3: $[[BB1]]: + ; M3: jr $ra + ; M3: nop + + ; GP64-NOT-R6: sll $[[T0:[0-9]+]], $7, 0 + ; GP64-NOT-R6: dsrlv $[[T1:[0-9]+]], $5, $[[T0]] + ; GP64-NOT-R6: dsll $[[T2:[0-9]+]], $4, 1 + ; GP64-NOT-R6: not $[[T3:[0-9]+]], $[[T0]] + ; GP64-NOT-R6: dsllv $[[T4:[0-9]+]], $[[T2]], $[[T3]] + ; GP64-NOT-R6: or $3, $[[T4]], $[[T1]] + ; GP64-NOT-R6: dsrlv $2, $4, $[[T0]] + ; GP64-NOT-R6: andi $[[T5:[0-9]+]], $[[T0]], 64 + ; GP64-NOT-R6: movn $3, $2, $[[T5]] + ; GP64-NOT-R6: jr $ra + ; GP64-NOT-R6: movn $2, $zero, $1 + + ; 64R6: sll $[[T0:[0-9]+]], $7, 0 + ; 64R6: dsrlv $[[T1:[0-9]+]], $5, $[[T0]] + ; 64R6: dsll $[[T2:[0-9]+]], $4, 1 + ; 64R6: not $[[T3:[0-9]+]], $[[T0]] + ; 64R6: dsllv $[[T4:[0-9]+]], $[[T2]], $[[T3]] + ; 64R6: or $[[T5:[0-9]+]], $[[T4]], $[[T1]] + ; 64R6: andi $[[T6:[0-9]+]], $[[T0]], 64 + ; 64R6: sll $[[T7:[0-9]+]], $[[T6]], 0 + ; 64R6: seleqz $[[T8:[0-9]+]], $[[T5]], $[[T7]] + ; 64R6: dsrlv $[[T9:[0-9]+]], $4, $[[T0]] + ; 64R6: selnez $[[T10:[0-9]+]], $[[T9]], $[[T7]] + ; 64R6: or $3, $[[T10]], $[[T8]] + ; 64R6: jr $ra + ; 64R6: seleqz $2, $[[T0]], $[[T7]] + + %r = lshr i128 %a, %b + ret i128 %r +} diff --git a/test/CodeGen/Mips/llvm-ir/mul.ll b/test/CodeGen/Mips/llvm-ir/mul.ll index 167412407cdc..5f7f338c7789 100644 --- a/test/CodeGen/Mips/llvm-ir/mul.ll +++ b/test/CodeGen/Mips/llvm-ir/mul.ll @@ -1,19 +1,19 @@ -; RUN: llc < %s -march=mips -mcpu=mips2 | FileCheck %s \ -; RUN: -check-prefix=ALL -check-prefix=M2 -; RUN: llc < %s -march=mips -mcpu=mips32 | FileCheck %s \ -; RUN: -check-prefix=ALL -check-prefix=32R1-R2 -check-prefix=32R1 -; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s \ -; RUN: -check-prefix=ALL -check-prefix=32R1-R2 -check-prefix=32R2 -; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s \ -; RUN: -check-prefix=ALL -check-prefix=32R6 -; RUN: llc < %s -march=mips64 -mcpu=mips4 | FileCheck %s \ -; RUN: -check-prefix=ALL -check-prefix=M4 -; RUN: llc < %s -march=mips64 -mcpu=mips64 | FileCheck %s \ -; RUN: -check-prefix=ALL -check-prefix=64R1-R2 -; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s \ -; RUN: -check-prefix=ALL -check-prefix=64R1-R2 -; RUN: llc < %s -march=mips64 -mcpu=mips64r6 | FileCheck %s \ -; RUN: -check-prefix=ALL -check-prefix=64R6 +; RUN: llc < %s -march=mips -mcpu=mips2 | FileCheck %s -check-prefix=ALL \ +; RUN: -check-prefix=M2 -check-prefix=GP32 +; RUN: llc < %s -march=mips -mcpu=mips32 | FileCheck %s -check-prefix=ALL \ +; RUN: -check-prefix=32R1-R2 -check-prefix=GP32 +; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s -check-prefix=ALL \ +; RUN: -check-prefix=32R1-R2 -check-prefix=32R2 -check-prefix=GP32 +; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s -check-prefix=ALL \ +; RUN: -check-prefix=32R6 -check-prefix=GP32 +; RUN: llc < %s -march=mips64 -mcpu=mips4 | FileCheck %s -check-prefix=ALL \ +; RUN: -check-prefix=M4 -check-prefix=GP64-NOT-R6 +; RUN: llc < %s -march=mips64 -mcpu=mips64 | FileCheck %s -check-prefix=ALL \ +; RUN: -check-prefix=64R1-R2 -check-prefix=GP64-NOT-R6 +; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s -check-prefix=ALL \ +; RUN: -check-prefix=64R1-R2 -check-prefix=GP64 -check-prefix=GP64-NOT-R6 +; RUN: llc < %s -march=mips64 -mcpu=mips64r6 | FileCheck %s -check-prefix=ALL \ +; RUN: -check-prefix=64R6 define signext i1 @mul_i1(i1 signext %a, i1 signext %b) { entry: @@ -179,3 +179,30 @@ entry: %r = mul i64 %a, %b ret i64 %r } + +define signext i128 @mul_i128(i128 signext %a, i128 signext %b) { +entry: +; ALL-LABEL: mul_i128: + + ; GP32: lw $25, %call16(__multi3)($gp) + + ; GP64-NOT-R6: dmult $4, $7 + ; GP64-NOT-R6: mflo $[[T0:[0-9]+]] + ; GP64-NOT-R6: dmult $5, $6 + ; GP64-NOT-R6: mflo $[[T1:[0-9]+]] + ; GP64-NOT-R6: dmultu $5, $7 + ; GP64-NOT-R6: mflo $3 + ; GP64-NOT-R6: mfhi $[[T2:[0-9]+]] + ; GP64-NOT-R6: daddu $[[T3:[0-9]+]], $[[T2]], $[[T1]] + ; GP64-NOT-R6: daddu $2, $[[T3:[0-9]+]], $[[T0]] + + ; 64R6: dmul $[[T0:[0-9]+]], $5, $6 + ; 64R6: dmuhu $[[T1:[0-9]+]], $5, $7 + ; 64R6: daddu $[[T2:[0-9]+]], $[[T1]], $[[T0]] + ; 64R6: dmul $[[T3:[0-9]+]], $4, $7 + ; 64R6: daddu $2, $[[T2]], $[[T3]] + ; 64R6: dmul $3, $5, $7 + + %r = mul i128 %a, %b + ret i128 %r +} diff --git a/test/CodeGen/Mips/llvm-ir/or.ll b/test/CodeGen/Mips/llvm-ir/or.ll new file mode 100644 index 000000000000..21d1d4fca2a3 --- /dev/null +++ b/test/CodeGen/Mips/llvm-ir/or.ll @@ -0,0 +1,95 @@ +; RUN: llc < %s -march=mips -mcpu=mips2 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=GP32 +; RUN: llc < %s -march=mips -mcpu=mips32 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=GP32 +; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=GP32 +; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=GP32 +; RUN: llc < %s -march=mips64 -mcpu=mips3 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=GP64 +; RUN: llc < %s -march=mips64 -mcpu=mips4 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=GP64 +; RUN: llc < %s -march=mips64 -mcpu=mips64 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=GP64 +; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=GP64 +; RUN: llc < %s -march=mips64 -mcpu=mips64r6 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=GP64 + +define signext i1 @or_i1(i1 signext %a, i1 signext %b) { +entry: +; ALL-LABEL: or_i1: + + ; ALL: or $2, $4, $5 + + %r = or i1 %a, %b + ret i1 %r +} + +define signext i8 @or_i8(i8 signext %a, i8 signext %b) { +entry: +; ALL-LABEL: or_i8: + + ; ALL: or $2, $4, $5 + + %r = or i8 %a, %b + ret i8 %r +} + +define signext i16 @or_i16(i16 signext %a, i16 signext %b) { +entry: +; ALL-LABEL: or_i16: + + ; ALL: or $2, $4, $5 + + %r = or i16 %a, %b + ret i16 %r +} + +define signext i32 @or_i32(i32 signext %a, i32 signext %b) { +entry: +; ALL-LABEL: or_i32: + + ; GP32: or $2, $4, $5 + + ; GP64: or $[[T0:[0-9]+]], $4, $5 + ; FIXME: The sll instruction below is redundant. + ; GP64: sll $2, $[[T0]], 0 + + %r = or i32 %a, %b + ret i32 %r +} + +define signext i64 @or_i64(i64 signext %a, i64 signext %b) { +entry: +; ALL-LABEL: or_i64: + + ; GP32: or $2, $4, $6 + ; GP32: or $3, $5, $7 + + ; GP64: or $2, $4, $5 + + %r = or i64 %a, %b + ret i64 %r +} + +define signext i128 @or_i128(i128 signext %a, i128 signext %b) { +entry: +; ALL-LABEL: or_i128: + + ; GP32: lw $[[T0:[0-9]+]], 24($sp) + ; GP32: lw $[[T1:[0-9]+]], 20($sp) + ; GP32: lw $[[T2:[0-9]+]], 16($sp) + ; GP32: or $2, $4, $[[T2]] + ; GP32: or $3, $5, $[[T1]] + ; GP32: or $4, $6, $[[T0]] + ; GP32: lw $[[T3:[0-9]+]], 28($sp) + ; GP32: or $5, $7, $[[T3]] + + ; GP64: or $2, $4, $6 + ; GP64: or $3, $5, $7 + + %r = or i128 %a, %b + ret i128 %r +} diff --git a/test/CodeGen/Mips/llvm-ir/sdiv.ll b/test/CodeGen/Mips/llvm-ir/sdiv.ll new file mode 100644 index 000000000000..54b7f70b1dac --- /dev/null +++ b/test/CodeGen/Mips/llvm-ir/sdiv.ll @@ -0,0 +1,136 @@ +; RUN: llc < %s -march=mips -mcpu=mips2 | FileCheck %s \ +; RUN: -check-prefix=NOT-R6 -check-prefix=NOT-R2-R6 -check-prefix=GP32 +; RUN: llc < %s -march=mips -mcpu=mips32 | FileCheck %s \ +; RUN: -check-prefix=NOT-R6 -check-prefix=NOT-R2-R6 -check-prefix=GP32 +; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s \ +; RUN: -check-prefix=NOT-R6 -check-prefix=R2 -check-prefix=GP32 +; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s \ +; RUN: -check-prefix=R6 -check-prefix=GP32 +; RUN: llc < %s -march=mips64 -mcpu=mips3 | FileCheck %s \ +; RUN: -check-prefix=NOT-R6 -check-prefix=NOT-R2-R6 -check-prefix=GP64-NOT-R6 +; RUN: llc < %s -march=mips64 -mcpu=mips4 | FileCheck %s \ +; RUN: -check-prefix=NOT-R6 -check-prefix=NOT-R2-R6 -check-prefix=GP64-NOT-R6 +; RUN: llc < %s -march=mips64 -mcpu=mips64 | FileCheck %s \ +; RUN: -check-prefix=NOT-R6 -check-prefix=NOT-R2-R6 -check-prefix=GP64-NOT-R6 +; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s \ +; RUN: -check-prefix=NOT-R6 -check-prefix=R2 -check-prefix=GP64-NOT-R6 +; RUN: llc < %s -march=mips64 -mcpu=mips64r6 | FileCheck %s \ +; RUN: -check-prefix=R6 -check-prefix=64R6 + +define signext i1 @sdiv_i1(i1 signext %a, i1 signext %b) { +entry: +; ALL-LABEL: sdiv_i1: + + ; NOT-R6: div $zero, $4, $5 + ; NOT-R6: teq $5, $zero, 7 + ; NOT-R6: mflo $[[T0:[0-9]+]] + ; FIXME: The sll/sra instructions are redundant since div is signed. + ; NOT-R6: sll $[[T1:[0-9]+]], $[[T0]], 31 + ; NOT-R6: sra $2, $[[T1]], 31 + + ; R6: div $[[T0:[0-9]+]], $4, $5 + ; R6: teq $5, $zero, 7 + ; FIXME: The sll/sra instructions are redundant since div is signed. + ; R6: sll $[[T1:[0-9]+]], $[[T0]], 31 + ; R6: sra $2, $[[T1]], 31 + + %r = sdiv i1 %a, %b + ret i1 %r +} + +define signext i8 @sdiv_i8(i8 signext %a, i8 signext %b) { +entry: +; ALL-LABEL: sdiv_i8: + + ; NOT-R2-R6: div $zero, $4, $5 + ; NOT-R2-R6: teq $5, $zero, 7 + ; NOT-R2-R6: mflo $[[T0:[0-9]+]] + ; FIXME: The sll/sra instructions are redundant since div is signed. + ; NOT-R2-R6: sll $[[T1:[0-9]+]], $[[T0]], 24 + ; NOT-R2-R6: sra $2, $[[T1]], 24 + + ; R2: div $zero, $4, $5 + ; R2: teq $5, $zero, 7 + ; R2: mflo $[[T0:[0-9]+]] + ; FIXME: This instruction is redundant. + ; R2: seb $2, $[[T0]] + + ; R6: div $[[T0:[0-9]+]], $4, $5 + ; R6: teq $5, $zero, 7 + ; FIXME: This instruction is redundant. + ; R6: seb $2, $[[T0]] + + %r = sdiv i8 %a, %b + ret i8 %r +} + +define signext i16 @sdiv_i16(i16 signext %a, i16 signext %b) { +entry: +; ALL-LABEL: sdiv_i16: + + ; NOT-R2-R6: div $zero, $4, $5 + ; NOT-R2-R6: teq $5, $zero, 7 + ; NOT-R2-R6: mflo $[[T0:[0-9]+]] + ; FIXME: The sll/sra instructions are redundant since div is signed. + ; NOT-R2-R6: sll $[[T1:[0-9]+]], $[[T0]], 16 + ; NOT-R2-R6: sra $2, $[[T1]], 16 + + ; R2: div $zero, $4, $5 + ; R2: teq $5, $zero, 7 + ; R2: mflo $[[T0:[0-9]+]] + ; FIXME: This is instruction is redundant since div is signed. + ; R2: seh $2, $[[T0]] + + ; R6: div $[[T0:[0-9]+]], $4, $5 + ; R6: teq $5, $zero, 7 + ; FIXME: This is instruction is redundant since div is signed. + ; R6: seh $2, $[[T0]] + + %r = sdiv i16 %a, %b + ret i16 %r +} + +define signext i32 @sdiv_i32(i32 signext %a, i32 signext %b) { +entry: +; ALL-LABEL: sdiv_i32: + + ; NOT-R6: div $zero, $4, $5 + ; NOT-R6: teq $5, $zero, 7 + ; NOT-R6: mflo $2 + + ; R6: div $2, $4, $5 + ; R6: teq $5, $zero, 7 + + %r = sdiv i32 %a, %b + ret i32 %r +} + +define signext i64 @sdiv_i64(i64 signext %a, i64 signext %b) { +entry: +; ALL-LABEL: sdiv_i64: + + ; GP32: lw $25, %call16(__divdi3)($gp) + + ; GP64-NOT-R6: ddiv $zero, $4, $5 + ; GP64-NOT-R6: teq $5, $zero, 7 + ; GP64-NOT-R6: mflo $2 + + ; 64R6: ddiv $2, $4, $5 + ; 64R6: teq $5, $zero, 7 + + %r = sdiv i64 %a, %b + ret i64 %r +} + +define signext i128 @sdiv_i128(i128 signext %a, i128 signext %b) { +entry: + ; ALL-LABEL: sdiv_i128: + + ; GP32: lw $25, %call16(__divti3)($gp) + + ; GP64-NOT-R6: ld $25, %call16(__divti3)($gp) + ; 64R6: ld $25, %call16(__divti3)($gp) + + %r = sdiv i128 %a, %b + ret i128 %r +} diff --git a/test/CodeGen/Mips/llvm-ir/shl.ll b/test/CodeGen/Mips/llvm-ir/shl.ll new file mode 100644 index 000000000000..fc5243cc97f2 --- /dev/null +++ b/test/CodeGen/Mips/llvm-ir/shl.ll @@ -0,0 +1,188 @@ +; RUN: llc < %s -march=mips -mcpu=mips2 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=GP32 \ +; RUN: -check-prefix=M2 -check-prefix=NOT-R2-R6 +; RUN: llc < %s -march=mips -mcpu=mips32 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=GP32 -check-prefix=NOT-R2-R6 \ +; RUN: -check-prefix=32R1-R2 +; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=GP32 \ +; RUN: -check-prefix=32R1-R2 -check-prefix=R2-R6 +; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=GP32 \ +; RUN: -check-prefix=32R6 -check-prefix=R2-R6 +; RUN: llc < %s -march=mips64 -mcpu=mips3 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=GP64 \ +; RUN: -check-prefix=M3 -check-prefix=NOT-R2-R6 +; RUN: llc < %s -march=mips64 -mcpu=mips4 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=GP64 \ +; RUN: -check-prefix=GP64-NOT-R6 -check-prefix=NOT-R2-R6 +; RUN: llc < %s -march=mips64 -mcpu=mips64 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=GP64 \ +; RUN: -check-prefix=GP64-NOT-R6 -check-prefix=NOT-R2-R6 +; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=GP64 \ +; RUN: -check-prefix=GP64-NOT-R6 -check-prefix R2-R6 +; RUN: llc < %s -march=mips64 -mcpu=mips64r6 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=GP64 \ +; RUN: -check-prefix=64R6 -check-prefix=R2-R6 + +define signext i1 @shl_i1(i1 signext %a, i1 signext %b) { +entry: +; ALL-LABEL: shl_i1: + + ; ALL: move $2, $4 + + %r = shl i1 %a, %b + ret i1 %r +} + +define signext i8 @shl_i8(i8 signext %a, i8 signext %b) { +entry: +; ALL-LABEL: shl_i8: + + ; NOT-R2-R6: andi $[[T0:[0-9]+]], $5, 255 + ; NOT-R2-R6: sllv $[[T1:[0-9]+]], $4, $[[T0]] + ; NOT-R2-R6: sll $[[T2:[0-9]+]], $[[T1]], 24 + ; NOT-R2-R6: sra $2, $[[T2]], 24 + + ; R2-R6: andi $[[T0:[0-9]+]], $5, 255 + ; R2-R6: sllv $[[T1:[0-9]+]], $4, $[[T0]] + ; R2-R6: seb $2, $[[T1]] + + %r = shl i8 %a, %b + ret i8 %r +} + +define signext i16 @shl_i16(i16 signext %a, i16 signext %b) { +entry: +; ALL-LABEL: shl_i16: + + ; NOT-R2-R6: andi $[[T0:[0-9]+]], $5, 65535 + ; NOT-R2-R6: sllv $[[T1:[0-9]+]], $4, $[[T0]] + ; NOT-R2-R6: sll $[[T2:[0-9]+]], $[[T1]], 16 + ; NOT-R2-R6: sra $2, $[[T2]], 16 + + ; R2-R6: andi $[[T0:[0-9]+]], $5, 65535 + ; R2-R6: sllv $[[T1:[0-9]+]], $4, $[[T0]] + ; R2-R6: seh $2, $[[T1]] + + %r = shl i16 %a, %b + ret i16 %r +} + +define signext i32 @shl_i32(i32 signext %a, i32 signext %b) { +entry: +; ALL-LABEL: shl_i32: + + ; ALL: sllv $2, $4, $5 + + %r = shl i32 %a, %b + ret i32 %r +} + +define signext i64 @shl_i64(i64 signext %a, i64 signext %b) { +entry: +; ALL-LABEL: shl_i64: + + ; M2: sllv $[[T0:[0-9]+]], $5, $7 + ; M2: andi $[[T1:[0-9]+]], $7, 32 + ; M2: bnez $[[T1]], $[[BB0:BB[0-9_]+]] + ; M2: move $2, $[[T0]] + ; M2: sllv $[[T2:[0-9]+]], $4, $7 + ; M2: not $[[T3:[0-9]+]], $7 + ; M2: srl $[[T4:[0-9]+]], $5, 1 + ; M2: srlv $[[T5:[0-9]+]], $[[T4]], $[[T3]] + ; M2: or $2, $[[T2]], $[[T3]] + ; M2: $[[BB0]]: + ; M2: bnez $[[T1]], $[[BB1:BB[0-9_]+]] + ; M2: addiu $3, $zero, 0 + ; M2: move $3, $[[T0]] + ; M2: $[[BB1]]: + ; M2: jr $ra + ; M2: nop + + ; 32R1-R2: sllv $[[T0:[0-9]+]], $4, $7 + ; 32R1-R2: not $[[T1:[0-9]+]], $7 + ; 32R1-R2: srl $[[T2:[0-9]+]], $5, 1 + ; 32R1-R2: srlv $[[T3:[0-9]+]], $[[T2]], $[[T1]] + ; 32R1-R2: or $2, $[[T0]], $[[T3]] + ; 32R1-R2: sllv $[[T4:[0-9]+]], $5, $7 + ; 32R1-R2: andi $[[T5:[0-9]+]], $7, 32 + ; 32R1-R2: movn $2, $[[T4]], $[[T5]] + ; 32R1-R2: jr $ra + ; 32R1-R2: movn $3, $zero, $[[T5]] + + ; 32R6: sllv $[[T0:[0-9]+]], $4, $7 + ; 32R6: not $[[T1:[0-9]+]], $7 + ; 32R6: srl $[[T2:[0-9]+]], $5, 1 + ; 32R6: srlv $[[T3:[0-9]+]], $[[T2]], $[[T1]] + ; 32R6: or $[[T4:[0-9]+]], $[[T0]], $[[T3]] + ; 32R6: andi $[[T5:[0-9]+]], $7, 32 + ; 32R6: seleqz $[[T6:[0-9]+]], $[[T4]], $[[T2]] + ; 32R6: sllv $[[T7:[0-9]+]], $5, $7 + ; 32R6: selnez $[[T8:[0-9]+]], $[[T7]], $[[T5]] + ; 32R6: or $2, $[[T8]], $[[T6]] + ; 32R6: jr $ra + ; 32R6: seleqz $3, $[[T7]], $[[T5]] + + ; GP64: sll $[[T0:[0-9]+]], $5, 0 + ; GP64: dsllv $2, $4, $1 + + %r = shl i64 %a, %b + ret i64 %r +} + +define signext i128 @shl_i128(i128 signext %a, i128 signext %b) { +entry: +; ALL-LABEL: shl_i128: + + ; GP32: lw $25, %call16(__ashlti3)($gp) + + ; M3: sll $[[T0:[0-9]+]], $7, 0 + ; M3: dsllv $[[T1:[0-9]+]], $5, $[[T0]] + ; M3: andi $[[T2:[0-9]+]], $[[T0]], 64 + ; M3: bnez $[[T3:[0-9]+]], $[[BB0:BB[0-9_]+]] + ; M3: move $2, $[[T1]] + ; M3: dsllv $[[T4:[0-9]+]], $4, $[[T0]] + ; M3: dsrl $[[T5:[0-9]+]], $5, 1 + ; M3: not $[[T6:[0-9]+]], $[[T0]] + ; M3: dsrlv $[[T7:[0-9]+]], $[[T5]], $[[T6]] + ; M3: or $2, $[[T4]], $[[T7]] + ; M3: $[[BB0]]: + ; M3: bnez $[[T3]], $[[BB1:BB[0-9_]+]] + ; M3: daddiu $3, $zero, 0 + ; M3: move $3, $[[T1]] + ; M3: $[[BB1]]: + ; M3: jr $ra + ; M3: nop + + ; GP64-NOT-R6: sll $[[T0:[0-9]+]], $7, 0 + ; GP64-NOT-R6: dsllv $[[T1:[0-9]+]], $4, $[[T0]] + ; GP64-NOT-R6: dsrl $[[T2:[0-9]+]], $5, 1 + ; GP64-NOT-R6: not $[[T3:[0-9]+]], $[[T0]] + ; GP64-NOT-R6: dsrlv $[[T4:[0-9]+]], $[[T2]], $[[T3]] + ; GP64-NOT-R6: or $2, $[[T1]], $[[T4]] + ; GP64-NOT-R6: dsllv $3, $5, $[[T0]] + ; GP64-NOT-R6: andi $[[T5:[0-9]+]], $[[T0]], 64 + ; GP64-NOT-R6: movn $2, $3, $[[T5]] + ; GP64-NOT-R6: jr $ra + ; GP64-NOT-R6: movn $3, $zero, $1 + + ; 64R6: sll $[[T0:[0-9]+]], $7, 0 + ; 64R6: dsllv $[[T1:[0-9]+]], $4, $[[T0]] + ; 64R6: dsrl $[[T2:[0-9]+]], $5, 1 + ; 64R6: not $[[T3:[0-9]+]], $[[T0]] + ; 64R6: dsrlv $[[T4:[0-9]+]], $[[T2]], $[[T3]] + ; 64R6: or $[[T5:[0-9]+]], $[[T1]], $[[T4]] + ; 64R6: andi $[[T6:[0-9]+]], $[[T0]], 64 + ; 64R6: sll $[[T7:[0-9]+]], $[[T6]], 0 + ; 64R6: seleqz $[[T8:[0-9]+]], $[[T5]], $[[T7]] + ; 64R6: dsllv $[[T9:[0-9]+]], $5, $[[T0]] + ; 64R6: selnez $[[T10:[0-9]+]], $[[T9]], $[[T7]] + ; 64R6: or $2, $[[T10]], $[[T8]] + ; 64R6: jr $ra + ; 64R6: seleqz $3, $[[T0]], $[[T7]] + + %r = shl i128 %a, %b + ret i128 %r +} diff --git a/test/CodeGen/Mips/llvm-ir/srem.ll b/test/CodeGen/Mips/llvm-ir/srem.ll new file mode 100644 index 000000000000..1e949d24678b --- /dev/null +++ b/test/CodeGen/Mips/llvm-ir/srem.ll @@ -0,0 +1,129 @@ +; RUN: llc < %s -march=mips -mcpu=mips2 | FileCheck %s \ +; RUN: -check-prefix=GP32 -check-prefix=NOT-R6 -check-prefix=NOT-R2-R6 +; RUN: llc < %s -march=mips -mcpu=mips32 | FileCheck %s \ +; RUN: -check-prefix=GP32 -check-prefix=NOT-R6 -check-prefix=NOT-R2-R6 +; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s -check-prefix=GP32 \ +; RUN: -check-prefix=R2 -check-prefix=R2-R6 -check-prefix=NOT-R6 +; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s \ +; RUN: -check-prefix=GP32 -check-prefix=R6 -check-prefix=R2-R6 +; RUN: llc < %s -march=mips64 -mcpu=mips3 | FileCheck %s \ +; RUN: -check-prefix=GP64-NOT-R6 -check-prefix=NOT-R6 -check-prefix=NOT-R2-R6 +; RUN: llc < %s -march=mips64 -mcpu=mips4 | FileCheck %s \ +; RUN: -check-prefix=GP64-NOT-R6 -check-prefix=NOT-R6 -check-prefix=NOT-R2-R6 +; RUN: llc < %s -march=mips64 -mcpu=mips64 | FileCheck %s \ +; RUN: -check-prefix=GP64-NOT-R6 -check-prefix=NOT-R6 -check-prefix=NOT-R2-R6 +; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s \ +; RUN: -check-prefix=R2 -check-prefix=R2-R6 \ +; RUN: -check-prefix=GP64-NOT-R6 -check-prefix=NOT-R6 +; RUN: llc < %s -march=mips64 -mcpu=mips64r6 | FileCheck %s \ +; RUN: -check-prefix=64R6 -check-prefix=R6 -check-prefix=R2-R6 + +define signext i1 @srem_i1(i1 signext %a, i1 signext %b) { +entry: +; ALL-LABEL: srem_i1: + + ; NOT-R6: div $zero, $4, $5 + ; NOT-R6: teq $5, $zero, 7 + ; NOT-R6: mfhi $[[T0:[0-9]+]] + ; NOT-R6: sll $[[T1:[0-9]+]], $[[T0]], 31 + ; NOT-R6: sra $2, $[[T1]], 31 + + ; R6: mod $[[T0:[0-9]+]], $4, $5 + ; R6: teq $5, $zero, 7 + ; R6: sll $[[T3:[0-9]+]], $[[T0]], 31 + ; R6: sra $2, $[[T3]], 31 + + %r = srem i1 %a, %b + ret i1 %r +} + +define signext i8 @srem_i8(i8 signext %a, i8 signext %b) { +entry: +; ALL-LABEL: srem_i8: + + ; NOT-R2-R6: div $zero, $4, $5 + ; NOT-R2-R6: teq $5, $zero, 7 + ; NOT-R2-R6: mfhi $[[T0:[0-9]+]] + ; NOT-R2-R6: sll $[[T1:[0-9]+]], $[[T0]], 24 + ; NOT-R2-R6: sra $2, $[[T1]], 24 + + ; R2: div $zero, $4, $5 + ; R2: teq $5, $zero, 7 + ; R2: mfhi $[[T0:[0-9]+]] + ; R2: seb $2, $[[T0]] + + ; R6: mod $[[T0:[0-9]+]], $4, $5 + ; R6: teq $5, $zero, 7 + ; R6: seb $2, $[[T0]] + + %r = srem i8 %a, %b + ret i8 %r +} + +define signext i16 @srem_i16(i16 signext %a, i16 signext %b) { +entry: +; ALL-LABEL: srem_i16: + + ; NOT-R2-R6: div $zero, $4, $5 + ; NOT-R2-R6: teq $5, $zero, 7 + ; NOT-R2-R6: mfhi $[[T0:[0-9]+]] + ; NOT-R2-R6: sll $[[T1:[0-9]+]], $[[T0]], 16 + ; NOT-R2-R6: sra $2, $[[T1]], 16 + + ; R2: div $zero, $4, $5 + ; R2: teq $5, $zero, 7 + ; R2: mfhi $[[T0:[0-9]+]] + ; R2: seh $2, $[[T1]] + + ; R6: mod $[[T0:[0-9]+]], $4, $5 + ; R6: teq $5, $zero, 7 + ; R6: seh $2, $[[T0]] + + %r = srem i16 %a, %b + ret i16 %r +} + +define signext i32 @srem_i32(i32 signext %a, i32 signext %b) { +entry: +; ALL-LABEL: srem_i32: + + ; NOT-R6: div $zero, $4, $5 + ; NOT-R6: teq $5, $zero, 7 + ; NOT-R6: mfhi $2 + + ; R6: mod $2, $4, $5 + ; R6: teq $5, $zero, 7 + + %r = srem i32 %a, %b + ret i32 %r +} + +define signext i64 @srem_i64(i64 signext %a, i64 signext %b) { +entry: +; ALL-LABEL: srem_i64: + + ; GP32: lw $25, %call16(__moddi3)($gp) + + ; GP64-NOT-R6: ddiv $zero, $4, $5 + ; GP64-NOT-R6: teq $5, $zero, 7 + ; GP64-NOT-R6: mfhi $2 + + ; 64R6: dmod $2, $4, $5 + ; 64R6: teq $5, $zero, 7 + + %r = srem i64 %a, %b + ret i64 %r +} + +define signext i128 @srem_i128(i128 signext %a, i128 signext %b) { +entry: +; ALL-LABEL: srem_i128: + + ; GP32: lw $25, %call16(__modti3)($gp) + + ; GP64-NOT-R6: ld $25, %call16(__modti3)($gp) + ; 64-R6: ld $25, %call16(__modti3)($gp) + + %r = srem i128 %a, %b + ret i128 %r +} diff --git a/test/CodeGen/Mips/llvm-ir/sub.ll b/test/CodeGen/Mips/llvm-ir/sub.ll new file mode 100644 index 000000000000..6d592be38211 --- /dev/null +++ b/test/CodeGen/Mips/llvm-ir/sub.ll @@ -0,0 +1,114 @@ +; RUN: llc < %s -march=mips -mcpu=mips2 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=NOT-R2-R6 -check-prefix=GP32 +; RUN: llc < %s -march=mips -mcpu=mips32 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=NOT-R2-R6 -check-prefix=GP32 +; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=R2-R6 -check-prefix=GP32 +; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=R2-R6 -check-prefix=GP32 +; RUN: llc < %s -march=mips64 -mcpu=mips3 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=NOT-R2-R6 -check-prefix=GP64 +; RUN: llc < %s -march=mips64 -mcpu=mips4 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=NOT-R2-R6 -check-prefix=GP64 +; RUN: llc < %s -march=mips64 -mcpu=mips64 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=NOT-R2-R6 -check-prefix=GP64 +; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=R2-R6 -check-prefix=GP64 +; RUN: llc < %s -march=mips64 -mcpu=mips64r6 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=R2-R6 -check-prefix=GP64 + +define signext i1 @sub_i1(i1 signext %a, i1 signext %b) { +entry: +; ALL-LABEL: sub_i1: + + ; ALL: subu $[[T0:[0-9]+]], $4, $5 + ; ALL: sll $[[T0]], $[[T0]], 31 + ; ALL: sra $2, $[[T0]], 31 + + %r = sub i1 %a, %b + ret i1 %r +} + +define signext i8 @sub_i8(i8 signext %a, i8 signext %b) { +entry: +; ALL-LABEL: sub_i8: + + ; NOT-R2-R6: subu $[[T0:[0-9]+]], $4, $5 + ; NOT-R2-R6: sll $[[T0]], $[[T0]], 24 + ; NOT-R2-R6: sra $2, $[[T0]], 24 + + ; R2-R6: subu $[[T0:[0-9]+]], $4, $5 + ; R2-R6: seb $2, $[[T0:[0-9]+]] + + %r = sub i8 %a, %b + ret i8 %r +} + +define signext i16 @sub_i16(i16 signext %a, i16 signext %b) { +entry: +; ALL-LABEL: sub_i16: + + ; NOT-R2-R6: subu $[[T0:[0-9]+]], $4, $5 + ; NOT-R2-R6: sll $[[T0]], $[[T0]], 16 + ; NOT-R2-R6: sra $2, $[[T0]], 16 + + ; R2-R6: subu $[[T0:[0-9]+]], $4, $5 + ; R2-R6: seh $2, $[[T0:[0-9]+]] + + %r = sub i16 %a, %b + ret i16 %r +} + +define signext i32 @sub_i32(i32 signext %a, i32 signext %b) { +entry: +; ALL-LABEL: sub_i32: + + ; ALL: subu $2, $4, $5 + + %r = sub i32 %a, %b + ret i32 %r +} + +define signext i64 @sub_i64(i64 signext %a, i64 signext %b) { +entry: +; ALL-LABEL: sub_i64: + + ; GP32: subu $3, $5, $7 + ; GP32: sltu $[[T0:[0-9]+]], $5, $7 + ; GP32: addu $[[T1:[0-9]+]], $[[T0]], $6 + ; GP32: subu $2, $4, $[[T1]] + + ; GP64: dsubu $2, $4, $5 + + %r = sub i64 %a, %b + ret i64 %r +} + +define signext i128 @sub_i128(i128 signext %a, i128 signext %b) { +entry: +; ALL-LABEL: sub_i128: + + ; GP32: lw $[[T0:[0-9]+]], 20($sp) + ; GP32: sltu $[[T1:[0-9]+]], $5, $[[T0]] + ; GP32: lw $[[T2:[0-9]+]], 16($sp) + ; GP32: addu $[[T3:[0-9]+]], $[[T1]], $[[T2]] + ; GP32: lw $[[T4:[0-9]+]], 24($sp) + ; GP32: lw $[[T5:[0-9]+]], 28($sp) + ; GP32: subu $[[T6:[0-9]+]], $7, $[[T5]] + ; GP32: subu $2, $4, $[[T3]] + ; GP32: sltu $[[T8:[0-9]+]], $6, $[[T4]] + ; GP32: addu $[[T9:[0-9]+]], $[[T8]], $[[T0]] + ; GP32: subu $3, $5, $[[T9]] + ; GP32: sltu $[[T10:[0-9]+]], $7, $[[T5]] + ; GP32: addu $[[T11:[0-9]+]], $[[T10]], $[[T4]] + ; GP32: subu $4, $6, $[[T11]] + ; GP32: move $5, $[[T6]] + + ; GP64: dsubu $3, $5, $7 + ; GP64: sltu $[[T0:[0-9]+]], $5, $7 + ; GP64: daddu $[[T1:[0-9]+]], $[[T0]], $6 + ; GP64: dsubu $2, $4, $[[T1]] + + %r = sub i128 %a, %b + ret i128 %r +} diff --git a/test/CodeGen/Mips/llvm-ir/udiv.ll b/test/CodeGen/Mips/llvm-ir/udiv.ll new file mode 100644 index 000000000000..1f7aa0d5f4ce --- /dev/null +++ b/test/CodeGen/Mips/llvm-ir/udiv.ll @@ -0,0 +1,108 @@ +; RUN: llc < %s -march=mips -mcpu=mips2 | FileCheck %s \ +; RUN: -check-prefix=NOT-R6 -check-prefix=GP32 +; RUN: llc < %s -march=mips -mcpu=mips32 | FileCheck %s \ +; RUN: -check-prefix=NOT-R6 -check-prefix=GP32 +; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s \ +; RUN: -check-prefix=NOT-R6 -check-prefix=GP32 +; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s \ +; RUN: -check-prefix=R6 -check-prefix=GP32 +; RUN: llc < %s -march=mips64 -mcpu=mips3 | FileCheck %s \ +; RUN: -check-prefix=NOT-R6 -check-prefix=GP64-NOT-R6 +; RUN: llc < %s -march=mips64 -mcpu=mips4 | FileCheck %s \ +; RUN: -check-prefix=NOT-R6 -check-prefix=GP64-NOT-R6 +; RUN: llc < %s -march=mips64 -mcpu=mips64 | FileCheck %s \ +; RUN: -check-prefix=NOT-R6 -check-prefix=GP64-NOT-R6 +; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s \ +; RUN: -check-prefix=NOT-R6 -check-prefix=GP64-NOT-R6 +; RUN: llc < %s -march=mips64 -mcpu=mips64r6 | FileCheck %s \ +; RUN: -check-prefix=R6 -check-prefix=64R6 + +define zeroext i1 @udiv_i1(i1 zeroext %a, i1 zeroext %b) { +entry: +; ALL-LABEL: udiv_i1: + + ; NOT-R6: divu $zero, $4, $5 + ; NOT-R6: teq $5, $zero, 7 + ; NOT-R6: mflo $2 + + ; R6: divu $2, $4, $5 + ; R6: teq $5, $zero, 7 + + %r = udiv i1 %a, %b + ret i1 %r +} + +define zeroext i8 @udiv_i8(i8 zeroext %a, i8 zeroext %b) { +entry: +; ALL-LABEL: udiv_i8: + + ; NOT-R6: divu $zero, $4, $5 + ; NOT-R6: teq $5, $zero, 7 + ; NOT-R6: mflo $2 + + ; R6: divu $2, $4, $5 + ; R6: teq $5, $zero, 7 + + %r = udiv i8 %a, %b + ret i8 %r +} + +define zeroext i16 @udiv_i16(i16 zeroext %a, i16 zeroext %b) { +entry: +; ALL-LABEL: udiv_i16: + + ; NOT-R6: divu $zero, $4, $5 + ; NOT-R6: teq $5, $zero, 7 + ; NOT-R6: mflo $2 + + ; R6: divu $2, $4, $5 + ; R6: teq $5, $zero, 7 + + %r = udiv i16 %a, %b + ret i16 %r +} + +define signext i32 @udiv_i32(i32 signext %a, i32 signext %b) { +entry: +; ALL-LABEL: udiv_i32: + + ; NOT-R6: divu $zero, $4, $5 + ; NOT-R6: teq $5, $zero, 7 + ; NOT-R6: mflo $2 + + ; R6: divu $2, $4, $5 + ; R6: teq $5, $zero, 7 + + %r = udiv i32 %a, %b + ret i32 %r +} + +define signext i64 @udiv_i64(i64 signext %a, i64 signext %b) { +entry: +; ALL-LABEL: udiv_i64: + + ; GP32: lw $25, %call16(__udivdi3)($gp) + + ; GP64-NOT-R6: ddivu $zero, $4, $5 + ; GP64-NOT-R6: teq $5, $zero, 7 + ; GP64-NOT-R6: mflo $2 + + ; 64R6: ddivu $2, $4, $5 + ; 64R6: teq $5, $zero, 7 + + %r = udiv i64 %a, %b + ret i64 %r +} + +define signext i128 @udiv_i128(i128 signext %a, i128 signext %b) { +entry: +; ALL-LABEL: udiv_i128: + + ; GP32: lw $25, %call16(__udivti3)($gp) + + ; GP64-NOT-R6: ld $25, %call16(__udivti3)($gp) + ; 64-R6: ld $25, %call16(__udivti3)($gp) + + %r = udiv i128 %a, %b + ret i128 %r +} diff --git a/test/CodeGen/Mips/llvm-ir/urem.ll b/test/CodeGen/Mips/llvm-ir/urem.ll new file mode 100644 index 000000000000..73235341a42f --- /dev/null +++ b/test/CodeGen/Mips/llvm-ir/urem.ll @@ -0,0 +1,145 @@ +; RUN: llc < %s -march=mips -mcpu=mips2 | FileCheck %s \ +; RUN: -check-prefix=GP32 -check-prefix=NOT-R6 -check-prefix=NOT-R2-R6 +; RUN: llc < %s -march=mips -mcpu=mips32 | FileCheck %s \ +; RUN: -check-prefix=GP32 -check-prefix=NOT-R6 -check-prefix=NOT-R2-R6 +; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s -check-prefix=GP32 \ +; RUN: -check-prefix=R2 -check-prefix=R2-R6 -check-prefix=NOT-R6 +; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s \ +; RUN: -check-prefix=GP32 -check-prefix=R6 -check-prefix=R2-R6 +; RUN: llc < %s -march=mips64 -mcpu=mips3 | FileCheck %s \ +; RUN: -check-prefix=GP64-NOT-R6 -check-prefix=NOT-R6 -check-prefix=NOT-R2-R6 +; RUN: llc < %s -march=mips64 -mcpu=mips4 | FileCheck %s \ +; RUN: -check-prefix=GP64-NOT-R6 -check-prefix=NOT-R6 -check-prefix=NOT-R2-R6 +; RUN: llc < %s -march=mips64 -mcpu=mips64 | FileCheck %s \ +; RUN: -check-prefix=GP64-NOT-R6 -check-prefix=NOT-R6 -check-prefix=NOT-R2-R6 +; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s \ +; RUN: -check-prefix=R2 -check-prefix=R2-R6 \ +; RUN: -check-prefix=GP64-NOT-R6 -check-prefix=NOT-R6 +; RUN: llc < %s -march=mips64 -mcpu=mips64r6 | FileCheck %s \ +; RUN: -check-prefix=64R6 -check-prefix=R6 -check-prefix=R2-R6 + +define signext i1 @urem_i1(i1 signext %a, i1 signext %b) { +entry: +; ALL-LABEL: urem_i1: + + ; NOT-R6: andi $[[T0:[0-9]+]], $5, 1 + ; NOT-R6: andi $[[T1:[0-9]+]], $4, 1 + ; NOT-R6: divu $zero, $[[T1]], $[[T0]] + ; NOT-R6: teq $[[T0]], $zero, 7 + ; NOT-R6: mfhi $[[T2:[0-9]+]] + ; NOT-R6: sll $[[T3:[0-9]+]], $[[T2]], 31 + ; NOT-R6: sra $2, $[[T3]], 31 + + ; R6: andi $[[T0:[0-9]+]], $5, 1 + ; R6: andi $[[T1:[0-9]+]], $4, 1 + ; R6: modu $[[T2:[0-9]+]], $[[T1]], $[[T0]] + ; R6: teq $[[T0]], $zero, 7 + ; R6: sll $[[T3:[0-9]+]], $[[T2]], 31 + ; R6: sra $2, $[[T3]], 31 + + %r = urem i1 %a, %b + ret i1 %r +} + +define signext i8 @urem_i8(i8 signext %a, i8 signext %b) { +entry: +; ALL-LABEL: urem_i8: + + ; NOT-R2-R6: andi $[[T0:[0-9]+]], $5, 255 + ; NOT-R2-R6: andi $[[T1:[0-9]+]], $4, 255 + ; NOT-R2-R6: divu $zero, $[[T1]], $[[T0]] + ; NOT-R2-R6: teq $[[T0]], $zero, 7 + ; NOT-R2-R6: mfhi $[[T2:[0-9]+]] + ; NOT-R2-R6: sll $[[T3:[0-9]+]], $[[T2]], 24 + ; NOT-R2-R6: sra $2, $[[T3]], 24 + + ; R2: andi $[[T0:[0-9]+]], $5, 255 + ; R2: andi $[[T1:[0-9]+]], $4, 255 + ; R2: divu $zero, $[[T1]], $[[T0]] + ; R2: teq $[[T0]], $zero, 7 + ; R2: mfhi $[[T2:[0-9]+]] + ; R2: seb $2, $[[T2]] + + ; R6: andi $[[T0:[0-9]+]], $5, 255 + ; R6: andi $[[T1:[0-9]+]], $4, 255 + ; R6: modu $[[T2:[0-9]+]], $[[T1]], $[[T0]] + ; R6: teq $[[T0]], $zero, 7 + ; R6: seb $2, $[[T2]] + + %r = urem i8 %a, %b + ret i8 %r +} + +define signext i16 @urem_i16(i16 signext %a, i16 signext %b) { +entry: +; ALL-LABEL: urem_i16: + + ; NOT-R2-R6: andi $[[T0:[0-9]+]], $5, 65535 + ; NOT-R2-R6: andi $[[T1:[0-9]+]], $4, 65535 + ; NOT-R2-R6: divu $zero, $[[T1]], $[[T0]] + ; NOT-R2-R6: teq $[[T0]], $zero, 7 + ; NOT-R2-R6: mfhi $[[T2:[0-9]+]] + ; NOT-R2-R6: sll $[[T3:[0-9]+]], $[[T2]], 16 + ; NOT-R2-R6: sra $2, $[[T3]], 16 + + ; R2: andi $[[T0:[0-9]+]], $5, 65535 + ; R2: andi $[[T1:[0-9]+]], $4, 65535 + ; R2: divu $zero, $[[T1]], $[[T0]] + ; R2: teq $[[T0]], $zero, 7 + ; R2: mfhi $[[T3:[0-9]+]] + ; R2: seh $2, $[[T2]] + + ; R6: andi $[[T0:[0-9]+]], $5, 65535 + ; R6: andi $[[T1:[0-9]+]], $4, 65535 + ; R6: modu $[[T2:[0-9]+]], $[[T1]], $[[T0]] + ; R6: teq $[[T0]], $zero, 7 + ; R6: seh $2, $[[T2]] + + %r = urem i16 %a, %b + ret i16 %r +} + +define signext i32 @urem_i32(i32 signext %a, i32 signext %b) { +entry: +; ALL-LABEL: urem_i32: + + ; NOT-R6: divu $zero, $4, $5 + ; NOT-R6: teq $5, $zero, 7 + ; NOT-R6: mfhi $2 + + ; R6: modu $2, $4, $5 + ; R6: teq $5, $zero, 7 + + %r = urem i32 %a, %b + ret i32 %r +} + +define signext i64 @urem_i64(i64 signext %a, i64 signext %b) { +entry: +; ALL-LABEL: urem_i64: + + ; GP32: lw $25, %call16(__umoddi3)($gp) + + ; GP64-NOT-R6: ddivu $zero, $4, $5 + ; GP64-NOT-R6: teq $5, $zero, 7 + ; GP64-NOT-R6: mfhi $2 + + ; 64R6: dmodu $2, $4, $5 + ; 64R6: teq $5, $zero, 7 + + %r = urem i64 %a, %b + ret i64 %r +} + +define signext i128 @urem_i128(i128 signext %a, i128 signext %b) { +entry: + ; ALL-LABEL: urem_i128: + + ; GP32: lw $25, %call16(__umodti3)($gp) + + ; GP64-NOT-R6: ld $25, %call16(__umodti3)($gp) + ; 64-R6: ld $25, %call16(__umodti3)($gp) + + %r = urem i128 %a, %b + ret i128 %r +} diff --git a/test/CodeGen/Mips/llvm-ir/xor.ll b/test/CodeGen/Mips/llvm-ir/xor.ll new file mode 100644 index 000000000000..94dead1eff41 --- /dev/null +++ b/test/CodeGen/Mips/llvm-ir/xor.ll @@ -0,0 +1,94 @@ +; RUN: llc < %s -march=mips -mcpu=mips2 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=GP32 +; RUN: llc < %s -march=mips -mcpu=mips32 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=GP32 +; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=GP32 +; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=GP32 +; RUN: llc < %s -march=mips64 -mcpu=mips3 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=GP64 +; RUN: llc < %s -march=mips64 -mcpu=mips4 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=GP64 +; RUN: llc < %s -march=mips64 -mcpu=mips64 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=GP64 +; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=GP64 +; RUN: llc < %s -march=mips64 -mcpu=mips64r6 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=GP64 + +define signext i1 @xor_i1(i1 signext %a, i1 signext %b) { +entry: +; ALL-LABEL: xor_i1: + + ; ALL: xor $2, $4, $5 + + %r = xor i1 %a, %b + ret i1 %r +} + +define signext i8 @xor_i8(i8 signext %a, i8 signext %b) { +entry: +; ALL-LABEL: xor_i8: + + ; ALL: xor $2, $4, $5 + + %r = xor i8 %a, %b + ret i8 %r +} + +define signext i16 @xor_i16(i16 signext %a, i16 signext %b) { +entry: +; ALL-LABEL: xor_i16: + + ; ALL: xor $2, $4, $5 + + %r = xor i16 %a, %b + ret i16 %r +} + +define signext i32 @xor_i32(i32 signext %a, i32 signext %b) { +entry: +; ALL-LABEL: xor_i32: + + ; GP32: xor $2, $4, $5 + + ; GP64: xor $[[T0:[0-9]+]], $4, $5 + ; GP64: sll $2, $[[T0]], 0 + + %r = xor i32 %a, %b + ret i32 %r +} + +define signext i64 @xor_i64(i64 signext %a, i64 signext %b) { +entry: +; ALL-LABEL: xor_i64: + + ; GP32: xor $2, $4, $6 + ; GP32: xor $3, $5, $7 + + ; GP64: xor $2, $4, $5 + + %r = xor i64 %a, %b + ret i64 %r +} + +define signext i128 @xor_i128(i128 signext %a, i128 signext %b) { +entry: +; ALL-LABEL: xor_i128: + + ; GP32: lw $[[T0:[0-9]+]], 24($sp) + ; GP32: lw $[[T1:[0-9]+]], 20($sp) + ; GP32: lw $[[T2:[0-9]+]], 16($sp) + ; GP32: xor $2, $4, $[[T2]] + ; GP32: xor $3, $5, $[[T1]] + ; GP32: xor $4, $6, $[[T0]] + ; GP32: lw $[[T3:[0-9]+]], 28($sp) + ; GP32: xor $5, $7, $[[T3]] + + ; GP64: xor $2, $4, $6 + ; GP64: xor $3, $5, $7 + + %r = xor i128 %a, %b + ret i128 %r +} diff --git a/test/CodeGen/Mips/mips64-f128.ll b/test/CodeGen/Mips/mips64-f128.ll index 6987d4ab0734..f0cbbd08d79a 100644 --- a/test/CodeGen/Mips/mips64-f128.ll +++ b/test/CodeGen/Mips/mips64-f128.ll @@ -545,7 +545,7 @@ entry: ; ALL-LABEL: load_LD_float: ; ALL: ld $[[R0:[0-9]+]], %got_disp(gf1) -; ALL: lwu $4, 0($[[R0]]) +; ALL: lw $4, 0($[[R0]]) ; ALL: ld $25, %call16(__extendsftf2) ; ALL: jalr $25 diff --git a/test/CodeGen/Mips/mips64signextendsesf.ll b/test/CodeGen/Mips/mips64signextendsesf.ll new file mode 100644 index 000000000000..dec83b80afea --- /dev/null +++ b/test/CodeGen/Mips/mips64signextendsesf.ll @@ -0,0 +1,214 @@ +; RUN: llc -march=mips64 -mcpu=mips64r2 -soft-float -O2 < %s | FileCheck %s + +define void @foosf() #0 { +entry: + %in = alloca float, align 4 + %out = alloca float, align 4 + store volatile float 0xBFD59E1380000000, float* %in, align 4 + %in.0.in.0. = load volatile float* %in, align 4 + %rintf = tail call float @rintf(float %in.0.in.0.) #1 + store volatile float %rintf, float* %out, align 4 + ret void + +; CHECK-LABEL: foosf +; CHECK-NOT: dsll +; CHECK-NOT: dsrl +; CHECK-NOT: lwu +} + +declare float @rintf(float) + +define float @foosf1(float* nocapture readonly %a) #0 { +entry: + %0 = load float* %a, align 4 + %call = tail call float @roundf(float %0) #2 + ret float %call + +; CHECK-LABEL: foosf1 +; CHECK-NOT: dsll +; CHECK-NOT: dsrl +; CHECK-NOT: lwu +} + +declare float @roundf(float) #1 + +define float @foosf2(float* nocapture readonly %a) #0 { +entry: + %0 = load float* %a, align 4 + %call = tail call float @truncf(float %0) #2 + ret float %call + +; CHECK-LABEL: foosf2 +; CHECK-NOT: dsll +; CHECK-NOT: dsrl +; CHECK-NOT: lwu +} + +declare float @truncf(float) #1 + +define float @foosf3(float* nocapture readonly %a) #0 { +entry: + %0 = load float* %a, align 4 + %call = tail call float @floorf(float %0) #2 + ret float %call + +; CHECK-LABEL: foosf3 +; CHECK-NOT: dsll +; CHECK-NOT: dsrl +; CHECK-NOT: lwu +} + +declare float @floorf(float) #1 + +define float @foosf4(float* nocapture readonly %a) #0 { +entry: + %0 = load float* %a, align 4 + %call = tail call float @nearbyintf(float %0) #2 + ret float %call + +; CHECK-LABEL: foosf4 +; CHECK-NOT: dsll +; CHECK-NOT: dsrl +; CHECK-NOT: lwu +} + +declare float @nearbyintf(float) #1 + +define float @foosf5(float* nocapture readonly %a) #0 { +entry: + %0 = load float* %a, align 4 + %mul = fmul float %0, undef + ret float %mul + +; CHECK-LABEL: foosf5 +; CHECK-NOT: dsll +; CHECK-NOT: dsrl +; CHECK-NOT: lwu +} + +define float @foosf6(float* nocapture readonly %a) #0 { +entry: + %0 = load float* %a, align 4 + %sub = fsub float %0, undef + ret float %sub + +; CHECK-LABEL: foosf6 +; CHECK-NOT: dsll +; CHECK-NOT: dsrl +; CHECK-NOT: lwu +} + +define float @foosf7(float* nocapture readonly %a) #0 { +entry: + %0 = load float* %a, align 4 + %add = fadd float %0, undef + ret float %add + +; CHECK-LABEL: foosf7 +; CHECK-NOT: dsll +; CHECK-NOT: dsrl +; CHECK-NOT: lwu +} + +define float @foosf8(float* nocapture readonly %a) #0 { +entry: + %b = alloca float, align 4 + %b.0.b.0. = load volatile float* %b, align 4 + %0 = load float* %a, align 4 + %div = fdiv float %b.0.b.0., %0 + ret float %div + +; CHECK-LABEL: foosf8 +; CHECK-NOT: dsll +; CHECK-NOT: dsrl +; CHECK-NOT: lwu +} + +define float @foosf9() #0 { +entry: + %b = alloca float, align 4 + %b.0.b.0. = load volatile float* %b, align 4 + %conv = fpext float %b.0.b.0. to double + %b.0.b.0.3 = load volatile float* %b, align 4 + %conv1 = fpext float %b.0.b.0.3 to double + %call = tail call double @pow(double %conv, double %conv1) #1 + %conv2 = fptrunc double %call to float + ret float %conv2 + +; CHECK-LABEL: foosf9 +; CHECK-NOT: dsll +; CHECK-NOT: dsrl +; CHECK-NOT: lwu +} + +declare double @pow(double, double) #0 + +define float @foosf10() #0 { +entry: + %a = alloca float, align 4 + %a.0.a.0. = load volatile float* %a, align 4 + %conv = fpext float %a.0.a.0. to double + %call = tail call double @sin(double %conv) #1 + %conv1 = fptrunc double %call to float + ret float %conv1 + +; CHECK-LABEL: foosf10 +; CHECK-NOT: dsll +; CHECK-NOT: dsrl +; CHECK-NOT: lwu +} + +declare double @sin(double) #0 + +define float @foosf11() #0 { +entry: + %b = alloca float, align 4 + %b.0.b.0. = load volatile float* %b, align 4 + %call = tail call float @ceilf(float %b.0.b.0.) #2 + ret float %call + +; CHECK-LABEL: foosf11 +; CHECK-NOT: dsll +; CHECK-NOT: dsrl +; CHECK-NOT: lwu +} + +declare float @ceilf(float) #1 + +define float @foosf12() #0 { +entry: + %b = alloca float, align 4 + %a = alloca float, align 4 + %b.0.b.0. = load volatile float* %b, align 4 + %a.0.a.0. = load volatile float* %a, align 4 + %call = tail call float @fmaxf(float %b.0.b.0., float %a.0.a.0.) #2 + ret float %call + +; CHECK-LABEL: foosf12 +; CHECK-NOT: dsll +; CHECK-NOT: dsrl +; CHECK-NOT: lwu +} + +declare float @fmaxf(float, float) #1 + +define float @foosf13() #0 { +entry: + %b = alloca float, align 4 + %a = alloca float, align 4 + %b.0.b.0. = load volatile float* %b, align 4 + %a.0.a.0. = load volatile float* %a, align 4 + %call = tail call float @fminf(float %b.0.b.0., float %a.0.a.0.) #2 + ret float %call + +; CHECK-LABEL: foosf13 +; CHECK-NOT: dsll +; CHECK-NOT: dsrl +; CHECK-NOT: lwu +} + +declare float @fminf(float, float) #1 + + +attributes #0 = { nounwind "use-soft-float"="true" } +attributes #1 = { nounwind readnone "use-soft-float"="true" } diff --git a/test/CodeGen/Mips/mips64sinttofpsf.ll b/test/CodeGen/Mips/mips64sinttofpsf.ll new file mode 100644 index 000000000000..d3d46036f7da --- /dev/null +++ b/test/CodeGen/Mips/mips64sinttofpsf.ll @@ -0,0 +1,15 @@ +; RUN: llc -march=mips64 -mcpu=mips64r2 -soft-float -O0 < %s | FileCheck %s + + +define double @foo() #0 { +entry: + %x = alloca i32, align 4 + store volatile i32 -32, i32* %x, align 4 + %0 = load volatile i32* %x, align 4 + %conv = sitofp i32 %0 to double + ret double %conv + +; CHECK-NOT: dsll +; CHECK-NOT: dsrl + +} diff --git a/test/CodeGen/Mips/no-odd-spreg-msa.ll b/test/CodeGen/Mips/no-odd-spreg-msa.ll new file mode 100644 index 000000000000..30dd1ff82d73 --- /dev/null +++ b/test/CodeGen/Mips/no-odd-spreg-msa.ll @@ -0,0 +1,131 @@ +; RUN: llc -march=mipsel -mcpu=mips32 -mattr=+fp64,+msa,-nooddspreg < %s | FileCheck %s -check-prefix=ALL -check-prefix=ODDSPREG +; RUN: llc -march=mipsel -mcpu=mips32 -mattr=+fp64,+msa,+nooddspreg < %s | FileCheck %s -check-prefix=ALL -check-prefix=NOODDSPREG + +@v4f32 = global <4 x float> zeroinitializer + +define void @msa_insert_0(float %a) { +entry: + ; Force the float into an odd-numbered register using named registers and + ; load the vector. + %b = call float asm sideeffect "mov.s $0, $1", "={$f13},{$f12}" (float %a) + %0 = load volatile <4 x float>* @v4f32 + + ; Clobber all except $f12/$w12 and $f13 + ; + ; The intention is that if odd single precision registers are permitted, the + ; allocator will choose $f12/$w12 for the vector and $f13 for the float to + ; avoid the spill/reload. + ; + ; On the other hand, if odd single precision registers are not permitted, it + ; must copy $f13 to an even-numbered register before inserting into the + ; vector. + call void asm sideeffect "# Clobber", "~{$f0},~{$f1},~{$f2},~{$f3},~{$f4},~{$f5},~{$f6},~{$f7},~{$f8},~{$f9},~{$f10},~{$f11},~{$f14},~{$f15},~{$f16},~{$f17},~{$f18},~{$f19},~{$f20},~{$f21},~{$f22},~{$f23},~{$f24},~{$f25},~{$f26},~{$f27},~{$f28},~{$f29},~{$f30},~{$f31}"() + %1 = insertelement <4 x float> %0, float %b, i32 0 + store <4 x float> %1, <4 x float>* @v4f32 + ret void +} + +; ALL-LABEL: msa_insert_0: +; ALL: mov.s $f13, $f12 +; ALL: lw $[[R0:[0-9]+]], %got(v4f32)( +; ALL: ld.w $w[[W0:[0-9]+]], 0($[[R0]]) +; NOODDSPREG: mov.s $f[[F0:[0-9]+]], $f13 +; NOODDSPREG: insve.w $w[[W0]][0], $w[[F0]][0] +; ODDSPREG: insve.w $w[[W0]][0], $w13[0] +; ALL: # Clobber +; ALL-NOT: sdc1 +; ALL-NOT: ldc1 +; ALL: st.w $w[[W0]], 0($[[R0]]) + +define void @msa_insert_1(float %a) { +entry: + ; Force the float into an odd-numbered register using named registers and + ; load the vector. + %b = call float asm sideeffect "mov.s $0, $1", "={$f13},{$f12}" (float %a) + %0 = load volatile <4 x float>* @v4f32 + + ; Clobber all except $f12/$w12 and $f13 + ; + ; The intention is that if odd single precision registers are permitted, the + ; allocator will choose $f12/$w12 for the vector and $f13 for the float to + ; avoid the spill/reload. + ; + ; On the other hand, if odd single precision registers are not permitted, it + ; must copy $f13 to an even-numbered register before inserting into the + ; vector. + call void asm sideeffect "# Clobber", "~{$f0},~{$f1},~{$f2},~{$f3},~{$f4},~{$f5},~{$f6},~{$f7},~{$f8},~{$f9},~{$f10},~{$f11},~{$f14},~{$f15},~{$f16},~{$f17},~{$f18},~{$f19},~{$f20},~{$f21},~{$f22},~{$f23},~{$f24},~{$f25},~{$f26},~{$f27},~{$f28},~{$f29},~{$f30},~{$f31}"() + %1 = insertelement <4 x float> %0, float %b, i32 1 + store <4 x float> %1, <4 x float>* @v4f32 + ret void +} + +; ALL-LABEL: msa_insert_1: +; ALL: mov.s $f13, $f12 +; ALL: lw $[[R0:[0-9]+]], %got(v4f32)( +; ALL: ld.w $w[[W0:[0-9]+]], 0($[[R0]]) +; NOODDSPREG: mov.s $f[[F0:[0-9]+]], $f13 +; NOODDSPREG: insve.w $w[[W0]][1], $w[[F0]][0] +; ODDSPREG: insve.w $w[[W0]][1], $w13[0] +; ALL: # Clobber +; ALL-NOT: sdc1 +; ALL-NOT: ldc1 +; ALL: st.w $w[[W0]], 0($[[R0]]) + +define float @msa_extract_0() { +entry: + %0 = load volatile <4 x float>* @v4f32 + %1 = call <4 x float> asm sideeffect "move.v $0, $1", "={$w13},{$w12}" (<4 x float> %0) + + ; Clobber all except $f12, and $f13 + ; + ; The intention is that if odd single precision registers are permitted, the + ; allocator will choose $f13/$w13 for the vector since that saves on moves. + ; + ; On the other hand, if odd single precision registers are not permitted, it + ; must move it to $f12/$w12. + call void asm sideeffect "# Clobber", "~{$f0},~{$f1},~{$f2},~{$f3},~{$f4},~{$f5},~{$f6},~{$f7},~{$f8},~{$f9},~{$f10},~{$f11},~{$f14},~{$f15},~{$f16},~{$f17},~{$f18},~{$f19},~{$f20},~{$f21},~{$f22},~{$f23},~{$f24},~{$f25},~{$f26},~{$f27},~{$f28},~{$f29},~{$f30},~{$f31}"() + + %2 = extractelement <4 x float> %1, i32 0 + ret float %2 +} + +; ALL-LABEL: msa_extract_0: +; ALL: lw $[[R0:[0-9]+]], %got(v4f32)( +; ALL: ld.w $w12, 0($[[R0]]) +; ALL: move.v $w[[W0:13]], $w12 +; NOODDSPREG: move.v $w[[W0:12]], $w13 +; ALL: # Clobber +; ALL-NOT: st.w +; ALL-NOT: ld.w +; ALL: mov.s $f0, $f[[W0]] + +define float @msa_extract_1() { +entry: + %0 = load volatile <4 x float>* @v4f32 + %1 = call <4 x float> asm sideeffect "move.v $0, $1", "={$w13},{$w12}" (<4 x float> %0) + + ; Clobber all except $f13 + ; + ; The intention is that if odd single precision registers are permitted, the + ; allocator will choose $f13/$w13 for the vector since that saves on moves. + ; + ; On the other hand, if odd single precision registers are not permitted, it + ; must be spilled. + call void asm sideeffect "# Clobber", "~{$f0},~{$f1},~{$f2},~{$f3},~{$f4},~{$f5},~{$f6},~{$f7},~{$f8},~{$f9},~{$f10},~{$f11},~{$f12},~{$f14},~{$f15},~{$f16},~{$f17},~{$f18},~{$f19},~{$f20},~{$f21},~{$f22},~{$f23},~{$f24},~{$f25},~{$f26},~{$f27},~{$f28},~{$f29},~{$f30},~{$f31}"() + + %2 = extractelement <4 x float> %1, i32 1 + ret float %2 +} + +; ALL-LABEL: msa_extract_1: +; ALL: lw $[[R0:[0-9]+]], %got(v4f32)( +; ALL: ld.w $w12, 0($[[R0]]) +; ALL: splati.w $w[[W0:[0-9]+]], $w13[1] +; NOODDSPREG: st.w $w[[W0]], 0($sp) +; ODDSPREG-NOT: st.w +; ODDSPREG-NOT: ld.w +; ALL: # Clobber +; ODDSPREG-NOT: st.w +; ODDSPREG-NOT: ld.w +; NOODDSPREG: ld.w $w0, 0($sp) +; ODDSPREG: mov.s $f0, $f[[W0]] diff --git a/test/CodeGen/R600/128bit-kernel-args.ll b/test/CodeGen/R600/128bit-kernel-args.ll index 3b3fee05af7f..557d86aa8376 100644 --- a/test/CodeGen/R600/128bit-kernel-args.ll +++ b/test/CodeGen/R600/128bit-kernel-args.ll @@ -1,26 +1,27 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK -; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600 +; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s --check-prefix=SI -; R600-CHECK: {{^}}v4i32_kernel_arg: -; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR:[0-9]]].X, KC0[3].Y -; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].Y, KC0[3].Z -; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].Z, KC0[3].W -; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].W, KC0[4].X -; SI-CHECK: {{^}}v4i32_kernel_arg: -; SI-CHECK: buffer_store_dwordx4 +; R600: {{^}}v4i32_kernel_arg: +; R600-DAG: MOV {{[* ]*}}T[[GPR:[0-9]]].X, KC0[3].Y +; R600-DAG: MOV {{[* ]*}}T[[GPR]].Y, KC0[3].Z +; R600-DAG: MOV {{[* ]*}}T[[GPR]].Z, KC0[3].W +; R600-DAG: MOV {{[* ]*}}T[[GPR]].W, KC0[4].X +; SI: {{^}}v4i32_kernel_arg: +; SI: buffer_store_dwordx4 define void @v4i32_kernel_arg(<4 x i32> addrspace(1)* %out, <4 x i32> %in) { entry: store <4 x i32> %in, <4 x i32> addrspace(1)* %out ret void } -; R600-CHECK: {{^}}v4f32_kernel_arg: -; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR:[0-9]]].X, KC0[3].Y -; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].Y, KC0[3].Z -; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].Z, KC0[3].W -; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].W, KC0[4].X -; SI-CHECK: {{^}}v4f32_kernel_arg: -; SI-CHECK: buffer_store_dwordx4 +; R600: {{^}}v4f32_kernel_arg: +; R600-DAG: MOV {{[* ]*}}T[[GPR:[0-9]]].X, KC0[3].Y +; R600-DAG: MOV {{[* ]*}}T[[GPR]].Y, KC0[3].Z +; R600-DAG: MOV {{[* ]*}}T[[GPR]].Z, KC0[3].W +; R600-DAG: MOV {{[* ]*}}T[[GPR]].W, KC0[4].X +; SI: {{^}}v4f32_kernel_arg: +; SI: buffer_store_dwordx4 define void @v4f32_kernel_arg(<4 x float> addrspace(1)* %out, <4 x float> %in) { entry: store <4 x float> %in, <4 x float> addrspace(1)* %out diff --git a/test/CodeGen/R600/32-bit-local-address-space.ll b/test/CodeGen/R600/32-bit-local-address-space.ll index 6ab0c08d505f..71940fd88f26 100644 --- a/test/CodeGen/R600/32-bit-local-address-space.ll +++ b/test/CodeGen/R600/32-bit-local-address-space.ll @@ -1,4 +1,5 @@ ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; On Southern Islands GPUs the local address space(3) uses 32-bit pointers and ; the global address space(1) uses 64-bit pointers. These tests check to make sure diff --git a/test/CodeGen/R600/64bit-kernel-args.ll b/test/CodeGen/R600/64bit-kernel-args.ll index 02b6f34e9419..9f2738edb6eb 100644 --- a/test/CodeGen/R600/64bit-kernel-args.ll +++ b/test/CodeGen/R600/64bit-kernel-args.ll @@ -1,9 +1,9 @@ -; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK +; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s --check-prefix=SI -; SI-CHECK: {{^}}f64_kernel_arg: -; SI-CHECK-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0x9 -; SI-CHECK-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0xb -; SI-CHECK: buffer_store_dwordx2 +; SI: {{^}}f64_kernel_arg: +; SI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0x9 +; SI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0xb +; SI: buffer_store_dwordx2 define void @f64_kernel_arg(double addrspace(1)* %out, double %in) { entry: store double %in, double addrspace(1)* %out diff --git a/test/CodeGen/R600/add-debug.ll b/test/CodeGen/R600/add-debug.ll index 85e9451d4a9b..a83c689eb182 100644 --- a/test/CodeGen/R600/add-debug.ll +++ b/test/CodeGen/R600/add-debug.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -march=amdgcn -mcpu=tahiti -debug +; RUN: llc < %s -march=amdgcn -mcpu=tonga -debug ; REQUIRES: asserts ; Check that SelectionDAGDumper does not crash on int_SI_if. diff --git a/test/CodeGen/R600/add.ll b/test/CodeGen/R600/add.ll index d95853a61048..3a8b97cd87e8 100644 --- a/test/CodeGen/R600/add.ll +++ b/test/CodeGen/R600/add.ll @@ -1,12 +1,13 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK --check-prefix=FUNC %s -; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK --check-prefix=FUNC %s +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG --check-prefix=FUNC %s +; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s ;FUNC-LABEL: {{^}}test1: -;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;SI-CHECK: v_add_i32_e32 [[REG:v[0-9]+]], {{v[0-9]+, v[0-9]+}} -;SI-CHECK-NOT: [[REG]] -;SI-CHECK: buffer_store_dword [[REG]], +;SI: v_add_i32_e32 [[REG:v[0-9]+]], {{v[0-9]+, v[0-9]+}} +;SI-NOT: [[REG]] +;SI: buffer_store_dword [[REG]], define void @test1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { %b_ptr = getelementptr i32 addrspace(1)* %in, i32 1 %a = load i32 addrspace(1)* %in @@ -17,11 +18,11 @@ define void @test1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { } ;FUNC-LABEL: {{^}}test2: -;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;SI-CHECK: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI-CHECK: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1 @@ -33,15 +34,15 @@ define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { } ;FUNC-LABEL: {{^}}test4: -;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;SI-CHECK: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI-CHECK: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI-CHECK: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI-CHECK: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1 @@ -53,22 +54,22 @@ define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { } ; FUNC-LABEL: {{^}}test8: -; EG-CHECK: ADD_INT -; EG-CHECK: ADD_INT -; EG-CHECK: ADD_INT -; EG-CHECK: ADD_INT -; EG-CHECK: ADD_INT -; EG-CHECK: ADD_INT -; EG-CHECK: ADD_INT -; EG-CHECK: ADD_INT -; SI-CHECK: s_add_i32 -; SI-CHECK: s_add_i32 -; SI-CHECK: s_add_i32 -; SI-CHECK: s_add_i32 -; SI-CHECK: s_add_i32 -; SI-CHECK: s_add_i32 -; SI-CHECK: s_add_i32 -; SI-CHECK: s_add_i32 +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; SI: s_add_i32 +; SI: s_add_i32 +; SI: s_add_i32 +; SI: s_add_i32 +; SI: s_add_i32 +; SI: s_add_i32 +; SI: s_add_i32 +; SI: s_add_i32 define void @test8(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b) { entry: %0 = add <8 x i32> %a, %b @@ -77,38 +78,38 @@ entry: } ; FUNC-LABEL: {{^}}test16: -; EG-CHECK: ADD_INT -; EG-CHECK: ADD_INT -; EG-CHECK: ADD_INT -; EG-CHECK: ADD_INT -; EG-CHECK: ADD_INT -; EG-CHECK: ADD_INT -; EG-CHECK: ADD_INT -; EG-CHECK: ADD_INT -; EG-CHECK: ADD_INT -; EG-CHECK: ADD_INT -; EG-CHECK: ADD_INT -; EG-CHECK: ADD_INT -; EG-CHECK: ADD_INT -; EG-CHECK: ADD_INT -; EG-CHECK: ADD_INT -; EG-CHECK: ADD_INT -; SI-CHECK: s_add_i32 -; SI-CHECK: s_add_i32 -; SI-CHECK: s_add_i32 -; SI-CHECK: s_add_i32 -; SI-CHECK: s_add_i32 -; SI-CHECK: s_add_i32 -; SI-CHECK: s_add_i32 -; SI-CHECK: s_add_i32 -; SI-CHECK: s_add_i32 -; SI-CHECK: s_add_i32 -; SI-CHECK: s_add_i32 -; SI-CHECK: s_add_i32 -; SI-CHECK: s_add_i32 -; SI-CHECK: s_add_i32 -; SI-CHECK: s_add_i32 -; SI-CHECK: s_add_i32 +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; SI: s_add_i32 +; SI: s_add_i32 +; SI: s_add_i32 +; SI: s_add_i32 +; SI: s_add_i32 +; SI: s_add_i32 +; SI: s_add_i32 +; SI: s_add_i32 +; SI: s_add_i32 +; SI: s_add_i32 +; SI: s_add_i32 +; SI: s_add_i32 +; SI: s_add_i32 +; SI: s_add_i32 +; SI: s_add_i32 +; SI: s_add_i32 define void @test16(<16 x i32> addrspace(1)* %out, <16 x i32> %a, <16 x i32> %b) { entry: %0 = add <16 x i32> %a, %b @@ -117,8 +118,8 @@ entry: } ; FUNC-LABEL: {{^}}add64: -; SI-CHECK: s_add_u32 -; SI-CHECK: s_addc_u32 +; SI: s_add_u32 +; SI: s_addc_u32 define void @add64(i64 addrspace(1)* %out, i64 %a, i64 %b) { entry: %0 = add i64 %a, %b @@ -132,7 +133,7 @@ entry: ; to a VGPR before doing the add. ; FUNC-LABEL: {{^}}add64_sgpr_vgpr: -; SI-CHECK-NOT: v_addc_u32_e32 s +; SI-NOT: v_addc_u32_e32 s define void @add64_sgpr_vgpr(i64 addrspace(1)* %out, i64 %a, i64 addrspace(1)* %in) { entry: %0 = load i64 addrspace(1)* %in @@ -143,8 +144,8 @@ entry: ; Test i64 add inside a branch. ; FUNC-LABEL: {{^}}add64_in_branch: -; SI-CHECK: s_add_u32 -; SI-CHECK: s_addc_u32 +; SI: s_add_u32 +; SI: s_addc_u32 define void @add64_in_branch(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %a, i64 %b, i64 %c) { entry: %0 = icmp eq i64 %a, 0 diff --git a/test/CodeGen/R600/address-space.ll b/test/CodeGen/R600/address-space.ll index 1106f4f99623..aaa0628ccdc9 100644 --- a/test/CodeGen/R600/address-space.ll +++ b/test/CodeGen/R600/address-space.ll @@ -1,4 +1,5 @@ ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s ; Test that codegenprepare understands address space sizes diff --git a/test/CodeGen/R600/and.ll b/test/CodeGen/R600/and.ll index bfdf8734eabb..7a395ccb38d0 100644 --- a/test/CodeGen/R600/and.ll +++ b/test/CodeGen/R600/and.ll @@ -1,5 +1,6 @@ ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}test2: ; EG: AND_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} diff --git a/test/CodeGen/R600/anyext.ll b/test/CodeGen/R600/anyext.ll index 8336ebcaca3b..48d8f3122495 100644 --- a/test/CodeGen/R600/anyext.ll +++ b/test/CodeGen/R600/anyext.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s ; CHECK-LABEL: {{^}}anyext_i1_i32: ; CHECK: v_cndmask_b32_e64 diff --git a/test/CodeGen/R600/atomic_load_add.ll b/test/CodeGen/R600/atomic_load_add.ll index 6dd1c51afb4a..5fe05f2996af 100644 --- a/test/CodeGen/R600/atomic_load_add.ll +++ b/test/CodeGen/R600/atomic_load_add.ll @@ -1,4 +1,5 @@ ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}atomic_add_local: diff --git a/test/CodeGen/R600/atomic_load_sub.ll b/test/CodeGen/R600/atomic_load_sub.ll index 5d47185421a0..40722833d265 100644 --- a/test/CodeGen/R600/atomic_load_sub.ll +++ b/test/CodeGen/R600/atomic_load_sub.ll @@ -1,5 +1,6 @@ ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}atomic_sub_local: ; R600: LDS_SUB * diff --git a/test/CodeGen/R600/basic-branch.ll b/test/CodeGen/R600/basic-branch.ll index 42ddddd2ed84..abdc4afef472 100644 --- a/test/CodeGen/R600/basic-branch.ll +++ b/test/CodeGen/R600/basic-branch.ll @@ -1,5 +1,6 @@ ; XFAIL: * ; RUN: llc -O0 -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -O0 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s ; CHECK-LABEL: {{^}}test_branch( define void @test_branch(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %val) nounwind { diff --git a/test/CodeGen/R600/basic-loop.ll b/test/CodeGen/R600/basic-loop.ll index 9d0509b38d8a..f0263caf5d6b 100644 --- a/test/CodeGen/R600/basic-loop.ll +++ b/test/CodeGen/R600/basic-loop.ll @@ -1,4 +1,5 @@ ; RUN: llc -O0 -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s +; RUN: llc -O0 -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck %s ; CHECK-LABEL: {{^}}test_loop: define void @test_loop(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %val) nounwind { diff --git a/test/CodeGen/R600/bfi_int.ll b/test/CodeGen/R600/bfi_int.ll index 988a2f85e0ea..03349349735d 100644 --- a/test/CodeGen/R600/bfi_int.ll +++ b/test/CodeGen/R600/bfi_int.ll @@ -1,13 +1,14 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600-CHECK %s -; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600 %s +; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI %s ; BFI_INT Definition pattern from ISA docs ; (y & x) | (z & ~x) ; -; R600-CHECK: {{^}}bfi_def: -; R600-CHECK: BFI_INT -; SI-CHECK: @bfi_def -; SI-CHECK: v_bfi_b32 +; R600: {{^}}bfi_def: +; R600: BFI_INT +; SI: @bfi_def +; SI: v_bfi_b32 define void @bfi_def(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) { entry: %0 = xor i32 %x, -1 @@ -20,10 +21,10 @@ entry: ; SHA-256 Ch function ; z ^ (x & (y ^ z)) -; R600-CHECK: {{^}}bfi_sha256_ch: -; R600-CHECK: BFI_INT -; SI-CHECK: @bfi_sha256_ch -; SI-CHECK: v_bfi_b32 +; R600: {{^}}bfi_sha256_ch: +; R600: BFI_INT +; SI: @bfi_sha256_ch +; SI: v_bfi_b32 define void @bfi_sha256_ch(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) { entry: %0 = xor i32 %y, %z @@ -35,11 +36,11 @@ entry: ; SHA-256 Ma function ; ((x & z) | (y & (x | z))) -; R600-CHECK: {{^}}bfi_sha256_ma: -; R600-CHECK: XOR_INT * [[DST:T[0-9]+\.[XYZW]]], KC0[2].Z, KC0[2].W -; R600-CHECK: BFI_INT * {{T[0-9]+\.[XYZW]}}, {{[[DST]]|PV\.[XYZW]}}, KC0[3].X, KC0[2].W -; SI-CHECK: v_xor_b32_e32 [[DST:v[0-9]+]], {{s[0-9]+, v[0-9]+}} -; SI-CHECK: v_bfi_b32 {{v[0-9]+}}, [[DST]], {{s[0-9]+, v[0-9]+}} +; R600: {{^}}bfi_sha256_ma: +; R600: XOR_INT * [[DST:T[0-9]+\.[XYZW]]], KC0[2].Z, KC0[2].W +; R600: BFI_INT * {{T[0-9]+\.[XYZW]}}, {{[[DST]]|PV\.[XYZW]}}, KC0[3].X, KC0[2].W +; SI: v_xor_b32_e32 [[DST:v[0-9]+]], {{s[0-9]+, v[0-9]+}} +; SI: v_bfi_b32 {{v[0-9]+}}, [[DST]], {{s[0-9]+, v[0-9]+}} define void @bfi_sha256_ma(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) { entry: diff --git a/test/CodeGen/R600/bitcast.ll b/test/CodeGen/R600/bitcast.ll index 3607d519f013..1ba64af7dca3 100644 --- a/test/CodeGen/R600/bitcast.ll +++ b/test/CodeGen/R600/bitcast.ll @@ -1,4 +1,5 @@ ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; This test just checks that the compiler doesn't crash. diff --git a/test/CodeGen/R600/bswap.ll b/test/CodeGen/R600/bswap.ll index 65998f5f1151..e93543de49da 100644 --- a/test/CodeGen/R600/bswap.ll +++ b/test/CodeGen/R600/bswap.ll @@ -1,4 +1,5 @@ ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare i32 @llvm.bswap.i32(i32) nounwind readnone declare <2 x i32> @llvm.bswap.v2i32(<2 x i32>) nounwind readnone diff --git a/test/CodeGen/R600/build_vector.ll b/test/CodeGen/R600/build_vector.ll index a0ebe089bd5a..65eacf5adc41 100644 --- a/test/CodeGen/R600/build_vector.ll +++ b/test/CodeGen/R600/build_vector.ll @@ -1,32 +1,33 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK -; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600 +; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s --check-prefix=SI -; R600-CHECK: {{^}}build_vector2: -; R600-CHECK: MOV -; R600-CHECK: MOV -; R600-CHECK-NOT: MOV -; SI-CHECK: {{^}}build_vector2: -; SI-CHECK-DAG: v_mov_b32_e32 v[[X:[0-9]]], 5 -; SI-CHECK-DAG: v_mov_b32_e32 v[[Y:[0-9]]], 6 -; SI-CHECK: buffer_store_dwordx2 v{{\[}}[[X]]:[[Y]]{{\]}} +; R600: {{^}}build_vector2: +; R600: MOV +; R600: MOV +; R600-NOT: MOV +; SI: {{^}}build_vector2: +; SI-DAG: v_mov_b32_e32 v[[X:[0-9]]], 5 +; SI-DAG: v_mov_b32_e32 v[[Y:[0-9]]], 6 +; SI: buffer_store_dwordx2 v{{\[}}[[X]]:[[Y]]{{\]}} define void @build_vector2 (<2 x i32> addrspace(1)* %out) { entry: store <2 x i32> <i32 5, i32 6>, <2 x i32> addrspace(1)* %out ret void } -; R600-CHECK: {{^}}build_vector4: -; R600-CHECK: MOV -; R600-CHECK: MOV -; R600-CHECK: MOV -; R600-CHECK: MOV -; R600-CHECK-NOT: MOV -; SI-CHECK: {{^}}build_vector4: -; SI-CHECK-DAG: v_mov_b32_e32 v[[X:[0-9]]], 5 -; SI-CHECK-DAG: v_mov_b32_e32 v[[Y:[0-9]]], 6 -; SI-CHECK-DAG: v_mov_b32_e32 v[[Z:[0-9]]], 7 -; SI-CHECK-DAG: v_mov_b32_e32 v[[W:[0-9]]], 8 -; SI-CHECK: buffer_store_dwordx4 v{{\[}}[[X]]:[[W]]{{\]}} +; R600: {{^}}build_vector4: +; R600: MOV +; R600: MOV +; R600: MOV +; R600: MOV +; R600-NOT: MOV +; SI: {{^}}build_vector4: +; SI-DAG: v_mov_b32_e32 v[[X:[0-9]]], 5 +; SI-DAG: v_mov_b32_e32 v[[Y:[0-9]]], 6 +; SI-DAG: v_mov_b32_e32 v[[Z:[0-9]]], 7 +; SI-DAG: v_mov_b32_e32 v[[W:[0-9]]], 8 +; SI: buffer_store_dwordx4 v{{\[}}[[X]]:[[W]]{{\]}} define void @build_vector4 (<4 x i32> addrspace(1)* %out) { entry: store <4 x i32> <i32 5, i32 6, i32 7, i32 8>, <4 x i32> addrspace(1)* %out diff --git a/test/CodeGen/R600/call.ll b/test/CodeGen/R600/call.ll index 1afe98ba5951..9a0eb1cc3fa0 100644 --- a/test/CodeGen/R600/call.ll +++ b/test/CodeGen/R600/call.ll @@ -1,4 +1,5 @@ ; RUN: not llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s 2>&1 | FileCheck %s +; RUN: not llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s 2>&1 | FileCheck %s ; RUN: not llc -march=r600 -mcpu=cypress < %s 2>&1 | FileCheck %s ; CHECK: error: unsupported call to function external_function in test_call_external @@ -6,28 +7,27 @@ declare i32 @external_function(i32) nounwind -define i32 @defined_function(i32 %x) nounwind noinline { - %y = add i32 %x, 8 - ret i32 %y -} - -define void @test_call(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { +define void @test_call_external(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { %b_ptr = getelementptr i32 addrspace(1)* %in, i32 1 %a = load i32 addrspace(1)* %in %b = load i32 addrspace(1)* %b_ptr - %c = call i32 @defined_function(i32 %b) nounwind + %c = call i32 @external_function(i32 %b) nounwind %result = add i32 %a, %c store i32 %result, i32 addrspace(1)* %out ret void } -define void @test_call_external(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { +define i32 @defined_function(i32 %x) nounwind noinline { + %y = add i32 %x, 8 + ret i32 %y +} + +define void @test_call(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { %b_ptr = getelementptr i32 addrspace(1)* %in, i32 1 %a = load i32 addrspace(1)* %in %b = load i32 addrspace(1)* %b_ptr - %c = call i32 @external_function(i32 %b) nounwind + %c = call i32 @defined_function(i32 %b) nounwind %result = add i32 %a, %c store i32 %result, i32 addrspace(1)* %out ret void } - diff --git a/test/CodeGen/R600/call_fs.ll b/test/CodeGen/R600/call_fs.ll index 7df22402cff9..db2cb6e5011c 100644 --- a/test/CodeGen/R600/call_fs.ll +++ b/test/CodeGen/R600/call_fs.ll @@ -1,13 +1,13 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood -show-mc-encoding -o - | FileCheck --check-prefix=EG-CHECK %s -; RUN: llc < %s -march=r600 -mcpu=rv710 -show-mc-encoding -o - | FileCheck --check-prefix=R600-CHECK %s +; RUN: llc < %s -march=r600 -mcpu=redwood -show-mc-encoding -o - | FileCheck --check-prefix=EG %s +; RUN: llc < %s -march=r600 -mcpu=rv710 -show-mc-encoding -o - | FileCheck --check-prefix=R600 %s -; EG-CHECK: {{^}}call_fs: -; EG-CHECK: .long 257 -; EG-CHECK: CALL_FS ; encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0xc0,0x84] -; R600-CHECK: {{^}}call_fs: -; R600-CHECK: .long 257 -; R600-CHECK:CALL_FS ; encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0x80,0x89] +; EG: {{^}}call_fs: +; EG: .long 257 +; EG: CALL_FS ; encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0xc0,0x84] +; R600: {{^}}call_fs: +; R600: .long 257 +; R600:CALL_FS ; encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0x80,0x89] define void @call_fs() #0 { diff --git a/test/CodeGen/R600/cf_end.ll b/test/CodeGen/R600/cf_end.ll index 138004df6df9..c74ee22868d5 100644 --- a/test/CodeGen/R600/cf_end.ll +++ b/test/CodeGen/R600/cf_end.ll @@ -1,9 +1,9 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood --show-mc-encoding | FileCheck --check-prefix=EG-CHECK %s -; RUN: llc < %s -march=r600 -mcpu=caicos --show-mc-encoding | FileCheck --check-prefix=EG-CHECK %s -; RUN: llc < %s -march=r600 -mcpu=cayman --show-mc-encoding | FileCheck --check-prefix=CM-CHECK %s +; RUN: llc < %s -march=r600 -mcpu=redwood --show-mc-encoding | FileCheck --check-prefix=EG %s +; RUN: llc < %s -march=r600 -mcpu=caicos --show-mc-encoding | FileCheck --check-prefix=EG %s +; RUN: llc < %s -march=r600 -mcpu=cayman --show-mc-encoding | FileCheck --check-prefix=CM %s -; EG-CHECK: CF_END ; encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0x20,0x80] -; CM-CHECK: CF_END ; encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x88] +; EG: CF_END ; encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0x20,0x80] +; CM: CF_END ; encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x88] define void @eop() { ret void } diff --git a/test/CodeGen/R600/concat_vectors.ll b/test/CodeGen/R600/concat_vectors.ll index 4c5b9c959516..b27bed3d4265 100644 --- a/test/CodeGen/R600/concat_vectors.ll +++ b/test/CodeGen/R600/concat_vectors.ll @@ -1,4 +1,5 @@ ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}test_concat_v1i32: ; 0x80f000 is the high 32 bits of the resource descriptor used by MUBUF @@ -282,3 +283,14 @@ define void @test_concat_v16i16(<32 x i16> addrspace(1)* %out, <16 x i16> %a, <1 store <32 x i16> %concat, <32 x i16> addrspace(1)* %out, align 64 ret void } + +; FUNC-LABEL: {{^}}concat_vector_crash: +; SI: s_endpgm +define void @concat_vector_crash(<8 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) { +bb: + %tmp = load <2 x float> addrspace(1)* %in, align 4 + %tmp1 = shufflevector <2 x float> %tmp, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %tmp2 = shufflevector <8 x float> undef, <8 x float> %tmp1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9> + store <8 x float> %tmp2, <8 x float> addrspace(1)* %out, align 32 + ret void +} diff --git a/test/CodeGen/R600/copy-illegal-type.ll b/test/CodeGen/R600/copy-illegal-type.ll index 2dff24c432b1..56c43d23b4a1 100644 --- a/test/CodeGen/R600/copy-illegal-type.ll +++ b/test/CodeGen/R600/copy-illegal-type.ll @@ -1,4 +1,5 @@ ; RUN: llc -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}test_copy_v4i8: ; SI: buffer_load_dword [[REG:v[0-9]+]] diff --git a/test/CodeGen/R600/copy-to-reg.ll b/test/CodeGen/R600/copy-to-reg.ll index 4a4143567102..9c1de73b3b1b 100644 --- a/test/CodeGen/R600/copy-to-reg.ll +++ b/test/CodeGen/R600/copy-to-reg.ll @@ -1,4 +1,5 @@ ; RUN: llc -march=amdgcn -mcpu=SI -mattr=-promote-alloca -verify-machineinstrs < %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca -verify-machineinstrs < %s ; Test that CopyToReg instructions don't have non-register operands prior ; to being emitted. diff --git a/test/CodeGen/R600/ctlz_zero_undef.ll b/test/CodeGen/R600/ctlz_zero_undef.ll index 090610d4aac2..1a4317b8095c 100644 --- a/test/CodeGen/R600/ctlz_zero_undef.ll +++ b/test/CodeGen/R600/ctlz_zero_undef.ll @@ -1,4 +1,5 @@ ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone diff --git a/test/CodeGen/R600/cttz-ctlz.ll b/test/CodeGen/R600/cttz-ctlz.ll index 6be06d243eaa..c957a033c5d7 100644 --- a/test/CodeGen/R600/cttz-ctlz.ll +++ b/test/CodeGen/R600/cttz-ctlz.ll @@ -1,4 +1,5 @@ ; RUN: opt -S -codegenprepare -mtriple=r600-unknown-unknown -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=ALL %s +; RUN: opt -S -codegenprepare -mtriple=r600-unknown-unknown -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=ALL %s define i64 @test1(i64 %A) { diff --git a/test/CodeGen/R600/cttz_zero_undef.ll b/test/CodeGen/R600/cttz_zero_undef.ll index ab59360694bf..d9d284c58865 100644 --- a/test/CodeGen/R600/cttz_zero_undef.ll +++ b/test/CodeGen/R600/cttz_zero_undef.ll @@ -1,4 +1,5 @@ ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone diff --git a/test/CodeGen/R600/cvt_f32_ubyte.ll b/test/CodeGen/R600/cvt_f32_ubyte.ll index e26ee12f6f6d..69eea5919c05 100644 --- a/test/CodeGen/R600/cvt_f32_ubyte.ll +++ b/test/CodeGen/R600/cvt_f32_ubyte.ll @@ -1,4 +1,5 @@ ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; SI-LABEL: {{^}}load_i8_to_f32: ; SI: buffer_load_ubyte [[LOADREG:v[0-9]+]], @@ -145,7 +146,7 @@ define void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias %out, <7 x i8> ; SI: buffer_store_dword ; SI: buffer_store_dword define void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias %out, <8 x i8> addrspace(1)* noalias %in) nounwind { - %load = load <8 x i8> addrspace(1)* %in, align 1 + %load = load <8 x i8> addrspace(1)* %in, align 8 %cvt = uitofp <8 x i8> %load to <8 x float> store <8 x float> %cvt, <8 x float> addrspace(1)* %out, align 16 ret void diff --git a/test/CodeGen/R600/default-fp-mode.ll b/test/CodeGen/R600/default-fp-mode.ll index 6b6d49996eb1..da8e91454b98 100644 --- a/test/CodeGen/R600/default-fp-mode.ll +++ b/test/CodeGen/R600/default-fp-mode.ll @@ -5,6 +5,13 @@ ; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=SI -mattr=-fp32-denormals < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=SI -mattr=+fp64-denormals < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals,+fp64-denormals < %s | FileCheck -check-prefix=FP64-DENORMAL -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp32-denormals,-fp64-denormals < %s | FileCheck -check-prefix=FP32-DENORMAL -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp32-denormals,+fp64-denormals < %s | FileCheck -check-prefix=BOTH-DENORMAL -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals,-fp64-denormals < %s | FileCheck -check-prefix=NO-DENORMAL -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp64-denormals < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}test_kernel: diff --git a/test/CodeGen/R600/ds_read2_offset_order.ll b/test/CodeGen/R600/ds_read2_offset_order.ll index bdbe22ff2348..44306bc9d38f 100644 --- a/test/CodeGen/R600/ds_read2_offset_order.ll +++ b/test/CodeGen/R600/ds_read2_offset_order.ll @@ -1,4 +1,5 @@ ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -strict-whitespace -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -strict-whitespace -check-prefix=SI %s ; XFAIL: * diff --git a/test/CodeGen/R600/elf.ll b/test/CodeGen/R600/elf.ll index ec28ed9c1dcd..f801b3f57357 100644 --- a/test/CodeGen/R600/elf.ll +++ b/test/CodeGen/R600/elf.ll @@ -1,19 +1,24 @@ -; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols - | FileCheck --check-prefix=ELF-CHECK %s -; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG-CHECK %s +; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols - | FileCheck --check-prefix=ELF %s +; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG --check-prefix=TYPICAL %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols - | FileCheck --check-prefix=ELF %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG --check-prefix=TONGA %s +; RUN: llc < %s -march=amdgcn -mcpu=carrizo -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols - | FileCheck --check-prefix=ELF %s +; RUN: llc < %s -march=amdgcn -mcpu=carrizo -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG --check-prefix=TYPICAL %s -; ELF-CHECK: Format: ELF32 -; ELF-CHECK: Name: .AMDGPU.config -; ELF-CHECK: Type: SHT_PROGBITS +; ELF: Format: ELF32 +; ELF: Name: .AMDGPU.config +; ELF: Type: SHT_PROGBITS -; ELF-CHECK: Symbol { -; ELF-CHECK: Name: test -; ELF-CHECK: Binding: Global +; ELF: Symbol { +; ELF: Name: test +; ELF: Binding: Global -; CONFIG-CHECK: .align 256 -; CONFIG-CHECK: test: -; CONFIG-CHECK: .section .AMDGPU.config -; CONFIG-CHECK-NEXT: .long 45096 -; CONFIG-CHECK-NEXT: .long 0 +; CONFIG: .align 256 +; CONFIG: test: +; CONFIG: .section .AMDGPU.config +; CONFIG-NEXT: .long 45096 +; TYPICAL-NEXT: .long 0 +; TONGA-NEXT: .long 576 define void @test(i32 %p) #0 { %i = add i32 %p, 2 %r = bitcast i32 %i to float diff --git a/test/CodeGen/R600/elf.r600.ll b/test/CodeGen/R600/elf.r600.ll index 4436c07c5a77..51cd08500932 100644 --- a/test/CodeGen/R600/elf.r600.ll +++ b/test/CodeGen/R600/elf.r600.ll @@ -1,14 +1,14 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood -filetype=obj | llvm-readobj -s - | FileCheck --check-prefix=ELF-CHECK %s -; RUN: llc < %s -march=r600 -mcpu=redwood -o - | FileCheck --check-prefix=CONFIG-CHECK %s +; RUN: llc < %s -march=r600 -mcpu=redwood -filetype=obj | llvm-readobj -s - | FileCheck --check-prefix=ELF %s +; RUN: llc < %s -march=r600 -mcpu=redwood -o - | FileCheck --check-prefix=CONFIG %s -; ELF-CHECK: Format: ELF32 -; ELF-CHECK: Name: .AMDGPU.config +; ELF: Format: ELF32 +; ELF: Name: .AMDGPU.config -; CONFIG-CHECK: .section .AMDGPU.config -; CONFIG-CHECK-NEXT: .long 166100 -; CONFIG-CHECK-NEXT: .long 2 -; CONFIG-CHECK-NEXT: .long 165900 -; CONFIG-CHECK-NEXT: .long 0 +; CONFIG: .section .AMDGPU.config +; CONFIG-NEXT: .long 166100 +; CONFIG-NEXT: .long 2 +; CONFIG-NEXT: .long 165900 +; CONFIG-NEXT: .long 0 define void @test(float addrspace(1)* %out, i32 %p) { %i = add i32 %p, 2 %r = bitcast i32 %i to float diff --git a/test/CodeGen/R600/empty-function.ll b/test/CodeGen/R600/empty-function.ll index 4b81d971d06b..b5593eb87ae4 100644 --- a/test/CodeGen/R600/empty-function.ll +++ b/test/CodeGen/R600/empty-function.ll @@ -1,4 +1,5 @@ ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; Make sure we don't assert on empty functions diff --git a/test/CodeGen/R600/extload-private.ll b/test/CodeGen/R600/extload-private.ll new file mode 100644 index 000000000000..fec868232507 --- /dev/null +++ b/test/CodeGen/R600/extload-private.ll @@ -0,0 +1,46 @@ +; RUN: llc < %s -march=amdgcn -mcpu=SI -mattr=-promote-alloca -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-promote-alloca -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}load_i8_sext_private: +; SI: buffer_load_sbyte v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen +define void @load_i8_sext_private(i32 addrspace(1)* %out) { +entry: + %tmp0 = alloca i8 + %tmp1 = load i8* %tmp0 + %tmp2 = sext i8 %tmp1 to i32 + store i32 %tmp2, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}load_i8_zext_private: +; SI: buffer_load_ubyte v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen +define void @load_i8_zext_private(i32 addrspace(1)* %out) { +entry: + %tmp0 = alloca i8 + %tmp1 = load i8* %tmp0 + %tmp2 = zext i8 %tmp1 to i32 + store i32 %tmp2, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}load_i16_sext_private: +; SI: buffer_load_sshort v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen +define void @load_i16_sext_private(i32 addrspace(1)* %out) { +entry: + %tmp0 = alloca i16 + %tmp1 = load i16* %tmp0 + %tmp2 = sext i16 %tmp1 to i32 + store i32 %tmp2, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}load_i16_zext_private: +; SI: buffer_load_ushort v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen +define void @load_i16_zext_private(i32 addrspace(1)* %out) { +entry: + %tmp0 = alloca i16 + %tmp1 = load i16* %tmp0 + %tmp2 = zext i16 %tmp1 to i32 + store i32 %tmp2, i32 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/extload.ll b/test/CodeGen/R600/extload.ll index 4a94acaba0b5..73d6701bfb5b 100644 --- a/test/CodeGen/R600/extload.ll +++ b/test/CodeGen/R600/extload.ll @@ -1,5 +1,6 @@ ; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}anyext_load_i8: ; EG: AND_INT diff --git a/test/CodeGen/R600/extract_vector_elt_i16.ll b/test/CodeGen/R600/extract_vector_elt_i16.ll index 04c375a89ae3..0774a9ae852b 100644 --- a/test/CodeGen/R600/extract_vector_elt_i16.ll +++ b/test/CodeGen/R600/extract_vector_elt_i16.ll @@ -1,4 +1,5 @@ ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}extract_vector_elt_v2i16: ; SI: buffer_load_ushort diff --git a/test/CodeGen/R600/fadd.ll b/test/CodeGen/R600/fadd.ll index 9d29c0629628..365af9b73cc0 100644 --- a/test/CodeGen/R600/fadd.ll +++ b/test/CodeGen/R600/fadd.ll @@ -1,5 +1,6 @@ ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=R600 -check-prefix=FUNC ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC ; FUNC-LABEL: {{^}}fadd_f32: ; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].W diff --git a/test/CodeGen/R600/fadd64.ll b/test/CodeGen/R600/fadd64.ll index 389c754c9e8d..f1f6fef54766 100644 --- a/test/CodeGen/R600/fadd64.ll +++ b/test/CodeGen/R600/fadd64.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s ; CHECK: {{^}}fadd_f64: ; CHECK: v_add_f64 {{v[[0-9]+:[0-9]+]}}, {{v[[0-9]+:[0-9]+]}}, {{v[[0-9]+:[0-9]+]}} diff --git a/test/CodeGen/R600/fceil.ll b/test/CodeGen/R600/fceil.ll index 7c7a7e36295c..f23e8919d733 100644 --- a/test/CodeGen/R600/fceil.ll +++ b/test/CodeGen/R600/fceil.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s declare float @llvm.ceil.f32(float) nounwind readnone diff --git a/test/CodeGen/R600/fcmp64.ll b/test/CodeGen/R600/fcmp64.ll index 032a4e416dc1..9dc8b50513f2 100644 --- a/test/CodeGen/R600/fcmp64.ll +++ b/test/CodeGen/R600/fcmp64.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s ; CHECK-LABEL: {{^}}flt_f64: ; CHECK: v_cmp_nge_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}} diff --git a/test/CodeGen/R600/fconst64.ll b/test/CodeGen/R600/fconst64.ll index f3bc399972d5..28e0c909747f 100644 --- a/test/CodeGen/R600/fconst64.ll +++ b/test/CodeGen/R600/fconst64.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s ; CHECK: {{^}}fconst_f64: ; CHECK-DAG: s_mov_b32 {{s[0-9]+}}, 0x40140000 diff --git a/test/CodeGen/R600/fdiv.f64.ll b/test/CodeGen/R600/fdiv.f64.ll new file mode 100644 index 000000000000..276642f99014 --- /dev/null +++ b/test/CodeGen/R600/fdiv.f64.ll @@ -0,0 +1,96 @@ +; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=COMMON %s +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=COMMON %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=COMMON %s + + +; COMMON-LABEL: {{^}}fdiv_f64: +; COMMON-DAG: buffer_load_dwordx2 [[NUM:v\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0 +; COMMON-DAG: buffer_load_dwordx2 [[DEN:v\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0 offset:8 +; CI-DAG: v_div_scale_f64 [[SCALE0:v\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, [[DEN]], [[DEN]], [[NUM]] +; CI-DAG: v_div_scale_f64 [[SCALE1:v\[[0-9]+:[0-9]+\]]], vcc, [[NUM]], [[DEN]], [[NUM]] + +; Check for div_scale bug workaround on SI +; SI-DAG: v_div_scale_f64 [[SCALE0:v\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, [[DEN]], [[DEN]], [[NUM]] +; SI-DAG: v_div_scale_f64 [[SCALE1:v\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, [[NUM]], [[DEN]], [[NUM]] + +; COMMON-DAG: v_rcp_f64_e32 [[RCP_SCALE0:v\[[0-9]+:[0-9]+\]]], [[SCALE0]] + +; SI-DAG: v_cmp_eq_i32_e32 vcc, {{v[0-9]+}}, {{v[0-9]+}} +; SI-DAG: v_cmp_eq_i32_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, {{v[0-9]+}} +; SI-DAG: s_xor_b64 vcc, [[CMP0]], vcc + +; COMMON-DAG: v_fma_f64 [[FMA0:v\[[0-9]+:[0-9]+\]]], -[[SCALE0]], [[RCP_SCALE0]], 1.0 +; COMMON-DAG: v_fma_f64 [[FMA1:v\[[0-9]+:[0-9]+\]]], [[RCP_SCALE0]], [[FMA0]], [[RCP_SCALE0]] +; COMMON-DAG: v_fma_f64 [[FMA2:v\[[0-9]+:[0-9]+\]]], -[[SCALE0]], [[FMA1]], 1.0 +; COMMON-DAG: v_fma_f64 [[FMA3:v\[[0-9]+:[0-9]+\]]], [[FMA1]], [[FMA2]], [[FMA1]] +; COMMON-DAG: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], [[SCALE1]], [[FMA3]] +; COMMON-DAG: v_fma_f64 [[FMA4:v\[[0-9]+:[0-9]+\]]], -[[SCALE0]], [[MUL]], [[SCALE1]] +; COMMON: v_div_fmas_f64 [[FMAS:v\[[0-9]+:[0-9]+\]]], [[FMA3]], [[FMA4]], [[MUL]] +; COMMON: v_div_fixup_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[FMAS]], [[DEN]], [[NUM]] +; COMMON: buffer_store_dwordx2 [[RESULT]] +; COMMON: s_endpgm +define void @fdiv_f64(double addrspace(1)* %out, double addrspace(1)* %in) nounwind { + %gep.1 = getelementptr double addrspace(1)* %in, i32 1 + %num = load double addrspace(1)* %in + %den = load double addrspace(1)* %gep.1 + %result = fdiv double %num, %den + store double %result, double addrspace(1)* %out + ret void +} + +; COMMON-LABEL: {{^}}fdiv_f64_s_v: +define void @fdiv_f64_s_v(double addrspace(1)* %out, double addrspace(1)* %in, double %num) nounwind { + %den = load double addrspace(1)* %in + %result = fdiv double %num, %den + store double %result, double addrspace(1)* %out + ret void +} + +; COMMON-LABEL: {{^}}fdiv_f64_v_s: +define void @fdiv_f64_v_s(double addrspace(1)* %out, double addrspace(1)* %in, double %den) nounwind { + %num = load double addrspace(1)* %in + %result = fdiv double %num, %den + store double %result, double addrspace(1)* %out + ret void +} + +; COMMON-LABEL: {{^}}fdiv_f64_s_s: +define void @fdiv_f64_s_s(double addrspace(1)* %out, double %num, double %den) nounwind { + %result = fdiv double %num, %den + store double %result, double addrspace(1)* %out + ret void +} + +; COMMON-LABEL: {{^}}v_fdiv_v2f64: +define void @v_fdiv_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in) nounwind { + %gep.1 = getelementptr <2 x double> addrspace(1)* %in, i32 1 + %num = load <2 x double> addrspace(1)* %in + %den = load <2 x double> addrspace(1)* %gep.1 + %result = fdiv <2 x double> %num, %den + store <2 x double> %result, <2 x double> addrspace(1)* %out + ret void +} + +; COMMON-LABEL: {{^}}s_fdiv_v2f64: +define void @s_fdiv_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %num, <2 x double> %den) { + %result = fdiv <2 x double> %num, %den + store <2 x double> %result, <2 x double> addrspace(1)* %out + ret void +} + +; COMMON-LABEL: {{^}}v_fdiv_v4f64: +define void @v_fdiv_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in) nounwind { + %gep.1 = getelementptr <4 x double> addrspace(1)* %in, i32 1 + %num = load <4 x double> addrspace(1)* %in + %den = load <4 x double> addrspace(1)* %gep.1 + %result = fdiv <4 x double> %num, %den + store <4 x double> %result, <4 x double> addrspace(1)* %out + ret void +} + +; COMMON-LABEL: {{^}}s_fdiv_v4f64: +define void @s_fdiv_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %num, <4 x double> %den) { + %result = fdiv <4 x double> %num, %den + store <4 x double> %result, <4 x double> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/fdiv.ll b/test/CodeGen/R600/fdiv.ll index f83b88ee1c9d..603287fbdf4f 100644 --- a/test/CodeGen/R600/fdiv.ll +++ b/test/CodeGen/R600/fdiv.ll @@ -1,5 +1,6 @@ ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 %s ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; These tests check that fdiv is expanded correctly and also test that the ; scheduler is scheduling the RECIP_IEEE and MUL_IEEE instructions in separate diff --git a/test/CodeGen/R600/fdiv64.ll b/test/CodeGen/R600/fdiv64.ll deleted file mode 100644 index 0611b153f5cc..000000000000 --- a/test/CodeGen/R600/fdiv64.ll +++ /dev/null @@ -1,14 +0,0 @@ -; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s - -; CHECK: {{^}}fdiv_f64: -; CHECK: v_rcp_f64_e32 {{v\[[0-9]+:[0-9]+\]}} -; CHECK: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} - -define void @fdiv_f64(double addrspace(1)* %out, double addrspace(1)* %in1, - double addrspace(1)* %in2) { - %r0 = load double addrspace(1)* %in1 - %r1 = load double addrspace(1)* %in2 - %r2 = fdiv double %r0, %r1 - store double %r2, double addrspace(1)* %out - ret void -} diff --git a/test/CodeGen/R600/ffloor.ll b/test/CodeGen/R600/ffloor.ll index 194d0aaa1819..9038ff81b073 100644 --- a/test/CodeGen/R600/ffloor.ll +++ b/test/CodeGen/R600/ffloor.ll @@ -1,5 +1,6 @@ ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s declare double @llvm.floor.f64(double) nounwind readnone declare <2 x double> @llvm.floor.v2f64(<2 x double>) nounwind readnone diff --git a/test/CodeGen/R600/flat-address-space.ll b/test/CodeGen/R600/flat-address-space.ll index 99ef41b24e4f..2e98bf51b23b 100644 --- a/test/CodeGen/R600/flat-address-space.ll +++ b/test/CodeGen/R600/flat-address-space.ll @@ -1,5 +1,7 @@ ; RUN: llc -O0 -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=CHECK -check-prefix=CHECK-NO-PROMOTE %s ; RUN: llc -O0 -march=amdgcn -mcpu=bonaire -mattr=+promote-alloca < %s | FileCheck -check-prefix=CHECK -check-prefix=CHECK-PROMOTE %s +; RUN: llc -O0 -march=amdgcn -mcpu=tonga -mattr=-promote-alloca < %s | FileCheck -check-prefix=CHECK -check-prefix=CHECK-NO-PROMOTE %s +; RUN: llc -O0 -march=amdgcn -mcpu=tonga -mattr=+promote-alloca < %s | FileCheck -check-prefix=CHECK -check-prefix=CHECK-PROMOTE %s ; Disable optimizations in case there are optimizations added that ; specialize away generic pointer accesses. diff --git a/test/CodeGen/R600/fma.f64.ll b/test/CodeGen/R600/fma.f64.ll index 48b1093ecd0b..bca312bfa751 100644 --- a/test/CodeGen/R600/fma.f64.ll +++ b/test/CodeGen/R600/fma.f64.ll @@ -1,4 +1,5 @@ ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare double @llvm.fma.f64(double, double, double) nounwind readnone declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) nounwind readnone diff --git a/test/CodeGen/R600/fmax3.f64.ll b/test/CodeGen/R600/fmax3.f64.ll new file mode 100644 index 000000000000..5ca789de2a08 --- /dev/null +++ b/test/CodeGen/R600/fmax3.f64.ll @@ -0,0 +1,24 @@ +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s + +declare double @llvm.maxnum.f64(double, double) nounwind readnone + +; SI-LABEL: {{^}}test_fmax3_f64: +; SI-DAG: buffer_load_dwordx2 [[REGA:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+:[0-9]+}}], 0{{$}} +; SI-DAG: buffer_load_dwordx2 [[REGB:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+:[0-9]+}}], 0 offset:8 +; SI-DAG: buffer_load_dwordx2 [[REGC:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+:[0-9]+}}], 0 offset:16 +; SI: v_max_f64 [[REGA]], [[REGA]], [[REGB]] +; SI: v_max_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[REGA]], [[REGC]] +; SI: buffer_store_dwordx2 [[RESULT]], +; SI: s_endpgm +define void @test_fmax3_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) nounwind { + %bptr = getelementptr double addrspace(1)* %aptr, i32 1 + %cptr = getelementptr double addrspace(1)* %aptr, i32 2 + %a = load double addrspace(1)* %aptr, align 8 + %b = load double addrspace(1)* %bptr, align 8 + %c = load double addrspace(1)* %cptr, align 8 + %f0 = call double @llvm.maxnum.f64(double %a, double %b) nounwind readnone + %f1 = call double @llvm.maxnum.f64(double %f0, double %c) nounwind readnone + store double %f1, double addrspace(1)* %out, align 8 + ret void +} diff --git a/test/CodeGen/R600/fmax3.ll b/test/CodeGen/R600/fmax3.ll index 6f95bf20f73b..e1b477c5921e 100644 --- a/test/CodeGen/R600/fmax3.ll +++ b/test/CodeGen/R600/fmax3.ll @@ -1,4 +1,5 @@ ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s declare float @llvm.maxnum.f32(float, float) nounwind readnone diff --git a/test/CodeGen/R600/fmaxnum.f64.ll b/test/CodeGen/R600/fmaxnum.f64.ll index e92996aa2b1f..de563cec3412 100644 --- a/test/CodeGen/R600/fmaxnum.f64.ll +++ b/test/CodeGen/R600/fmaxnum.f64.ll @@ -1,4 +1,5 @@ ; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare double @llvm.maxnum.f64(double, double) #0 declare <2 x double> @llvm.maxnum.v2f64(<2 x double>, <2 x double>) #0 diff --git a/test/CodeGen/R600/fmaxnum.ll b/test/CodeGen/R600/fmaxnum.ll index 473184af214b..c105598ff811 100644 --- a/test/CodeGen/R600/fmaxnum.ll +++ b/test/CodeGen/R600/fmaxnum.ll @@ -1,4 +1,5 @@ ; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare float @llvm.maxnum.f32(float, float) #0 declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>) #0 diff --git a/test/CodeGen/R600/fmin3.ll b/test/CodeGen/R600/fmin3.ll index aeeed1c7dd39..716beb16bb10 100644 --- a/test/CodeGen/R600/fmin3.ll +++ b/test/CodeGen/R600/fmin3.ll @@ -1,4 +1,6 @@ ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s declare float @llvm.minnum.f32(float, float) nounwind readnone diff --git a/test/CodeGen/R600/fminnum.f64.ll b/test/CodeGen/R600/fminnum.f64.ll index b8476f98bab8..0f929d6a81f0 100644 --- a/test/CodeGen/R600/fminnum.f64.ll +++ b/test/CodeGen/R600/fminnum.f64.ll @@ -1,4 +1,5 @@ ; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare double @llvm.minnum.f64(double, double) #0 declare <2 x double> @llvm.minnum.v2f64(<2 x double>, <2 x double>) #0 diff --git a/test/CodeGen/R600/fminnum.ll b/test/CodeGen/R600/fminnum.ll index cd1a948707e8..6b93b830033b 100644 --- a/test/CodeGen/R600/fminnum.ll +++ b/test/CodeGen/R600/fminnum.ll @@ -1,4 +1,5 @@ ; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare float @llvm.minnum.f32(float, float) #0 declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>) #0 diff --git a/test/CodeGen/R600/fmul.ll b/test/CodeGen/R600/fmul.ll index 7296a8760be2..6c09aa242677 100644 --- a/test/CodeGen/R600/fmul.ll +++ b/test/CodeGen/R600/fmul.ll @@ -1,4 +1,5 @@ ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s diff --git a/test/CodeGen/R600/fmul64.ll b/test/CodeGen/R600/fmul64.ll index 882307ef458b..9d7787ccbe1f 100644 --- a/test/CodeGen/R600/fmul64.ll +++ b/test/CodeGen/R600/fmul64.ll @@ -1,4 +1,5 @@ ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s ; FUNC-LABEL: {{^}}fmul_f64: ; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} diff --git a/test/CodeGen/R600/fnearbyint.ll b/test/CodeGen/R600/fnearbyint.ll index 30bc67689e1c..4fa9adaabdae 100644 --- a/test/CodeGen/R600/fnearbyint.ll +++ b/test/CodeGen/R600/fnearbyint.ll @@ -1,5 +1,6 @@ ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s ; This should have the exactly the same output as the test for rint, ; so no need to check anything. diff --git a/test/CodeGen/R600/fneg-fabs.f64.ll b/test/CodeGen/R600/fneg-fabs.f64.ll index 04a87e377857..7430e7ffb33d 100644 --- a/test/CodeGen/R600/fneg-fabs.f64.ll +++ b/test/CodeGen/R600/fneg-fabs.f64.ll @@ -1,4 +1,5 @@ ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; FIXME: Check something here. Currently it seems fabs + fneg aren't ; into 2 modifiers, although theoretically that should work. diff --git a/test/CodeGen/R600/fneg-fabs.ll b/test/CodeGen/R600/fneg-fabs.ll index 94e8256cd261..4fde0484567c 100644 --- a/test/CodeGen/R600/fneg-fabs.ll +++ b/test/CodeGen/R600/fneg-fabs.ll @@ -1,4 +1,5 @@ ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}fneg_fabs_fadd_f32: diff --git a/test/CodeGen/R600/fp-classify.ll b/test/CodeGen/R600/fp-classify.ll index a1b2f08eddeb..c1de85203104 100644 --- a/test/CodeGen/R600/fp-classify.ll +++ b/test/CodeGen/R600/fp-classify.ll @@ -1,4 +1,5 @@ ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=r600 -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s declare i1 @llvm.AMDGPU.class.f32(float, i32) #1 declare i1 @llvm.AMDGPU.class.f64(double, i32) #1 diff --git a/test/CodeGen/R600/fp16_to_fp.ll b/test/CodeGen/R600/fp16_to_fp.ll index be84582a73a6..da78f6155c85 100644 --- a/test/CodeGen/R600/fp16_to_fp.ll +++ b/test/CodeGen/R600/fp16_to_fp.ll @@ -1,4 +1,5 @@ ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s declare float @llvm.convert.from.fp16.f32(i16) nounwind readnone declare double @llvm.convert.from.fp16.f64(i16) nounwind readnone diff --git a/test/CodeGen/R600/fp32_to_fp16.ll b/test/CodeGen/R600/fp32_to_fp16.ll index 43dd09b5ec05..c3c65aece082 100644 --- a/test/CodeGen/R600/fp32_to_fp16.ll +++ b/test/CodeGen/R600/fp32_to_fp16.ll @@ -1,4 +1,5 @@ ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s declare i16 @llvm.convert.to.fp16.f32(float) nounwind readnone diff --git a/test/CodeGen/R600/fp_to_sint.ll b/test/CodeGen/R600/fp_to_sint.ll index d76e8a341c6f..16549c392b00 100644 --- a/test/CodeGen/R600/fp_to_sint.ll +++ b/test/CodeGen/R600/fp_to_sint.ll @@ -1,5 +1,6 @@ ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s --check-prefix=EG --check-prefix=FUNC ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s --check-prefix=SI --check-prefix=FUNC +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s --check-prefix=SI --check-prefix=FUNC declare float @llvm.fabs.f32(float) #0 diff --git a/test/CodeGen/R600/fp_to_uint.ll b/test/CodeGen/R600/fp_to_uint.ll index 5970adf999c9..804d90f476da 100644 --- a/test/CodeGen/R600/fp_to_uint.ll +++ b/test/CodeGen/R600/fp_to_uint.ll @@ -1,5 +1,6 @@ ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=EG -check-prefix=FUNC ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC ; FUNC-LABEL: {{^}}fp_to_uint_f32_to_i32: ; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} diff --git a/test/CodeGen/R600/fpext.ll b/test/CodeGen/R600/fpext.ll index 320545edf56b..21c7bfd48df8 100644 --- a/test/CodeGen/R600/fpext.ll +++ b/test/CodeGen/R600/fpext.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=CHECK +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s ; CHECK: {{^}}fpext: ; CHECK: v_cvt_f64_f32_e32 diff --git a/test/CodeGen/R600/fptrunc.ll b/test/CodeGen/R600/fptrunc.ll index 15ae4e18ff38..94fcdab9c52f 100644 --- a/test/CodeGen/R600/fptrunc.ll +++ b/test/CodeGen/R600/fptrunc.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=CHECK +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s ; CHECK: {{^}}fptrunc: ; CHECK: v_cvt_f32_f64_e32 diff --git a/test/CodeGen/R600/frem.ll b/test/CodeGen/R600/frem.ll index 50d6687abeec..564634178656 100644 --- a/test/CodeGen/R600/frem.ll +++ b/test/CodeGen/R600/frem.ll @@ -40,10 +40,14 @@ define void @unsafe_frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1, ret void } -; TODO: This should check something when f64 fdiv is implemented -; correctly - ; FUNC-LABEL: {{^}}frem_f64: +; SI: buffer_load_dwordx2 [[Y:v\[[0-9]+:[0-9]+\]]], {{.*}}, 0 +; SI: buffer_load_dwordx2 [[X:v\[[0-9]+:[0-9]+\]]], {{.*}}, 0 +; SI-DAG: v_div_fmas_f64 +; SI-DAG: v_div_scale_f64 +; SI-DAG: v_mul_f64 +; SI: v_add_f64 +; SI: buffer_store_dwordx2 ; SI: s_endpgm define void @frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1, double addrspace(1)* %in2) #0 { diff --git a/test/CodeGen/R600/fsqrt.ll b/test/CodeGen/R600/fsqrt.ll index 0d1095cf2683..1fdf3e453bf3 100644 --- a/test/CodeGen/R600/fsqrt.ll +++ b/test/CodeGen/R600/fsqrt.ll @@ -1,5 +1,7 @@ ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck %s ; Run with unsafe-fp-math to make sure nothing tries to turn this into 1 / rsqrt(x) diff --git a/test/CodeGen/R600/fsub.ll b/test/CodeGen/R600/fsub.ll index 4fe47e7badf3..ef90fea67900 100644 --- a/test/CodeGen/R600/fsub.ll +++ b/test/CodeGen/R600/fsub.ll @@ -1,5 +1,6 @@ ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}v_fsub_f32: diff --git a/test/CodeGen/R600/fsub64.ll b/test/CodeGen/R600/fsub64.ll index d0f894607a61..62f46142fe0d 100644 --- a/test/CodeGen/R600/fsub64.ll +++ b/test/CodeGen/R600/fsub64.ll @@ -1,4 +1,5 @@ ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; SI-LABEL: {{^}}fsub_f64: ; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}} diff --git a/test/CodeGen/R600/ftrunc.ll b/test/CodeGen/R600/ftrunc.ll index 39eb2b5accbf..edc08609a8aa 100644 --- a/test/CodeGen/R600/ftrunc.ll +++ b/test/CodeGen/R600/ftrunc.ll @@ -1,5 +1,6 @@ ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG --check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI --check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI --check-prefix=FUNC %s declare float @llvm.trunc.f32(float) nounwind readnone declare <2 x float> @llvm.trunc.v2f32(<2 x float>) nounwind readnone diff --git a/test/CodeGen/R600/global-directive.ll b/test/CodeGen/R600/global-directive.ll index 189510ad08fe..3ba12c206ad3 100644 --- a/test/CodeGen/R600/global-directive.ll +++ b/test/CodeGen/R600/global-directive.ll @@ -1,4 +1,5 @@ ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; Make sure the GlobalDirective isn't merged with the function name diff --git a/test/CodeGen/R600/global-extload-i1.ll b/test/CodeGen/R600/global-extload-i1.ll index 940911e73453..5dc494900ce8 100644 --- a/test/CodeGen/R600/global-extload-i1.ll +++ b/test/CodeGen/R600/global-extload-i1.ll @@ -1,4 +1,5 @@ ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; XUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FIXME: Evergreen broken diff --git a/test/CodeGen/R600/global-extload-i16.ll b/test/CodeGen/R600/global-extload-i16.ll index 838068470ff2..a1740ec8236a 100644 --- a/test/CodeGen/R600/global-extload-i16.ll +++ b/test/CodeGen/R600/global-extload-i16.ll @@ -1,4 +1,5 @@ ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; XUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FIXME: cypress is broken because the bigger testcases spill and it's not implemented diff --git a/test/CodeGen/R600/global-extload-i32.ll b/test/CodeGen/R600/global-extload-i32.ll index ce78c446c3ba..f56b6ac8dc38 100644 --- a/test/CodeGen/R600/global-extload-i32.ll +++ b/test/CodeGen/R600/global-extload-i32.ll @@ -1,4 +1,5 @@ ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}zextload_global_i32_to_i64: diff --git a/test/CodeGen/R600/global-extload-i8.ll b/test/CodeGen/R600/global-extload-i8.ll index 8d6042f1de02..86245232d3e4 100644 --- a/test/CodeGen/R600/global-extload-i8.ll +++ b/test/CodeGen/R600/global-extload-i8.ll @@ -1,4 +1,5 @@ ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}zextload_global_i8_to_i32: diff --git a/test/CodeGen/R600/global-zero-initializer.ll b/test/CodeGen/R600/global-zero-initializer.ll index 031df59cd1e1..6909c58354c5 100644 --- a/test/CodeGen/R600/global-zero-initializer.ll +++ b/test/CodeGen/R600/global-zero-initializer.ll @@ -1,4 +1,5 @@ ; RUN: not llc -march=amdgcn -mcpu=SI < %s 2>&1 | FileCheck %s +; RUN: not llc -march=amdgcn -mcpu=tonga < %s 2>&1 | FileCheck %s ; CHECK: error: unsupported initializer for address space in load_init_global_global diff --git a/test/CodeGen/R600/half.ll b/test/CodeGen/R600/half.ll index cb7a94a0b859..35a41c5cd0b0 100644 --- a/test/CodeGen/R600/half.ll +++ b/test/CodeGen/R600/half.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -march=amdgcn -mcpu=SI | FileCheck %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga | FileCheck %s define void @test_load_store(half addrspace(1)* %in, half addrspace(1)* %out) { ; CHECK-LABEL: {{^}}test_load_store: diff --git a/test/CodeGen/R600/i1-copy-implicit-def.ll b/test/CodeGen/R600/i1-copy-implicit-def.ll index 51e230196bf6..b11a21137642 100644 --- a/test/CodeGen/R600/i1-copy-implicit-def.ll +++ b/test/CodeGen/R600/i1-copy-implicit-def.ll @@ -1,4 +1,5 @@ ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; SILowerI1Copies was not handling IMPLICIT_DEF ; SI-LABEL: {{^}}br_implicit_def: diff --git a/test/CodeGen/R600/i1-copy-phi.ll b/test/CodeGen/R600/i1-copy-phi.ll index 8b761710f9d2..430466e9f80e 100644 --- a/test/CodeGen/R600/i1-copy-phi.ll +++ b/test/CodeGen/R600/i1-copy-phi.ll @@ -1,4 +1,5 @@ ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; SI-LABEL: {{^}}br_i1_phi: ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}} diff --git a/test/CodeGen/R600/icmp64.ll b/test/CodeGen/R600/icmp64.ll index ed0f221b87b1..0eaa33ebafed 100644 --- a/test/CodeGen/R600/icmp64.ll +++ b/test/CodeGen/R600/icmp64.ll @@ -1,4 +1,5 @@ ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; SI-LABEL: {{^}}test_i64_eq: ; SI: v_cmp_eq_i64 diff --git a/test/CodeGen/R600/indirect-addressing-si.ll b/test/CodeGen/R600/indirect-addressing-si.ll index db597a363775..f551606d63a7 100644 --- a/test/CodeGen/R600/indirect-addressing-si.ll +++ b/test/CodeGen/R600/indirect-addressing-si.ll @@ -1,9 +1,10 @@ ; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s ; Tests for indirect addressing on SI, which is implemented using dynamic ; indexing of vectors. -; CHECK: extract_w_offset +; CHECK-LABEL: {{^}}extract_w_offset: ; CHECK: s_mov_b32 m0 ; CHECK-NEXT: v_movrels_b32_e32 define void @extract_w_offset(float addrspace(1)* %out, i32 %in) { @@ -14,7 +15,7 @@ entry: ret void } -; CHECK: extract_wo_offset +; CHECK-LABEL: {{^}}extract_wo_offset: ; CHECK: s_mov_b32 m0 ; CHECK-NEXT: v_movrels_b32_e32 define void @extract_wo_offset(float addrspace(1)* %out, i32 %in) { @@ -24,7 +25,34 @@ entry: ret void } -; CHECK: insert_w_offset +; CHECK-LABEL: {{^}}extract_neg_offset_sgpr: +; The offset depends on the register that holds the first element of the vector. +; CHECK: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}} +; CHECK: v_movrels_b32_e32 v{{[0-9]}}, v0 +define void @extract_neg_offset_sgpr(i32 addrspace(1)* %out, i32 %offset) { +entry: + %index = add i32 %offset, -512 + %value = extractelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 %index + store i32 %value, i32 addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}extract_neg_offset_vgpr: +; The offset depends on the register that holds the first element of the vector. +; CHECK: v_readfirstlane_b32 +; CHECK: s_add_i32 m0, m0, 0xfffffe{{[0-9a-z]+}} +; CHECK-NEXT: v_movrels_b32_e32 v{{[0-9]}}, v0 +; CHECK: s_cbranch_execnz +define void @extract_neg_offset_vgpr(i32 addrspace(1)* %out) { +entry: + %id = call i32 @llvm.r600.read.tidig.x() #1 + %index = add i32 %id, -512 + %value = extractelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 %index + store i32 %value, i32 addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}insert_w_offset: ; CHECK: s_mov_b32 m0 ; CHECK-NEXT: v_movreld_b32_e32 define void @insert_w_offset(float addrspace(1)* %out, i32 %in) { @@ -36,7 +64,7 @@ entry: ret void } -; CHECK: insert_wo_offset +; CHECK-LABEL: {{^}}insert_wo_offset: ; CHECK: s_mov_b32 m0 ; CHECK-NEXT: v_movreld_b32_e32 define void @insert_wo_offset(float addrspace(1)* %out, i32 %in) { @@ -46,3 +74,48 @@ entry: store float %1, float addrspace(1)* %out ret void } + +; CHECK-LABEL: {{^}}insert_neg_offset_sgpr: +; The offset depends on the register that holds the first element of the vector. +; CHECK: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}} +; CHECK: v_movreld_b32_e32 v0, v{{[0-9]}} +define void @insert_neg_offset_sgpr(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out, i32 %offset) { +entry: + %index = add i32 %offset, -512 + %value = insertelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 5, i32 %index + store <4 x i32> %value, <4 x i32> addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}insert_neg_offset_vgpr: +; The offset depends on the register that holds the first element of the vector. +; CHECK: v_readfirstlane_b32 +; CHECK: s_add_i32 m0, m0, 0xfffffe{{[0-9a-z]+}} +; CHECK-NEXT: v_movreld_b32_e32 v0, v{{[0-9]}} +; CHECK: s_cbranch_execnz +define void @insert_neg_offset_vgpr(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out) { +entry: + %id = call i32 @llvm.r600.read.tidig.x() #1 + %index = add i32 %id, -512 + %value = insertelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 5, i32 %index + store <4 x i32> %value, <4 x i32> addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}insert_neg_inline_offset_vgpr: +; The offset depends on the register that holds the first element of the vector. +; CHECK: v_readfirstlane_b32 +; CHECK: s_add_i32 m0, m0, -{{[0-9]+}} +; CHECK-NEXT: v_movreld_b32_e32 v0, v{{[0-9]}} +; CHECK: s_cbranch_execnz +define void @insert_neg_inline_offset_vgpr(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out) { +entry: + %id = call i32 @llvm.r600.read.tidig.x() #1 + %index = add i32 %id, -16 + %value = insertelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 5, i32 %index + store <4 x i32> %value, <4 x i32> addrspace(1)* %out + ret void +} + +declare i32 @llvm.r600.read.tidig.x() #1 +attributes #1 = { nounwind readnone } diff --git a/test/CodeGen/R600/indirect-private-64.ll b/test/CodeGen/R600/indirect-private-64.ll index 24006f8799b2..cb06d609da49 100644 --- a/test/CodeGen/R600/indirect-private-64.ll +++ b/test/CodeGen/R600/indirect-private-64.ll @@ -1,5 +1,7 @@ ; RUN: llc -march=amdgcn -mcpu=SI -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI %s ; RUN: llc -march=amdgcn -mcpu=SI -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s declare void @llvm.AMDGPU.barrier.local() noduplicate nounwind diff --git a/test/CodeGen/R600/infinite-loop.ll b/test/CodeGen/R600/infinite-loop.ll index 0f82a7df6098..7233aa57fd78 100644 --- a/test/CodeGen/R600/infinite-loop.ll +++ b/test/CodeGen/R600/infinite-loop.ll @@ -1,4 +1,5 @@ ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; SI-LABEL: {{^}}infinite_loop: ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3e7 diff --git a/test/CodeGen/R600/inline-asm.ll b/test/CodeGen/R600/inline-asm.ll index 6f1f977de2a4..37e4486db380 100644 --- a/test/CodeGen/R600/inline-asm.ll +++ b/test/CodeGen/R600/inline-asm.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=r600 -mcpu=tonga -verify-machineinstrs | FileCheck %s ; CHECK: {{^}}inline_asm: ; CHECK: s_endpgm diff --git a/test/CodeGen/R600/inline-calls.ll b/test/CodeGen/R600/inline-calls.ll index b8700d55e155..33a4c832e75e 100644 --- a/test/CodeGen/R600/inline-calls.ll +++ b/test/CodeGen/R600/inline-calls.ll @@ -1,4 +1,5 @@ ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck %s ; CHECK-NOT: {{^}}func: diff --git a/test/CodeGen/R600/input-mods.ll b/test/CodeGen/R600/input-mods.ll index e3e94995fc95..1c4d285cbcb1 100644 --- a/test/CodeGen/R600/input-mods.ll +++ b/test/CodeGen/R600/input-mods.ll @@ -1,13 +1,13 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG-CHECK -;RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM-CHECK +;RUN: llc < %s |