aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-02-17 19:35:08 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-02-17 19:35:08 +0000
commitc60b95818e4f6c00c872114318d01109f97a7fa3 (patch)
tree9361932ed8cde0f9d3c167adb0eb75ff1401ed99
parent3897d3b845ab73af1f4abd7fd8cc6e43925af1b4 (diff)
downloadsrc-c60b95818e4f6c00c872114318d01109f97a7fa3.tar.gz
src-c60b95818e4f6c00c872114318d01109f97a7fa3.zip
Vendor import of llvm release_40 branch r295380:vendor/llvm/llvm-release_40-r295380
Notes
Notes: svn path=/vendor/llvm/dist/; revision=313881 svn path=/vendor/llvm/llvm-release_40-r295380/; revision=313882; tag=vendor/llvm/llvm-release_40-r295380
-rw-r--r--docs/Extensions.rst2
-rw-r--r--docs/LangRef.rst2
-rw-r--r--docs/ReleaseNotes.rst49
-rw-r--r--include/llvm/LTO/legacy/ThinLTOCodeGenerator.h19
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp8
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.cpp31
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp9
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp20
-rw-r--r--lib/LTO/ThinLTOCodeGenerator.cpp31
-rw-r--r--lib/Target/AArch64/AArch64CallingConvention.td8
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp3
-rw-r--r--lib/Target/ARM/ARMCallingConv.td28
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp3
-rw-r--r--lib/Transforms/Instrumentation/AddressSanitizer.cpp15
-rw-r--r--lib/Transforms/Instrumentation/ThreadSanitizer.cpp7
-rw-r--r--lib/Transforms/Utils/LoopUnroll.cpp11
-rw-r--r--lib/Transforms/Utils/LoopUnrollRuntime.cpp15
-rw-r--r--test/CodeGen/AArch64/ldst-opt.ll99
-rw-r--r--test/CodeGen/AArch64/swifterror.ll182
-rw-r--r--test/CodeGen/AArch64/swiftself.ll18
-rw-r--r--test/CodeGen/ARM/swifterror.ll169
-rw-r--r--test/CodeGen/ARM/swiftself.ll17
-rw-r--r--test/CodeGen/X86/dag-update-nodetomatch.ll241
-rw-r--r--test/CodeGen/X86/pr31956.ll25
-rw-r--r--test/CodeGen/X86/swifterror.ll15
-rw-r--r--test/Instrumentation/AddressSanitizer/basic.ll26
-rw-r--r--test/Instrumentation/ThreadSanitizer/tsan_basic.ll24
-rw-r--r--test/Transforms/LoopUnroll/runtime-li.ll36
28 files changed, 863 insertions, 250 deletions
diff --git a/docs/Extensions.rst b/docs/Extensions.rst
index 850c42750911..2b12123cdf68 100644
--- a/docs/Extensions.rst
+++ b/docs/Extensions.rst
@@ -61,7 +61,7 @@ types ``IMAGE_REL_I386_SECREL`` (32-bit) or ``IMAGE_REL_AMD64_SECREL`` (64-bit).
the target. It corresponds to the COFF relocation types
``IMAGE_REL_I386_SECTION`` (32-bit) or ``IMAGE_REL_AMD64_SECTION`` (64-bit).
-.. code-block:: gas
+.. code-block:: none
.section .debug$S,"rn"
.long 4
diff --git a/docs/LangRef.rst b/docs/LangRef.rst
index 5ac17015953e..e93a02f6b023 100644
--- a/docs/LangRef.rst
+++ b/docs/LangRef.rst
@@ -3997,7 +3997,7 @@ DIFile
``DIFile`` nodes represent files. The ``filename:`` can include slashes.
-.. code-block:: llvm
+.. code-block:: none
!0 = !DIFile(filename: "path/to/file", directory: "/path/to/dir",
checksumkind: CSK_MD5,
diff --git a/docs/ReleaseNotes.rst b/docs/ReleaseNotes.rst
index aef11daf194b..da86be3f96ff 100644
--- a/docs/ReleaseNotes.rst
+++ b/docs/ReleaseNotes.rst
@@ -61,6 +61,9 @@ Non-comprehensive list of changes in this release
with LLVM option -adce-remove-loops when the loop body otherwise has
no live operations.
+ * The llvm-cov tool can now export coverage data as json. Its html output mode
+ has also improved.
+
* ... next change ...
.. NOTE
@@ -81,6 +84,37 @@ Non-comprehensive list of changes in this release
* Significant build-time and binary-size improvements when compiling with
debug info (-g).
+LLVM Coroutines
+---------------
+
+Experimental support for :doc:`Coroutines` was added, which can be enabled
+with ``-enable-coroutines`` in ``opt`` command tool or using
+``addCoroutinePassesToExtensionPoints`` API when building the optimization
+pipeline.
+
+For more information on LLVM Coroutines and the LLVM implementation, see
+`2016 LLVM Developers’ Meeting talk on LLVM Coroutines
+<http://llvm.org/devmtg/2016-11/#talk4>`_.
+
+Regcall and Vectorcall Calling Conventions
+--------------------------------------------------
+
+Support was added for _regcall calling convention.
+Existing __vectorcall calling convention support was extended to include
+correct handling of HVAs.
+
+The __vectorcall calling convention was introduced by Microsoft to
+enhance register usage when passing parameters.
+For more information please read `__vectorcall documentation
+<https://msdn.microsoft.com/en-us/library/dn375768.aspx>`_.
+
+The __regcall calling convention was introduced by Intel to
+optimize parameter transfer on function call.
+This calling convention ensures that as many values as possible are
+passed or returned in registers.
+For more information please read `__regcall documentation
+<https://software.intel.com/en-us/node/693069>`_.
+
Code Generation Testing
-----------------------
@@ -258,6 +292,21 @@ External Open Source Projects Using LLVM 4.0.0
* A project...
+LDC - the LLVM-based D compiler
+-------------------------------
+
+`D <http://dlang.org>`_ is a language with C-like syntax and static typing. It
+pragmatically combines efficiency, control, and modeling power, with safety and
+programmer productivity. D supports powerful concepts like Compile-Time Function
+Execution (CTFE) and Template Meta-Programming, provides an innovative approach
+to concurrency and offers many classical paradigms.
+
+`LDC <http://wiki.dlang.org/LDC>`_ uses the frontend from the reference compiler
+combined with LLVM as backend to produce efficient native code. LDC targets
+x86/x86_64 systems like Linux, OS X, FreeBSD and Windows and also Linux on ARM
+and PowerPC (32/64 bit). Ports to other architectures like AArch64 and MIPS64
+are underway.
+
Additional Information
======================
diff --git a/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h b/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h
index cb4a16cb5b7b..0cc3b26e9659 100644
--- a/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h
+++ b/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h
@@ -31,6 +31,23 @@ class StringRef;
class LLVMContext;
class TargetMachine;
+/// Wrapper around MemoryBufferRef, owning the identifier
+class ThinLTOBuffer {
+ std::string OwnedIdentifier;
+ StringRef Buffer;
+
+public:
+ ThinLTOBuffer(StringRef Buffer, StringRef Identifier)
+ : OwnedIdentifier(Identifier), Buffer(Buffer) {}
+
+ MemoryBufferRef getMemBuffer() const {
+ return MemoryBufferRef(Buffer,
+ {OwnedIdentifier.c_str(), OwnedIdentifier.size()});
+ }
+ StringRef getBuffer() const { return Buffer; }
+ StringRef getBufferIdentifier() const { return OwnedIdentifier; }
+};
+
/// Helper to gather options relevant to the target machine creation
struct TargetMachineBuilder {
Triple TheTriple;
@@ -280,7 +297,7 @@ private:
/// Vector holding the input buffers containing the bitcode modules to
/// process.
- std::vector<MemoryBufferRef> Modules;
+ std::vector<ThinLTOBuffer> Modules;
/// Set of symbols that need to be preserved outside of the set of bitcode
/// files.
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index fd156fa7dd07..2c7bffe76503 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -13072,9 +13072,15 @@ SDValue DAGCombiner::createBuildVecShuffle(SDLoc DL, SDNode *N,
!TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
return SDValue();
- if (InVT1 != InVT2)
+ // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
+ // lower it back into a BUILD_VECTOR. So if the inserted type is
+ // illegal, don't even try.
+ if (InVT1 != InVT2) {
+ if (!TLI.isTypeLegal(InVT2))
+ return SDValue();
VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
+ }
ShuffleNumElems = NumElems * 2;
} else {
// Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 3b91e58879b4..4a9042cfb3f4 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -502,8 +502,17 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
const TargetRegisterClass *TRC =
TLI->getRegClassFor(Node->getSimpleValueType(0));
- unsigned VReg = getVR(Node->getOperand(0), VRBaseMap);
- MachineInstr *DefMI = MRI->getVRegDef(VReg);
+ unsigned Reg;
+ MachineInstr *DefMI;
+ RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node->getOperand(0));
+ if (R && TargetRegisterInfo::isPhysicalRegister(R->getReg())) {
+ Reg = R->getReg();
+ DefMI = nullptr;
+ } else {
+ Reg = getVR(Node->getOperand(0), VRBaseMap);
+ DefMI = MRI->getVRegDef(Reg);
+ }
+
unsigned SrcReg, DstReg, DefSubIdx;
if (DefMI &&
TII->isCoalescableExtInstr(*DefMI, SrcReg, DstReg, DefSubIdx) &&
@@ -519,20 +528,26 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
TII->get(TargetOpcode::COPY), VRBase).addReg(SrcReg);
MRI->clearKillFlags(SrcReg);
} else {
- // VReg may not support a SubIdx sub-register, and we may need to
+ // Reg may not support a SubIdx sub-register, and we may need to
// constrain its register class or issue a COPY to a compatible register
// class.
- VReg = ConstrainForSubReg(VReg, SubIdx,
- Node->getOperand(0).getSimpleValueType(),
- Node->getDebugLoc());
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ Reg = ConstrainForSubReg(Reg, SubIdx,
+ Node->getOperand(0).getSimpleValueType(),
+ Node->getDebugLoc());
// Create the destreg if it is missing.
if (VRBase == 0)
VRBase = MRI->createVirtualRegister(TRC);
// Create the extract_subreg machine instruction.
- BuildMI(*MBB, InsertPos, Node->getDebugLoc(),
- TII->get(TargetOpcode::COPY), VRBase).addReg(VReg, 0, SubIdx);
+ MachineInstrBuilder CopyMI =
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), VRBase);
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ CopyMI.addReg(Reg, 0, SubIdx);
+ else
+ CopyMI.addReg(TRI->getSubReg(Reg, SubIdx));
}
} else if (Opc == TargetOpcode::INSERT_SUBREG ||
Opc == TargetOpcode::SUBREG_TO_REG) {
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 9ca646534e2b..996c95bd5f07 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -5832,6 +5832,15 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
const Value *SwiftErrorVal = nullptr;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // We can't tail call inside a function with a swifterror argument. Lowering
+ // does not support this yet. It would have to move into the swifterror
+ // register before the call.
+ auto *Caller = CS.getInstruction()->getParent()->getParent();
+ if (TLI.supportSwiftError() &&
+ Caller->getAttributes().hasAttrSomewhere(Attribute::SwiftError))
+ isTailCall = false;
+
for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
i != e; ++i) {
const Value *V = *i;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 004fa703c192..64e6c221229b 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -2782,14 +2782,15 @@ struct MatchScope {
/// for this.
class MatchStateUpdater : public SelectionDAG::DAGUpdateListener
{
- SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes;
- SmallVectorImpl<MatchScope> &MatchScopes;
+ SDNode **NodeToMatch;
+ SmallVectorImpl<std::pair<SDValue, SDNode *>> &RecordedNodes;
+ SmallVectorImpl<MatchScope> &MatchScopes;
public:
- MatchStateUpdater(SelectionDAG &DAG,
- SmallVectorImpl<std::pair<SDValue, SDNode*> > &RN,
- SmallVectorImpl<MatchScope> &MS) :
- SelectionDAG::DAGUpdateListener(DAG),
- RecordedNodes(RN), MatchScopes(MS) { }
+ MatchStateUpdater(SelectionDAG &DAG, SDNode **NodeToMatch,
+ SmallVectorImpl<std::pair<SDValue, SDNode *>> &RN,
+ SmallVectorImpl<MatchScope> &MS)
+ : SelectionDAG::DAGUpdateListener(DAG), NodeToMatch(NodeToMatch),
+ RecordedNodes(RN), MatchScopes(MS) {}
void NodeDeleted(SDNode *N, SDNode *E) override {
// Some early-returns here to avoid the search if we deleted the node or
@@ -2799,6 +2800,9 @@ public:
// update listener during matching a complex patterns.
if (!E || E->isMachineOpcode())
return;
+ // Check if NodeToMatch was updated.
+ if (N == *NodeToMatch)
+ *NodeToMatch = E;
// Performing linear search here does not matter because we almost never
// run this code. You'd have to have a CSE during complex pattern
// matching.
@@ -3091,7 +3095,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
// consistent.
std::unique_ptr<MatchStateUpdater> MSU;
if (ComplexPatternFuncMutatesDAG())
- MSU.reset(new MatchStateUpdater(*CurDAG, RecordedNodes,
+ MSU.reset(new MatchStateUpdater(*CurDAG, &NodeToMatch, RecordedNodes,
MatchScopes));
if (!CheckComplexPattern(NodeToMatch, RecordedNodes[RecNo].second,
diff --git a/lib/LTO/ThinLTOCodeGenerator.cpp b/lib/LTO/ThinLTOCodeGenerator.cpp
index 104fb199da08..40537e4fa784 100644
--- a/lib/LTO/ThinLTOCodeGenerator.cpp
+++ b/lib/LTO/ThinLTOCodeGenerator.cpp
@@ -150,13 +150,13 @@ static void computePrevailingCopies(
}
static StringMap<MemoryBufferRef>
-generateModuleMap(const std::vector<MemoryBufferRef> &Modules) {
+generateModuleMap(const std::vector<ThinLTOBuffer> &Modules) {
StringMap<MemoryBufferRef> ModuleMap;
for (auto &ModuleBuffer : Modules) {
assert(ModuleMap.find(ModuleBuffer.getBufferIdentifier()) ==
ModuleMap.end() &&
"Expect unique Buffer Identifier");
- ModuleMap[ModuleBuffer.getBufferIdentifier()] = ModuleBuffer;
+ ModuleMap[ModuleBuffer.getBufferIdentifier()] = ModuleBuffer.getMemBuffer();
}
return ModuleMap;
}
@@ -522,13 +522,13 @@ static void initTMBuilder(TargetMachineBuilder &TMBuilder,
} // end anonymous namespace
void ThinLTOCodeGenerator::addModule(StringRef Identifier, StringRef Data) {
- MemoryBufferRef Buffer(Data, Identifier);
+ ThinLTOBuffer Buffer(Data, Identifier);
if (Modules.empty()) {
// First module added, so initialize the triple and some options
LLVMContext Context;
StringRef TripleStr;
- ErrorOr<std::string> TripleOrErr =
- expectedToErrorOrAndEmitErrors(Context, getBitcodeTargetTriple(Buffer));
+ ErrorOr<std::string> TripleOrErr = expectedToErrorOrAndEmitErrors(
+ Context, getBitcodeTargetTriple(Buffer.getMemBuffer()));
if (TripleOrErr)
TripleStr = *TripleOrErr;
Triple TheTriple(TripleStr);
@@ -538,8 +538,8 @@ void ThinLTOCodeGenerator::addModule(StringRef Identifier, StringRef Data) {
else {
LLVMContext Context;
StringRef TripleStr;
- ErrorOr<std::string> TripleOrErr =
- expectedToErrorOrAndEmitErrors(Context, getBitcodeTargetTriple(Buffer));
+ ErrorOr<std::string> TripleOrErr = expectedToErrorOrAndEmitErrors(
+ Context, getBitcodeTargetTriple(Buffer.getMemBuffer()));
if (TripleOrErr)
TripleStr = *TripleOrErr;
assert(TMBuilder.TheTriple.str() == TripleStr &&
@@ -588,7 +588,8 @@ std::unique_ptr<ModuleSummaryIndex> ThinLTOCodeGenerator::linkCombinedIndex() {
uint64_t NextModuleId = 0;
for (auto &ModuleBuffer : Modules) {
Expected<std::unique_ptr<object::ModuleSummaryIndexObjectFile>> ObjOrErr =
- object::ModuleSummaryIndexObjectFile::create(ModuleBuffer);
+ object::ModuleSummaryIndexObjectFile::create(
+ ModuleBuffer.getMemBuffer());
if (!ObjOrErr) {
// FIXME diagnose
logAllUnhandledErrors(
@@ -852,8 +853,9 @@ void ThinLTOCodeGenerator::run() {
Context.setDiscardValueNames(LTODiscardValueNames);
// Parse module now
- auto TheModule = loadModuleFromBuffer(ModuleBuffer, Context, false,
- /*IsImporting*/ false);
+ auto TheModule =
+ loadModuleFromBuffer(ModuleBuffer.getMemBuffer(), Context, false,
+ /*IsImporting*/ false);
// CodeGen
auto OutputBuffer = codegen(*TheModule);
@@ -943,8 +945,8 @@ void ThinLTOCodeGenerator::run() {
std::iota(ModulesOrdering.begin(), ModulesOrdering.end(), 0);
std::sort(ModulesOrdering.begin(), ModulesOrdering.end(),
[&](int LeftIndex, int RightIndex) {
- auto LSize = Modules[LeftIndex].getBufferSize();
- auto RSize = Modules[RightIndex].getBufferSize();
+ auto LSize = Modules[LeftIndex].getBuffer().size();
+ auto RSize = Modules[RightIndex].getBuffer().size();
return LSize > RSize;
});
@@ -996,8 +998,9 @@ void ThinLTOCodeGenerator::run() {
}
// Parse module now
- auto TheModule = loadModuleFromBuffer(ModuleBuffer, Context, false,
- /*IsImporting*/ false);
+ auto TheModule =
+ loadModuleFromBuffer(ModuleBuffer.getMemBuffer(), Context, false,
+ /*IsImporting*/ false);
// Save temps: original file.
saveTempBitcode(*TheModule, SaveTempsDir, count, ".0.original.bc");
diff --git a/lib/Target/AArch64/AArch64CallingConvention.td b/lib/Target/AArch64/AArch64CallingConvention.td
index 9058617768dd..938779d23690 100644
--- a/lib/Target/AArch64/AArch64CallingConvention.td
+++ b/lib/Target/AArch64/AArch64CallingConvention.td
@@ -91,7 +91,7 @@ def RetCC_AArch64_AAPCS : CallingConv<[
CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
CCIfType<[v2f64, v4f32], CCBitConvertToType<v2i64>>,
- CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X19], [W19]>>>,
+ CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X21], [W21]>>>,
// Big endian vectors must be passed as if they were 1-element vectors so that
// their lanes are in a consistent order.
@@ -138,8 +138,8 @@ def CC_AArch64_DarwinPCS : CallingConv<[
// Pass SwiftSelf in a callee saved register.
CCIfSwiftSelf<CCIfType<[i64], CCAssignToRegWithShadow<[X20], [W20]>>>,
- // A SwiftError is passed in X19.
- CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X19], [W19]>>>,
+ // A SwiftError is passed in X21.
+ CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X21], [W21]>>>,
CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>,
@@ -289,7 +289,7 @@ def CSR_AArch64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22,
def CSR_AArch64_AAPCS_ThisReturn : CalleeSavedRegs<(add CSR_AArch64_AAPCS, X0)>;
def CSR_AArch64_AAPCS_SwiftError
- : CalleeSavedRegs<(sub CSR_AArch64_AAPCS, X19)>;
+ : CalleeSavedRegs<(sub CSR_AArch64_AAPCS, X21)>;
// The function used by Darwin to obtain the address of a thread-local variable
// guarantees more than a normal AAPCS function. x16 and x17 are used on the
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 2f67a105b4d1..849058bdfbdb 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -3155,7 +3155,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
}
if (VA.isRegLoc()) {
- if (realArgIdx == 0 && Flags.isReturned() && Outs[0].VT == MVT::i64) {
+ if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
+ Outs[0].VT == MVT::i64) {
assert(VA.getLocVT() == MVT::i64 &&
"unexpected calling convention register assignment");
assert(!Ins.empty() && Ins[0].VT == MVT::i64 &&
diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td
index 9c278a52a7ff..7a7b7fede7c8 100644
--- a/lib/Target/ARM/ARMCallingConv.td
+++ b/lib/Target/ARM/ARMCallingConv.td
@@ -26,8 +26,8 @@ def CC_ARM_APCS : CallingConv<[
// Pass SwiftSelf in a callee saved register.
CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>,
- // A SwiftError is passed in R6.
- CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R6]>>>,
+ // A SwiftError is passed in R8.
+ CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R8]>>>,
// Handle all vector types as either f64 or v2f64.
CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
@@ -51,8 +51,8 @@ def RetCC_ARM_APCS : CallingConv<[
// Pass SwiftSelf in a callee saved register.
CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>,
- // A SwiftError is returned in R6.
- CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R6]>>>,
+ // A SwiftError is returned in R8.
+ CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R8]>>>,
// Handle all vector types as either f64 or v2f64.
CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
@@ -166,8 +166,8 @@ def CC_ARM_AAPCS : CallingConv<[
// Pass SwiftSelf in a callee saved register.
CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>,
- // A SwiftError is passed in R6.
- CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R6]>>>,
+ // A SwiftError is passed in R8.
+ CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R8]>>>,
CCIfType<[f64, v2f64], CCCustom<"CC_ARM_AAPCS_Custom_f64">>,
CCIfType<[f32], CCBitConvertToType<i32>>,
@@ -182,8 +182,8 @@ def RetCC_ARM_AAPCS : CallingConv<[
// Pass SwiftSelf in a callee saved register.
CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>,
- // A SwiftError is returned in R6.
- CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R6]>>>,
+ // A SwiftError is returned in R8.
+ CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R8]>>>,
CCIfType<[f64, v2f64], CCCustom<"RetCC_ARM_AAPCS_Custom_f64">>,
CCIfType<[f32], CCBitConvertToType<i32>>,
@@ -206,8 +206,8 @@ def CC_ARM_AAPCS_VFP : CallingConv<[
// Pass SwiftSelf in a callee saved register.
CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>,
- // A SwiftError is passed in R6.
- CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R6]>>>,
+ // A SwiftError is passed in R8.
+ CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R8]>>>,
// HFAs are passed in a contiguous block of registers, or on the stack
CCIfConsecutiveRegs<CCCustom<"CC_ARM_AAPCS_Custom_Aggregate">>,
@@ -227,8 +227,8 @@ def RetCC_ARM_AAPCS_VFP : CallingConv<[
// Pass SwiftSelf in a callee saved register.
CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>,
- // A SwiftError is returned in R6.
- CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R6]>>>,
+ // A SwiftError is returned in R8.
+ CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R8]>>>,
CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
@@ -267,8 +267,8 @@ def CSR_AAPCS_ThisReturn : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6,
// Also save R7-R4 first to match the stack frame fixed spill areas.
def CSR_iOS : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS, R9))>;
-// R6 is used to pass swifterror, remove it from CSR.
-def CSR_iOS_SwiftError : CalleeSavedRegs<(sub CSR_iOS, R6)>;
+// R8 is used to pass swifterror, remove it from CSR.
+def CSR_iOS_SwiftError : CalleeSavedRegs<(sub CSR_iOS, R8)>;
def CSR_iOS_ThisReturn : CalleeSavedRegs<(add LR, R7, R6, R5, R4,
(sub CSR_AAPCS_ThisReturn, R9))>;
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 1606c1576465..97481d49ea34 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -1787,7 +1787,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
StackPtr, MemOpChains, Flags);
}
} else if (VA.isRegLoc()) {
- if (realArgIdx == 0 && Flags.isReturned() && Outs[0].VT == MVT::i32) {
+ if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
+ Outs[0].VT == MVT::i32) {
assert(VA.getLocVT() == MVT::i32 &&
"unexpected calling convention register assignment");
assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index ffd518e52968..f5e9e7dd5a93 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -1013,7 +1013,9 @@ bool AddressSanitizer::isInterestingAlloca(const AllocaInst &AI) {
(!ClSkipPromotableAllocas || !isAllocaPromotable(&AI)) &&
// inalloca allocas are not treated as static, and we don't want
// dynamic alloca instrumentation for them as well.
- !AI.isUsedWithInAlloca());
+ !AI.isUsedWithInAlloca() &&
+ // swifterror allocas are register promoted by ISel
+ !AI.isSwiftError());
ProcessedAllocas[&AI] = IsInteresting;
return IsInteresting;
@@ -1088,12 +1090,19 @@ Value *AddressSanitizer::isInterestingMemoryAccess(Instruction *I,
}
}
- // Do not instrument acesses from different address spaces; we cannot deal
- // with them.
if (PtrOperand) {
+ // Do not instrument acesses from different address spaces; we cannot deal
+ // with them.
Type *PtrTy = cast<PointerType>(PtrOperand->getType()->getScalarType());
if (PtrTy->getPointerAddressSpace() != 0)
return nullptr;
+
+ // Ignore swifterror addresses.
+ // swifterror memory addresses are mem2reg promoted by instruction
+ // selection. As such they cannot have regular uses like an instrumentation
+ // function and it makes no sense to track them as memory.
+ if (PtrOperand->isSwiftError())
+ return nullptr;
}
// Treat memory accesses to promotable allocas as non-interesting since they
diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index d9659694da46..52035c79a4a3 100644
--- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -488,6 +488,13 @@ bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I,
Value *Addr = IsWrite
? cast<StoreInst>(I)->getPointerOperand()
: cast<LoadInst>(I)->getPointerOperand();
+
+ // swifterror memory addresses are mem2reg promoted by instruction selection.
+ // As such they cannot have regular uses like an instrumentation function and
+ // it makes no sense to track them as memory.
+ if (Addr->isSwiftError())
+ return false;
+
int Idx = getMemoryAccessFuncIndex(Addr, DL);
if (Idx < 0)
return false;
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index f9a602bc268a..e346ebd6a000 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -189,11 +189,14 @@ const Loop* llvm::addClonedBlockToLoopInfo(BasicBlock *OriginalBB,
assert(OriginalBB == OldLoop->getHeader() &&
"Header should be first in RPO");
+ NewLoop = new Loop();
Loop *NewLoopParent = NewLoops.lookup(OldLoop->getParentLoop());
- assert(NewLoopParent &&
- "Expected parent loop before sub-loop in RPO");
- NewLoop = new Loop;
- NewLoopParent->addChildLoop(NewLoop);
+
+ if (NewLoopParent)
+ NewLoopParent->addChildLoop(NewLoop);
+ else
+ LI->addTopLevelLoop(NewLoop);
+
NewLoop->addBasicBlockToLoop(ClonedBB, *LI);
return OldLoop;
} else {
diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index 85da3ba899a5..d3ea1564115b 100644
--- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -302,17 +302,22 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter,
}
NewLoopsMap NewLoops;
- NewLoops[L] = NewLoop;
+ if (NewLoop)
+ NewLoops[L] = NewLoop;
+ else if (ParentLoop)
+ NewLoops[L] = ParentLoop;
+
// For each block in the original loop, create a new copy,
// and update the value map with the newly created values.
for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F);
NewBlocks.push_back(NewBB);
-
- if (NewLoop) {
+
+ // If we're unrolling the outermost loop, there's no remainder loop,
+ // and this block isn't in a nested loop, then the new block is not
+ // in any loop. Otherwise, add it to loopinfo.
+ if (CreateRemainderLoop || LI->getLoopFor(*BB) != L || ParentLoop)
addClonedBlockToLoopInfo(*BB, NewBB, LI, NewLoops);
- } else if (ParentLoop)
- ParentLoop->addBasicBlockToLoop(NewBB, *LI);
VMap[*BB] = NewBB;
if (Header == *BB) {
diff --git a/test/CodeGen/AArch64/ldst-opt.ll b/test/CodeGen/AArch64/ldst-opt.ll
index 81e4b19e6eea..b09fab8d8b46 100644
--- a/test/CodeGen/AArch64/ldst-opt.ll
+++ b/test/CodeGen/AArch64/ldst-opt.ll
@@ -1,4 +1,5 @@
-; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 -disable-lsr -verify-machineinstrs -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 -disable-lsr -verify-machineinstrs -o - %s | FileCheck --check-prefix=CHECK --check-prefix=NOSTRICTALIGN %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+strict-align -aarch64-enable-atomic-cfg-tidy=0 -disable-lsr -verify-machineinstrs -o - %s | FileCheck --check-prefix=CHECK --check-prefix=STRICTALIGN %s
; This file contains tests for the AArch64 load/store optimizer.
@@ -119,7 +120,7 @@ define void @load-pre-indexed-doubleword(%struct.doubleword* %ptr) nounwind {
; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}, #32]!
entry:
%a = getelementptr inbounds %struct.doubleword, %struct.doubleword* %ptr, i64 0, i32 1, i32 0
- %add = load i64, i64* %a, align 4
+ %add = load i64, i64* %a, align 8
br label %bar
bar:
%c = getelementptr inbounds %struct.doubleword, %struct.doubleword* %ptr, i64 0, i32 1
@@ -132,7 +133,7 @@ define void @store-pre-indexed-doubleword(%struct.doubleword* %ptr, i64 %val) no
; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}, #32]!
entry:
%a = getelementptr inbounds %struct.doubleword, %struct.doubleword* %ptr, i64 0, i32 1, i32 0
- store i64 %val, i64* %a, align 4
+ store i64 %val, i64* %a, align 8
br label %bar
bar:
%c = getelementptr inbounds %struct.doubleword, %struct.doubleword* %ptr, i64 0, i32 1
@@ -147,7 +148,7 @@ define void @load-pre-indexed-quadword(%struct.quadword* %ptr) nounwind {
; CHECK: ldr q{{[0-9]+}}, [x{{[0-9]+}}, #32]!
entry:
%a = getelementptr inbounds %struct.quadword, %struct.quadword* %ptr, i64 0, i32 1, i32 0
- %add = load fp128, fp128* %a, align 4
+ %add = load fp128, fp128* %a, align 16
br label %bar
bar:
%c = getelementptr inbounds %struct.quadword, %struct.quadword* %ptr, i64 0, i32 1
@@ -160,7 +161,7 @@ define void @store-pre-indexed-quadword(%struct.quadword* %ptr, fp128 %val) noun
; CHECK: str q{{[0-9]+}}, [x{{[0-9]+}}, #32]!
entry:
%a = getelementptr inbounds %struct.quadword, %struct.quadword* %ptr, i64 0, i32 1, i32 0
- store fp128 %val, fp128* %a, align 4
+ store fp128 %val, fp128* %a, align 16
br label %bar
bar:
%c = getelementptr inbounds %struct.quadword, %struct.quadword* %ptr, i64 0, i32 1
@@ -203,7 +204,7 @@ define void @load-pre-indexed-double(%struct.double* %ptr) nounwind {
; CHECK: ldr d{{[0-9]+}}, [x{{[0-9]+}}, #32]!
entry:
%a = getelementptr inbounds %struct.double, %struct.double* %ptr, i64 0, i32 1, i32 0
- %add = load double, double* %a, align 4
+ %add = load double, double* %a, align 8
br label %bar
bar:
%c = getelementptr inbounds %struct.double, %struct.double* %ptr, i64 0, i32 1
@@ -216,7 +217,7 @@ define void @store-pre-indexed-double(%struct.double* %ptr, double %val) nounwin
; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}, #32]!
entry:
%a = getelementptr inbounds %struct.double, %struct.double* %ptr, i64 0, i32 1, i32 0
- store double %val, double* %a, align 4
+ store double %val, double* %a, align 8
br label %bar
bar:
%c = getelementptr inbounds %struct.double, %struct.double* %ptr, i64 0, i32 1
@@ -1340,7 +1341,8 @@ end:
define void @merge_zr32(i32* %p) {
; CHECK-LABEL: merge_zr32:
; CHECK: // %entry
-; CHECK-NEXT: str xzr, [x{{[0-9]+}}]
+; NOSTRICTALIGN-NEXT: str xzr, [x{{[0-9]+}}]
+; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}]
; CHECK-NEXT: ret
entry:
store i32 0, i32* %p
@@ -1349,11 +1351,13 @@ entry:
ret void
}
-; Same sa merge_zr32 but the merged stores should also get paried.
+; Same as merge_zr32 but the merged stores should also get paried.
define void @merge_zr32_2(i32* %p) {
; CHECK-LABEL: merge_zr32_2:
; CHECK: // %entry
-; CHECK-NEXT: stp xzr, xzr, [x{{[0-9]+}}]
+; NOSTRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}]
+; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}]
+; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}, #8]
; CHECK-NEXT: ret
entry:
store i32 0, i32* %p
@@ -1370,7 +1374,11 @@ entry:
define void @merge_zr32_2_offset(i32* %p) {
; CHECK-LABEL: merge_zr32_2_offset:
; CHECK: // %entry
-; CHECK-NEXT: stp xzr, xzr, [x{{[0-9]+}}, #504]
+; NOSTRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}, #504]
+; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #504]
+; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #508]
+; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #512]
+; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #516]
; CHECK-NEXT: ret
entry:
%p0 = getelementptr i32, i32* %p, i32 126
@@ -1390,8 +1398,12 @@ entry:
define void @no_merge_zr32_2_offset(i32* %p) {
; CHECK-LABEL: no_merge_zr32_2_offset:
; CHECK: // %entry
-; CHECK-NEXT: movi v[[REG:[0-9]]].2d, #0000000000000000
-; CHECK-NEXT: str q[[REG]], [x{{[0-9]+}}, #4096]
+; NOSTRICTALIGN-NEXT: movi v[[REG:[0-9]]].2d, #0000000000000000
+; NOSTRICTALIGN-NEXT: str q[[REG]], [x{{[0-9]+}}, #4096]
+; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #4096]
+; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #4100]
+; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #4104]
+; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #4108]
; CHECK-NEXT: ret
entry:
%p0 = getelementptr i32, i32* %p, i32 1024
@@ -1411,8 +1423,12 @@ entry:
define void @merge_zr32_3(i32* %p) {
; CHECK-LABEL: merge_zr32_3:
; CHECK: // %entry
-; CHECK-NEXT: movi v[[REG:[0-9]]].2d, #0000000000000000
-; CHECK-NEXT: stp q[[REG]], q[[REG]], [x{{[0-9]+}}]
+; NOSTRICTALIGN-NEXT: movi v[[REG:[0-9]]].2d, #0000000000000000
+; NOSTRICTALIGN-NEXT: stp q[[REG]], q[[REG]], [x{{[0-9]+}}]
+; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}]
+; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}, #8]
+; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}, #16]
+; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}, #24]
; CHECK-NEXT: ret
entry:
store i32 0, i32* %p
@@ -1437,7 +1453,8 @@ entry:
define void @merge_zr32_2vec(<2 x i32>* %p) {
; CHECK-LABEL: merge_zr32_2vec:
; CHECK: // %entry
-; CHECK-NEXT: str xzr, [x{{[0-9]+}}]
+; NOSTRICTALIGN-NEXT: str xzr, [x{{[0-9]+}}]
+; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}]
; CHECK-NEXT: ret
entry:
store <2 x i32> zeroinitializer, <2 x i32>* %p
@@ -1448,8 +1465,10 @@ entry:
define void @merge_zr32_3vec(<3 x i32>* %p) {
; CHECK-LABEL: merge_zr32_3vec:
; CHECK: // %entry
-; CHECK-NEXT: str xzr, [x{{[0-9]+}}]
-; CHECK-NEXT: str wzr, [x{{[0-9]+}}, #8]
+; NOSTRICTALIGN-NEXT: str xzr, [x{{[0-9]+}}]
+; NOSTRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #8]
+; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}]
+; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #8]
; CHECK-NEXT: ret
entry:
store <3 x i32> zeroinitializer, <3 x i32>* %p
@@ -1460,7 +1479,9 @@ entry:
define void @merge_zr32_4vec(<4 x i32>* %p) {
; CHECK-LABEL: merge_zr32_4vec:
; CHECK: // %entry
-; CHECK-NEXT: stp xzr, xzr, [x{{[0-9]+}}]
+; NOSTRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}]
+; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}]
+; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}, #8]
; CHECK-NEXT: ret
entry:
store <4 x i32> zeroinitializer, <4 x i32>* %p
@@ -1471,7 +1492,8 @@ entry:
define void @merge_zr32_2vecf(<2 x float>* %p) {
; CHECK-LABEL: merge_zr32_2vecf:
; CHECK: // %entry
-; CHECK-NEXT: str xzr, [x{{[0-9]+}}]
+; NOSTRICTALIGN-NEXT: str xzr, [x{{[0-9]+}}]
+; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}]
; CHECK-NEXT: ret
entry:
store <2 x float> zeroinitializer, <2 x float>* %p
@@ -1482,7 +1504,9 @@ entry:
define void @merge_zr32_4vecf(<4 x float>* %p) {
; CHECK-LABEL: merge_zr32_4vecf:
; CHECK: // %entry
-; CHECK-NEXT: stp xzr, xzr, [x{{[0-9]+}}]
+; NOSTRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}]
+; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}]
+; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}, #8]
; CHECK-NEXT: ret
entry:
store <4 x float> zeroinitializer, <4 x float>* %p
@@ -1502,13 +1526,42 @@ entry:
ret void
}
+; Similar to merge_zr32, but for 64-bit values and with unaligned stores.
+define void @merge_zr64_unalign(<2 x i64>* %p) {
+; CHECK-LABEL: merge_zr64_unalign:
+; CHECK: // %entry
+; NOSTRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}]
+; STRICTALIGN: strb wzr,
+; STRICTALIGN: strb
+; STRICTALIGN: strb
+; STRICTALIGN: strb
+; STRICTALIGN: strb
+; STRICTALIGN: strb
+; STRICTALIGN: strb
+; STRICTALIGN: strb
+; STRICTALIGN: strb
+; STRICTALIGN: strb
+; STRICTALIGN: strb
+; STRICTALIGN: strb
+; STRICTALIGN: strb
+; STRICTALIGN: strb
+; STRICTALIGN: strb
+; STRICTALIGN: strb
+; CHECK-NEXT: ret
+entry:
+ store <2 x i64> zeroinitializer, <2 x i64>* %p, align 1
+ ret void
+}
+
; Similar to merge_zr32_3, replaceZeroVectorStore should not split the
; vector store since the zero constant vector has multiple uses.
define void @merge_zr64_2(i64* %p) {
; CHECK-LABEL: merge_zr64_2:
; CHECK: // %entry
-; CHECK-NEXT: movi v[[REG:[0-9]]].2d, #0000000000000000
-; CHECK-NEXT: stp q[[REG]], q[[REG]], [x{{[0-9]+}}]
+; NOSTRICTALIGN-NEXT: movi v[[REG:[0-9]]].2d, #0000000000000000
+; NOSTRICTALIGN-NEXT: stp q[[REG]], q[[REG]], [x{{[0-9]+}}]
+; STRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}]
+; STRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}, #16]
; CHECK-NEXT: ret
entry:
store i64 0, i64* %p
diff --git a/test/CodeGen/AArch64/swifterror.ll b/test/CodeGen/AArch64/swifterror.ll
index b15eaa923f08..69bf3510cc5a 100644
--- a/test/CodeGen/AArch64/swifterror.ll
+++ b/test/CodeGen/AArch64/swifterror.ll
@@ -13,18 +13,18 @@ define float @foo(%swift_error** swifterror %error_ptr_ref) {
; CHECK-APPLE: malloc
; CHECK-APPLE: orr [[ID:w[0-9]+]], wzr, #0x1
; CHECK-APPLE: strb [[ID]], [x0, #8]
-; CHECK-APPLE: mov x19, x0
-; CHECK-APPLE-NOT: x19
+; CHECK-APPLE: mov x21, x0
+; CHECK-APPLE-NOT: x21
; CHECK-O0-LABEL: foo:
; CHECK-O0: orr w{{.*}}, wzr, #0x10
; CHECK-O0: malloc
-; CHECK-O0: mov x19, x0
-; CHECK-O0-NOT: x19
+; CHECK-O0: mov x21, x0
+; CHECK-O0-NOT: x21
; CHECK-O0: orr [[ID:w[0-9]+]], wzr, #0x1
-; CHECK-O0-NOT: x19
+; CHECK-O0-NOT: x21
; CHECK-O0: strb [[ID]], [x0, #8]
-; CHECK-O0-NOT: x19
+; CHECK-O0-NOT: x21
entry:
%call = call i8* @malloc(i64 16)
%call.0 = bitcast i8* %call to %swift_error*
@@ -38,20 +38,20 @@ entry:
define float @caller(i8* %error_ref) {
; CHECK-APPLE-LABEL: caller:
; CHECK-APPLE: mov [[ID:x[0-9]+]], x0
-; CHECK-APPLE: mov x19, xzr
+; CHECK-APPLE: mov x21, xzr
; CHECK-APPLE: bl {{.*}}foo
-; CHECK-APPLE: cbnz x19
+; CHECK-APPLE: cbnz x21
; Access part of the error object and save it to error_ref
-; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x19, #8]
+; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x21, #8]
; CHECK-APPLE: strb [[CODE]], [{{.*}}[[ID]]]
-; CHECK-APPLE: mov x0, x19
+; CHECK-APPLE: mov x0, x21
; CHECK-APPLE: bl {{.*}}free
; CHECK-O0-LABEL: caller:
-; CHECK-O0: mov x19
+; CHECK-O0: mov x21
; CHECK-O0: bl {{.*}}foo
-; CHECK-O0: mov [[ID:x[0-9]+]], x19
-; CHECK-O0: cbnz x19
+; CHECK-O0: mov [[ID:x[0-9]+]], x21
+; CHECK-O0: cbnz x21
entry:
%error_ptr_ref = alloca swifterror %swift_error*
store %swift_error* null, %swift_error** %error_ptr_ref
@@ -75,22 +75,22 @@ define float @caller2(i8* %error_ref) {
; CHECK-APPLE-LABEL: caller2:
; CHECK-APPLE: mov [[ID:x[0-9]+]], x0
; CHECK-APPLE: fmov [[CMP:s[0-9]+]], #1.0
-; CHECK-APPLE: mov x19, xzr
+; CHECK-APPLE: mov x21, xzr
; CHECK-APPLE: bl {{.*}}foo
-; CHECK-APPLE: cbnz x19
+; CHECK-APPLE: cbnz x21
; CHECK-APPLE: fcmp s0, [[CMP]]
; CHECK-APPLE: b.le
; Access part of the error object and save it to error_ref
-; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x19, #8]
+; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x21, #8]
; CHECK-APPLE: strb [[CODE]], [{{.*}}[[ID]]]
-; CHECK-APPLE: mov x0, x19
+; CHECK-APPLE: mov x0, x21
; CHECK-APPLE: bl {{.*}}free
; CHECK-O0-LABEL: caller2:
-; CHECK-O0: mov x19
+; CHECK-O0: mov x21
; CHECK-O0: bl {{.*}}foo
-; CHECK-O0: mov [[ID:x[0-9]+]], x19
-; CHECK-O0: cbnz x19
+; CHECK-O0: mov [[ID:x[0-9]+]], x21
+; CHECK-O0: cbnz x21
entry:
%error_ptr_ref = alloca swifterror %swift_error*
br label %bb_loop
@@ -123,24 +123,24 @@ define float @foo_if(%swift_error** swifterror %error_ptr_ref, i32 %cc) {
; CHECK-APPLE: malloc
; CHECK-APPLE: orr [[ID:w[0-9]+]], wzr, #0x1
; CHECK-APPLE: strb [[ID]], [x0, #8]
-; CHECK-APPLE: mov x19, x0
-; CHECK-APPLE-NOT: x19
+; CHECK-APPLE: mov x21, x0
+; CHECK-APPLE-NOT: x21
; CHECK-APPLE: ret
; CHECK-O0-LABEL: foo_if:
-; spill x19
-; CHECK-O0: str x19, [sp, [[SLOT:#[0-9]+]]]
+; spill x21
+; CHECK-O0: str x21, [sp, [[SLOT:#[0-9]+]]]
; CHECK-O0: cbz w0
; CHECK-O0: orr w{{.*}}, wzr, #0x10
; CHECK-O0: malloc
; CHECK-O0: mov [[ID:x[0-9]+]], x0
; CHECK-O0: orr [[ID2:w[0-9]+]], wzr, #0x1
; CHECK-O0: strb [[ID2]], [x0, #8]
-; CHECK-O0: mov x19, [[ID]]
+; CHECK-O0: mov x21, [[ID]]
; CHECK-O0: ret
; reload from stack
; CHECK-O0: ldr [[ID3:x[0-9]+]], [sp, [[SLOT]]]
-; CHECK-O0: mov x19, [[ID3]]
+; CHECK-O0: mov x21, [[ID3]]
; CHECK-O0: ret
entry:
%cond = icmp ne i32 %cc, 0
@@ -162,19 +162,19 @@ normal:
; under a certain condition inside a loop.
define float @foo_loop(%swift_error** swifterror %error_ptr_ref, i32 %cc, float %cc2) {
; CHECK-APPLE-LABEL: foo_loop:
-; CHECK-APPLE: mov x0, x19
+; CHECK-APPLE: mov x0, x21
; CHECK-APPLE: cbz
; CHECK-APPLE: orr w0, wzr, #0x10
; CHECK-APPLE: malloc
; CHECK-APPLE: strb w{{.*}}, [x0, #8]
; CHECK-APPLE: fcmp
; CHECK-APPLE: b.le
-; CHECK-APPLE: mov x19, x0
+; CHECK-APPLE: mov x21, x0
; CHECK-APPLE: ret
; CHECK-O0-LABEL: foo_loop:
-; spill x19
-; CHECK-O0: str x19, [sp, [[SLOT:#[0-9]+]]]
+; spill x21
+; CHECK-O0: str x21, [sp, [[SLOT:#[0-9]+]]]
; CHECK-O0: b [[BB1:[A-Za-z0-9_]*]]
; CHECK-O0: [[BB1]]:
; CHECK-O0: ldr x0, [sp, [[SLOT]]]
@@ -193,7 +193,7 @@ define float @foo_loop(%swift_error** swifterror %error_ptr_ref, i32 %cc, float
; CHECK-O0: b.le [[BB1]]
; reload from stack
; CHECK-O0: ldr [[ID3:x[0-9]+]], [sp]
-; CHECK-O0: mov x19, [[ID3]]
+; CHECK-O0: mov x21, [[ID3]]
; CHECK-O0: ret
entry:
br label %bb_loop
@@ -229,23 +229,23 @@ define void @foo_sret(%struct.S* sret %agg.result, i32 %val1, %swift_error** swi
; CHECK-APPLE: orr [[ID:w[0-9]+]], wzr, #0x1
; CHECK-APPLE: strb [[ID]], [x0, #8]
; CHECK-APPLE: str w{{.*}}, [{{.*}}[[SRET]], #4]
-; CHECK-APPLE: mov x19, x0
-; CHECK-APPLE-NOT: x19
+; CHECK-APPLE: mov x21, x0
+; CHECK-APPLE-NOT: x21
; CHECK-O0-LABEL: foo_sret:
; CHECK-O0: orr w{{.*}}, wzr, #0x10
; spill x8
; CHECK-O0-DAG: str x8
-; spill x19
-; CHECK-O0-DAG: str x19
+; spill x21
+; CHECK-O0-DAG: str x21
; CHECK-O0: malloc
; CHECK-O0: orr [[ID:w[0-9]+]], wzr, #0x1
; CHECK-O0: strb [[ID]], [x0, #8]
; reload from stack
; CHECK-O0: ldr [[SRET:x[0-9]+]]
; CHECK-O0: str w{{.*}}, [{{.*}}[[SRET]], #4]
-; CHECK-O0: mov x19
-; CHECK-O0-NOT: x19
+; CHECK-O0: mov x21
+; CHECK-O0-NOT: x21
entry:
%call = call i8* @malloc(i64 16)
%call.0 = bitcast i8* %call to %swift_error*
@@ -261,22 +261,22 @@ entry:
define float @caller3(i8* %error_ref) {
; CHECK-APPLE-LABEL: caller3:
; CHECK-APPLE: mov [[ID:x[0-9]+]], x0
-; CHECK-APPLE: mov x19, xzr
+; CHECK-APPLE: mov x21, xzr
; CHECK-APPLE: bl {{.*}}foo_sret
-; CHECK-APPLE: cbnz x19
+; CHECK-APPLE: cbnz x21
; Access part of the error object and save it to error_ref
-; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x19, #8]
+; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x21, #8]
; CHECK-APPLE: strb [[CODE]], [{{.*}}[[ID]]]
-; CHECK-APPLE: mov x0, x19
+; CHECK-APPLE: mov x0, x21
; CHECK-APPLE: bl {{.*}}free
; CHECK-O0-LABEL: caller3:
; spill x0
; CHECK-O0: str x0
-; CHECK-O0: mov x19
+; CHECK-O0: mov x21
; CHECK-O0: bl {{.*}}foo_sret
-; CHECK-O0: mov [[ID2:x[0-9]+]], x19
-; CHECK-O0: cbnz [[ID2]]
+; CHECK-O0: mov [[ID2:x[0-9]+]], x21
+; CHECK-O0: cbnz x21
; Access part of the error object and save it to error_ref
; reload from stack
; CHECK-O0: ldrb [[CODE:w[0-9]+]]
@@ -323,8 +323,8 @@ define float @foo_vararg(%swift_error** swifterror %error_ptr_ref, ...) {
; Third vararg
; CHECK-APPLE: ldr {{w[0-9]+}}, [{{x[0-9]+}}]
-; CHECK-APPLE: mov x19, x0
-; CHECK-APPLE-NOT: x19
+; CHECK-APPLE: mov x21, x0
+; CHECK-APPLE-NOT: x21
entry:
%call = call i8* @malloc(i64 16)
%call.0 = bitcast i8* %call to %swift_error*
@@ -356,13 +356,13 @@ define float @caller4(i8* %error_ref) {
; CHECK-APPLE: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #8]
; CHECK-APPLE: str {{x[0-9]+}}, [sp]
-; CHECK-APPLE: mov x19, xzr
+; CHECK-APPLE: mov x21, xzr
; CHECK-APPLE: bl {{.*}}foo_vararg
-; CHECK-APPLE: cbnz x19
+; CHECK-APPLE: cbnz x21
; Access part of the error object and save it to error_ref
-; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x19, #8]
+; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x21, #8]
; CHECK-APPLE: strb [[CODE]], [{{.*}}[[ID]]]
-; CHECK-APPLE: mov x0, x19
+; CHECK-APPLE: mov x0, x21
; CHECK-APPLE: bl {{.*}}free
entry:
%error_ptr_ref = alloca swifterror %swift_error*
@@ -407,29 +407,29 @@ entry:
}
; CHECK-APPLE-LABEL: swifterror_clobber
-; CHECK-APPLE: mov [[REG:x[0-9]+]], x19
+; CHECK-APPLE: mov [[REG:x[0-9]+]], x21
; CHECK-APPLE: nop
-; CHECK-APPLE: mov x19, [[REG]]
+; CHECK-APPLE: mov x21, [[REG]]
define swiftcc void @swifterror_clobber(%swift_error** nocapture swifterror %err) {
- call void asm sideeffect "nop", "~{x19}"()
+ call void asm sideeffect "nop", "~{x21}"()
ret void
}
; CHECK-APPLE-LABEL: swifterror_reg_clobber
-; CHECK-APPLE: stp {{.*}}x19
+; CHECK-APPLE: stp {{.*}}x21
; CHECK-APPLE: nop
-; CHECK-APPLE: ldp {{.*}}x19
+; CHECK-APPLE: ldp {{.*}}x21
define swiftcc void @swifterror_reg_clobber(%swift_error** nocapture %err) {
- call void asm sideeffect "nop", "~{x19}"()
+ call void asm sideeffect "nop", "~{x21}"()
ret void
}
; CHECK-APPLE-LABEL: params_in_reg
; Save callee saved registers and swifterror since it will be clobbered by the first call to params_in_reg2.
-; CHECK-APPLE: stp x19, x28, [sp
+; CHECK-APPLE: stp x21, x28, [sp
; CHECK-APPLE: stp x27, x26, [sp
; CHECK-APPLE: stp x25, x24, [sp
; CHECK-APPLE: stp x23, x22, [sp
-; CHECK-APPLE: stp x21, x20, [sp
+; CHECK-APPLE: stp x20, x19, [sp
; CHECK-APPLE: stp x29, x30, [sp
; CHECK-APPLE: str x20, [sp
; Store argument registers.
@@ -439,7 +439,7 @@ define swiftcc void @swifterror_reg_clobber(%swift_error** nocapture %err) {
; CHECK-APPLE: mov x26, x4
; CHECK-APPLE: mov x27, x3
; CHECK-APPLE: mov x28, x2
-; CHECK-APPLE: mov x21, x1
+; CHECK-APPLE: mov x19, x1
; CHECK-APPLE: mov x22, x0
; Setup call.
; CHECK-APPLE: orr w0, wzr, #0x1
@@ -451,11 +451,11 @@ define swiftcc void @swifterror_reg_clobber(%swift_error** nocapture %err) {
; CHECK-APPLE: orr w6, wzr, #0x7
; CHECK-APPLE: orr w7, wzr, #0x8
; CHECK-APPLE: mov x20, xzr
-; CHECK-APPLE: mov x19, xzr
+; CHECK-APPLE: mov x21, xzr
; CHECK-APPLE: bl _params_in_reg2
; Restore original arguments for next call.
; CHECK-APPLE: mov x0, x22
-; CHECK-APPLE: mov x1, x21
+; CHECK-APPLE: mov x1, x19
; CHECK-APPLE: mov x2, x28
; CHECK-APPLE: mov x3, x27
; CHECK-APPLE: mov x4, x26
@@ -463,22 +463,22 @@ define swiftcc void @swifterror_reg_clobber(%swift_error** nocapture %err) {
; CHECK-APPLE: mov x6, x24
; CHECK-APPLE: mov x7, x23
; Restore original swiftself argument and swifterror %err.
-; CHECK-APPLE: ldp x20, x19, [sp
+; CHECK-APPLE: ldp x20, x21, [sp
; CHECK-APPLE: bl _params_in_reg2
-; Restore calle save registers but don't clober swifterror x19.
-; CHECK-APPLE-NOT: x19
+; Restore calle save registers but don't clober swifterror x21.
+; CHECK-APPLE-NOT: x21
; CHECK-APPLE: ldp x29, x30, [sp
-; CHECK-APPLE-NOT: x19
-; CHECK-APPLE: ldp x21, x20, [sp
-; CHECK-APPLE-NOT: x19
+; CHECK-APPLE-NOT: x21
+; CHECK-APPLE: ldp x20, x19, [sp
+; CHECK-APPLE-NOT: x21
; CHECK-APPLE: ldp x23, x22, [sp
-; CHECK-APPLE-NOT: x19
+; CHECK-APPLE-NOT: x21
; CHECK-APPLE: ldp x25, x24, [sp
-; CHECK-APPLE-NOT: x19
+; CHECK-APPLE-NOT: x21
; CHECK-APPLE: ldp x27, x26, [sp
-; CHECK-APPLE-NOT: x19
+; CHECK-APPLE-NOT: x21
; CHECK-APPLE: ldr x28, [sp
-; CHECK-APPLE-NOT: x19
+; CHECK-APPLE-NOT: x21
; CHECK-APPLE: ret
define swiftcc void @params_in_reg(i64, i64, i64, i64, i64, i64, i64, i64, i8* swiftself, %swift_error** nocapture swifterror %err) {
%error_ptr_ref = alloca swifterror %swift_error*, align 8
@@ -495,17 +495,17 @@ declare swiftcc void @params_in_reg2(i64, i64, i64, i64, i64, i64, i64, i64, i8*
; CHECK-APPLE: stp x27, x26, [sp
; CHECK-APPLE: stp x25, x24, [sp
; CHECK-APPLE: stp x23, x22, [sp
-; CHECK-APPLE: stp x21, x20, [sp
+; CHECK-APPLE: stp x20, x19, [sp
; CHECK-APPLE: stp x29, x30, [sp
; Save original arguments.
-; CHECK-APPLE: mov x23, x19
+; CHECK-APPLE: mov x23, x21
; CHECK-APPLE: str x7, [sp, #16]
; CHECK-APPLE: mov x24, x6
; CHECK-APPLE: mov x25, x5
; CHECK-APPLE: mov x26, x4
; CHECK-APPLE: mov x27, x3
; CHECK-APPLE: mov x28, x2
-; CHECK-APPLE: mov x21, x1
+; CHECK-APPLE: mov x19, x1
; CHECK-APPLE: mov x22, x0
; Setup call arguments.
; CHECK-APPLE: orr w0, wzr, #0x1
@@ -517,23 +517,23 @@ declare swiftcc void @params_in_reg2(i64, i64, i64, i64, i64, i64, i64, i64, i8*
; CHECK-APPLE: orr w6, wzr, #0x7
; CHECK-APPLE: orr w7, wzr, #0x8
; CHECK-APPLE: mov x20, xzr
-; CHECK-APPLE: mov x19, xzr
+; CHECK-APPLE: mov x21, xzr
; CHECK-APPLE: bl _params_in_reg2
; Store swifterror %error_ptr_ref.
-; CHECK-APPLE: str x19, [sp, #8]
+; CHECK-APPLE: str x21, [sp, #8]
; Setup call arguments from original arguments.
; CHECK-APPLE: mov x0, x22
-; CHECK-APPLE: mov x1, x21
+; CHECK-APPLE: mov x1, x19
; CHECK-APPLE: mov x2, x28
; CHECK-APPLE: mov x3, x27
; CHECK-APPLE: mov x4, x26
; CHECK-APPLE: mov x5, x25
; CHECK-APPLE: mov x6, x24
; CHECK-APPLE: ldp x7, x20, [sp, #16]
-; CHECK-APPLE: mov x19, x23
+; CHECK-APPLE: mov x21, x23
; CHECK-APPLE: bl _params_and_return_in_reg2
; Store return values.
-; CHECK-APPLE: mov x21, x0
+; CHECK-APPLE: mov x19, x0
; CHECK-APPLE: mov x22, x1
; CHECK-APPLE: mov x24, x2
; CHECK-APPLE: mov x25, x3
@@ -542,7 +542,7 @@ declare swiftcc void @params_in_reg2(i64, i64, i64, i64, i64, i64, i64, i64, i8*
; CHECK-APPLE: mov x28, x6
; CHECK-APPLE: mov x23, x7
; Save swifterror %err.
-; CHECK-APPLE: str x19, [sp, #24]
+; CHECK-APPLE: str x21, [sp, #24]
; Setup call.
; CHECK-APPLE: orr w0, wzr, #0x1
; CHECK-APPLE: orr w1, wzr, #0x2
@@ -554,10 +554,10 @@ declare swiftcc void @params_in_reg2(i64, i64, i64, i64, i64, i64, i64, i64, i8*
; CHECK-APPLE: orr w7, wzr, #0x8
; CHECK-APPLE: mov x20, xzr
; ... setup call with swiferror %error_ptr_ref.
-; CHECK-APPLE: ldr x19, [sp, #8]
+; CHECK-APPLE: ldr x21, [sp, #8]
; CHECK-APPLE: bl _params_in_reg2
; Restore return values for return from this function.
-; CHECK-APPLE: mov x0, x21
+; CHECK-APPLE: mov x0, x19
; CHECK-APPLE: mov x1, x22
; CHECK-APPLE: mov x2, x24
; CHECK-APPLE: mov x3, x25
@@ -566,9 +566,9 @@ declare swiftcc void @params_in_reg2(i64, i64, i64, i64, i64, i64, i64, i64, i8*
; CHECK-APPLE: mov x6, x28
; CHECK-APPLE: mov x7, x23
; Restore swifterror %err and callee save registers.
-; CHECK-APPLE: ldp x19, x28, [sp, #24
+; CHECK-APPLE: ldp x21, x28, [sp, #24
; CHECK-APPLE: ldp x29, x30, [sp
-; CHECK-APPLE: ldp x21, x20, [sp
+; CHECK-APPLE: ldp x20, x19, [sp
; CHECK-APPLE: ldp x23, x22, [sp
; CHECK-APPLE: ldp x25, x24, [sp
; CHECK-APPLE: ldp x27, x26, [sp
@@ -583,3 +583,17 @@ define swiftcc { i64, i64, i64, i64, i64, i64, i64, i64 } @params_and_return_in_
}
declare swiftcc { i64, i64, i64, i64, i64, i64, i64, i64 } @params_and_return_in_reg2(i64, i64, i64, i64, i64, i64, i64, i64, i8* swiftself, %swift_error** nocapture swifterror %err)
+
+declare void @acallee(i8*)
+
+; Make sure we don't tail call if the caller returns a swifterror value. We
+; would have to move into the swifterror register before the tail call.
+; CHECK-APPLE: tailcall_from_swifterror:
+; CHECK-APPLE-NOT: b _acallee
+; CHECK-APPLE: bl _acallee
+
+define swiftcc void @tailcall_from_swifterror(%swift_error** swifterror %error_ptr_ref) {
+entry:
+ tail call void @acallee(i8* null)
+ ret void
+}
diff --git a/test/CodeGen/AArch64/swiftself.ll b/test/CodeGen/AArch64/swiftself.ll
index a60aed6b0f2b..33a49198430e 100644
--- a/test/CodeGen/AArch64/swiftself.ll
+++ b/test/CodeGen/AArch64/swiftself.ll
@@ -65,3 +65,21 @@ define i8* @swiftself_notail(i8* swiftself %addr0, i8* %addr1) nounwind {
%res = tail call i8* @swiftself_param(i8* swiftself %addr1)
ret i8* %res
}
+
+; We cannot pretend that 'x0' is alive across the thisreturn_attribute call as
+; we normally would. We marked the first parameter with swiftself which means it
+; will no longer be passed in x0.
+declare swiftcc i8* @thisreturn_attribute(i8* returned swiftself)
+; OPT-LABEL: swiftself_nothisreturn:
+; OPT-DAG: ldr x20, [x20]
+; OPT-DAG: mov [[CSREG:x[1-9].*]], x8
+; OPT: bl {{_?}}thisreturn_attribute
+; OPT: str x0, {{\[}}[[CSREG]]
+; OPT: ret
+define hidden swiftcc void @swiftself_nothisreturn(i8** noalias nocapture sret, i8** noalias nocapture readonly swiftself) {
+entry:
+ %2 = load i8*, i8** %1, align 8
+ %3 = tail call swiftcc i8* @thisreturn_attribute(i8* swiftself %2)
+ store i8* %3, i8** %0, align 8
+ ret void
+}
diff --git a/test/CodeGen/ARM/swifterror.ll b/test/CodeGen/ARM/swifterror.ll
index 7551291207ed..78764202f627 100644
--- a/test/CodeGen/ARM/swifterror.ll
+++ b/test/CodeGen/ARM/swifterror.ll
@@ -13,7 +13,7 @@ define float @foo(%swift_error** swifterror %error_ptr_ref) {
; CHECK-APPLE: mov r0, #16
; CHECK-APPLE: malloc
; CHECK-APPLE-DAG: mov [[ID:r[0-9]+]], #1
-; CHECK-APPLE-DAG: mov r6, r{{.*}}
+; CHECK-APPLE-DAG: mov r8, r{{.*}}
; CHECK-APPLE-DAG: strb [[ID]], [r{{.*}}, #8]
; CHECK-O0-LABEL: foo:
@@ -22,7 +22,7 @@ define float @foo(%swift_error** swifterror %error_ptr_ref) {
; CHECK-O0: mov [[ID2:r[0-9]+]], r0
; CHECK-O0: mov [[ID:r[0-9]+]], #1
; CHECK-O0: strb [[ID]], [r0, #8]
-; CHECK-O0: mov r6, [[ID2]]
+; CHECK-O0: mov r8, [[ID2]]
entry:
%call = call i8* @malloc(i64 16)
%call.0 = bitcast i8* %call to %swift_error*
@@ -36,21 +36,21 @@ entry:
define float @caller(i8* %error_ref) {
; CHECK-APPLE-LABEL: caller:
; CHECK-APPLE-DAG: mov [[ID:r[0-9]+]], r0
-; CHECK-APPLE-DAG: mov r6, #0
+; CHECK-APPLE-DAG: mov r8, #0
; CHECK-APPLE: bl {{.*}}foo
-; CHECK-APPLE: cmp r6, #0
+; CHECK-APPLE: cmp r8, #0
; Access part of the error object and save it to error_ref
-; CHECK-APPLE: ldrbeq [[CODE:r[0-9]+]], [r6, #8]
+; CHECK-APPLE: ldrbeq [[CODE:r[0-9]+]], [r8, #8]
; CHECK-APPLE: strbeq [[CODE]], [{{.*}}[[ID]]]
-; CHECK-APPLE: mov r0, r6
+; CHECK-APPLE: mov r0, r8
; CHECK-APPLE: bl {{.*}}free
; CHECK-O0-LABEL: caller:
; spill r0
-; CHECK-O0-DAG: mov r6, #0
+; CHECK-O0-DAG: mov r8, #0
; CHECK-O0-DAG: str r0, [sp, [[SLOT:#[0-9]+]]
; CHECK-O0: bl {{.*}}foo
-; CHECK-O0: mov [[TMP:r[0-9]+]], r6
+; CHECK-O0: mov [[TMP:r[0-9]+]], r8
; CHECK-O0: str [[TMP]], [sp]
; CHECK-O0: bne
; CHECK-O0: ldrb [[CODE:r[0-9]+]], [r0, #8]
@@ -81,22 +81,22 @@ handler:
define float @caller2(i8* %error_ref) {
; CHECK-APPLE-LABEL: caller2:
; CHECK-APPLE-DAG: mov [[ID:r[0-9]+]], r0
-; CHECK-APPLE-DAG: mov r6, #0
+; CHECK-APPLE-DAG: mov r8, #0
; CHECK-APPLE: bl {{.*}}foo
-; CHECK-APPLE: cmp r6, #0
+; CHECK-APPLE: cmp r8, #0
; CHECK-APPLE: bne
; Access part of the error object and save it to error_ref
-; CHECK-APPLE: ldrb [[CODE:r[0-9]+]], [r6, #8]
+; CHECK-APPLE: ldrb [[CODE:r[0-9]+]], [r8, #8]
; CHECK-APPLE: strb [[CODE]], [{{.*}}[[ID]]]
-; CHECK-APPLE: mov r0, r6
+; CHECK-APPLE: mov r0, r8
; CHECK-APPLE: bl {{.*}}free
; CHECK-O0-LABEL: caller2:
; spill r0
; CHECK-O0-DAG: str r0,
-; CHECK-O0-DAG: mov r6, #0
+; CHECK-O0-DAG: mov r8, #0
; CHECK-O0: bl {{.*}}foo
-; CHECK-O0: mov r{{.*}}, r6
+; CHECK-O0: mov r{{.*}}, r8
; CHECK-O0: str r0, [sp]
; CHECK-O0: bne
; CHECK-O0: ble
@@ -138,22 +138,22 @@ define float @foo_if(%swift_error** swifterror %error_ptr_ref, i32 %cc) {
; CHECK-APPLE: mov r0, #16
; CHECK-APPLE: malloc
; CHECK-APPLE: mov [[ID:r[0-9]+]], #1
-; CHECK-APPLE-DAG: mov r6, r{{.*}}
+; CHECK-APPLE-DAG: mov r8, r{{.*}}
; CHECK-APPLE-DAG: strb [[ID]], [r{{.*}}, #8]
; CHECK-O0-LABEL: foo_if:
; CHECK-O0: cmp r0, #0
; spill to stack
-; CHECK-O0: str r6
+; CHECK-O0: str r8
; CHECK-O0: beq
; CHECK-O0: mov r0, #16
; CHECK-O0: malloc
; CHECK-O0: mov [[ID:r[0-9]+]], r0
; CHECK-O0: mov [[ID2:[a-z0-9]+]], #1
; CHECK-O0: strb [[ID2]], [r0, #8]
-; CHECK-O0: mov r6, [[ID]]
+; CHECK-O0: mov r8, [[ID]]
; reload from stack
-; CHECK-O0: ldr r6
+; CHECK-O0: ldr r8
entry:
%cond = icmp ne i32 %cc, 0
br i1 %cond, label %gen_error, label %normal
@@ -176,17 +176,17 @@ define float @foo_loop(%swift_error** swifterror %error_ptr_ref, i32 %cc, float
; CHECK-APPLE-LABEL: foo_loop:
; CHECK-APPLE: mov [[CODE:r[0-9]+]], r0
; swifterror is kept in a register
-; CHECK-APPLE: mov [[ID:r[0-9]+]], r6
+; CHECK-APPLE: mov [[ID:r[0-9]+]], r8
; CHECK-APPLE: cmp [[CODE]], #0
; CHECK-APPLE: beq
; CHECK-APPLE: mov r0, #16
; CHECK-APPLE: malloc
; CHECK-APPLE: strb r{{.*}}, [{{.*}}[[ID]], #8]
; CHECK-APPLE: ble
-; CHECK-APPLE: mov r6, [[ID]]
+; CHECK-APPLE: mov r8, [[ID]]
; CHECK-O0-LABEL: foo_loop:
-; CHECK-O0: mov r{{.*}}, r6
+; CHECK-O0: mov r{{.*}}, r8
; CHECK-O0: cmp r{{.*}}, #0
; CHECK-O0: beq
; CHECK-O0-DAG: movw r{{.*}}, #1
@@ -200,7 +200,7 @@ define float @foo_loop(%swift_error** swifterror %error_ptr_ref, i32 %cc, float
; CHECK-O0: vcmpe
; CHECK-O0: ble
; reload from stack
-; CHECK-O0: ldr r6
+; CHECK-O0: ldr r8
entry:
br label %bb_loop
@@ -231,7 +231,7 @@ define void @foo_sret(%struct.S* sret %agg.result, i32 %val1, %swift_error** swi
; CHECK-APPLE: mov r0, #16
; CHECK-APPLE: malloc
; CHECK-APPLE: mov [[REG:r[0-9]+]], #1
-; CHECK-APPLE-DAG: mov r6, r0
+; CHECK-APPLE-DAG: mov r8, r0
; CHECK-APPLE-DAG: strb [[REG]], [r0, #8]
; CHECK-APPLE-DAG: str r{{.*}}, [{{.*}}[[SRET]], #4]
@@ -247,7 +247,7 @@ define void @foo_sret(%struct.S* sret %agg.result, i32 %val1, %swift_error** swi
; CHECK-O0: ldr
; CHECK-O0: ldr
; CHECK-O0: str r{{.*}}, [{{.*}}, #4]
-; CHECK-O0: mov r6
+; CHECK-O0: mov r8
entry:
%call = call i8* @malloc(i64 16)
%call.0 = bitcast i8* %call to %swift_error*
@@ -263,22 +263,22 @@ entry:
define float @caller3(i8* %error_ref) {
; CHECK-APPLE-LABEL: caller3:
; CHECK-APPLE: mov [[ID:r[0-9]+]], r0
-; CHECK-APPLE: mov r6, #0
+; CHECK-APPLE: mov r8, #0
; CHECK-APPLE: bl {{.*}}foo_sret
-; CHECK-APPLE: cmp r6, #0
+; CHECK-APPLE: cmp r8, #0
; Access part of the error object and save it to error_ref
-; CHECK-APPLE: ldrbeq [[CODE:r[0-9]+]], [r6, #8]
+; CHECK-APPLE: ldrbeq [[CODE:r[0-9]+]], [r8, #8]
; CHECK-APPLE: strbeq [[CODE]], [{{.*}}[[ID]]]
-; CHECK-APPLE: mov r0, r6
+; CHECK-APPLE: mov r0, r8
; CHECK-APPLE: bl {{.*}}free
; CHECK-O0-LABEL: caller3:
-; CHECK-O0-DAG: mov r6, #0
+; CHECK-O0-DAG: mov r8, #0
; CHECK-O0-DAG: mov r0
; CHECK-O0-DAG: mov r1
; CHECK-O0: bl {{.*}}foo_sret
-; CHECK-O0: mov [[ID2:r[0-9]+]], r6
-; CHECK-O0: cmp r6
+; CHECK-O0: mov [[ID2:r[0-9]+]], r8
+; CHECK-O0: cmp r8
; CHECK-O0: str [[ID2]], [sp[[SLOT:.*]]]
; CHECK-O0: bne
; Access part of the error object and save it to error_ref
@@ -316,7 +316,7 @@ define float @foo_vararg(%swift_error** swifterror %error_ptr_ref, ...) {
; CHECK-APPLE: mov [[REG:r[0-9]+]], r0
; CHECK-APPLE: mov [[ID:r[0-9]+]], #1
; CHECK-APPLE-DAG: strb [[ID]], [{{.*}}[[REG]], #8]
-; CHECK-APPLE-DAG: mov r6, [[REG]]
+; CHECK-APPLE-DAG: mov r8, [[REG]]
entry:
%call = call i8* @malloc(i64 16)
@@ -345,13 +345,13 @@ entry:
define float @caller4(i8* %error_ref) {
; CHECK-APPLE-LABEL: caller4:
; CHECK-APPLE: mov [[ID:r[0-9]+]], r0
-; CHECK-APPLE: mov r6, #0
+; CHECK-APPLE: mov r8, #0
; CHECK-APPLE: bl {{.*}}foo_vararg
-; CHECK-APPLE: cmp r6, #0
+; CHECK-APPLE: cmp r8, #0
; Access part of the error object and save it to error_ref
-; CHECK-APPLE: ldrbeq [[CODE:r[0-9]+]], [r6, #8]
+; CHECK-APPLE: ldrbeq [[CODE:r[0-9]+]], [r8, #8]
; CHECK-APPLE: strbeq [[CODE]], [{{.*}}[[ID]]]
-; CHECK-APPLE: mov r0, r6
+; CHECK-APPLE: mov r0, r8
; CHECK-APPLE: bl {{.*}}free
entry:
%error_ptr_ref = alloca swifterror %swift_error*
@@ -396,51 +396,51 @@ entry:
}
; CHECK-APPLE-LABEL: swifterror_clobber
-; CHECK-APPLE: mov [[REG:r[0-9]+]], r6
+; CHECK-APPLE: mov [[REG:r[0-9]+]], r8
; CHECK-APPLE: nop
-; CHECK-APPLE: mov r6, [[REG]]
+; CHECK-APPLE: mov r8, [[REG]]
define swiftcc void @swifterror_clobber(%swift_error** nocapture swifterror %err) {
- call void asm sideeffect "nop", "~{r6}"()
+ call void asm sideeffect "nop", "~{r8}"()
ret void
}
; CHECK-APPLE-LABEL: swifterror_reg_clobber
-; CHECK-APPLE: push {{.*}}r6
+; CHECK-APPLE: push {{.*}}r8
; CHECK-APPLE: nop
-; CHECK-APPLE: pop {{.*}}r6
+; CHECK-APPLE: pop {{.*}}r8
define swiftcc void @swifterror_reg_clobber(%swift_error** nocapture %err) {
- call void asm sideeffect "nop", "~{r6}"()
+ call void asm sideeffect "nop", "~{r8}"()
ret void
}
; CHECK-ARMV7-LABEL: _params_in_reg
; Store callee saved registers excluding swifterror.
-; CHECK-ARMV7: push {r4, r5, r7, r8, r10, r11, lr}
-; Store swiftself (r10) and swifterror (r6).
-; CHECK-ARMV7-DAG: str r6, [s[[STK1:.*]]]
+; CHECK-ARMV7: push {r4, r5, r6, r7, r10, r11, lr}
+; Store swiftself (r10) and swifterror (r8).
+; CHECK-ARMV7-DAG: str r8, [s[[STK1:.*]]]
; CHECK-ARMV7-DAG: str r10, [s[[STK2:.*]]]
; Store arguments.
-; CHECK-ARMV7: mov r4, r3
-; CHECK-ARMV7: mov r5, r2
-; CHECK-ARMV7: mov r8, r1
-; CHECK-ARMV7: mov r11, r0
+; CHECK-ARMV7: mov r6, r3
+; CHECK-ARMV7: mov r4, r2
+; CHECK-ARMV7: mov r11, r1
+; CHECK-ARMV7: mov r5, r0
; Setup call.
; CHECK-ARMV7: mov r0, #1
; CHECK-ARMV7: mov r1, #2
; CHECK-ARMV7: mov r2, #3
; CHECK-ARMV7: mov r3, #4
; CHECK-ARMV7: mov r10, #0
-; CHECK-ARMV7: mov r6, #0
+; CHECK-ARMV7: mov r8, #0
; CHECK-ARMV7: bl _params_in_reg2
; Restore original arguments.
; CHECK-ARMV7-DAG: ldr r10, [s[[STK2]]]
-; CHECK-ARMV7-DAG: ldr r6, [s[[STK1]]]
-; CHECK-ARMV7: mov r0, r11
-; CHECK-ARMV7: mov r1, r8
-; CHECK-ARMV7: mov r2, r5
-; CHECK-ARMV7: mov r3, r4
+; CHECK-ARMV7-DAG: ldr r8, [s[[STK1]]]
+; CHECK-ARMV7: mov r0, r5
+; CHECK-ARMV7: mov r1, r11
+; CHECK-ARMV7: mov r2, r4
+; CHECK-ARMV7: mov r3, r6
; CHECK-ARMV7: bl _params_in_reg2
-; CHECK-ARMV7: pop {r4, r5, r7, r8, r10, r11, pc}
+; CHECK-ARMV7: pop {r4, r5, r6, r7, r10, r11, pc}
define swiftcc void @params_in_reg(i32, i32, i32, i32, i8* swiftself, %swift_error** nocapture swifterror %err) {
%error_ptr_ref = alloca swifterror %swift_error*, align 8
store %swift_error* null, %swift_error** %error_ptr_ref
@@ -451,42 +451,42 @@ define swiftcc void @params_in_reg(i32, i32, i32, i32, i8* swiftself, %swift_err
declare swiftcc void @params_in_reg2(i32, i32, i32, i32, i8* swiftself, %swift_error** nocapture swifterror %err)
; CHECK-ARMV7-LABEL: params_and_return_in_reg
-; CHECK-ARMV7: push {r4, r5, r7, r8, r10, r11, lr}
+; CHECK-ARMV7: push {r4, r5, r6, r7, r10, r11, lr}
; Store swifterror and swiftself
-; CHECK-ARMV7: mov r4, r6
+; CHECK-ARMV7: mov r6, r8
; CHECK-ARMV7: str r10, [s[[STK1:.*]]]
; Store arguments.
; CHECK-ARMV7: str r3, [s[[STK2:.*]]]
-; CHECK-ARMV7: mov r5, r2
-; CHECK-ARMV7: mov r8, r1
-; CHECK-ARMV7: mov r11, r0
+; CHECK-ARMV7: mov r4, r2
+; CHECK-ARMV7: mov r11, r1
+; CHECK-ARMV7: mov r5, r0
; Setup call.
; CHECK-ARMV7: mov r0, #1
; CHECK-ARMV7: mov r1, #2
; CHECK-ARMV7: mov r2, #3
; CHECK-ARMV7: mov r3, #4
; CHECK-ARMV7: mov r10, #0
-; CHECK-ARMV7: mov r6, #0
+; CHECK-ARMV7: mov r8, #0
; CHECK-ARMV7: bl _params_in_reg2
; Restore original arguments.
; CHECK-ARMV7: ldr r3, [s[[STK2]]]
; CHECK-ARMV7: ldr r10, [s[[STK1]]]
; Store %error_ptr_ref;
-; CHECK-ARMV7: str r6, [s[[STK3:.*]]]
+; CHECK-ARMV7: str r8, [s[[STK3:.*]]]
; Restore original arguments.
-; CHECK-ARMV7: mov r0, r11
-; CHECK-ARMV7: mov r1, r8
-; CHECK-ARMV7: mov r2, r5
-; CHECK-ARMV7: mov r6, r4
+; CHECK-ARMV7: mov r0, r5
+; CHECK-ARMV7: mov r1, r11
+; CHECK-ARMV7: mov r2, r4
+; CHECK-ARMV7: mov r8, r6
; CHECK-ARMV7: bl _params_and_return_in_reg2
; Store swifterror return %err;
-; CHECK-ARMV7: str r6, [s[[STK1]]]
+; CHECK-ARMV7: str r8, [s[[STK1]]]
; Load swifterror value %error_ptr_ref.
-; CHECK-ARMV7: ldr r6, [s[[STK3]]]
+; CHECK-ARMV7: ldr r8, [s[[STK3]]]
; Save return values.
-; CHECK-ARMV7: mov r5, r0
-; CHECK-ARMV7: mov r4, r1
-; CHECK-ARMV7: mov r8, r2
+; CHECK-ARMV7: mov r4, r0
+; CHECK-ARMV7: mov r5, r1
+; CHECK-ARMV7: mov r6, r2
; CHECK-ARMV7: mov r11, r3
; Setup call.
; CHECK-ARMV7: mov r0, #1
@@ -496,13 +496,13 @@ declare swiftcc void @params_in_reg2(i32, i32, i32, i32, i8* swiftself, %swift_e
; CHECK-ARMV7: mov r10, #0
; CHECK-ARMV7: bl _params_in_reg2
; Load swifterror %err;
-; CHECK-ARMV7: ldr r6, [s[[STK1]]]
+; CHECK-ARMV7: ldr r8, [s[[STK1]]]
; Restore return values for returning.
-; CHECK-ARMV7: mov r0, r5
-; CHECK-ARMV7: mov r1, r4
-; CHECK-ARMV7: mov r2, r8
+; CHECK-ARMV7: mov r0, r4
+; CHECK-ARMV7: mov r1, r5
+; CHECK-ARMV7: mov r2, r6
; CHECK-ARMV7: mov r3, r11
-; CHECK-ARMV7: pop {r4, r5, r7, r8, r10, r11, pc}
+; CHECK-ARMV7: pop {r4, r5, r6, r7, r10, r11, pc}
define swiftcc { i32, i32, i32, i32} @params_and_return_in_reg(i32, i32, i32, i32, i8* swiftself, %swift_error** nocapture swifterror %err) {
%error_ptr_ref = alloca swifterror %swift_error*, align 8
store %swift_error* null, %swift_error** %error_ptr_ref
@@ -513,3 +513,18 @@ define swiftcc { i32, i32, i32, i32} @params_and_return_in_reg(i32, i32, i32, i3
}
declare swiftcc { i32, i32, i32, i32 } @params_and_return_in_reg2(i32, i32, i32, i32, i8* swiftself, %swift_error** nocapture swifterror %err)
+
+
+declare void @acallee(i8*)
+
+; Make sure we don't tail call if the caller returns a swifterror value. We
+; would have to move into the swifterror register before the tail call.
+; CHECK-APPLE: tailcall_from_swifterror:
+; CHECK-APPLE-NOT: b _acallee
+; CHECK-APPLE: bl _acallee
+
+define swiftcc void @tailcall_from_swifterror(%swift_error** swifterror %error_ptr_ref) {
+entry:
+ tail call void @acallee(i8* null)
+ ret void
+}
diff --git a/test/CodeGen/ARM/swiftself.ll b/test/CodeGen/ARM/swiftself.ll
index b7a04ca4060e..1e06b34c7052 100644
--- a/test/CodeGen/ARM/swiftself.ll
+++ b/test/CodeGen/ARM/swiftself.ll
@@ -63,3 +63,20 @@ define i8* @swiftself_notail(i8* swiftself %addr0, i8* %addr1) nounwind "no-fram
%res = tail call i8* @swiftself_param(i8* swiftself %addr1)
ret i8* %res
}
+
+; We cannot pretend that 'r0' is alive across the thisreturn_attribute call as
+; we normally would. We marked the first parameter with swiftself which means it
+; will no longer be passed in r0.
+declare swiftcc i8* @thisreturn_attribute(i8* returned swiftself)
+; OPT-LABEL: swiftself_nothisreturn:
+; OPT-DAG: mov [[CSREG:r[1-9].*]], r0
+; OPT-DAG: ldr r10, [r10]
+; OPT: bl {{_?}}thisreturn_attribute
+; OPT: str r0, {{\[}}[[CSREG]]
+define hidden swiftcc void @swiftself_nothisreturn(i8** noalias nocapture sret, i8** noalias nocapture readonly swiftself) {
+entry:
+ %2 = load i8*, i8** %1, align 8
+ %3 = tail call swiftcc i8* @thisreturn_attribute(i8* swiftself %2)
+ store i8* %3, i8** %0, align 8
+ ret void
+}
diff --git a/test/CodeGen/X86/dag-update-nodetomatch.ll b/test/CodeGen/X86/dag-update-nodetomatch.ll
new file mode 100644
index 000000000000..45b6d020ce45
--- /dev/null
+++ b/test/CodeGen/X86/dag-update-nodetomatch.ll
@@ -0,0 +1,241 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
+
+%struct.i = type { i32, i24 }
+%struct.m = type { %struct.i }
+
+@a = local_unnamed_addr global i32 0, align 4
+@b = local_unnamed_addr global i16 0, align 2
+@c = local_unnamed_addr global i16 0, align 2
+@e = local_unnamed_addr global i16 0, align 2
+@l = local_unnamed_addr global %struct.i zeroinitializer, align 4
+@k = local_unnamed_addr global %struct.m zeroinitializer, align 4
+
+@x0 = local_unnamed_addr global double 0.000000e+00, align 8
+@x1 = local_unnamed_addr global i32 0, align 4
+@x2 = local_unnamed_addr global i32 0, align 4
+@x3 = local_unnamed_addr global i32 0, align 4
+@x4 = local_unnamed_addr global i32 0, align 4
+@x5 = local_unnamed_addr global double* null, align 8
+
+; Check that compiler does not crash.
+; Test for PR30775
+define void @_Z1nv() local_unnamed_addr {
+; CHECK-LABEL: _Z1nv:
+entry:
+ %bf.load = load i32, i32* bitcast (i24* getelementptr inbounds (%struct.m, %struct.m* @k, i64 0, i32 0, i32 1) to i32*), align 4
+ %0 = load i16, i16* @c, align 2
+ %conv = sext i16 %0 to i32
+ %1 = load i16, i16* @b, align 2
+ %conv1 = sext i16 %1 to i32
+ %2 = load i32, i32* @a, align 4
+ %tobool = icmp ne i32 %2, 0
+ %bf.load3 = load i32, i32* getelementptr inbounds (%struct.i, %struct.i* @l, i64 0, i32 0), align 4
+ %bf.shl = shl i32 %bf.load3, 7
+ %bf.ashr = ashr exact i32 %bf.shl, 7
+ %bf.clear = shl i32 %bf.load, 1
+ %factor = and i32 %bf.clear, 131070
+ %add13 = add nsw i32 %factor, %conv
+ %add15 = add nsw i32 %add13, %conv1
+ %bf.ashr.op = sub nsw i32 0, %bf.ashr
+ %add28 = select i1 %tobool, i32 %bf.ashr.op, i32 0
+ %tobool29 = icmp eq i32 %add15, %add28
+ %phitmp = icmp eq i32 %bf.ashr, 0
+ %.phitmp = or i1 %phitmp, %tobool29
+ %conv37 = zext i1 %.phitmp to i16
+ store i16 %conv37, i16* @e, align 2
+ %bf.clear39 = and i32 %bf.load, 65535
+ %factor53 = shl nuw nsw i32 %bf.clear39, 1
+ %add46 = add nsw i32 %factor53, %conv
+ %add48 = add nsw i32 %add46, %conv1
+ %add48.lobit = lshr i32 %add48, 31
+ %add48.lobit.not = xor i32 %add48.lobit, 1
+ %add51 = add nuw nsw i32 %add48.lobit.not, %bf.clear39
+ %shr = ashr i32 %2, %add51
+ %conv52 = trunc i32 %shr to i16
+ store i16 %conv52, i16* @b, align 2
+ ret void
+}
+
+; Test for PR31536
+define void @_Z2x6v() local_unnamed_addr {
+; CHECK-LABEL: _Z2x6v:
+entry:
+ %0 = load i32, i32* @x1, align 4
+ %and = and i32 %0, 511
+ %add = add nuw nsw i32 %and, 1
+ store i32 %add, i32* @x4, align 4
+ %.pr = load i32, i32* @x3, align 4
+ %tobool8 = icmp eq i32 %.pr, 0
+ br i1 %tobool8, label %for.end5, label %for.cond1thread-pre-split.lr.ph
+
+for.cond1thread-pre-split.lr.ph: ; preds = %entry
+ %idx.ext13 = zext i32 %add to i64
+ %x5.promoted = load double*, double** @x5, align 8
+ %x5.promoted9 = bitcast double* %x5.promoted to i8*
+ %1 = xor i32 %.pr, -1
+ %2 = zext i32 %1 to i64
+ %3 = shl nuw nsw i64 %2, 3
+ %4 = add nuw nsw i64 %3, 8
+ %5 = mul nuw nsw i64 %4, %idx.ext13
+ %uglygep = getelementptr i8, i8* %x5.promoted9, i64 %5
+ %.pr6.pre = load i32, i32* @x2, align 4
+ %6 = shl nuw nsw i32 %and, 3
+ %addconv = add nuw nsw i32 %6, 8
+ %7 = zext i32 %addconv to i64
+ %scevgep15 = getelementptr double, double* %x5.promoted, i64 1
+ %scevgep1516 = bitcast double* %scevgep15 to i8*
+ br label %for.cond1thread-pre-split
+
+for.cond1thread-pre-split: ; preds = %for.cond1thread-pre-split.lr.ph, %for.inc3
+ %indvar = phi i64 [ 0, %for.cond1thread-pre-split.lr.ph ], [ %indvar.next, %for.inc3 ]
+ %.pr6 = phi i32 [ %.pr6.pre, %for.cond1thread-pre-split.lr.ph ], [ %.pr611, %for.inc3 ]
+ %8 = phi double* [ %x5.promoted, %for.cond1thread-pre-split.lr.ph ], [ %add.ptr, %for.inc3 ]
+ %9 = phi i32 [ %.pr, %for.cond1thread-pre-split.lr.ph ], [ %inc4, %for.inc3 ]
+ %10 = mul i64 %7, %indvar
+ %uglygep14 = getelementptr i8, i8* %x5.promoted9, i64 %10
+ %uglygep17 = getelementptr i8, i8* %scevgep1516, i64 %10
+ %cmp7 = icmp slt i32 %.pr6, 0
+ br i1 %cmp7, label %for.body2.preheader, label %for.inc3
+
+for.body2.preheader: ; preds = %for.cond1thread-pre-split
+ %11 = sext i32 %.pr6 to i64
+ %12 = sext i32 %.pr6 to i64
+ %13 = icmp sgt i64 %12, -1
+ %smax = select i1 %13, i64 %12, i64 -1
+ %14 = add nsw i64 %smax, 1
+ %15 = sub nsw i64 %14, %12
+ %min.iters.check = icmp ult i64 %15, 4
+ br i1 %min.iters.check, label %for.body2.preheader21, label %min.iters.checked
+
+min.iters.checked: ; preds = %for.body2.preheader
+ %n.vec = and i64 %15, -4
+ %cmp.zero = icmp eq i64 %n.vec, 0
+ br i1 %cmp.zero, label %for.body2.preheader21, label %vector.memcheck
+
+vector.memcheck: ; preds = %min.iters.checked
+ %16 = shl nsw i64 %11, 3
+ %scevgep = getelementptr i8, i8* %uglygep14, i64 %16
+ %17 = icmp sgt i64 %11, -1
+ %smax18 = select i1 %17, i64 %11, i64 -1
+ %18 = shl nsw i64 %smax18, 3
+ %scevgep19 = getelementptr i8, i8* %uglygep17, i64 %18
+ %bound0 = icmp ult i8* %scevgep, bitcast (double* @x0 to i8*)
+ %bound1 = icmp ugt i8* %scevgep19, bitcast (double* @x0 to i8*)
+ %memcheck.conflict = and i1 %bound0, %bound1
+ %ind.end = add nsw i64 %11, %n.vec
+ br i1 %memcheck.conflict, label %for.body2.preheader21, label %vector.body.preheader
+
+vector.body.preheader: ; preds = %vector.memcheck
+ %19 = add nsw i64 %n.vec, -4
+ %20 = lshr exact i64 %19, 2
+ %21 = and i64 %20, 1
+ %lcmp.mod = icmp eq i64 %21, 0
+ br i1 %lcmp.mod, label %vector.body.prol.preheader, label %vector.body.prol.loopexit.unr-lcssa
+
+vector.body.prol.preheader: ; preds = %vector.body.preheader
+ br label %vector.body.prol
+
+vector.body.prol: ; preds = %vector.body.prol.preheader
+ %22 = load i64, i64* bitcast (double* @x0 to i64*), align 8
+ %23 = insertelement <2 x i64> undef, i64 %22, i32 0
+ %24 = shufflevector <2 x i64> %23, <2 x i64> undef, <2 x i32> zeroinitializer
+ %25 = insertelement <2 x i64> undef, i64 %22, i32 0
+ %26 = shufflevector <2 x i64> %25, <2 x i64> undef, <2 x i32> zeroinitializer
+ %27 = getelementptr inbounds double, double* %8, i64 %11
+ %28 = bitcast double* %27 to <2 x i64>*
+ store <2 x i64> %24, <2 x i64>* %28, align 8
+ %29 = getelementptr double, double* %27, i64 2
+ %30 = bitcast double* %29 to <2 x i64>*
+ store <2 x i64> %26, <2 x i64>* %30, align 8
+ br label %vector.body.prol.loopexit.unr-lcssa
+
+vector.body.prol.loopexit.unr-lcssa: ; preds = %vector.body.preheader, %vector.body.prol
+ %index.unr.ph = phi i64 [ 4, %vector.body.prol ], [ 0, %vector.body.preheader ]
+ br label %vector.body.prol.loopexit
+
+vector.body.prol.loopexit: ; preds = %vector.body.prol.loopexit.unr-lcssa
+ %31 = icmp eq i64 %20, 0
+ br i1 %31, label %middle.block, label %vector.body.preheader.new
+
+vector.body.preheader.new: ; preds = %vector.body.prol.loopexit
+ %32 = load i64, i64* bitcast (double* @x0 to i64*), align 8
+ %33 = insertelement <2 x i64> undef, i64 %32, i32 0
+ %34 = shufflevector <2 x i64> %33, <2 x i64> undef, <2 x i32> zeroinitializer
+ %35 = insertelement <2 x i64> undef, i64 %32, i32 0
+ %36 = shufflevector <2 x i64> %35, <2 x i64> undef, <2 x i32> zeroinitializer
+ %37 = load i64, i64* bitcast (double* @x0 to i64*), align 8
+ %38 = insertelement <2 x i64> undef, i64 %37, i32 0
+ %39 = shufflevector <2 x i64> %38, <2 x i64> undef, <2 x i32> zeroinitializer
+ %40 = insertelement <2 x i64> undef, i64 %37, i32 0
+ %41 = shufflevector <2 x i64> %40, <2 x i64> undef, <2 x i32> zeroinitializer
+ br label %vector.body
+
+vector.body: ; preds = %vector.body, %vector.body.preheader.new
+ %index = phi i64 [ %index.unr.ph, %vector.body.preheader.new ], [ %index.next.1, %vector.body ]
+ %42 = add i64 %11, %index
+ %43 = getelementptr inbounds double, double* %8, i64 %42
+ %44 = bitcast double* %43 to <2 x i64>*
+ store <2 x i64> %34, <2 x i64>* %44, align 8
+ %45 = getelementptr double, double* %43, i64 2
+ %46 = bitcast double* %45 to <2 x i64>*
+ store <2 x i64> %36, <2 x i64>* %46, align 8
+ %index.next = add i64 %index, 4
+ %47 = add i64 %11, %index.next
+ %48 = getelementptr inbounds double, double* %8, i64 %47
+ %49 = bitcast double* %48 to <2 x i64>*
+ store <2 x i64> %39, <2 x i64>* %49, align 8
+ %50 = getelementptr double, double* %48, i64 2
+ %51 = bitcast double* %50 to <2 x i64>*
+ store <2 x i64> %41, <2 x i64>* %51, align 8
+ %index.next.1 = add i64 %index, 8
+ %52 = icmp eq i64 %index.next.1, %n.vec
+ br i1 %52, label %middle.block.unr-lcssa, label %vector.body
+
+middle.block.unr-lcssa: ; preds = %vector.body
+ br label %middle.block
+
+middle.block: ; preds = %vector.body.prol.loopexit, %middle.block.unr-lcssa
+ %cmp.n = icmp eq i64 %15, %n.vec
+ br i1 %cmp.n, label %for.cond1.for.inc3_crit_edge, label %for.body2.preheader21
+
+for.body2.preheader21: ; preds = %middle.block, %vector.memcheck, %min.iters.checked, %for.body2.preheader
+ %indvars.iv.ph = phi i64 [ %11, %vector.memcheck ], [ %11, %min.iters.checked ], [ %11, %for.body2.preheader ], [ %ind.end, %middle.block ]
+ br label %for.body2
+
+for.body2: ; preds = %for.body2.preheader21, %for.body2
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body2 ], [ %indvars.iv.ph, %for.body2.preheader21 ]
+ %53 = load i64, i64* bitcast (double* @x0 to i64*), align 8
+ %arrayidx = getelementptr inbounds double, double* %8, i64 %indvars.iv
+ %54 = bitcast double* %arrayidx to i64*
+ store i64 %53, i64* %54, align 8
+ %indvars.iv.next = add nsw i64 %indvars.iv, 1
+ %cmp = icmp slt i64 %indvars.iv, -1
+ br i1 %cmp, label %for.body2, label %for.cond1.for.inc3_crit_edge.loopexit
+
+for.cond1.for.inc3_crit_edge.loopexit: ; preds = %for.body2
+ br label %for.cond1.for.inc3_crit_edge
+
+for.cond1.for.inc3_crit_edge: ; preds = %for.cond1.for.inc3_crit_edge.loopexit, %middle.block
+ %indvars.iv.next.lcssa = phi i64 [ %ind.end, %middle.block ], [ %indvars.iv.next, %for.cond1.for.inc3_crit_edge.loopexit ]
+ %55 = trunc i64 %indvars.iv.next.lcssa to i32
+ store i32 %55, i32* @x2, align 4
+ br label %for.inc3
+
+for.inc3: ; preds = %for.cond1.for.inc3_crit_edge, %for.cond1thread-pre-split
+ %.pr611 = phi i32 [ %55, %for.cond1.for.inc3_crit_edge ], [ %.pr6, %for.cond1thread-pre-split ]
+ %inc4 = add nsw i32 %9, 1
+ %add.ptr = getelementptr inbounds double, double* %8, i64 %idx.ext13
+ %tobool = icmp eq i32 %inc4, 0
+ %indvar.next = add i64 %indvar, 1
+ br i1 %tobool, label %for.cond.for.end5_crit_edge, label %for.cond1thread-pre-split
+
+for.cond.for.end5_crit_edge: ; preds = %for.inc3
+ store i8* %uglygep, i8** bitcast (double** @x5 to i8**), align 8
+ store i32 0, i32* @x3, align 4
+ br label %for.end5
+
+for.end5: ; preds = %for.cond.for.end5_crit_edge, %entry
+ ret void
+}
+
diff --git a/test/CodeGen/X86/pr31956.ll b/test/CodeGen/X86/pr31956.ll
new file mode 100644
index 000000000000..e9293048f4e5
--- /dev/null
+++ b/test/CodeGen/X86/pr31956.ll
@@ -0,0 +1,25 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mattr=+avx < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-scei-ps4"
+
+@G1 = common global <2 x float> zeroinitializer, align 8
+@G2 = common global <8 x float> zeroinitializer, align 32
+
+define <4 x float> @foo() {
+; CHECK-LABEL: foo:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2,3]
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
+; CHECK-NEXT: vshufps {{.*#+}} xmm0 = xmm1[0,2],xmm0[2,0]
+; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %V = load <2 x float>, <2 x float>* @G1, align 8
+ %shuffle = shufflevector <2 x float> %V, <2 x float> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 undef, i32 undef, i32 undef>
+ %L = load <8 x float>, <8 x float>* @G2, align 32
+ %shuffle1 = shufflevector <8 x float> %shuffle, <8 x float> %L, <4 x i32> <i32 12, i32 10, i32 14, i32 4>
+ ret <4 x float> %shuffle1
+}
diff --git a/test/CodeGen/X86/swifterror.ll b/test/CodeGen/X86/swifterror.ll
index cd4150597225..86e0221c2015 100644
--- a/test/CodeGen/X86/swifterror.ll
+++ b/test/CodeGen/X86/swifterror.ll
@@ -670,3 +670,18 @@ define swiftcc { i64, i64, i64, i64} @params_and_return_in_reg(i64, i64, i64, i6
}
declare swiftcc { i64, i64, i64, i64 } @params_and_return_in_reg2(i64, i64, i64, i64, i64, i64, i8* swiftself, %swift_error** nocapture swifterror %err)
+
+
+declare void @acallee(i8*)
+
+; Make sure we don't tail call if the caller returns a swifterror value. We
+; would have to move into the swifterror register before the tail call.
+; CHECK-APPLE: tailcall_from_swifterror:
+; CHECK-APPLE-NOT: jmp _acallee
+; CHECK-APPLE: callq _acallee
+
+define swiftcc void @tailcall_from_swifterror(%swift_error** swifterror %error_ptr_ref) {
+entry:
+ tail call void @acallee(i8* null)
+ ret void
+}
diff --git a/test/Instrumentation/AddressSanitizer/basic.ll b/test/Instrumentation/AddressSanitizer/basic.ll
index 9c4d416a1eff..9827e7a6792b 100644
--- a/test/Instrumentation/AddressSanitizer/basic.ll
+++ b/test/Instrumentation/AddressSanitizer/basic.ll
@@ -170,6 +170,32 @@ define void @memintr_test(i8* %a, i8* %b) nounwind uwtable sanitize_address {
; CHECK: __asan_memcpy
; CHECK: ret void
+; CHECK-LABEL: @test_swifterror
+; CHECK-NOT: __asan_report_load
+; CHECK: ret void
+define void @test_swifterror(i8** swifterror) sanitize_address {
+ %swifterror_ptr_value = load i8*, i8** %0
+ ret void
+}
+
+; CHECK-LABEL: @test_swifterror_2
+; CHECK-NOT: __asan_report_store
+; CHECK: ret void
+define void @test_swifterror_2(i8** swifterror) sanitize_address {
+ store i8* null, i8** %0
+ ret void
+}
+
+; CHECK-LABEL: @test_swifterror_3
+; CHECK-NOT: __asan_report_store
+; CHECK: ret void
+define void @test_swifterror_3() sanitize_address {
+ %swifterror_addr = alloca swifterror i8*
+ store i8* null, i8** %swifterror_addr
+ call void @test_swifterror_2(i8** swifterror %swifterror_addr)
+ ret void
+}
+
; CHECK: define internal void @asan.module_ctor()
; CHECK: call void @__asan_init()
diff --git a/test/Instrumentation/ThreadSanitizer/tsan_basic.ll b/test/Instrumentation/ThreadSanitizer/tsan_basic.ll
index 7e049c548f22..61ab98dc9997 100644
--- a/test/Instrumentation/ThreadSanitizer/tsan_basic.ll
+++ b/test/Instrumentation/ThreadSanitizer/tsan_basic.ll
@@ -54,5 +54,29 @@ entry:
; CHECK: ret void
}
+; CHECK-LABEL: @SwiftError
+; CHECK-NOT: __tsan_read
+; CHECK-NOT: __tsan_write
+; CHECK: ret
+define void @SwiftError(i8** swifterror) sanitize_thread {
+ %swifterror_ptr_value = load i8*, i8** %0
+ store i8* null, i8** %0
+ %swifterror_addr = alloca swifterror i8*
+ %swifterror_ptr_value_2 = load i8*, i8** %swifterror_addr
+ store i8* null, i8** %swifterror_addr
+ ret void
+}
+
+; CHECK-LABEL: @SwiftErrorCall
+; CHECK-NOT: __tsan_read
+; CHECK-NOT: __tsan_write
+; CHECK: ret
+define void @SwiftErrorCall(i8** swifterror) sanitize_thread {
+ %swifterror_addr = alloca swifterror i8*
+ store i8* null, i8** %0
+ call void @SwiftError(i8** %0)
+ ret void
+}
+
; CHECK: define internal void @tsan.module_ctor()
; CHECK: call void @__tsan_init()
diff --git a/test/Transforms/LoopUnroll/runtime-li.ll b/test/Transforms/LoopUnroll/runtime-li.ll
new file mode 100644
index 000000000000..5494c8e9da7d
--- /dev/null
+++ b/test/Transforms/LoopUnroll/runtime-li.ll
@@ -0,0 +1,36 @@
+; RUN: opt -S -loop-unroll -unroll-runtime -unroll-count=2 -verify-loop-info -pass-remarks=loop-unroll < %s 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Verify that runtime-unrolling a top-level loop that has nested loops does not
+; make the unroller produce invalid loop-info.
+; CHECK: remark: {{.*}}: unrolled loop by a factor of 2 with run-time trip count
+; CHECK: @widget
+; CHECK: ret void
+define void @widget(double* %arg, double* %arg1, double* %p, i64* %q1, i64* %q2) local_unnamed_addr {
+entry:
+ br label %header.outer
+
+header.outer: ; preds = %latch.outer, %entry
+ %tmp = phi double* [ %tmp8, %latch.outer ], [ %arg, %entry ]
+ br label %header.inner
+
+header.inner: ; preds = %latch.inner, %header.outer
+ br i1 undef, label %latch.inner, label %latch.outer
+
+latch.inner: ; preds = %header.inner
+ %tmp5 = load i64, i64* %q1, align 8
+ store i64 %tmp5, i64* %q2, align 8
+ %tmp6 = icmp eq double* %p, %arg
+ br label %header.inner
+
+latch.outer: ; preds = %header.inner
+ store double 0.0, double* %p, align 8
+ %tmp8 = getelementptr inbounds double, double* %tmp, i64 1
+ %tmp9 = icmp eq double* %tmp8, %arg1
+ br i1 %tmp9, label %exit, label %header.outer
+
+exit: ; preds = %latch.outer
+ ret void
+}