aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-01-22 16:52:30 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-01-22 16:52:30 +0000
commit7c71d32ab52480cb7bfd9f951450060263a5b9e7 (patch)
treec9e92208269d0251cd61fb3e34aad15ea21d7fbc
parent581a6d8501ff5614297da837b81ed3b6956361ea (diff)
downloadsrc-7c71d32ab52480cb7bfd9f951450060263a5b9e7.tar.gz
src-7c71d32ab52480cb7bfd9f951450060263a5b9e7.zip
Vendor import of llvm release_40 branch r292732:vendor/llvm/llvm-release_40-r292732
Notes
Notes: svn path=/vendor/llvm/dist/; revision=312625 svn path=/vendor/llvm/llvm-release_40-r292732/; revision=312626; tag=vendor/llvm/llvm-release_40-r292732
-rwxr-xr-xcmake/modules/AddLLVM.cmake8
-rw-r--r--docs/ReleaseNotes.rst37
-rw-r--r--docs/index.rst5
-rw-r--r--include/llvm/Analysis/AssumptionCache.h5
-rw-r--r--lib/Analysis/AssumptionCache.cpp27
-rw-r--r--lib/Analysis/ModuleSummaryAnalysis.cpp1
-rw-r--r--lib/Bitcode/Reader/MetadataLoader.cpp21
-rw-r--r--lib/LTO/ThinLTOCodeGenerator.cpp34
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp32
-rw-r--r--lib/Target/X86/X86Subtarget.cpp3
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp6
-rw-r--r--lib/Transforms/Scalar/NewGVN.cpp129
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp7
-rw-r--r--test/CodeGen/X86/atomic-eflags-reuse.ll64
-rw-r--r--test/CodeGen/X86/slow-pmulld.ll3
-rw-r--r--test/ThinLTO/X86/lazyload_metadata.ll6
-rw-r--r--test/Transforms/LoopStrengthReduce/pr31627.ll58
-rw-r--r--test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll56
-rw-r--r--test/Transforms/NewGVN/pr31613.ll135
19 files changed, 462 insertions, 175 deletions
diff --git a/cmake/modules/AddLLVM.cmake b/cmake/modules/AddLLVM.cmake
index fbe790b05b1a..b3c7746c480a 100755
--- a/cmake/modules/AddLLVM.cmake
+++ b/cmake/modules/AddLLVM.cmake
@@ -462,11 +462,9 @@ function(llvm_add_library name)
if(UNIX AND NOT APPLE AND NOT ARG_SONAME)
set_target_properties(${name}
PROPERTIES
- # Concatenate the version numbers since ldconfig expects exactly
- # one component indicating the ABI version, while LLVM uses
- # major+minor for that.
- SOVERSION ${LLVM_VERSION_MAJOR}${LLVM_VERSION_MINOR}
- VERSION ${LLVM_VERSION_MAJOR}${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}${LLVM_VERSION_SUFFIX})
+ # Since 4.0.0, the ABI version is indicated by the major version
+ SOVERSION ${LLVM_VERSION_MAJOR}
+ VERSION ${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}${LLVM_VERSION_SUFFIX})
endif()
endif()
diff --git a/docs/ReleaseNotes.rst b/docs/ReleaseNotes.rst
index b92527dbb296..bc5aca521179 100644
--- a/docs/ReleaseNotes.rst
+++ b/docs/ReleaseNotes.rst
@@ -67,13 +67,46 @@ Non-comprehensive list of changes in this release
Makes programs 10x faster by doing Special New Thing.
+ Improvements to ThinLTO (-flto=thin)
+ ------------------------------------
+ * Integration with profile data (PGO). When available, profile data
+ enables more accurate function importing decisions, as well as
+ cross-module indirect call promotion.
+ * Significant build-time and binary-size improvements when compiling with
+ debug info (-g).
+
Changes to the LLVM IR
----------------------
-Changes to the ARM Backend
+Changes to the ARM Targets
--------------------------
- During this release ...
+**During this release the AArch64 target has:**
+
+* Gained support for ILP32 relocations.
+* Gained support for XRay.
+* Made even more progress on GlobalISel. There is still some work left before
+ it is production-ready though.
+* Refined the support for Qualcomm's Falkor and Samsung's Exynos CPUs.
+* Learned a few new tricks for lowering multiplications by constants, folding
+ spilled/refilled copies etc.
+
+**During this release the ARM target has:**
+
+* Gained support for ROPI (read-only position independence) and RWPI
+ (read-write position independence), which can be used to remove the need for
+ a dynamic linker.
+* Gained support for execute-only code, which is placed in pages without read
+ permissions.
+* Gained a machine scheduler for Cortex-R52.
+* Gained support for XRay.
+* Gained Thumb1 implementations for several compiler-rt builtins. It also
+ has some support for building the builtins for HF targets.
+* Started using the generic bitreverse intrinsic instead of rbit.
+* Gained very basic support for GlobalISel.
+
+A lot of work has also been done in LLD for ARM, which now supports more
+relocations and TLS.
Changes to the MIPS Target
diff --git a/docs/index.rst b/docs/index.rst
index 341a9c16325b..83fc73387945 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -1,11 +1,6 @@
Overview
========
-.. warning::
-
- If you are using a released version of LLVM, see `the download page
- <http://llvm.org/releases/>`_ to find your documentation.
-
The LLVM compiler infrastructure supports a wide range of projects, from
industrial strength compilers to specialized JIT applications to small
research projects.
diff --git a/include/llvm/Analysis/AssumptionCache.h b/include/llvm/Analysis/AssumptionCache.h
index b50545a0484b..79287ed76f2e 100644
--- a/include/llvm/Analysis/AssumptionCache.h
+++ b/include/llvm/Analysis/AssumptionCache.h
@@ -68,7 +68,10 @@ class AssumptionCache {
AffectedValuesMap AffectedValues;
/// Get the vector of assumptions which affect a value from the cache.
- SmallVector<WeakVH, 1> &getAffectedValues(Value *V);
+ SmallVector<WeakVH, 1> &getOrInsertAffectedValues(Value *V);
+
+ /// Copy affected values in the cache for OV to be affected values for NV.
+ void copyAffectedValuesInCache(Value *OV, Value *NV);
/// \brief Flag tracking whether we have scanned the function yet.
///
diff --git a/lib/Analysis/AssumptionCache.cpp b/lib/Analysis/AssumptionCache.cpp
index aa55d79b761e..5851594700a4 100644
--- a/lib/Analysis/AssumptionCache.cpp
+++ b/lib/Analysis/AssumptionCache.cpp
@@ -24,7 +24,7 @@
using namespace llvm;
using namespace llvm::PatternMatch;
-SmallVector<WeakVH, 1> &AssumptionCache::getAffectedValues(Value *V) {
+SmallVector<WeakVH, 1> &AssumptionCache::getOrInsertAffectedValues(Value *V) {
// Try using find_as first to avoid creating extra value handles just for the
// purpose of doing the lookup.
auto AVI = AffectedValues.find_as(V);
@@ -98,7 +98,7 @@ void AssumptionCache::updateAffectedValues(CallInst *CI) {
}
for (auto &AV : Affected) {
- auto &AVV = getAffectedValues(AV);
+ auto &AVV = getOrInsertAffectedValues(AV);
if (std::find(AVV.begin(), AVV.end(), CI) == AVV.end())
AVV.push_back(CI);
}
@@ -111,20 +111,27 @@ void AssumptionCache::AffectedValueCallbackVH::deleted() {
// 'this' now dangles!
}
+void AssumptionCache::copyAffectedValuesInCache(Value *OV, Value *NV) {
+ auto &NAVV = getOrInsertAffectedValues(NV);
+ auto AVI = AffectedValues.find(OV);
+ if (AVI == AffectedValues.end())
+ return;
+
+ for (auto &A : AVI->second)
+ if (std::find(NAVV.begin(), NAVV.end(), A) == NAVV.end())
+ NAVV.push_back(A);
+}
+
void AssumptionCache::AffectedValueCallbackVH::allUsesReplacedWith(Value *NV) {
if (!isa<Instruction>(NV) && !isa<Argument>(NV))
return;
// Any assumptions that affected this value now affect the new value.
- auto &NAVV = AC->getAffectedValues(NV);
- auto AVI = AC->AffectedValues.find(getValPtr());
- if (AVI == AC->AffectedValues.end())
- return;
-
- for (auto &A : AVI->second)
- if (std::find(NAVV.begin(), NAVV.end(), A) == NAVV.end())
- NAVV.push_back(A);
+ AC->copyAffectedValuesInCache(getValPtr(), NV);
+ // 'this' now might dangle! If the AffectedValues map was resized to add an
+ // entry for NV then this object might have been destroyed in favor of some
+ // copy in the grown map.
}
void AssumptionCache::scanFunction() {
diff --git a/lib/Analysis/ModuleSummaryAnalysis.cpp b/lib/Analysis/ModuleSummaryAnalysis.cpp
index 6387bb36166e..f5ba637e58e2 100644
--- a/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -405,6 +405,7 @@ char ModuleSummaryIndexWrapperPass::ID = 0;
INITIALIZE_PASS_BEGIN(ModuleSummaryIndexWrapperPass, "module-summary-analysis",
"Module Summary Analysis", false, true)
INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
INITIALIZE_PASS_END(ModuleSummaryIndexWrapperPass, "module-summary-analysis",
"Module Summary Analysis", false, true)
diff --git a/lib/Bitcode/Reader/MetadataLoader.cpp b/lib/Bitcode/Reader/MetadataLoader.cpp
index 4a5d18e2db75..b05ab4b1da85 100644
--- a/lib/Bitcode/Reader/MetadataLoader.cpp
+++ b/lib/Bitcode/Reader/MetadataLoader.cpp
@@ -768,13 +768,12 @@ void MetadataLoader::MetadataLoaderImpl::lazyLoadOneMetadata(
unsigned ID, PlaceholderQueue &Placeholders) {
assert(ID < (MDStringRef.size()) + GlobalMetadataBitPosIndex.size());
assert(ID >= MDStringRef.size() && "Unexpected lazy-loading of MDString");
-#ifndef NDEBUG
// Lookup first if the metadata hasn't already been loaded.
if (auto *MD = MetadataList.lookup(ID)) {
auto *N = dyn_cast_or_null<MDNode>(MD);
- assert(N && N->isTemporary() && "Lazy loading an already loaded metadata");
+ if (!N->isTemporary())
+ return;
}
-#endif
SmallVector<uint64_t, 64> Record;
StringRef Blob;
IndexCursor.JumpToBit(GlobalMetadataBitPosIndex[ID - MDStringRef.size()]);
@@ -827,8 +826,22 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
auto getMD = [&](unsigned ID) -> Metadata * {
if (ID < MDStringRef.size())
return lazyLoadOneMDString(ID);
- if (!IsDistinct)
+ if (!IsDistinct) {
+ if (auto *MD = MetadataList.lookup(ID))
+ return MD;
+ // If lazy-loading is enabled, we try recursively to load the operand
+ // instead of creating a temporary.
+ if (ID < (MDStringRef.size() + GlobalMetadataBitPosIndex.size())) {
+ // Create a temporary for the node that is referencing the operand we
+ // will lazy-load. It is needed before recursing in case there are
+ // uniquing cycles.
+ MetadataList.getMetadataFwdRef(NextMetadataNo);
+ lazyLoadOneMetadata(ID, Placeholders);
+ return MetadataList.lookup(ID);
+ }
+ // Return a temporary.
return MetadataList.getMetadataFwdRef(ID);
+ }
if (auto *MD = MetadataList.getMetadataIfResolved(ID))
return MD;
return &Placeholders.getPlaceholderOp(ID);
diff --git a/lib/LTO/ThinLTOCodeGenerator.cpp b/lib/LTO/ThinLTOCodeGenerator.cpp
index a14b86179d6e..104fb199da08 100644
--- a/lib/LTO/ThinLTOCodeGenerator.cpp
+++ b/lib/LTO/ThinLTOCodeGenerator.cpp
@@ -829,11 +829,22 @@ static std::string writeGeneratedObject(int count, StringRef CacheEntryPath,
// Main entry point for the ThinLTO processing
void ThinLTOCodeGenerator::run() {
+ // Prepare the resulting object vector
+ assert(ProducedBinaries.empty() && "The generator should not be reused");
+ if (SavedObjectsDirectoryPath.empty())
+ ProducedBinaries.resize(Modules.size());
+ else {
+ sys::fs::create_directories(SavedObjectsDirectoryPath);
+ bool IsDir;
+ sys::fs::is_directory(SavedObjectsDirectoryPath, IsDir);
+ if (!IsDir)
+ report_fatal_error("Unexistent dir: '" + SavedObjectsDirectoryPath + "'");
+ ProducedBinaryFiles.resize(Modules.size());
+ }
+
if (CodeGenOnly) {
// Perform only parallel codegen and return.
ThreadPool Pool;
- assert(ProducedBinaries.empty() && "The generator should not be reused");
- ProducedBinaries.resize(Modules.size());
int count = 0;
for (auto &ModuleBuffer : Modules) {
Pool.async([&](int count) {
@@ -845,7 +856,12 @@ void ThinLTOCodeGenerator::run() {
/*IsImporting*/ false);
// CodeGen
- ProducedBinaries[count] = codegen(*TheModule);
+ auto OutputBuffer = codegen(*TheModule);
+ if (SavedObjectsDirectoryPath.empty())
+ ProducedBinaries[count] = std::move(OutputBuffer);
+ else
+ ProducedBinaryFiles[count] = writeGeneratedObject(
+ count, "", SavedObjectsDirectoryPath, *OutputBuffer);
}, count++);
}
@@ -866,18 +882,6 @@ void ThinLTOCodeGenerator::run() {
WriteIndexToFile(*Index, OS);
}
- // Prepare the resulting object vector
- assert(ProducedBinaries.empty() && "The generator should not be reused");
- if (SavedObjectsDirectoryPath.empty())
- ProducedBinaries.resize(Modules.size());
- else {
- sys::fs::create_directories(SavedObjectsDirectoryPath);
- bool IsDir;
- sys::fs::is_directory(SavedObjectsDirectoryPath, IsDir);
- if (!IsDir)
- report_fatal_error("Unexistent dir: '" + SavedObjectsDirectoryPath + "'");
- ProducedBinaryFiles.resize(Modules.size());
- }
// Prepare the module map.
auto ModuleMap = generateModuleMap(Modules);
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 787dff99367e..2f13b722eb3b 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -29455,19 +29455,11 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-/// Combine brcond/cmov/setcc/.. based on comparing the result of
-/// atomic_load_add to use EFLAGS produced by the addition
-/// directly if possible. For example:
-///
-/// (setcc (cmp (atomic_load_add x, -C) C), COND_E)
-/// becomes:
-/// (setcc (LADD x, -C), COND_E)
-///
-/// and
+/// Combine:
/// (brcond/cmov/setcc .., (cmp (atomic_load_add x, 1), 0), COND_S)
-/// becomes:
+/// to:
/// (brcond/cmov/setcc .., (LADD x, 1), COND_LE)
-///
+/// i.e., reusing the EFLAGS produced by the LOCKed instruction.
/// Note that this is only legal for some op/cc combinations.
static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC,
SelectionDAG &DAG) {
@@ -29482,7 +29474,7 @@ static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC,
if (!Cmp.hasOneUse())
return SDValue();
- // This applies to variations of the common case:
+ // This only applies to variations of the common case:
// (icmp slt x, 0) -> (icmp sle (add x, 1), 0)
// (icmp sge x, 0) -> (icmp sgt (add x, 1), 0)
// (icmp sle x, 0) -> (icmp slt (sub x, 1), 0)
@@ -29501,9 +29493,8 @@ static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC,
return SDValue();
auto *CmpRHSC = dyn_cast<ConstantSDNode>(CmpRHS);
- if (!CmpRHSC)
+ if (!CmpRHSC || CmpRHSC->getZExtValue() != 0)
return SDValue();
- APInt Comparand = CmpRHSC->getAPIntValue();
const unsigned Opc = CmpLHS.getOpcode();
@@ -29519,19 +29510,16 @@ static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC,
if (Opc == ISD::ATOMIC_LOAD_SUB)
Addend = -Addend;
- if (Comparand == -Addend) {
- // No change to CC.
- } else if (CC == X86::COND_S && Comparand == 0 && Addend == 1) {
+ if (CC == X86::COND_S && Addend == 1)
CC = X86::COND_LE;
- } else if (CC == X86::COND_NS && Comparand == 0 && Addend == 1) {
+ else if (CC == X86::COND_NS && Addend == 1)
CC = X86::COND_G;
- } else if (CC == X86::COND_G && Comparand == 0 && Addend == -1) {
+ else if (CC == X86::COND_G && Addend == -1)
CC = X86::COND_GE;
- } else if (CC == X86::COND_LE && Comparand == 0 && Addend == -1) {
+ else if (CC == X86::COND_LE && Addend == -1)
CC = X86::COND_L;
- } else {
+ else
return SDValue();
- }
SDValue LockOp = lowerAtomicArithWithLOCK(CmpLHS, DAG);
DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(0),
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 727ff70c3ff6..586bb7bd7b1a 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -232,9 +232,6 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
else if (isTargetDarwin() || isTargetLinux() || isTargetSolaris() ||
isTargetKFreeBSD() || In64BitMode)
stackAlignment = 16;
-
- assert((!isPMULLDSlow() || hasSSE41()) &&
- "Feature Slow PMULLD can only be set on a subtarget with SSE4.1");
}
void X86Subtarget::initializeEnvironment() {
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index a1561fc0a6c2..01728ae680de 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -3163,6 +3163,9 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
// Don't bother if the instruction is in a BB which ends in an EHPad.
if (UseBB->getTerminator()->isEHPad())
continue;
+ // Don't bother rewriting PHIs in catchswitch blocks.
+ if (isa<CatchSwitchInst>(UserInst->getParent()->getTerminator()))
+ continue;
// Ignore uses which are part of other SCEV expressions, to avoid
// analyzing them multiple times.
if (SE.isSCEVable(UserInst->getType())) {
@@ -4672,7 +4675,8 @@ void LSRInstance::RewriteForPHI(PHINode *PN,
// is the canonical backedge for this loop, which complicates post-inc
// users.
if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 &&
- !isa<IndirectBrInst>(BB->getTerminator())) {
+ !isa<IndirectBrInst>(BB->getTerminator()) &&
+ !isa<CatchSwitchInst>(BB->getTerminator())) {
BasicBlock *Parent = PN->getParent();
Loop *PNLoop = LI.getLoopFor(Parent);
if (!PNLoop || Parent != PNLoop->getHeader()) {
diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp
index e1b6741f31b4..6043e04bb8c5 100644
--- a/lib/Transforms/Scalar/NewGVN.cpp
+++ b/lib/Transforms/Scalar/NewGVN.cpp
@@ -81,6 +81,10 @@ STATISTIC(NumGVNOpsSimplified, "Number of Expressions simplified");
STATISTIC(NumGVNPhisAllSame, "Number of PHIs whos arguments are all the same");
STATISTIC(NumGVNMaxIterations,
"Maximum Number of iterations it took to converge GVN");
+STATISTIC(NumGVNLeaderChanges, "Number of leader changes");
+STATISTIC(NumGVNSortedLeaderChanges, "Number of sorted leader changes");
+STATISTIC(NumGVNAvoidedSortedLeaderChanges,
+ "Number of avoided sorted leader changes");
//===----------------------------------------------------------------------===//
// GVN Pass
@@ -139,6 +143,10 @@ struct CongruenceClass {
// This is used so we can detect store equivalence changes properly.
int StoreCount = 0;
+ // The most dominating leader after our current leader, because the member set
+ // is not sorted and is expensive to keep sorted all the time.
+ std::pair<Value *, unsigned int> NextLeader = {nullptr, ~0U};
+
explicit CongruenceClass(unsigned ID) : ID(ID) {}
CongruenceClass(unsigned ID, Value *Leader, const Expression *E)
: ID(ID), RepLeader(Leader), DefiningExpr(E) {}
@@ -320,8 +328,8 @@ private:
// Templated to allow them to work both on BB's and BB-edges.
template <class T>
Value *lookupOperandLeader(Value *, const User *, const T &) const;
- void performCongruenceFinding(Value *, const Expression *);
- void moveValueToNewCongruenceClass(Value *, CongruenceClass *,
+ void performCongruenceFinding(Instruction *, const Expression *);
+ void moveValueToNewCongruenceClass(Instruction *, CongruenceClass *,
CongruenceClass *);
// Reachability handling.
void updateReachableEdge(BasicBlock *, BasicBlock *);
@@ -1056,20 +1064,43 @@ void NewGVN::markLeaderChangeTouched(CongruenceClass *CC) {
// Move a value, currently in OldClass, to be part of NewClass
// Update OldClass for the move (including changing leaders, etc)
-void NewGVN::moveValueToNewCongruenceClass(Value *V, CongruenceClass *OldClass,
+void NewGVN::moveValueToNewCongruenceClass(Instruction *I,
+ CongruenceClass *OldClass,
CongruenceClass *NewClass) {
- DEBUG(dbgs() << "New congruence class for " << V << " is " << NewClass->ID
+ DEBUG(dbgs() << "New congruence class for " << I << " is " << NewClass->ID
<< "\n");
- OldClass->Members.erase(V);
- NewClass->Members.insert(V);
- if (isa<StoreInst>(V)) {
+
+ if (I == OldClass->NextLeader.first)
+ OldClass->NextLeader = {nullptr, ~0U};
+
+ // The new instruction and new class leader may either be siblings in the
+ // dominator tree, or the new class leader should dominate the new member
+ // instruction. We simply check that the member instruction does not properly
+ // dominate the new class leader.
+ assert(
+ !isa<Instruction>(NewClass->RepLeader) || !NewClass->RepLeader ||
+ I == NewClass->RepLeader ||
+ !DT->properlyDominates(
+ I->getParent(),
+ cast<Instruction>(NewClass->RepLeader)->getParent()) &&
+ "New class for instruction should not be dominated by instruction");
+
+ if (NewClass->RepLeader != I) {
+ auto DFSNum = InstrDFS.lookup(I);
+ if (DFSNum < NewClass->NextLeader.second)
+ NewClass->NextLeader = {I, DFSNum};
+ }
+
+ OldClass->Members.erase(I);
+ NewClass->Members.insert(I);
+ if (isa<StoreInst>(I)) {
--OldClass->StoreCount;
assert(OldClass->StoreCount >= 0);
++NewClass->StoreCount;
assert(NewClass->StoreCount > 0);
}
- ValueToClass[V] = NewClass;
+ ValueToClass[I] = NewClass;
// See if we destroyed the class or need to swap leaders.
if (OldClass->Members.empty() && OldClass != InitialClass) {
if (OldClass->DefiningExpr) {
@@ -1078,25 +1109,48 @@ void NewGVN::moveValueToNewCongruenceClass(Value *V, CongruenceClass *OldClass,
<< " from table\n");
ExpressionToClass.erase(OldClass->DefiningExpr);
}
- } else if (OldClass->RepLeader == V) {
+ } else if (OldClass->RepLeader == I) {
// When the leader changes, the value numbering of
// everything may change due to symbolization changes, so we need to
// reprocess.
- OldClass->RepLeader = *(OldClass->Members.begin());
+ DEBUG(dbgs() << "Leader change!\n");
+ ++NumGVNLeaderChanges;
+ // We don't need to sort members if there is only 1, and we don't care about
+ // sorting the initial class because everything either gets out of it or is
+ // unreachable.
+ if (OldClass->Members.size() == 1 || OldClass == InitialClass) {
+ OldClass->RepLeader = *(OldClass->Members.begin());
+ } else if (OldClass->NextLeader.first) {
+ ++NumGVNAvoidedSortedLeaderChanges;
+ OldClass->RepLeader = OldClass->NextLeader.first;
+ OldClass->NextLeader = {nullptr, ~0U};
+ } else {
+ ++NumGVNSortedLeaderChanges;
+ // TODO: If this ends up to slow, we can maintain a dual structure for
+ // member testing/insertion, or keep things mostly sorted, and sort only
+ // here, or ....
+ std::pair<Value *, unsigned> MinDFS = {nullptr, ~0U};
+ for (const auto X : OldClass->Members) {
+ auto DFSNum = InstrDFS.lookup(X);
+ if (DFSNum < MinDFS.second)
+ MinDFS = {X, DFSNum};
+ }
+ OldClass->RepLeader = MinDFS.first;
+ }
markLeaderChangeTouched(OldClass);
}
}
// Perform congruence finding on a given value numbering expression.
-void NewGVN::performCongruenceFinding(Value *V, const Expression *E) {
- ValueToExpression[V] = E;
+void NewGVN::performCongruenceFinding(Instruction *I, const Expression *E) {
+ ValueToExpression[I] = E;
// This is guaranteed to return something, since it will at least find
// INITIAL.
- CongruenceClass *VClass = ValueToClass[V];
- assert(VClass && "Should have found a vclass");
+ CongruenceClass *IClass = ValueToClass[I];
+ assert(IClass && "Should have found a IClass");
// Dead classes should have been eliminated from the mapping.
- assert(!VClass->Dead && "Found a dead class");
+ assert(!IClass->Dead && "Found a dead class");
CongruenceClass *EClass;
if (const auto *VE = dyn_cast<VariableExpression>(E)) {
@@ -1118,13 +1172,13 @@ void NewGVN::performCongruenceFinding(Value *V, const Expression *E) {
NewClass->RepLeader =
lookupOperandLeader(SI->getValueOperand(), SI, SI->getParent());
} else {
- NewClass->RepLeader = V;
+ NewClass->RepLeader = I;
}
assert(!isa<VariableExpression>(E) &&
"VariableExpression should have been handled already");
EClass = NewClass;
- DEBUG(dbgs() << "Created new congruence class for " << *V
+ DEBUG(dbgs() << "Created new congruence class for " << *I
<< " using expression " << *E << " at " << NewClass->ID
<< " and leader " << *(NewClass->RepLeader) << "\n");
DEBUG(dbgs() << "Hash value was " << E->getHashValue() << "\n");
@@ -1140,36 +1194,31 @@ void NewGVN::performCongruenceFinding(Value *V, const Expression *E) {
assert(!EClass->Dead && "We accidentally looked up a dead class");
}
}
- bool ClassChanged = VClass != EClass;
- bool LeaderChanged = LeaderChanges.erase(V);
+ bool ClassChanged = IClass != EClass;
+ bool LeaderChanged = LeaderChanges.erase(I);
if (ClassChanged || LeaderChanged) {
DEBUG(dbgs() << "Found class " << EClass->ID << " for expression " << E
<< "\n");
if (ClassChanged)
-
- moveValueToNewCongruenceClass(V, VClass, EClass);
-
-
- markUsersTouched(V);
- if (auto *I = dyn_cast<Instruction>(V)) {
- if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) {
- // If this is a MemoryDef, we need to update the equivalence table. If
- // we determined the expression is congruent to a different memory
- // state, use that different memory state. If we determined it didn't,
- // we update that as well. Right now, we only support store
- // expressions.
- if (!isa<MemoryUse>(MA) && isa<StoreExpression>(E) &&
- EClass->Members.size() != 1) {
- auto *DefAccess = cast<StoreExpression>(E)->getDefiningAccess();
- setMemoryAccessEquivTo(MA, DefAccess != MA ? DefAccess : nullptr);
- } else {
- setMemoryAccessEquivTo(MA, nullptr);
- }
- markMemoryUsersTouched(MA);
+ moveValueToNewCongruenceClass(I, IClass, EClass);
+ markUsersTouched(I);
+ if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) {
+ // If this is a MemoryDef, we need to update the equivalence table. If
+ // we determined the expression is congruent to a different memory
+ // state, use that different memory state. If we determined it didn't,
+ // we update that as well. Right now, we only support store
+ // expressions.
+ if (!isa<MemoryUse>(MA) && isa<StoreExpression>(E) &&
+ EClass->Members.size() != 1) {
+ auto *DefAccess = cast<StoreExpression>(E)->getDefiningAccess();
+ setMemoryAccessEquivTo(MA, DefAccess != MA ? DefAccess : nullptr);
+ } else {
+ setMemoryAccessEquivTo(MA, nullptr);
}
+ markMemoryUsersTouched(MA);
}
- } else if (StoreInst *SI = dyn_cast<StoreInst>(V)) {
+ } else if (auto *SI = dyn_cast<StoreInst>(I)) {
// There is, sadly, one complicating thing for stores. Stores do not
// produce values, only consume them. However, in order to make loads and
// stores value number the same, we ignore the value operand of the store.
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 1b1f86f8efdc..dac7032fa08f 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5602,6 +5602,13 @@ void LoopVectorizationLegality::collectLoopUniforms() {
// is consecutive-like, the pointer operand should remain uniform.
else if (hasConsecutiveLikePtrOperand(&I))
ConsecutiveLikePtrs.insert(Ptr);
+
+ // Otherwise, if the memory instruction will be vectorized and its
+ // pointer operand is non-consecutive-like, the memory instruction should
+ // be a gather or scatter operation. Its pointer operand will be
+ // non-uniform.
+ else
+ PossibleNonUniformPtrs.insert(Ptr);
}
// Add to the Worklist all consecutive and consecutive-like pointers that
diff --git a/test/CodeGen/X86/atomic-eflags-reuse.ll b/test/CodeGen/X86/atomic-eflags-reuse.ll
index 9902325fd148..9521a2afefcd 100644
--- a/test/CodeGen/X86/atomic-eflags-reuse.ll
+++ b/test/CodeGen/X86/atomic-eflags-reuse.ll
@@ -192,68 +192,4 @@ entry:
ret i8 %s2
}
-define i8 @test_sub_1_setcc_eq(i64* %p) #0 {
-; CHECK-LABEL: test_sub_1_setcc_eq:
-; CHECK: # BB#0: # %entry
-; CHECK-NEXT: lock decq (%rdi)
-; CHECK-NEXT: sete %al
-; CHECK-NEXT: retq
-entry:
- %tmp0 = atomicrmw sub i64* %p, i64 1 seq_cst
- %tmp1 = icmp eq i64 %tmp0, 1
- %tmp2 = zext i1 %tmp1 to i8
- ret i8 %tmp2
-}
-
-define i8 @test_add_5_setcc_ne(i64* %p) #0 {
-; CHECK-LABEL: test_add_5_setcc_ne:
-; CHECK: # BB#0: # %entry
-; CHECK-NEXT: lock addq $5, (%rdi)
-; CHECK-NEXT: setne %al
-; CHECK-NEXT: retq
-entry:
- %tmp0 = atomicrmw add i64* %p, i64 5 seq_cst
- %tmp1 = icmp ne i64 %tmp0, -5
- %tmp2 = zext i1 %tmp1 to i8
- ret i8 %tmp2
-}
-
-define i8 @test_add_5_setcc_ne_comparand_mismatch(i64* %p) #0 {
-; CHECK-LABEL: test_add_5_setcc_ne_comparand_mismatch:
-; CHECK: # BB#0: # %entry
-; CHECK-NEXT: movl $5, %eax
-; CHECK-NEXT: lock xaddq %rax, (%rdi)
-; CHECK-NEXT: testq %rax, %rax
-; CHECK-NEXT: setne %al
-; CHECK-NEXT: retq
-entry:
- %tmp0 = atomicrmw add i64* %p, i64 5 seq_cst
- %tmp1 = icmp ne i64 %tmp0, 0
- %tmp2 = zext i1 %tmp1 to i8
- ret i8 %tmp2
-}
-
-declare void @g()
-define zeroext i1 @test_sub_1_setcc_jcc(i64* %p) local_unnamed_addr #0 {
-; TODO: It's possible to use "lock dec" here, but both uses of the cmp need to
-; be updated.
-; CHECK-LABEL: test_sub_1_setcc_jcc:
-; CHECK: # BB#0: # %entry
-; CHECK: movq $-1, %rax
-; CHECK-NEXT: lock xaddq %rax, (%rdi)
-; CHECK-NEXT: cmpq $1, %rax
-; CHECK-NEXT: sete %bl
-; CHECK-NEXT: jne
-entry:
- %add = atomicrmw volatile add i64* %p, i64 -1 seq_cst
- %cmp = icmp ne i64 %add, 1
- %not = xor i1 %cmp, true
- br i1 %cmp, label %else, label %then
-then:
- tail call void @g()
- br label %else
-else:
- ret i1 %not
-}
-
attributes #0 = { nounwind }
diff --git a/test/CodeGen/X86/slow-pmulld.ll b/test/CodeGen/X86/slow-pmulld.ll
index ff6682090a26..1de19d2334d4 100644
--- a/test/CodeGen/X86/slow-pmulld.ll
+++ b/test/CodeGen/X86/slow-pmulld.ll
@@ -4,6 +4,9 @@
; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE4-32
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE4-64
+; Make sure that the slow-pmulld feature can be used without SSE4.1.
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=silvermont -mattr=-sse4.1
+
define <4 x i32> @foo(<4 x i8> %A) {
; CHECK32-LABEL: foo:
; CHECK32: # BB#0:
diff --git a/test/ThinLTO/X86/lazyload_metadata.ll b/test/ThinLTO/X86/lazyload_metadata.ll
index 3c4345831aa3..7bd3e641bc77 100644
--- a/test/ThinLTO/X86/lazyload_metadata.ll
+++ b/test/ThinLTO/X86/lazyload_metadata.ll
@@ -17,7 +17,7 @@
; RUN: -o /dev/null -disable-ondemand-mds-loading -stats \
; RUN: 2>&1 | FileCheck %s -check-prefix=NOTLAZY
; NOTLAZY: 58 bitcode-reader - Number of Metadata records loaded
-; NOTLAZY: 8 bitcode-reader - Number of MDStrings loaded
+; NOTLAZY: 6 bitcode-reader - Number of MDStrings loaded
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
@@ -48,7 +48,7 @@ define void @globalfunc3(i32 %arg) {
!3 = !{!"3"}
!4 = !{!"4"}
!5 = !{!"5"}
-!6 = !{!"6"}
+!6 = !{!9}
!7 = !{!"7"}
!8 = !{!"8"}
-!9 = !{!"9"}
+!9 = !{!6}
diff --git a/test/Transforms/LoopStrengthReduce/pr31627.ll b/test/Transforms/LoopStrengthReduce/pr31627.ll
new file mode 100644
index 000000000000..4bd4fc273d7b
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/pr31627.ll
@@ -0,0 +1,58 @@
+; RUN: opt -S -loop-reduce < %s | FileCheck %s
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc19.0.24215"
+
+define void @fn3() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ %call = invoke i32 @fn2()
+ to label %for.cond.preheader unwind label %catch.dispatch2
+
+for.cond.preheader: ; preds = %entry
+ br label %for.cond
+
+for.cond: ; preds = %for.cond.preheader, %for.cond
+ %b.0 = phi i32 [ %inc, %for.cond ], [ %call, %for.cond.preheader ]
+ %inc = add nsw i32 %b.0, 1
+ invoke void @fn1(i32 %inc)
+ to label %for.cond unwind label %catch.dispatch
+
+; CHECK: %[[add:.*]] = add i32 %call, 1
+; CHECK: br label %for.cond
+
+; CHECK: for.cond: ; preds = %for.cond, %for.cond.preheader
+; CHECK: %[[lsr_iv:.*]] = phi i32 [ %lsr.iv.next, %for.cond ], [ %[[add]], %for.cond.preheader ]
+; CHECK: %[[lsr_iv_next:.*]] = add i32 %lsr.iv, 1
+; CHECK: invoke void @fn1(i32 %[[lsr_iv]])
+
+
+catch.dispatch: ; preds = %for.cond
+ %0 = catchswitch within none [label %catch] unwind label %catch.dispatch2
+
+catch: ; preds = %catch.dispatch
+ %1 = catchpad within %0 [i8* null, i32 64, i8* null]
+ invoke void @_CxxThrowException(i8* null, i8* null) #2 [ "funclet"(token %1) ]
+ to label %unreachable unwind label %catch.dispatch2
+
+catch.dispatch2: ; preds = %catch.dispatch, %catch, %entry
+ %a.0 = phi i32 [ undef, %entry ], [ %call, %catch ], [ %call, %catch.dispatch ]
+ %2 = catchswitch within none [label %catch3] unwind to caller
+
+catch3: ; preds = %catch.dispatch2
+ %3 = catchpad within %2 [i8* null, i32 64, i8* null]
+ call void @fn1(i32 %a.0) [ "funclet"(token %3) ]
+ catchret from %3 to label %try.cont4
+
+try.cont4: ; preds = %catch3
+ ret void
+
+unreachable: ; preds = %catch
+ unreachable
+}
+
+declare i32 @fn2()
+
+declare i32 @__CxxFrameHandler3(...)
+
+declare void @fn1(i32)
+
+declare void @_CxxThrowException(i8*, i8*)
diff --git a/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll b/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll
new file mode 100644
index 000000000000..32bfcd2275ac
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll
@@ -0,0 +1,56 @@
+; REQUIRES: asserts
+; RUN: opt < %s -loop-vectorize -instcombine -S -debug-only=loop-vectorize -disable-output -print-after=instcombine 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK-LABEL: PR31671
+;
+; Check a pointer in which one of its uses is consecutive-like and another of
+; its uses is non-consecutive-like. In the test case below, %tmp3 is the
+; pointer operand of an interleaved load, making it consecutive-like. However,
+; it is also the pointer operand of a non-interleaved store that will become a
+; scatter operation. %tmp3 (and the induction variable) should not be marked
+; uniform-after-vectorization.
+;
+; CHECK: LV: Found uniform instruction: %tmp0 = getelementptr inbounds %data, %data* %d, i64 0, i32 3, i64 %i
+; CHECK-NOT: LV: Found uniform instruction: %tmp3 = getelementptr inbounds %data, %data* %d, i64 0, i32 0, i64 %i
+; CHECK-NOT: LV: Found uniform instruction: %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
+; CHECK-NOT: LV: Found uniform instruction: %i.next = add nuw nsw i64 %i, 5
+; CHECK: vector.body:
+; CHECK: %vec.ind = phi <16 x i64>
+; CHECK: %[[T0:.+]] = extractelement <16 x i64> %vec.ind, i32 0
+; CHECK: %[[T1:.+]] = getelementptr inbounds %data, %data* %d, i64 0, i32 3, i64 %[[T0]]
+; CHECK: %[[T2:.+]] = bitcast float* %[[T1]] to <80 x float>*
+; CHECK: load <80 x float>, <80 x float>* %[[T2]], align 4
+; CHECK: %[[T3:.+]] = getelementptr inbounds %data, %data* %d, i64 0, i32 0, i64 %[[T0]]
+; CHECK: %[[T4:.+]] = bitcast float* %[[T3]] to <80 x float>*
+; CHECK: load <80 x float>, <80 x float>* %[[T4]], align 4
+; CHECK: %VectorGep = getelementptr inbounds %data, %data* %d, i64 0, i32 0, <16 x i64> %vec.ind
+; CHECK: call void @llvm.masked.scatter.v16f32({{.*}}, <16 x float*> %VectorGep, {{.*}})
+; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body
+
+%data = type { [32000 x float], [3 x i32], [4 x i8], [32000 x float] }
+
+define void @PR31671(float %x, %data* %d) #0 {
+entry:
+ br label %for.body
+
+for.body:
+ %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
+ %tmp0 = getelementptr inbounds %data, %data* %d, i64 0, i32 3, i64 %i
+ %tmp1 = load float, float* %tmp0, align 4
+ %tmp2 = fmul float %x, %tmp1
+ %tmp3 = getelementptr inbounds %data, %data* %d, i64 0, i32 0, i64 %i
+ %tmp4 = load float, float* %tmp3, align 4
+ %tmp5 = fadd float %tmp4, %tmp2
+ store float %tmp5, float* %tmp3, align 4
+ %i.next = add nuw nsw i64 %i, 5
+ %cond = icmp slt i64 %i.next, 32000
+ br i1 %cond, label %for.body, label %for.end
+
+for.end:
+ ret void
+}
+
+attributes #0 = { "target-cpu"="knl" }
diff --git a/test/Transforms/NewGVN/pr31613.ll b/test/Transforms/NewGVN/pr31613.ll
new file mode 100644
index 000000000000..d3a41830c789
--- /dev/null
+++ b/test/Transforms/NewGVN/pr31613.ll
@@ -0,0 +1,135 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+;; Both of these tests are tests of phi nodes that end up all equivalent to each other
+;; Without proper leader ordering, we will end up cycling the leader between all of them and never converge.
+
+define void @foo() {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: br label [[BB1:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: [[TMP:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ 1, [[BB18:%.*]] ]
+; CHECK-NEXT: br label [[BB2:%.*]]
+; CHECK: bb2:
+; CHECK-NEXT: br label [[BB4:%.*]]
+; CHECK: bb4:
+; CHECK-NEXT: br i1 undef, label [[BB18]], label [[BB7:%.*]]
+; CHECK: bb7:
+; CHECK-NEXT: br label [[BB9:%.*]]
+; CHECK: bb9:
+; CHECK-NEXT: br i1 undef, label [[BB2]], label [[BB11:%.*]]
+; CHECK: bb11:
+; CHECK-NEXT: br i1 undef, label [[BB16:%.*]], label [[BB14:%.*]]
+; CHECK: bb14:
+; CHECK-NEXT: br label [[BB4]]
+; CHECK: bb16:
+; CHECK-NEXT: br label [[BB7]]
+; CHECK: bb18:
+; CHECK-NEXT: br label [[BB1]]
+;
+bb:
+ br label %bb1
+
+bb1: ; preds = %bb18, %bb
+ %tmp = phi i32 [ 0, %bb ], [ 1, %bb18 ]
+ br label %bb2
+
+bb2: ; preds = %bb9, %bb1
+ %tmp3 = phi i32 [ %tmp, %bb1 ], [ %tmp8, %bb9 ]
+ br label %bb4
+
+bb4: ; preds = %bb14, %bb2
+ %tmp5 = phi i32 [ %tmp3, %bb2 ], [ %tmp15, %bb14 ]
+ br i1 undef, label %bb18, label %bb7
+
+bb7: ; preds = %bb16, %bb4
+ %tmp8 = phi i32 [ %tmp17, %bb16 ], [ %tmp5, %bb4 ]
+ br label %bb9
+
+bb9: ; preds = %bb7
+ br i1 undef, label %bb2, label %bb11
+
+bb11: ; preds = %bb9
+ br i1 undef, label %bb16, label %bb14
+
+bb14: ; preds = %bb11
+ %tmp15 = phi i32 [ %tmp8, %bb11 ]
+ br label %bb4
+
+bb16: ; preds = %bb11
+ %tmp17 = phi i32 [ %tmp8, %bb11 ]
+ br label %bb7
+
+bb18: ; preds = %bb4
+ br label %bb1
+}
+
+%struct.a = type {}
+%struct.b = type {}
+
+declare void @c.d.p(i64, i8*)
+
+define void @e() {
+; CHECK-LABEL: @e(
+; CHECK-NEXT: [[F:%.*]] = alloca i32
+; CHECK-NEXT: store i32 undef, i32* [[F]], !g !0
+; CHECK-NEXT: br label [[H:%.*]]
+; CHECK: h:
+; CHECK-NEXT: call void @c.d.p(i64 8, i8* undef)
+; CHECK-NEXT: [[I:%.*]] = load i32, i32* [[F]]
+; CHECK-NEXT: [[J:%.*]] = load i32, i32* null
+; CHECK-NEXT: [[K:%.*]] = icmp eq i32 [[I]], [[J]]
+; CHECK-NEXT: br i1 [[K]], label [[L:%.*]], label [[Q:%.*]]
+; CHECK: l:
+; CHECK-NEXT: br label [[R:%.*]]
+; CHECK: q:
+; CHECK-NEXT: [[M:%.*]] = load %struct.a*, %struct.a** null
+; CHECK-NEXT: br label [[R]]
+; CHECK: r:
+; CHECK-NEXT: switch i32 undef, label [[N:%.*]] [
+; CHECK-NEXT: i32 0, label [[S:%.*]]
+; CHECK-NEXT: ]
+; CHECK: s:
+; CHECK-NEXT: store i32 undef, i32* [[F]], !g !0
+; CHECK-NEXT: br label [[H]]
+; CHECK: n:
+; CHECK-NEXT: [[O:%.*]] = load %struct.a*, %struct.a** null
+; CHECK-NEXT: ret void
+;
+ %f = alloca i32
+ store i32 undef, i32* %f, !g !0
+ br label %h
+
+h: ; preds = %s, %0
+ call void @c.d.p(i64 8, i8* undef)
+ %i = load i32, i32* %f
+ %j = load i32, i32* null
+ %k = icmp eq i32 %i, %j
+ br i1 %k, label %l, label %q
+
+l: ; preds = %h
+ br label %r
+
+q: ; preds = %h
+ %m = load %struct.a*, %struct.a** null
+ %1 = bitcast %struct.a* %m to %struct.b*
+ br label %r
+
+r: ; preds = %q, %l
+ switch i32 undef, label %n [
+ i32 0, label %s
+ ]
+
+s: ; preds = %r
+ store i32 undef, i32* %f, !g !0
+ br label %h
+
+n: ; preds = %r
+ %o = load %struct.a*, %struct.a** null
+ %2 = bitcast %struct.a* %o to %struct.b*
+ ret void
+}
+
+!0 = !{}