aboutsummaryrefslogtreecommitdiffstats
path: root/test
diff options
context:
space:
mode:
Diffstat (limited to 'test')
-rw-r--r--test/CodeGen/AArch64/fcopysign.ll25
-rw-r--r--test/CodeGen/AMDGPU/kernel-args.ll19
-rw-r--r--test/CodeGen/AMDGPU/mad_uint24.ll93
-rw-r--r--test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll9
-rw-r--r--test/CodeGen/NVPTX/load-store.ll88
-rw-r--r--test/CodeGen/X86/masked_memop.ll59
-rw-r--r--test/Instrumentation/BoundsChecking/many-traps-2.ll65
-rw-r--r--test/MC/ELF/extra-section-flags.s12
-rw-r--r--test/Transforms/InstSimplify/AndOrXor.ll43
-rw-r--r--test/Transforms/InstSimplify/floating-point-compare.ll6
-rw-r--r--test/Transforms/NewGVN/pair_jumpthread.ll2
11 files changed, 347 insertions, 74 deletions
diff --git a/test/CodeGen/AArch64/fcopysign.ll b/test/CodeGen/AArch64/fcopysign.ll
index 6bda33cf76ef..a625a9eb2a6c 100644
--- a/test/CodeGen/AArch64/fcopysign.ll
+++ b/test/CodeGen/AArch64/fcopysign.ll
@@ -5,10 +5,12 @@ target triple = "aarch64--"
declare fp128 @llvm.copysign.f128(fp128, fp128)
-@val = global double zeroinitializer, align 8
+@val_float = global float zeroinitializer, align 4
+@val_double = global double zeroinitializer, align 8
+@val_fp128 = global fp128 zeroinitializer, align 16
; CHECK-LABEL: copysign0
-; CHECK: ldr [[REG:x[0-9]+]], [x8, :lo12:val]
+; CHECK: ldr [[REG:x[0-9]+]], [x8, :lo12:val_double]
; CHECK: and [[ANDREG:x[0-9]+]], [[REG]], #0x8000000000000000
; CHECK: lsr x[[LSRREGNUM:[0-9]+]], [[ANDREG]], #56
; CHECK: bfxil w[[LSRREGNUM]], w{{[0-9]+}}, #0, #7
@@ -16,8 +18,25 @@ declare fp128 @llvm.copysign.f128(fp128, fp128)
; CHECK: ldr q{{[0-9]+}},
define fp128 @copysign0() {
entry:
- %v = load double, double* @val, align 8
+ %v = load double, double* @val_double, align 8
%conv = fpext double %v to fp128
%call = tail call fp128 @llvm.copysign.f128(fp128 0xL00000000000000007FFF000000000000, fp128 %conv) #2
ret fp128 %call
}
+
+; CHECK-LABEL: copysign1
+; CHECK-DAG: ldr [[REG:q[0-9]+]], [x8, :lo12:val_fp128]
+; CHECK-DAG: ldr [[REG:w[0-9]+]], [x8, :lo12:val_float]
+; CHECK: and [[ANDREG:w[0-9]+]], [[REG]], #0x80000000
+; CHECK: lsr w[[LSRREGNUM:[0-9]+]], [[ANDREG]], #24
+; CHECK: bfxil w[[LSRREGNUM]], w{{[0-9]+}}, #0, #7
+; CHECK: strb w[[LSRREGNUM]],
+; CHECK: ldr q{{[0-9]+}},
+define fp128@copysign1() {
+entry:
+ %v0 = load fp128, fp128* @val_fp128, align 16
+ %v1 = load float, float* @val_float, align 4
+ %conv = fpext float %v1 to fp128
+ %call = tail call fp128 @llvm.copysign.f128(fp128 %v0, fp128 %conv)
+ ret fp128 %call
+}
diff --git a/test/CodeGen/AMDGPU/kernel-args.ll b/test/CodeGen/AMDGPU/kernel-args.ll
index 9d1f582f4a88..11067522f857 100644
--- a/test/CodeGen/AMDGPU/kernel-args.ll
+++ b/test/CodeGen/AMDGPU/kernel-args.ll
@@ -16,13 +16,8 @@
; HSA-VI: s_and_b32 s{{[0-9]+}}, [[VAL]], 0xff
-; EG: LSHR T0.X, KC0[2].Y, literal.x,
-; EG-NEXT: MOV * T1.X, KC0[2].Z,
-; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
-
-; CM: LSHR * T0.X, KC0[2].Y, literal.x,
-; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
-; CM-NEXT: MOV * T1.X, KC0[2].Z,
+; EGCM: VTX_READ_8{{.*}} #3
+; EGCM: KC0[2].Y
define amdgpu_kernel void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind {
%ext = zext i8 %in to i32
store i32 %ext, i32 addrspace(1)* %out, align 4
@@ -92,14 +87,8 @@ define amdgpu_kernel void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 sign
; HSA-VI: s_and_b32 s{{[0-9]+}}, [[VAL]], 0xffff{{$}}
; HSA-VI: flat_store_dword
-
-; EG: LSHR T0.X, KC0[2].Y, literal.x,
-; EG-NEXT: MOV * T1.X, KC0[2].Z,
-; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
-
-; CM: LSHR * T0.X, KC0[2].Y, literal.x,
-; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
-; CM-NEXT: MOV * T1.X, KC0[2].Z,
+; EGCM: VTX_READ_16
+; EGCM: KC0[2].Y
define amdgpu_kernel void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind {
%ext = zext i16 %in to i32
store i32 %ext, i32 addrspace(1)* %out, align 4
diff --git a/test/CodeGen/AMDGPU/mad_uint24.ll b/test/CodeGen/AMDGPU/mad_uint24.ll
index 2c4f7d324a96..3c3371bf9166 100644
--- a/test/CodeGen/AMDGPU/mad_uint24.ll
+++ b/test/CodeGen/AMDGPU/mad_uint24.ll
@@ -1,8 +1,8 @@
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC
; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG --check-prefix=FUNC
-; RUN: llc < %s -march=amdgcn -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC
-; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=VI --check-prefix=FUNC
-; RUN: llc < %s -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=VI --check-prefix=FUNC
+; RUN: llc < %s -march=amdgcn -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC --check-prefix=GCN
+; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=VI --check-prefix=FUNC --check-prefix=GCN
+; RUN: llc < %s -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=VI --check-prefix=FUNC --check-prefix=GCN
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
@@ -138,3 +138,90 @@ bb18: ; preds = %bb4
store i32 %tmp16, i32 addrspace(1)* %arg
ret void
}
+
+; FUNC-LABEL: {{^}}i8_mad_sat_16:
+; EG: MULADD_UINT24 {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]]
+; The result must be sign-extended
+; EG: BFE_INT {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[MAD_CHAN]], 0.0, literal.x
+; EG: 8
+; SI: v_mad_u32_u24 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
+; VI: v_mad_u16 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
+; GCN: v_bfe_i32 [[EXT:v[0-9]]], [[MAD]], 0, 16
+; GCN: v_med3_i32 v{{[0-9]}}, [[EXT]],
+define amdgpu_kernel void @i8_mad_sat_16(i8 addrspace(1)* %out, i8 addrspace(1)* %in0, i8 addrspace(1)* %in1, i8 addrspace(1)* %in2, i64 addrspace(5)* %idx) {
+entry:
+ %retval.0.i = load i64, i64 addrspace(5)* %idx
+ %arrayidx = getelementptr inbounds i8, i8 addrspace(1)* %in0, i64 %retval.0.i
+ %arrayidx2 = getelementptr inbounds i8, i8 addrspace(1)* %in1, i64 %retval.0.i
+ %arrayidx4 = getelementptr inbounds i8, i8 addrspace(1)* %in2, i64 %retval.0.i
+ %l1 = load i8, i8 addrspace(1)* %arrayidx, align 1
+ %l2 = load i8, i8 addrspace(1)* %arrayidx2, align 1
+ %l3 = load i8, i8 addrspace(1)* %arrayidx4, align 1
+ %conv1.i = sext i8 %l1 to i16
+ %conv3.i = sext i8 %l2 to i16
+ %conv5.i = sext i8 %l3 to i16
+ %mul.i.i.i = mul nsw i16 %conv3.i, %conv1.i
+ %add.i.i = add i16 %mul.i.i.i, %conv5.i
+ %c4 = icmp sgt i16 %add.i.i, -128
+ %cond.i.i = select i1 %c4, i16 %add.i.i, i16 -128
+ %c5 = icmp slt i16 %cond.i.i, 127
+ %cond13.i.i = select i1 %c5, i16 %cond.i.i, i16 127
+ %conv8.i = trunc i16 %cond13.i.i to i8
+ %arrayidx7 = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 %retval.0.i
+ store i8 %conv8.i, i8 addrspace(1)* %arrayidx7, align 1
+ ret void
+}
+
+; FUNC-LABEL: {{^}}i8_mad_32:
+; EG: MULADD_UINT24 {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]]
+; The result must be sign-extended
+; EG: BFE_INT {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[MAD_CHAN]], 0.0, literal.x
+; EG: 8
+; SI: v_mad_u32_u24 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
+; VI: v_mad_u16 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
+; GCN: v_bfe_i32 [[EXT:v[0-9]]], [[MAD]], 0, 16
+define amdgpu_kernel void @i8_mad_32(i32 addrspace(1)* %out, i8 addrspace(1)* %a, i8 addrspace(1)* %b, i8 addrspace(1)* %c, i64 addrspace(5)* %idx) {
+entry:
+ %retval.0.i = load i64, i64 addrspace(5)* %idx
+ %arrayidx = getelementptr inbounds i8, i8 addrspace(1)* %a, i64 %retval.0.i
+ %arrayidx2 = getelementptr inbounds i8, i8 addrspace(1)* %b, i64 %retval.0.i
+ %arrayidx4 = getelementptr inbounds i8, i8 addrspace(1)* %c, i64 %retval.0.i
+ %la = load i8, i8 addrspace(1)* %arrayidx, align 1
+ %lb = load i8, i8 addrspace(1)* %arrayidx2, align 1
+ %lc = load i8, i8 addrspace(1)* %arrayidx4, align 1
+ %exta = sext i8 %la to i16
+ %extb = sext i8 %lb to i16
+ %extc = sext i8 %lc to i16
+ %mul = mul i16 %exta, %extb
+ %mad = add i16 %mul, %extc
+ %mad_ext = sext i16 %mad to i32
+ store i32 %mad_ext, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}i8_mad_64:
+; EG: MULADD_UINT24 {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]]
+; The result must be sign-extended
+; EG: BFE_INT {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[MAD_CHAN]], 0.0, literal.x
+; EG: 8
+; SI: v_mad_u32_u24 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
+; VI: v_mad_u16 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
+; GCN: v_bfe_i32 [[EXT:v[0-9]]], [[MAD]], 0, 16
+define amdgpu_kernel void @i8_mad_64(i64 addrspace(1)* %out, i8 addrspace(1)* %a, i8 addrspace(1)* %b, i8 addrspace(1)* %c, i64 addrspace(5)* %idx) {
+entry:
+ %retval.0.i = load i64, i64 addrspace(5)* %idx
+ %arrayidx = getelementptr inbounds i8, i8 addrspace(1)* %a, i64 %retval.0.i
+ %arrayidx2 = getelementptr inbounds i8, i8 addrspace(1)* %b, i64 %retval.0.i
+ %arrayidx4 = getelementptr inbounds i8, i8 addrspace(1)* %c, i64 %retval.0.i
+ %la = load i8, i8 addrspace(1)* %arrayidx, align 1
+ %lb = load i8, i8 addrspace(1)* %arrayidx2, align 1
+ %lc = load i8, i8 addrspace(1)* %arrayidx4, align 1
+ %exta = sext i8 %la to i16
+ %extb = sext i8 %lb to i16
+ %extc = sext i8 %lc to i16
+ %mul = mul i16 %exta, %extb
+ %mad = add i16 %mul, %extc
+ %mad_ext = sext i16 %mad to i64
+ store i64 %mad_ext, i64 addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll b/test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll
index a7adc2ae996e..03f0539e19be 100644
--- a/test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll
+++ b/test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll
@@ -60,8 +60,11 @@ entry:
}
; FUNC-LABEL: {{^}}test_implicit:
-; 36 prepended implicit bytes + 4(out pointer) + 4*4 = 56
-; EG: VTX_READ_32 {{T[0-9]+\.[XYZW]}}, {{T[0-9]+\.[XYZW]}}, 56
+; 36 prepended implicit bytes + 4(out pointer) + 4*4 = 56 == KC0[3].Z
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+.[XYZW]]], [[PTR:T[0-9]+.[XYZW]]]
+; EG-NOT: VTX_READ
+; EG-DAG: MOV {{\*?}} [[VAL]], KC0[3].Z
+; EG-DAG: LSHR {{\*? *}}[[PTR]], KC0[2].Y, literal
define amdgpu_kernel void @test_implicit(i32 addrspace(1)* %out) #1 {
%implicitarg.ptr = call noalias i8 addrspace(7)* @llvm.r600.implicitarg.ptr()
%header.ptr = bitcast i8 addrspace(7)* %implicitarg.ptr to i32 addrspace(7)*
@@ -73,7 +76,7 @@ define amdgpu_kernel void @test_implicit(i32 addrspace(1)* %out) #1 {
; FUNC-LABEL: {{^}}test_implicit_dyn:
; 36 prepended implicit bytes + 8(out pointer + in) = 44
-; EG: VTX_READ_32 {{T[0-9]+\.[XYZW]}}, {{T[0-9]+\.[XYZW]}}, 44
+; EG: VTX_READ_32 {{T[0-9]+\.[XYZW]}}, {{T[0-9]+\.[XYZW]}}, 44, #3
define amdgpu_kernel void @test_implicit_dyn(i32 addrspace(1)* %out, i32 %in) #1 {
%implicitarg.ptr = call noalias i8 addrspace(7)* @llvm.r600.implicitarg.ptr()
%header.ptr = bitcast i8 addrspace(7)* %implicitarg.ptr to i32 addrspace(7)*
diff --git a/test/CodeGen/NVPTX/load-store.ll b/test/CodeGen/NVPTX/load-store.ll
new file mode 100644
index 000000000000..03b0109dea20
--- /dev/null
+++ b/test/CodeGen/NVPTX/load-store.ll
@@ -0,0 +1,88 @@
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
+
+; CHECK-LABEL: plain
+define void @plain(i8* %a, i16* %b, i32* %c, i64* %d) local_unnamed_addr {
+ ; CHECK: ld.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %a.load = load i8, i8* %a
+ %a.add = add i8 %a.load, 1
+ ; CHECK: st.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store i8 %a.add, i8* %a
+
+ ; CHECK: ld.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %b.load = load i16, i16* %b
+ %b.add = add i16 %b.load, 1
+ ; CHECK: st.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store i16 %b.add, i16* %b
+
+ ; CHECK: ld.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %c.load = load i32, i32* %c
+ %c.add = add i32 %c.load, 1
+ ; CHECK: st.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store i32 %c.add, i32* %c
+
+ ; CHECK: ld.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %d.load = load i64, i64* %d
+ %d.add = add i64 %d.load, 1
+ ; CHECK: st.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ store i64 %d.add, i64* %d
+
+ ret void
+}
+
+; CHECK-LABEL: volatile
+define void @volatile(i8* %a, i16* %b, i32* %c, i64* %d) local_unnamed_addr {
+ ; CHECK: ld.volatile.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %a.load = load volatile i8, i8* %a
+ %a.add = add i8 %a.load, 1
+ ; CHECK: st.volatile.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store volatile i8 %a.add, i8* %a
+
+ ; CHECK: ld.volatile.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %b.load = load volatile i16, i16* %b
+ %b.add = add i16 %b.load, 1
+ ; CHECK: st.volatile.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store volatile i16 %b.add, i16* %b
+
+ ; CHECK: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %c.load = load volatile i32, i32* %c
+ %c.add = add i32 %c.load, 1
+ ; CHECK: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store volatile i32 %c.add, i32* %c
+
+ ; CHECK: ld.volatile.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %d.load = load volatile i64, i64* %d
+ %d.add = add i64 %d.load, 1
+ ; CHECK: st.volatile.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ store volatile i64 %d.add, i64* %d
+
+ ret void
+}
+
+; CHECK-LABEL: monotonic
+define void @monotonic(i8* %a, i16* %b, i32* %c, i64* %d) local_unnamed_addr {
+ ; CHECK: ld.volatile.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %a.load = load atomic i8, i8* %a monotonic, align 1
+ %a.add = add i8 %a.load, 1
+ ; CHECK: st.volatile.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic i8 %a.add, i8* %a monotonic, align 1
+
+ ; CHECK: ld.volatile.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %b.load = load atomic i16, i16* %b monotonic, align 2
+ %b.add = add i16 %b.load, 1
+ ; CHECK: st.volatile.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic i16 %b.add, i16* %b monotonic, align 2
+
+ ; CHECK: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %c.load = load atomic i32, i32* %c monotonic, align 4
+ %c.add = add i32 %c.load, 1
+ ; CHECK: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store atomic i32 %c.add, i32* %c monotonic, align 4
+
+ ; CHECK: ld.volatile.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %d.load = load atomic i64, i64* %d monotonic, align 8
+ %d.add = add i64 %d.load, 1
+ ; CHECK: st.volatile.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ store atomic i64 %d.add, i64* %d monotonic, align 8
+
+ ret void
+}
diff --git a/test/CodeGen/X86/masked_memop.ll b/test/CodeGen/X86/masked_memop.ll
index 812d9f50fe3c..36da9386fb06 100644
--- a/test/CodeGen/X86/masked_memop.ll
+++ b/test/CodeGen/X86/masked_memop.ll
@@ -1310,6 +1310,65 @@ define void @trunc_mask(<4 x float> %x, <4 x float>* %ptr, <4 x float> %y, <4 x
ret void
}
+; This needs to be widened to v4i32.
+; This used to assert in type legalization. PR38436
+; FIXME: The codegen for AVX512 should use KSHIFT to zero the upper bits of the mask.
+define void @widen_masked_store(<3 x i32> %v, <3 x i32>* %p, <3 x i1> %mask) {
+; AVX1-LABEL: widen_masked_store:
+; AVX1: ## %bb.0:
+; AVX1-NEXT: vmovd %edx, %xmm1
+; AVX1-NEXT: vmovd %esi, %xmm2
+; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; AVX1-NEXT: vmovd %ecx, %xmm2
+; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX1-NEXT: vpslld $31, %xmm1, %xmm1
+; AVX1-NEXT: vpsrad $31, %xmm1, %xmm1
+; AVX1-NEXT: vmaskmovps %xmm0, %xmm1, (%rdi)
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: widen_masked_store:
+; AVX2: ## %bb.0:
+; AVX2-NEXT: vmovd %edx, %xmm1
+; AVX2-NEXT: vmovd %esi, %xmm2
+; AVX2-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; AVX2-NEXT: vmovd %ecx, %xmm2
+; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX2-NEXT: vpslld $31, %xmm1, %xmm1
+; AVX2-NEXT: vpsrad $31, %xmm1, %xmm1
+; AVX2-NEXT: vpmaskmovd %xmm0, %xmm1, (%rdi)
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: widen_masked_store:
+; AVX512F: ## %bb.0:
+; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512F-NEXT: vpslld $31, %xmm1, %xmm1
+; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k1
+; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX512F-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[3]
+; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
+; AVX512F-NEXT: kshiftlw $12, %k0, %k0
+; AVX512F-NEXT: kshiftrw $12, %k0, %k1
+; AVX512F-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1}
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: retq
+;
+; SKX-LABEL: widen_masked_store:
+; SKX: ## %bb.0:
+; SKX-NEXT: vpslld $31, %xmm1, %xmm1
+; SKX-NEXT: vptestmd %xmm1, %xmm1, %k1
+; SKX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; SKX-NEXT: vmovdqa32 %xmm1, %xmm1 {%k1} {z}
+; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; SKX-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[3]
+; SKX-NEXT: vptestmd %xmm1, %xmm1, %k1
+; SKX-NEXT: vmovdqa32 %xmm0, (%rdi) {%k1}
+; SKX-NEXT: retq
+ call void @llvm.masked.store.v3i32(<3 x i32> %v, <3 x i32>* %p, i32 16, <3 x i1> %mask)
+ ret void
+}
+declare void @llvm.masked.store.v3i32(<3 x i32>, <3 x i32>*, i32, <3 x i1>)
+
declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>)
declare <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>*, i32, <2 x i1>, <2 x i32>)
declare <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>*, i32, <4 x i1>, <4 x i64>)
diff --git a/test/Instrumentation/BoundsChecking/many-traps-2.ll b/test/Instrumentation/BoundsChecking/many-traps-2.ll
new file mode 100644
index 000000000000..a6e99586af21
--- /dev/null
+++ b/test/Instrumentation/BoundsChecking/many-traps-2.ll
@@ -0,0 +1,65 @@
+; RUN: opt < %s -bounds-checking -S | FileCheck %s
+@array = internal global [1819 x i16] zeroinitializer, section ".bss,bss"
+@offsets = external dso_local global [10 x i16]
+
+; CHECK-LABEL: @test
+define dso_local void @test() {
+bb1:
+ br label %bb19
+
+bb20:
+ %_tmp819 = load i16, i16* null
+; CHECK: br {{.*}} %trap
+ %_tmp820 = sub nsw i16 9, %_tmp819
+ %_tmp821 = sext i16 %_tmp820 to i64
+ %_tmp822 = getelementptr [10 x i16], [10 x i16]* @offsets, i16 0, i64 %_tmp821
+ %_tmp823 = load i16, i16* %_tmp822
+ br label %bb33
+
+bb34:
+ %_tmp907 = zext i16 %i__7.107.0 to i64
+ %_tmp908 = getelementptr [1819 x i16], [1819 x i16]* @array, i16 0, i64 %_tmp907
+ store i16 0, i16* %_tmp908
+; CHECK: br {{.*}} %trap
+ %_tmp910 = add i16 %i__7.107.0, 1
+ br label %bb33
+
+bb33:
+ %i__7.107.0 = phi i16 [ undef, %bb20 ], [ %_tmp910, %bb34 ]
+ %_tmp913 = add i16 %_tmp823, 191
+ %_tmp914 = icmp ult i16 %i__7.107.0, %_tmp913
+ br i1 %_tmp914, label %bb34, label %bb19
+
+bb19:
+ %_tmp976 = icmp slt i16 0, 10
+ br i1 %_tmp976, label %bb20, label %bb39
+
+bb39:
+ ret void
+}
+
+@e = dso_local local_unnamed_addr global [1 x i16] zeroinitializer, align 1
+
+; CHECK-LABEL: @test2
+define dso_local void @test2() local_unnamed_addr {
+entry:
+ br label %while.cond1.preheader
+
+while.cond1.preheader:
+ %0 = phi i16 [ undef, %entry ], [ %inc, %while.end ]
+ %1 = load i16, i16* undef, align 1
+; CHECK: br {{.*}} %trap
+ br label %while.end
+
+while.end:
+ %inc = add nsw i16 %0, 1
+ %arrayidx = getelementptr inbounds [1 x i16], [1 x i16]* @e, i16 0, i16
+ %0
+ %2 = load i16, i16* %arrayidx, align 1
+; CHECK: or i1
+; CHECK-NEXT: br {{.*}} %trap
+ br i1 false, label %while.end6, label %while.cond1.preheader
+
+while.end6:
+ ret void
+}
diff --git a/test/MC/ELF/extra-section-flags.s b/test/MC/ELF/extra-section-flags.s
deleted file mode 100644
index bde7e1abf8ef..000000000000
--- a/test/MC/ELF/extra-section-flags.s
+++ /dev/null
@@ -1,12 +0,0 @@
-# RUN: llvm-mc -triple x86_64-unknown-unknown -filetype=obj %s -o /dev/null 2>&1 | FileCheck %s
-
-.section .rodata, "ax"
-# CHECK: warning: setting incorrect section attributes for .rodata
-nop
-
-.section .rodata, "a"
-nop
-.section .rodata.cst4, "aM",@progbits,8
-nop
-# CHECK-NOT: warning:
-
diff --git a/test/Transforms/InstSimplify/AndOrXor.ll b/test/Transforms/InstSimplify/AndOrXor.ll
index ed68f1121278..8054eb045364 100644
--- a/test/Transforms/InstSimplify/AndOrXor.ll
+++ b/test/Transforms/InstSimplify/AndOrXor.ll
@@ -967,12 +967,8 @@ define i32 @reversed_not(i32 %a) {
define i64 @shl_or_and1(i32 %a, i1 %b) {
; CHECK-LABEL: @shl_or_and1(
-; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[A:%.*]] to i64
; CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[B:%.*]] to i64
-; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP1]], 32
-; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP2]], [[TMP3]]
-; CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 1
-; CHECK-NEXT: ret i64 [[TMP5]]
+; CHECK-NEXT: ret i64 [[TMP2]]
;
%tmp1 = zext i32 %a to i64
%tmp2 = zext i1 %b to i64
@@ -985,11 +981,8 @@ define i64 @shl_or_and1(i32 %a, i1 %b) {
define i64 @shl_or_and2(i32 %a, i1 %b) {
; CHECK-LABEL: @shl_or_and2(
; CHECK-NEXT: [[TMP1:%.*]] = zext i1 [[B:%.*]] to i64
-; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[A:%.*]] to i64
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP1]], 32
-; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP2]], [[TMP3]]
-; CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 4294967296
-; CHECK-NEXT: ret i64 [[TMP5]]
+; CHECK-NEXT: ret i64 [[TMP3]]
;
%tmp1 = zext i1 %b to i64
%tmp2 = zext i32 %a to i64
@@ -999,15 +992,11 @@ define i64 @shl_or_and2(i32 %a, i1 %b) {
ret i64 %tmp5
}
-; concatinate two 32-bit integers and extract lower 32-bit
+; concatenate two 32-bit integers and extract lower 32-bit
define i64 @shl_or_and3(i32 %a, i32 %b) {
; CHECK-LABEL: @shl_or_and3(
-; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[A:%.*]] to i64
; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[B:%.*]] to i64
-; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP1]], 32
-; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP2]], [[TMP3]]
-; CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 4294967295
-; CHECK-NEXT: ret i64 [[TMP5]]
+; CHECK-NEXT: ret i64 [[TMP2]]
;
%tmp1 = zext i32 %a to i64
%tmp2 = zext i32 %b to i64
@@ -1017,15 +1006,12 @@ define i64 @shl_or_and3(i32 %a, i32 %b) {
ret i64 %tmp5
}
-; concatinate two 16-bit integers and extract higher 16-bit
+; concatenate two 16-bit integers and extract higher 16-bit
define i32 @shl_or_and4(i16 %a, i16 %b) {
; CHECK-LABEL: @shl_or_and4(
; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
-; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP1]], 16
-; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP2]], [[TMP3]]
-; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], -65536
-; CHECK-NEXT: ret i32 [[TMP5]]
+; CHECK-NEXT: ret i32 [[TMP3]]
;
%tmp1 = zext i16 %a to i32
%tmp2 = zext i16 %b to i32
@@ -1037,12 +1023,8 @@ define i32 @shl_or_and4(i16 %a, i16 %b) {
define i128 @shl_or_and5(i64 %a, i1 %b) {
; CHECK-LABEL: @shl_or_and5(
-; CHECK-NEXT: [[TMP1:%.*]] = zext i64 [[A:%.*]] to i128
; CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[B:%.*]] to i128
-; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i128 [[TMP1]], 64
-; CHECK-NEXT: [[TMP4:%.*]] = or i128 [[TMP2]], [[TMP3]]
-; CHECK-NEXT: [[TMP5:%.*]] = and i128 [[TMP4]], 1
-; CHECK-NEXT: ret i128 [[TMP5]]
+; CHECK-NEXT: ret i128 [[TMP2]]
;
%tmp1 = zext i64 %a to i128
%tmp2 = zext i1 %b to i128
@@ -1108,12 +1090,8 @@ define i32 @shl_or_and8(i16 %a, i16 %b) {
define <2 x i64> @shl_or_and1v(<2 x i32> %a, <2 x i1> %b) {
; CHECK-LABEL: @shl_or_and1v(
-; CHECK-NEXT: [[TMP1:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64>
; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i1> [[B:%.*]] to <2 x i64>
-; CHECK-NEXT: [[TMP3:%.*]] = shl nuw <2 x i64> [[TMP1]], <i64 32, i64 32>
-; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i64> [[TMP3]], [[TMP2]]
-; CHECK-NEXT: [[TMP5:%.*]] = and <2 x i64> [[TMP4]], <i64 1, i64 1>
-; CHECK-NEXT: ret <2 x i64> [[TMP5]]
+; CHECK-NEXT: ret <2 x i64> [[TMP2]]
;
%tmp1 = zext <2 x i32> %a to <2 x i64>
%tmp2 = zext <2 x i1> %b to <2 x i64>
@@ -1126,11 +1104,8 @@ define <2 x i64> @shl_or_and1v(<2 x i32> %a, <2 x i1> %b) {
define <2 x i64> @shl_or_and2v(<2 x i32> %a, <2 x i1> %b) {
; CHECK-LABEL: @shl_or_and2v(
; CHECK-NEXT: [[TMP1:%.*]] = zext <2 x i1> [[B:%.*]] to <2 x i64>
-; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64>
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw <2 x i64> [[TMP1]], <i64 32, i64 32>
-; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i64> [[TMP2]], [[TMP3]]
-; CHECK-NEXT: [[TMP5:%.*]] = and <2 x i64> [[TMP4]], <i64 4294967296, i64 4294967296>
-; CHECK-NEXT: ret <2 x i64> [[TMP5]]
+; CHECK-NEXT: ret <2 x i64> [[TMP3]]
;
%tmp1 = zext <2 x i1> %b to <2 x i64>
%tmp2 = zext <2 x i32> %a to <2 x i64>
diff --git a/test/Transforms/InstSimplify/floating-point-compare.ll b/test/Transforms/InstSimplify/floating-point-compare.ll
index b1444bb5a1e2..bc5c58a698e3 100644
--- a/test/Transforms/InstSimplify/floating-point-compare.ll
+++ b/test/Transforms/InstSimplify/floating-point-compare.ll
@@ -266,13 +266,15 @@ define i1 @orderedLessZeroMinNum(float, float) {
ret i1 %uge
}
-; FIXME: This is wrong.
; PR37776: https://bugs.llvm.org/show_bug.cgi?id=37776
; exp() may return nan, leaving %1 as the unknown result, so we can't simplify.
define i1 @orderedLessZeroMaxNum(float, float) {
; CHECK-LABEL: @orderedLessZeroMaxNum(
-; CHECK-NEXT: ret i1 true
+; CHECK-NEXT: [[A:%.*]] = call float @llvm.exp.f32(float [[TMP0:%.*]])
+; CHECK-NEXT: [[B:%.*]] = call float @llvm.maxnum.f32(float [[A]], float [[TMP1:%.*]])
+; CHECK-NEXT: [[UGE:%.*]] = fcmp uge float [[B]], 0.000000e+00
+; CHECK-NEXT: ret i1 [[UGE]]
;
%a = call float @llvm.exp.f32(float %0)
%b = call float @llvm.maxnum.f32(float %a, float %1)
diff --git a/test/Transforms/NewGVN/pair_jumpthread.ll b/test/Transforms/NewGVN/pair_jumpthread.ll
index 9e55cda82eb1..65d94e1f6353 100644
--- a/test/Transforms/NewGVN/pair_jumpthread.ll
+++ b/test/Transforms/NewGVN/pair_jumpthread.ll
@@ -1,8 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -newgvn -S | FileCheck %s
; RUN: opt < %s -newgvn -jump-threading -S | FileCheck --check-prefix=CHECK-JT %s
-; This test is expected to fail until the transformation is committed.
-; XFAIL: *
define signext i32 @testBI(i32 signext %v) {
; Test with std::pair<bool, int>