diff options
Diffstat (limited to 'test')
-rw-r--r-- | test/CodeGen/AArch64/fcopysign.ll | 25 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/kernel-args.ll | 19 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/mad_uint24.ll | 93 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll | 9 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/load-store.ll | 88 | ||||
-rw-r--r-- | test/CodeGen/X86/masked_memop.ll | 59 | ||||
-rw-r--r-- | test/Instrumentation/BoundsChecking/many-traps-2.ll | 65 | ||||
-rw-r--r-- | test/MC/ELF/extra-section-flags.s | 12 | ||||
-rw-r--r-- | test/Transforms/InstSimplify/AndOrXor.ll | 43 | ||||
-rw-r--r-- | test/Transforms/InstSimplify/floating-point-compare.ll | 6 | ||||
-rw-r--r-- | test/Transforms/NewGVN/pair_jumpthread.ll | 2 |
11 files changed, 347 insertions, 74 deletions
diff --git a/test/CodeGen/AArch64/fcopysign.ll b/test/CodeGen/AArch64/fcopysign.ll index 6bda33cf76ef..a625a9eb2a6c 100644 --- a/test/CodeGen/AArch64/fcopysign.ll +++ b/test/CodeGen/AArch64/fcopysign.ll @@ -5,10 +5,12 @@ target triple = "aarch64--" declare fp128 @llvm.copysign.f128(fp128, fp128) -@val = global double zeroinitializer, align 8 +@val_float = global float zeroinitializer, align 4 +@val_double = global double zeroinitializer, align 8 +@val_fp128 = global fp128 zeroinitializer, align 16 ; CHECK-LABEL: copysign0 -; CHECK: ldr [[REG:x[0-9]+]], [x8, :lo12:val] +; CHECK: ldr [[REG:x[0-9]+]], [x8, :lo12:val_double] ; CHECK: and [[ANDREG:x[0-9]+]], [[REG]], #0x8000000000000000 ; CHECK: lsr x[[LSRREGNUM:[0-9]+]], [[ANDREG]], #56 ; CHECK: bfxil w[[LSRREGNUM]], w{{[0-9]+}}, #0, #7 @@ -16,8 +18,25 @@ declare fp128 @llvm.copysign.f128(fp128, fp128) ; CHECK: ldr q{{[0-9]+}}, define fp128 @copysign0() { entry: - %v = load double, double* @val, align 8 + %v = load double, double* @val_double, align 8 %conv = fpext double %v to fp128 %call = tail call fp128 @llvm.copysign.f128(fp128 0xL00000000000000007FFF000000000000, fp128 %conv) #2 ret fp128 %call } + +; CHECK-LABEL: copysign1 +; CHECK-DAG: ldr [[REG:q[0-9]+]], [x8, :lo12:val_fp128] +; CHECK-DAG: ldr [[REG:w[0-9]+]], [x8, :lo12:val_float] +; CHECK: and [[ANDREG:w[0-9]+]], [[REG]], #0x80000000 +; CHECK: lsr w[[LSRREGNUM:[0-9]+]], [[ANDREG]], #24 +; CHECK: bfxil w[[LSRREGNUM]], w{{[0-9]+}}, #0, #7 +; CHECK: strb w[[LSRREGNUM]], +; CHECK: ldr q{{[0-9]+}}, +define fp128@copysign1() { +entry: + %v0 = load fp128, fp128* @val_fp128, align 16 + %v1 = load float, float* @val_float, align 4 + %conv = fpext float %v1 to fp128 + %call = tail call fp128 @llvm.copysign.f128(fp128 %v0, fp128 %conv) + ret fp128 %call +} diff --git a/test/CodeGen/AMDGPU/kernel-args.ll b/test/CodeGen/AMDGPU/kernel-args.ll index 9d1f582f4a88..11067522f857 100644 --- a/test/CodeGen/AMDGPU/kernel-args.ll +++ b/test/CodeGen/AMDGPU/kernel-args.ll @@ -16,13 +16,8 @@ ; HSA-VI: s_and_b32 s{{[0-9]+}}, [[VAL]], 0xff -; EG: LSHR T0.X, KC0[2].Y, literal.x, -; EG-NEXT: MOV * T1.X, KC0[2].Z, -; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) - -; CM: LSHR * T0.X, KC0[2].Y, literal.x, -; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) -; CM-NEXT: MOV * T1.X, KC0[2].Z, +; EGCM: VTX_READ_8{{.*}} #3 +; EGCM: KC0[2].Y define amdgpu_kernel void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind { %ext = zext i8 %in to i32 store i32 %ext, i32 addrspace(1)* %out, align 4 @@ -92,14 +87,8 @@ define amdgpu_kernel void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 sign ; HSA-VI: s_and_b32 s{{[0-9]+}}, [[VAL]], 0xffff{{$}} ; HSA-VI: flat_store_dword - -; EG: LSHR T0.X, KC0[2].Y, literal.x, -; EG-NEXT: MOV * T1.X, KC0[2].Z, -; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) - -; CM: LSHR * T0.X, KC0[2].Y, literal.x, -; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) -; CM-NEXT: MOV * T1.X, KC0[2].Z, +; EGCM: VTX_READ_16 +; EGCM: KC0[2].Y define amdgpu_kernel void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind { %ext = zext i16 %in to i32 store i32 %ext, i32 addrspace(1)* %out, align 4 diff --git a/test/CodeGen/AMDGPU/mad_uint24.ll b/test/CodeGen/AMDGPU/mad_uint24.ll index 2c4f7d324a96..3c3371bf9166 100644 --- a/test/CodeGen/AMDGPU/mad_uint24.ll +++ b/test/CodeGen/AMDGPU/mad_uint24.ll @@ -1,8 +1,8 @@ ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC ; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG --check-prefix=FUNC -; RUN: llc < %s -march=amdgcn -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC -; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=VI --check-prefix=FUNC -; RUN: llc < %s -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=VI --check-prefix=FUNC +; RUN: llc < %s -march=amdgcn -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC --check-prefix=GCN +; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=VI --check-prefix=FUNC --check-prefix=GCN +; RUN: llc < %s -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=VI --check-prefix=FUNC --check-prefix=GCN declare i32 @llvm.r600.read.tidig.x() nounwind readnone @@ -138,3 +138,90 @@ bb18: ; preds = %bb4 store i32 %tmp16, i32 addrspace(1)* %arg ret void } + +; FUNC-LABEL: {{^}}i8_mad_sat_16: +; EG: MULADD_UINT24 {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]] +; The result must be sign-extended +; EG: BFE_INT {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[MAD_CHAN]], 0.0, literal.x +; EG: 8 +; SI: v_mad_u32_u24 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}} +; VI: v_mad_u16 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}} +; GCN: v_bfe_i32 [[EXT:v[0-9]]], [[MAD]], 0, 16 +; GCN: v_med3_i32 v{{[0-9]}}, [[EXT]], +define amdgpu_kernel void @i8_mad_sat_16(i8 addrspace(1)* %out, i8 addrspace(1)* %in0, i8 addrspace(1)* %in1, i8 addrspace(1)* %in2, i64 addrspace(5)* %idx) { +entry: + %retval.0.i = load i64, i64 addrspace(5)* %idx + %arrayidx = getelementptr inbounds i8, i8 addrspace(1)* %in0, i64 %retval.0.i + %arrayidx2 = getelementptr inbounds i8, i8 addrspace(1)* %in1, i64 %retval.0.i + %arrayidx4 = getelementptr inbounds i8, i8 addrspace(1)* %in2, i64 %retval.0.i + %l1 = load i8, i8 addrspace(1)* %arrayidx, align 1 + %l2 = load i8, i8 addrspace(1)* %arrayidx2, align 1 + %l3 = load i8, i8 addrspace(1)* %arrayidx4, align 1 + %conv1.i = sext i8 %l1 to i16 + %conv3.i = sext i8 %l2 to i16 + %conv5.i = sext i8 %l3 to i16 + %mul.i.i.i = mul nsw i16 %conv3.i, %conv1.i + %add.i.i = add i16 %mul.i.i.i, %conv5.i + %c4 = icmp sgt i16 %add.i.i, -128 + %cond.i.i = select i1 %c4, i16 %add.i.i, i16 -128 + %c5 = icmp slt i16 %cond.i.i, 127 + %cond13.i.i = select i1 %c5, i16 %cond.i.i, i16 127 + %conv8.i = trunc i16 %cond13.i.i to i8 + %arrayidx7 = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 %retval.0.i + store i8 %conv8.i, i8 addrspace(1)* %arrayidx7, align 1 + ret void +} + +; FUNC-LABEL: {{^}}i8_mad_32: +; EG: MULADD_UINT24 {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]] +; The result must be sign-extended +; EG: BFE_INT {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[MAD_CHAN]], 0.0, literal.x +; EG: 8 +; SI: v_mad_u32_u24 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}} +; VI: v_mad_u16 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}} +; GCN: v_bfe_i32 [[EXT:v[0-9]]], [[MAD]], 0, 16 +define amdgpu_kernel void @i8_mad_32(i32 addrspace(1)* %out, i8 addrspace(1)* %a, i8 addrspace(1)* %b, i8 addrspace(1)* %c, i64 addrspace(5)* %idx) { +entry: + %retval.0.i = load i64, i64 addrspace(5)* %idx + %arrayidx = getelementptr inbounds i8, i8 addrspace(1)* %a, i64 %retval.0.i + %arrayidx2 = getelementptr inbounds i8, i8 addrspace(1)* %b, i64 %retval.0.i + %arrayidx4 = getelementptr inbounds i8, i8 addrspace(1)* %c, i64 %retval.0.i + %la = load i8, i8 addrspace(1)* %arrayidx, align 1 + %lb = load i8, i8 addrspace(1)* %arrayidx2, align 1 + %lc = load i8, i8 addrspace(1)* %arrayidx4, align 1 + %exta = sext i8 %la to i16 + %extb = sext i8 %lb to i16 + %extc = sext i8 %lc to i16 + %mul = mul i16 %exta, %extb + %mad = add i16 %mul, %extc + %mad_ext = sext i16 %mad to i32 + store i32 %mad_ext, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}i8_mad_64: +; EG: MULADD_UINT24 {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]] +; The result must be sign-extended +; EG: BFE_INT {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[MAD_CHAN]], 0.0, literal.x +; EG: 8 +; SI: v_mad_u32_u24 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}} +; VI: v_mad_u16 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}} +; GCN: v_bfe_i32 [[EXT:v[0-9]]], [[MAD]], 0, 16 +define amdgpu_kernel void @i8_mad_64(i64 addrspace(1)* %out, i8 addrspace(1)* %a, i8 addrspace(1)* %b, i8 addrspace(1)* %c, i64 addrspace(5)* %idx) { +entry: + %retval.0.i = load i64, i64 addrspace(5)* %idx + %arrayidx = getelementptr inbounds i8, i8 addrspace(1)* %a, i64 %retval.0.i + %arrayidx2 = getelementptr inbounds i8, i8 addrspace(1)* %b, i64 %retval.0.i + %arrayidx4 = getelementptr inbounds i8, i8 addrspace(1)* %c, i64 %retval.0.i + %la = load i8, i8 addrspace(1)* %arrayidx, align 1 + %lb = load i8, i8 addrspace(1)* %arrayidx2, align 1 + %lc = load i8, i8 addrspace(1)* %arrayidx4, align 1 + %exta = sext i8 %la to i16 + %extb = sext i8 %lb to i16 + %extc = sext i8 %lc to i16 + %mul = mul i16 %exta, %extb + %mad = add i16 %mul, %extc + %mad_ext = sext i16 %mad to i64 + store i64 %mad_ext, i64 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll b/test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll index a7adc2ae996e..03f0539e19be 100644 --- a/test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll +++ b/test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll @@ -60,8 +60,11 @@ entry: } ; FUNC-LABEL: {{^}}test_implicit: -; 36 prepended implicit bytes + 4(out pointer) + 4*4 = 56 -; EG: VTX_READ_32 {{T[0-9]+\.[XYZW]}}, {{T[0-9]+\.[XYZW]}}, 56 +; 36 prepended implicit bytes + 4(out pointer) + 4*4 = 56 == KC0[3].Z +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+.[XYZW]]], [[PTR:T[0-9]+.[XYZW]]] +; EG-NOT: VTX_READ +; EG-DAG: MOV {{\*?}} [[VAL]], KC0[3].Z +; EG-DAG: LSHR {{\*? *}}[[PTR]], KC0[2].Y, literal define amdgpu_kernel void @test_implicit(i32 addrspace(1)* %out) #1 { %implicitarg.ptr = call noalias i8 addrspace(7)* @llvm.r600.implicitarg.ptr() %header.ptr = bitcast i8 addrspace(7)* %implicitarg.ptr to i32 addrspace(7)* @@ -73,7 +76,7 @@ define amdgpu_kernel void @test_implicit(i32 addrspace(1)* %out) #1 { ; FUNC-LABEL: {{^}}test_implicit_dyn: ; 36 prepended implicit bytes + 8(out pointer + in) = 44 -; EG: VTX_READ_32 {{T[0-9]+\.[XYZW]}}, {{T[0-9]+\.[XYZW]}}, 44 +; EG: VTX_READ_32 {{T[0-9]+\.[XYZW]}}, {{T[0-9]+\.[XYZW]}}, 44, #3 define amdgpu_kernel void @test_implicit_dyn(i32 addrspace(1)* %out, i32 %in) #1 { %implicitarg.ptr = call noalias i8 addrspace(7)* @llvm.r600.implicitarg.ptr() %header.ptr = bitcast i8 addrspace(7)* %implicitarg.ptr to i32 addrspace(7)* diff --git a/test/CodeGen/NVPTX/load-store.ll b/test/CodeGen/NVPTX/load-store.ll new file mode 100644 index 000000000000..03b0109dea20 --- /dev/null +++ b/test/CodeGen/NVPTX/load-store.ll @@ -0,0 +1,88 @@ +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s + +; CHECK-LABEL: plain +define void @plain(i8* %a, i16* %b, i32* %c, i64* %d) local_unnamed_addr { + ; CHECK: ld.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] + %a.load = load i8, i8* %a + %a.add = add i8 %a.load, 1 + ; CHECK: st.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} + store i8 %a.add, i8* %a + + ; CHECK: ld.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] + %b.load = load i16, i16* %b + %b.add = add i16 %b.load, 1 + ; CHECK: st.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} + store i16 %b.add, i16* %b + + ; CHECK: ld.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] + %c.load = load i32, i32* %c + %c.add = add i32 %c.load, 1 + ; CHECK: st.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} + store i32 %c.add, i32* %c + + ; CHECK: ld.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] + %d.load = load i64, i64* %d + %d.add = add i64 %d.load, 1 + ; CHECK: st.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} + store i64 %d.add, i64* %d + + ret void +} + +; CHECK-LABEL: volatile +define void @volatile(i8* %a, i16* %b, i32* %c, i64* %d) local_unnamed_addr { + ; CHECK: ld.volatile.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] + %a.load = load volatile i8, i8* %a + %a.add = add i8 %a.load, 1 + ; CHECK: st.volatile.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} + store volatile i8 %a.add, i8* %a + + ; CHECK: ld.volatile.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] + %b.load = load volatile i16, i16* %b + %b.add = add i16 %b.load, 1 + ; CHECK: st.volatile.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} + store volatile i16 %b.add, i16* %b + + ; CHECK: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] + %c.load = load volatile i32, i32* %c + %c.add = add i32 %c.load, 1 + ; CHECK: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} + store volatile i32 %c.add, i32* %c + + ; CHECK: ld.volatile.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] + %d.load = load volatile i64, i64* %d + %d.add = add i64 %d.load, 1 + ; CHECK: st.volatile.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} + store volatile i64 %d.add, i64* %d + + ret void +} + +; CHECK-LABEL: monotonic +define void @monotonic(i8* %a, i16* %b, i32* %c, i64* %d) local_unnamed_addr { + ; CHECK: ld.volatile.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] + %a.load = load atomic i8, i8* %a monotonic, align 1 + %a.add = add i8 %a.load, 1 + ; CHECK: st.volatile.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}} + store atomic i8 %a.add, i8* %a monotonic, align 1 + + ; CHECK: ld.volatile.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] + %b.load = load atomic i16, i16* %b monotonic, align 2 + %b.add = add i16 %b.load, 1 + ; CHECK: st.volatile.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} + store atomic i16 %b.add, i16* %b monotonic, align 2 + + ; CHECK: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] + %c.load = load atomic i32, i32* %c monotonic, align 4 + %c.add = add i32 %c.load, 1 + ; CHECK: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}} + store atomic i32 %c.add, i32* %c monotonic, align 4 + + ; CHECK: ld.volatile.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] + %d.load = load atomic i64, i64* %d monotonic, align 8 + %d.add = add i64 %d.load, 1 + ; CHECK: st.volatile.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}} + store atomic i64 %d.add, i64* %d monotonic, align 8 + + ret void +} diff --git a/test/CodeGen/X86/masked_memop.ll b/test/CodeGen/X86/masked_memop.ll index 812d9f50fe3c..36da9386fb06 100644 --- a/test/CodeGen/X86/masked_memop.ll +++ b/test/CodeGen/X86/masked_memop.ll @@ -1310,6 +1310,65 @@ define void @trunc_mask(<4 x float> %x, <4 x float>* %ptr, <4 x float> %y, <4 x ret void } +; This needs to be widened to v4i32. +; This used to assert in type legalization. PR38436 +; FIXME: The codegen for AVX512 should use KSHIFT to zero the upper bits of the mask. +define void @widen_masked_store(<3 x i32> %v, <3 x i32>* %p, <3 x i1> %mask) { +; AVX1-LABEL: widen_masked_store: +; AVX1: ## %bb.0: +; AVX1-NEXT: vmovd %edx, %xmm1 +; AVX1-NEXT: vmovd %esi, %xmm2 +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; AVX1-NEXT: vmovd %ecx, %xmm2 +; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX1-NEXT: vpslld $31, %xmm1, %xmm1 +; AVX1-NEXT: vpsrad $31, %xmm1, %xmm1 +; AVX1-NEXT: vmaskmovps %xmm0, %xmm1, (%rdi) +; AVX1-NEXT: retq +; +; AVX2-LABEL: widen_masked_store: +; AVX2: ## %bb.0: +; AVX2-NEXT: vmovd %edx, %xmm1 +; AVX2-NEXT: vmovd %esi, %xmm2 +; AVX2-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; AVX2-NEXT: vmovd %ecx, %xmm2 +; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX2-NEXT: vpslld $31, %xmm1, %xmm1 +; AVX2-NEXT: vpsrad $31, %xmm1, %xmm1 +; AVX2-NEXT: vpmaskmovd %xmm0, %xmm1, (%rdi) +; AVX2-NEXT: retq +; +; AVX512F-LABEL: widen_masked_store: +; AVX512F: ## %bb.0: +; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512F-NEXT: vpslld $31, %xmm1, %xmm1 +; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k1 +; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512F-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[3] +; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 +; AVX512F-NEXT: kshiftlw $12, %k0, %k0 +; AVX512F-NEXT: kshiftrw $12, %k0, %k1 +; AVX512F-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1} +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; SKX-LABEL: widen_masked_store: +; SKX: ## %bb.0: +; SKX-NEXT: vpslld $31, %xmm1, %xmm1 +; SKX-NEXT: vptestmd %xmm1, %xmm1, %k1 +; SKX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; SKX-NEXT: vmovdqa32 %xmm1, %xmm1 {%k1} {z} +; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; SKX-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[3] +; SKX-NEXT: vptestmd %xmm1, %xmm1, %k1 +; SKX-NEXT: vmovdqa32 %xmm0, (%rdi) {%k1} +; SKX-NEXT: retq + call void @llvm.masked.store.v3i32(<3 x i32> %v, <3 x i32>* %p, i32 16, <3 x i1> %mask) + ret void +} +declare void @llvm.masked.store.v3i32(<3 x i32>, <3 x i32>*, i32, <3 x i1>) + declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>) declare <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>*, i32, <2 x i1>, <2 x i32>) declare <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>*, i32, <4 x i1>, <4 x i64>) diff --git a/test/Instrumentation/BoundsChecking/many-traps-2.ll b/test/Instrumentation/BoundsChecking/many-traps-2.ll new file mode 100644 index 000000000000..a6e99586af21 --- /dev/null +++ b/test/Instrumentation/BoundsChecking/many-traps-2.ll @@ -0,0 +1,65 @@ +; RUN: opt < %s -bounds-checking -S | FileCheck %s +@array = internal global [1819 x i16] zeroinitializer, section ".bss,bss" +@offsets = external dso_local global [10 x i16] + +; CHECK-LABEL: @test +define dso_local void @test() { +bb1: + br label %bb19 + +bb20: + %_tmp819 = load i16, i16* null +; CHECK: br {{.*}} %trap + %_tmp820 = sub nsw i16 9, %_tmp819 + %_tmp821 = sext i16 %_tmp820 to i64 + %_tmp822 = getelementptr [10 x i16], [10 x i16]* @offsets, i16 0, i64 %_tmp821 + %_tmp823 = load i16, i16* %_tmp822 + br label %bb33 + +bb34: + %_tmp907 = zext i16 %i__7.107.0 to i64 + %_tmp908 = getelementptr [1819 x i16], [1819 x i16]* @array, i16 0, i64 %_tmp907 + store i16 0, i16* %_tmp908 +; CHECK: br {{.*}} %trap + %_tmp910 = add i16 %i__7.107.0, 1 + br label %bb33 + +bb33: + %i__7.107.0 = phi i16 [ undef, %bb20 ], [ %_tmp910, %bb34 ] + %_tmp913 = add i16 %_tmp823, 191 + %_tmp914 = icmp ult i16 %i__7.107.0, %_tmp913 + br i1 %_tmp914, label %bb34, label %bb19 + +bb19: + %_tmp976 = icmp slt i16 0, 10 + br i1 %_tmp976, label %bb20, label %bb39 + +bb39: + ret void +} + +@e = dso_local local_unnamed_addr global [1 x i16] zeroinitializer, align 1 + +; CHECK-LABEL: @test2 +define dso_local void @test2() local_unnamed_addr { +entry: + br label %while.cond1.preheader + +while.cond1.preheader: + %0 = phi i16 [ undef, %entry ], [ %inc, %while.end ] + %1 = load i16, i16* undef, align 1 +; CHECK: br {{.*}} %trap + br label %while.end + +while.end: + %inc = add nsw i16 %0, 1 + %arrayidx = getelementptr inbounds [1 x i16], [1 x i16]* @e, i16 0, i16 + %0 + %2 = load i16, i16* %arrayidx, align 1 +; CHECK: or i1 +; CHECK-NEXT: br {{.*}} %trap + br i1 false, label %while.end6, label %while.cond1.preheader + +while.end6: + ret void +} diff --git a/test/MC/ELF/extra-section-flags.s b/test/MC/ELF/extra-section-flags.s deleted file mode 100644 index bde7e1abf8ef..000000000000 --- a/test/MC/ELF/extra-section-flags.s +++ /dev/null @@ -1,12 +0,0 @@ -# RUN: llvm-mc -triple x86_64-unknown-unknown -filetype=obj %s -o /dev/null 2>&1 | FileCheck %s - -.section .rodata, "ax" -# CHECK: warning: setting incorrect section attributes for .rodata -nop - -.section .rodata, "a" -nop -.section .rodata.cst4, "aM",@progbits,8 -nop -# CHECK-NOT: warning: - diff --git a/test/Transforms/InstSimplify/AndOrXor.ll b/test/Transforms/InstSimplify/AndOrXor.ll index ed68f1121278..8054eb045364 100644 --- a/test/Transforms/InstSimplify/AndOrXor.ll +++ b/test/Transforms/InstSimplify/AndOrXor.ll @@ -967,12 +967,8 @@ define i32 @reversed_not(i32 %a) { define i64 @shl_or_and1(i32 %a, i1 %b) { ; CHECK-LABEL: @shl_or_and1( -; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[A:%.*]] to i64 ; CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[B:%.*]] to i64 -; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP1]], 32 -; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP2]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 1 -; CHECK-NEXT: ret i64 [[TMP5]] +; CHECK-NEXT: ret i64 [[TMP2]] ; %tmp1 = zext i32 %a to i64 %tmp2 = zext i1 %b to i64 @@ -985,11 +981,8 @@ define i64 @shl_or_and1(i32 %a, i1 %b) { define i64 @shl_or_and2(i32 %a, i1 %b) { ; CHECK-LABEL: @shl_or_and2( ; CHECK-NEXT: [[TMP1:%.*]] = zext i1 [[B:%.*]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[A:%.*]] to i64 ; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP1]], 32 -; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP2]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 4294967296 -; CHECK-NEXT: ret i64 [[TMP5]] +; CHECK-NEXT: ret i64 [[TMP3]] ; %tmp1 = zext i1 %b to i64 %tmp2 = zext i32 %a to i64 @@ -999,15 +992,11 @@ define i64 @shl_or_and2(i32 %a, i1 %b) { ret i64 %tmp5 } -; concatinate two 32-bit integers and extract lower 32-bit +; concatenate two 32-bit integers and extract lower 32-bit define i64 @shl_or_and3(i32 %a, i32 %b) { ; CHECK-LABEL: @shl_or_and3( -; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[A:%.*]] to i64 ; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[B:%.*]] to i64 -; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP1]], 32 -; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP2]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 4294967295 -; CHECK-NEXT: ret i64 [[TMP5]] +; CHECK-NEXT: ret i64 [[TMP2]] ; %tmp1 = zext i32 %a to i64 %tmp2 = zext i32 %b to i64 @@ -1017,15 +1006,12 @@ define i64 @shl_or_and3(i32 %a, i32 %b) { ret i64 %tmp5 } -; concatinate two 16-bit integers and extract higher 16-bit +; concatenate two 16-bit integers and extract higher 16-bit define i32 @shl_or_and4(i16 %a, i16 %b) { ; CHECK-LABEL: @shl_or_and4( ; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32 -; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32 ; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP1]], 16 -; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP2]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], -65536 -; CHECK-NEXT: ret i32 [[TMP5]] +; CHECK-NEXT: ret i32 [[TMP3]] ; %tmp1 = zext i16 %a to i32 %tmp2 = zext i16 %b to i32 @@ -1037,12 +1023,8 @@ define i32 @shl_or_and4(i16 %a, i16 %b) { define i128 @shl_or_and5(i64 %a, i1 %b) { ; CHECK-LABEL: @shl_or_and5( -; CHECK-NEXT: [[TMP1:%.*]] = zext i64 [[A:%.*]] to i128 ; CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[B:%.*]] to i128 -; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i128 [[TMP1]], 64 -; CHECK-NEXT: [[TMP4:%.*]] = or i128 [[TMP2]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = and i128 [[TMP4]], 1 -; CHECK-NEXT: ret i128 [[TMP5]] +; CHECK-NEXT: ret i128 [[TMP2]] ; %tmp1 = zext i64 %a to i128 %tmp2 = zext i1 %b to i128 @@ -1108,12 +1090,8 @@ define i32 @shl_or_and8(i16 %a, i16 %b) { define <2 x i64> @shl_or_and1v(<2 x i32> %a, <2 x i1> %b) { ; CHECK-LABEL: @shl_or_and1v( -; CHECK-NEXT: [[TMP1:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64> ; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i1> [[B:%.*]] to <2 x i64> -; CHECK-NEXT: [[TMP3:%.*]] = shl nuw <2 x i64> [[TMP1]], <i64 32, i64 32> -; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i64> [[TMP3]], [[TMP2]] -; CHECK-NEXT: [[TMP5:%.*]] = and <2 x i64> [[TMP4]], <i64 1, i64 1> -; CHECK-NEXT: ret <2 x i64> [[TMP5]] +; CHECK-NEXT: ret <2 x i64> [[TMP2]] ; %tmp1 = zext <2 x i32> %a to <2 x i64> %tmp2 = zext <2 x i1> %b to <2 x i64> @@ -1126,11 +1104,8 @@ define <2 x i64> @shl_or_and1v(<2 x i32> %a, <2 x i1> %b) { define <2 x i64> @shl_or_and2v(<2 x i32> %a, <2 x i1> %b) { ; CHECK-LABEL: @shl_or_and2v( ; CHECK-NEXT: [[TMP1:%.*]] = zext <2 x i1> [[B:%.*]] to <2 x i64> -; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64> ; CHECK-NEXT: [[TMP3:%.*]] = shl nuw <2 x i64> [[TMP1]], <i64 32, i64 32> -; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i64> [[TMP2]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = and <2 x i64> [[TMP4]], <i64 4294967296, i64 4294967296> -; CHECK-NEXT: ret <2 x i64> [[TMP5]] +; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; %tmp1 = zext <2 x i1> %b to <2 x i64> %tmp2 = zext <2 x i32> %a to <2 x i64> diff --git a/test/Transforms/InstSimplify/floating-point-compare.ll b/test/Transforms/InstSimplify/floating-point-compare.ll index b1444bb5a1e2..bc5c58a698e3 100644 --- a/test/Transforms/InstSimplify/floating-point-compare.ll +++ b/test/Transforms/InstSimplify/floating-point-compare.ll @@ -266,13 +266,15 @@ define i1 @orderedLessZeroMinNum(float, float) { ret i1 %uge } -; FIXME: This is wrong. ; PR37776: https://bugs.llvm.org/show_bug.cgi?id=37776 ; exp() may return nan, leaving %1 as the unknown result, so we can't simplify. define i1 @orderedLessZeroMaxNum(float, float) { ; CHECK-LABEL: @orderedLessZeroMaxNum( -; CHECK-NEXT: ret i1 true +; CHECK-NEXT: [[A:%.*]] = call float @llvm.exp.f32(float [[TMP0:%.*]]) +; CHECK-NEXT: [[B:%.*]] = call float @llvm.maxnum.f32(float [[A]], float [[TMP1:%.*]]) +; CHECK-NEXT: [[UGE:%.*]] = fcmp uge float [[B]], 0.000000e+00 +; CHECK-NEXT: ret i1 [[UGE]] ; %a = call float @llvm.exp.f32(float %0) %b = call float @llvm.maxnum.f32(float %a, float %1) diff --git a/test/Transforms/NewGVN/pair_jumpthread.ll b/test/Transforms/NewGVN/pair_jumpthread.ll index 9e55cda82eb1..65d94e1f6353 100644 --- a/test/Transforms/NewGVN/pair_jumpthread.ll +++ b/test/Transforms/NewGVN/pair_jumpthread.ll @@ -1,8 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -newgvn -S | FileCheck %s ; RUN: opt < %s -newgvn -jump-threading -S | FileCheck --check-prefix=CHECK-JT %s -; This test is expected to fail until the transformation is committed. -; XFAIL: * define signext i32 @testBI(i32 signext %v) { ; Test with std::pair<bool, int> |