@@ -4765,16 +4765,16 @@ define void @scaleidx_scatter_outofrange(<8 x float> %value, ptr %base, <8 x i32
47654765}
47664766declare void @llvm.masked.scatter.v8f32.v8p0 (<8 x float >, <8 x ptr >, i32 immarg, <8 x i1 >)
47674767
4768- define <16 x i32 > @pr163023 (ptr %a0 , <16 x i32 > %a1 ) {
4769- ; X64-LABEL: pr163023 :
4768+ define <16 x i32 > @pr163023_sext (ptr %a0 , <16 x i32 > %a1 ) {
4769+ ; X64-LABEL: pr163023_sext :
47704770; X64: # %bb.0:
47714771; X64-NEXT: kxnorw %k0, %k0, %k1
47724772; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
47734773; X64-NEXT: vpgatherdd (%rdi,%zmm0), %zmm1 {%k1}
47744774; X64-NEXT: vmovdqa64 %zmm1, %zmm0
47754775; X64-NEXT: retq
47764776;
4777- ; X86-LABEL: pr163023 :
4777+ ; X86-LABEL: pr163023_sext :
47784778; X86: # %bb.0:
47794779; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
47804780; X86-NEXT: kxnorw %k0, %k0, %k1
@@ -4788,7 +4788,40 @@ define <16 x i32> @pr163023(ptr %a0, <16 x i32> %a1) {
47884788 %ofs = sext <16 x i32 > %a1 to <16 x i64 >
47894789 %addr = add nuw <16 x i64 > %addr.splat , %ofs
47904790 %ptr = inttoptr <16 x i64 > %addr to <16 x ptr >
4791- %gather = tail call fastcc <16 x i32 > @llvm.masked.gather.v16i32.v16p0 (<16 x ptr > %ptr , i32 4 , <16 x i1 > splat (i1 true ), <16 x i32 > poison)
4791+ %gather = call <16 x i32 > @llvm.masked.gather.v16i32.v16p0 (<16 x ptr > %ptr , i32 4 , <16 x i1 > splat (i1 true ), <16 x i32 > poison)
4792+ ret <16 x i32 > %gather
4793+ }
4794+
4795+ define <16 x i32 > @pr163023_zext (ptr %a0 , <16 x i32 > %a1 ) {
4796+ ; X64-LABEL: pr163023_zext:
4797+ ; X64: # %bb.0:
4798+ ; X64-NEXT: vpmovzxdq {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
4799+ ; X64-NEXT: vextracti64x4 $1, %zmm0, %ymm0
4800+ ; X64-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
4801+ ; X64-NEXT: kxnorw %k0, %k0, %k1
4802+ ; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2
4803+ ; X64-NEXT: vpxor %xmm3, %xmm3, %xmm3
4804+ ; X64-NEXT: kxnorw %k0, %k0, %k2
4805+ ; X64-NEXT: vpgatherqd (%rdi,%zmm0), %ymm3 {%k2}
4806+ ; X64-NEXT: vpgatherqd (%rdi,%zmm1), %ymm2 {%k1}
4807+ ; X64-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm0
4808+ ; X64-NEXT: retq
4809+ ;
4810+ ; X86-LABEL: pr163023_zext:
4811+ ; X86: # %bb.0:
4812+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
4813+ ; X86-NEXT: kxnorw %k0, %k0, %k1
4814+ ; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1
4815+ ; X86-NEXT: vpgatherdd (%eax,%zmm0), %zmm1 {%k1}
4816+ ; X86-NEXT: vmovdqa64 %zmm1, %zmm0
4817+ ; X86-NEXT: retl
4818+ %addr.p = ptrtoint ptr %a0 to i64
4819+ %addr.v = insertelement <1 x i64 > poison, i64 %addr.p , i64 0
4820+ %addr.splat = shufflevector <1 x i64 > %addr.v , <1 x i64 > poison, <16 x i32 > zeroinitializer
4821+ %ofs = zext <16 x i32 > %a1 to <16 x i64 >
4822+ %addr = add nuw <16 x i64 > %addr.splat , %ofs
4823+ %ptr = inttoptr <16 x i64 > %addr to <16 x ptr >
4824+ %gather = call <16 x i32 > @llvm.masked.gather.v16i32.v16p0 (<16 x ptr > %ptr , i32 4 , <16 x i1 > splat (i1 true ), <16 x i32 > poison)
47924825 ret <16 x i32 > %gather
47934826}
47944827
0 commit comments