@@ -3444,3 +3444,140 @@ define <8 x i8> @test_mask_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
34443444}
34453445
34463446declare i8 @llvm.x86.avx512.mask.ucmp.q.512 (<8 x i64 >, <8 x i64 >, i32 , i8 ) nounwind readnone
3447+
3448+ declare <16 x float > @llvm.x86.avx512.mask.broadcastf32x4.512 (<4 x float >, <16 x float >, i16 )
3449+
3450+ define <16 x float >@test_int_x86_avx512_mask_broadcastf32x4_512 (<4 x float > %x0 , <16 x float > %x2 , i16 %mask ) {
3451+ ; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf32x4_512:
3452+ ; CHECK: ## BB#0:
3453+ ; CHECK-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<def>
3454+ ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
3455+ ; CHECK-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
3456+ ; CHECK-NEXT: kmovw %edi, %k1
3457+ ; CHECK-NEXT: vmovaps %zmm0, %zmm1 {%k1}
3458+ ; CHECK-NEXT: vmovaps %zmm0, %zmm2 {%k1} {z}
3459+ ; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0
3460+ ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
3461+ ; CHECK-NEXT: retq
3462+
3463+ %res1 = call <16 x float > @llvm.x86.avx512.mask.broadcastf32x4.512 (<4 x float > %x0 , <16 x float > %x2 , i16 -1 )
3464+ %res2 = call <16 x float > @llvm.x86.avx512.mask.broadcastf32x4.512 (<4 x float > %x0 , <16 x float > %x2 , i16 %mask )
3465+ %res3 = call <16 x float > @llvm.x86.avx512.mask.broadcastf32x4.512 (<4 x float > %x0 , <16 x float > zeroinitializer , i16 %mask )
3466+ %res4 = fadd <16 x float > %res1 , %res2
3467+ %res5 = fadd <16 x float > %res3 , %res4
3468+ ret <16 x float > %res5
3469+ }
3470+
3471+ define <16 x float >@test_int_x86_avx512_mask_broadcastf32x4_512_load (<4 x float >* %x0ptr , <16 x float > %x2 , i16 %mask ) {
3472+ ; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf32x4_512_load:
3473+ ; CHECK: ## BB#0:
3474+ ; CHECK-NEXT: kmovw %esi, %k1
3475+ ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
3476+ ; CHECK-NEXT: retq
3477+ %x0 = load <4 x float >, <4 x float >* %x0ptr
3478+ %res = call <16 x float > @llvm.x86.avx512.mask.broadcastf32x4.512 (<4 x float > %x0 , <16 x float > %x2 , i16 %mask )
3479+ ret <16 x float > %res
3480+ }
3481+
3482+ declare <8 x double > @llvm.x86.avx512.mask.broadcastf64x4.512 (<4 x double >, <8 x double >, i8 )
3483+
3484+ define <8 x double >@test_int_x86_avx512_mask_broadcastf64x4_512 (<4 x double > %x0 , <8 x double > %x2 , i8 %mask ) {
3485+ ; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf64x4_512:
3486+ ; CHECK: ## BB#0:
3487+ ; CHECK-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
3488+ ; CHECK-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm2
3489+ ; CHECK-NEXT: kmovw %edi, %k1
3490+ ; CHECK-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm1 {%k1}
3491+ ; CHECK-NEXT: vaddpd %zmm1, %zmm2, %zmm1
3492+ ; CHECK-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0 {%k1} {z}
3493+ ; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0
3494+ ; CHECK-NEXT: retq
3495+
3496+ %res1 = call <8 x double > @llvm.x86.avx512.mask.broadcastf64x4.512 (<4 x double > %x0 , <8 x double > %x2 , i8 -1 )
3497+ %res2 = call <8 x double > @llvm.x86.avx512.mask.broadcastf64x4.512 (<4 x double > %x0 , <8 x double > %x2 , i8 %mask )
3498+ %res3 = call <8 x double > @llvm.x86.avx512.mask.broadcastf64x4.512 (<4 x double > %x0 , <8 x double > zeroinitializer , i8 %mask )
3499+ %res4 = fadd <8 x double > %res1 , %res2
3500+ %res5 = fadd <8 x double > %res3 , %res4
3501+ ret <8 x double > %res5
3502+ }
3503+
3504+ define <8 x double >@test_int_x86_avx512_mask_broadcastf64x4_512_load (<4 x double >* %x0ptr , <8 x double > %x2 , i8 %mask ) {
3505+ ; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf64x4_512_load:
3506+ ; CHECK: ## BB#0:
3507+ ; CHECK-NEXT: kmovw %esi, %k1
3508+ ; CHECK-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3]
3509+ ; CHECK-NEXT: retq
3510+
3511+ %x0 = load <4 x double >, <4 x double >* %x0ptr
3512+ %res = call <8 x double > @llvm.x86.avx512.mask.broadcastf64x4.512 (<4 x double > %x0 , <8 x double > %x2 , i8 %mask )
3513+ ret <8 x double > %res
3514+ }
3515+
3516+ declare <16 x i32 > @llvm.x86.avx512.mask.broadcasti32x4.512 (<4 x i32 >, <16 x i32 >, i16 )
3517+
3518+ define <16 x i32 >@test_int_x86_avx512_mask_broadcasti32x4_512 (<4 x i32 > %x0 , <16 x i32 > %x2 , i16 %mask ) {
3519+ ; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x4_512:
3520+ ; CHECK: ## BB#0:
3521+ ; CHECK-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<def>
3522+ ; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
3523+ ; CHECK-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
3524+ ; CHECK-NEXT: kmovw %edi, %k1
3525+ ; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
3526+ ; CHECK-NEXT: vmovdqa32 %zmm0, %zmm2 {%k1} {z}
3527+ ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0
3528+ ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
3529+ ; CHECK-NEXT: retq
3530+
3531+ %res1 = call <16 x i32 > @llvm.x86.avx512.mask.broadcasti32x4.512 (<4 x i32 > %x0 , <16 x i32 > %x2 , i16 -1 )
3532+ %res2 = call <16 x i32 > @llvm.x86.avx512.mask.broadcasti32x4.512 (<4 x i32 > %x0 , <16 x i32 > %x2 , i16 %mask )
3533+ %res3 = call <16 x i32 > @llvm.x86.avx512.mask.broadcasti32x4.512 (<4 x i32 > %x0 , <16 x i32 > zeroinitializer , i16 %mask )
3534+ %res4 = add <16 x i32 > %res1 , %res2
3535+ %res5 = add <16 x i32 > %res3 , %res4
3536+ ret <16 x i32 > %res5
3537+ }
3538+
3539+ define <16 x i32 >@test_int_x86_avx512_mask_broadcasti32x4_512_load (<4 x i32 >* %x0ptr , <16 x i32 > %x2 , i16 %mask ) {
3540+ ; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x4_512_load:
3541+ ; CHECK: ## BB#0:
3542+ ; CHECK-NEXT: kmovw %esi, %k1
3543+ ; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
3544+ ; CHECK-NEXT: retq
3545+
3546+ %x0 = load <4 x i32 >, <4 x i32 >* %x0ptr
3547+ %res = call <16 x i32 > @llvm.x86.avx512.mask.broadcasti32x4.512 (<4 x i32 > %x0 , <16 x i32 > %x2 , i16 %mask )
3548+ ret <16 x i32 > %res
3549+ }
3550+
3551+ declare <8 x i64 > @llvm.x86.avx512.mask.broadcasti64x4.512 (<4 x i64 >, <8 x i64 >, i8 )
3552+
3553+ define <8 x i64 >@test_int_x86_avx512_mask_broadcasti64x4_512 (<4 x i64 > %x0 , <8 x i64 > %x2 , i8 %mask ) {
3554+ ; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti64x4_512:
3555+ ; CHECK: ## BB#0:
3556+ ; CHECK-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
3557+ ; CHECK-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm2
3558+ ; CHECK-NEXT: kmovw %edi, %k1
3559+ ; CHECK-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm1 {%k1}
3560+ ; CHECK-NEXT: vpaddq %zmm1, %zmm2, %zmm1
3561+ ; CHECK-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 {%k1} {z}
3562+ ; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0
3563+ ; CHECK-NEXT: retq
3564+
3565+ %res1 = call <8 x i64 > @llvm.x86.avx512.mask.broadcasti64x4.512 (<4 x i64 > %x0 , <8 x i64 > %x2 , i8 -1 )
3566+ %res2 = call <8 x i64 > @llvm.x86.avx512.mask.broadcasti64x4.512 (<4 x i64 > %x0 , <8 x i64 > %x2 , i8 %mask )
3567+ %res3 = call <8 x i64 > @llvm.x86.avx512.mask.broadcasti64x4.512 (<4 x i64 > %x0 , <8 x i64 > zeroinitializer , i8 %mask )
3568+ %res4 = add <8 x i64 > %res1 , %res2
3569+ %res5 = add <8 x i64 > %res3 , %res4
3570+ ret <8 x i64 > %res5
3571+ }
3572+
3573+ define <8 x i64 >@test_int_x86_avx512_mask_broadcasti64x4_512_load (<4 x i64 >* %x0ptr , <8 x i64 > %x2 , i8 %mask ) {
3574+ ; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti64x4_512_load:
3575+ ; CHECK: ## BB#0:
3576+ ; CHECK-NEXT: kmovw %esi, %k1
3577+ ; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3]
3578+ ; CHECK-NEXT: retq
3579+
3580+ %x0 = load <4 x i64 >, <4 x i64 >* %x0ptr
3581+ %res = call <8 x i64 > @llvm.x86.avx512.mask.broadcasti64x4.512 (<4 x i64 > %x0 , <8 x i64 > %x2 , i8 %mask )
3582+ ret <8 x i64 > %res
3583+ }
0 commit comments