Skip to content

Commit dfe9b14

Browse files
committed
Improve handling of insert_subvector of bitcast values
Fix insert_subvector / extract_subvector merges of bitcast values. Reviewers: efriedma, craig.topper, RKSimon Subscribers: RKSimon, llvm-commits Differential Revision: https://reviews.llvm.org/D34571 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@310711 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent b872fbb commit dfe9b14

File tree

3 files changed

+44
-21
lines changed

3 files changed

+44
-21
lines changed

lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15890,12 +15890,47 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
1589015890
if (N1.isUndef())
1589115891
return N0;
1589215892

15893+
// For nested INSERT_SUBVECTORs, attempt to combine inner node first to allow
15894+
// us to pull BITCASTs from input to output.
15895+
if (N0.hasOneUse() && N0->getOpcode() == ISD::INSERT_SUBVECTOR)
15896+
if (SDValue NN0 = visitINSERT_SUBVECTOR(N0.getNode()))
15897+
return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, NN0, N1, N2);
15898+
1589315899
// If this is an insert of an extracted vector into an undef vector, we can
1589415900
// just use the input to the extract.
1589515901
if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
1589615902
N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
1589715903
return N1.getOperand(0);
1589815904

15905+
// If we are inserting a bitcast value into an undef, with the same
15906+
// number of elements, just use the bitcast input of the extract.
15907+
// i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
15908+
// BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
15909+
if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
15910+
N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
15911+
N1.getOperand(0).getOperand(1) == N2 &&
15912+
N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() ==
15913+
VT.getVectorNumElements()) {
15914+
return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
15915+
}
15916+
15917+
// If both N1 and N2 are bitcast values on which insert_subvector
15918+
// would makes sense, pull the bitcast through.
15919+
// i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
15920+
// BITCAST (INSERT_SUBVECTOR N0 N1 N2)
15921+
if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
15922+
SDValue CN0 = N0.getOperand(0);
15923+
SDValue CN1 = N1.getOperand(0);
15924+
if (CN0.getValueType().getVectorElementType() ==
15925+
CN1.getValueType().getVectorElementType() &&
15926+
CN0.getValueType().getVectorNumElements() ==
15927+
VT.getVectorNumElements()) {
15928+
SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
15929+
CN0.getValueType(), CN0, CN1, N2);
15930+
return DAG.getBitcast(VT, NewINSERT);
15931+
}
15932+
}
15933+
1589915934
// Combine INSERT_SUBVECTORs where we are inserting to the same index.
1590015935
// INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
1590115936
// --> INSERT_SUBVECTOR( Vec, SubNew, Idx )

test/CodeGen/X86/MergeConsecutiveStores.ll

Lines changed: 8 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -492,15 +492,10 @@ define void @merge_vec_element_store(<8 x float> %v, float* %ptr) {
492492
store float %vecext7, float* %arrayidx7, align 4
493493
ret void
494494

495-
; CHECK: vextractf128 $1, %ymm0, %xmm1
496-
; CHECK: vinsertf128 $1, %xmm1, %ymm0, %ymm0
495+
; CHECK-LABEL: merge_vec_element_store
496+
; CHECK: vmovups %ymm0, (%rdi)
497+
; CHECK: vzeroupper
497498
; CHECK: retq
498-
499-
; This is what should be generated:
500-
; FIXME-LABEL: merge_vec_element_store
501-
; FIXME: vmovups
502-
; FIXME-NEXT: vzeroupper
503-
; FIXME-NEXT: retq
504499
}
505500

506501
; PR21711 - Merge vector stores into wider vector stores.
@@ -520,18 +515,11 @@ define void @merge_vec_extract_stores(<8 x float> %v1, <8 x float> %v2, <4 x flo
520515
store <4 x float> %shuffle3, <4 x float>* %idx3, align 16
521516
ret void
522517

523-
; These vblendpd are obviously redundant.
524-
; CHECK: vblendpd $12, %ymm0, %ymm0, %ymm0 # ymm0 = ymm0[0,1,2,3]
525-
; CHECK: vmovupd %ymm0, 48(%rdi)
526-
; CHECK: vblendpd $12, %ymm1, %ymm1, %ymm0 # ymm0 = ymm1[0,1,2,3]
527-
; CHECK: vmovupd %ymm0, 80(%rdi)
528-
529-
; This is what should be generated:
530-
; FIXME-LABEL: merge_vec_extract_stores
531-
; FIXME: vmovups %ymm0, 48(%rdi)
532-
; FIXME-NEXT: vmovups %ymm1, 80(%rdi)
533-
; FIXME-NEXT: vzeroupper
534-
; FIXME-NEXT: retq
518+
; CHECK-LABEL: merge_vec_extract_stores
519+
; CHECK: vmovups %ymm0, 48(%rdi)
520+
; CHECK-NEXT: vmovups %ymm1, 80(%rdi)
521+
; CHECK-NEXT: vzeroupper
522+
; CHECK-NEXT: retq
535523
}
536524

537525
; Merging vector stores when sourced from vector loads.

test/CodeGen/X86/vector-shuffle-256-v4.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -807,10 +807,10 @@ define <4 x i64> @shuffle_v4i64_0142(<4 x i64> %a, <4 x i64> %b) {
807807
define <4 x i64> @shuffle_v4i64_0412(<4 x i64> %a, <4 x i64> %b) {
808808
; AVX1-LABEL: shuffle_v4i64_0412:
809809
; AVX1: # BB#0:
810+
; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0]
810811
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
811812
; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm0[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
812813
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
813-
; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0]
814814
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3]
815815
; AVX1-NEXT: retq
816816
;

0 commit comments

Comments
 (0)