Skip to content

Commit 86c4bd6

Browse files
committed
[DAG] Relax type restriction for store merge
Summary: Allow stores of bitcastable types to be merged by peeking through BITCAST nodes and recasting stored values constant and vector extract nodes as necessary. Reviewers: jyknight, hfinkel, efriedma, RKSimon, spatel Reviewed By: RKSimon Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D34569 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@310655 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 6f0ee4d commit 86c4bd6

File tree

2 files changed

+66
-29
lines changed

2 files changed

+66
-29
lines changed

lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 64 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -466,7 +466,8 @@ namespace {
466466
/// This is a helper function for MergeConsecutiveStores. When the
467467
/// source elements of the consecutive stores are all constants or
468468
/// all extracted vector elements, try to merge them into one
469-
/// larger store. \return True if a merged store was created.
469+
/// larger store introducing bitcasts if necessary. \return True
470+
/// if a merged store was created.
470471
bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
471472
EVT MemVT, unsigned NumStores,
472473
bool IsConstantSrc, bool UseVector,
@@ -12474,22 +12475,59 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
1247412475
for (unsigned I = 0; I != NumStores; ++I) {
1247512476
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
1247612477
SDValue Val = St->getValue();
12477-
if (MemVT.getScalarType().isInteger())
12478-
if (auto *CFP = dyn_cast<ConstantFPSDNode>(Val))
12479-
Val = DAG.getConstant(
12480-
(uint32_t)CFP->getValueAPF().bitcastToAPInt().getZExtValue(),
12481-
SDLoc(CFP), MemVT);
12478+
// If constant is of the wrong type, convert it now.
12479+
if (MemVT != Val.getValueType()) {
12480+
Val = peekThroughBitcast(Val);
12481+
// Deal with constants of wrong size.
12482+
if (ElementSizeBytes * 8 != Val.getValueSizeInBits()) {
12483+
EVT IntMemVT =
12484+
EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
12485+
if (auto *CFP = dyn_cast<ConstantFPSDNode>(Val))
12486+
Val = DAG.getConstant(
12487+
CFP->getValueAPF().bitcastToAPInt().zextOrTrunc(
12488+
8 * ElementSizeBytes),
12489+
SDLoc(CFP), IntMemVT);
12490+
else if (auto *C = dyn_cast<ConstantSDNode>(Val))
12491+
Val = DAG.getConstant(
12492+
C->getAPIntValue().zextOrTrunc(8 * ElementSizeBytes),
12493+
SDLoc(C), IntMemVT);
12494+
}
12495+
// Make sure correctly size type is the correct type.
12496+
Val = DAG.getBitcast(MemVT, Val);
12497+
}
1248212498
BuildVector.push_back(Val);
1248312499
}
12484-
StoredVal = DAG.getBuildVector(StoreTy, DL, BuildVector);
12500+
StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
12501+
: ISD::BUILD_VECTOR,
12502+
DL, StoreTy, BuildVector);
1248512503
} else {
1248612504
SmallVector<SDValue, 8> Ops;
1248712505
for (unsigned i = 0; i < NumStores; ++i) {
1248812506
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
12489-
SDValue Val = St->getValue();
12490-
// All operands of BUILD_VECTOR / CONCAT_VECTOR must have the same type.
12491-
if (Val.getValueType() != MemVT)
12492-
return false;
12507+
SDValue Val = peekThroughBitcast(St->getValue());
12508+
// All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
12509+
// type MemVT. If the underlying value is not the correct
12510+
// type, but it is an extraction of an appropriate vector we
12511+
// can recast Val to be of the correct type. This may require
12512+
// converting between EXTRACT_VECTOR_ELT and
12513+
// EXTRACT_SUBVECTOR.
12514+
if ((MemVT != Val.getValueType()) &&
12515+
(Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
12516+
Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
12517+
SDValue Vec = Val.getOperand(0);
12518+
EVT MemVTScalarTy = MemVT.getScalarType();
12519+
// We may need to add a bitcast here to get types to line up.
12520+
if (MemVTScalarTy != Vec.getValueType()) {
12521+
unsigned Elts = Vec.getValueType().getSizeInBits() /
12522+
MemVTScalarTy.getSizeInBits();
12523+
EVT NewVecTy =
12524+
EVT::getVectorVT(*DAG.getContext(), MemVTScalarTy, Elts);
12525+
Vec = DAG.getBitcast(NewVecTy, Vec);
12526+
}
12527+
auto OpC = (MemVT.isVector()) ? ISD::EXTRACT_SUBVECTOR
12528+
: ISD::EXTRACT_VECTOR_ELT;
12529+
Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Val.getOperand(1));
12530+
}
1249312531
Ops.push_back(Val);
1249412532
}
1249512533

@@ -12532,7 +12570,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
1253212570

1253312571
// make sure we use trunc store if it's necessary to be legal.
1253412572
SDValue NewStore;
12535-
if (UseVector || !UseTrunc) {
12573+
if (!UseTrunc) {
1253612574
NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
1253712575
FirstInChain->getPointerInfo(),
1253812576
FirstInChain->getAlignment());
@@ -12573,7 +12611,7 @@ void DAGCombiner::getStoreMergeCandidates(
1257312611
BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
1257412612
EVT MemVT = St->getMemoryVT();
1257512613

12576-
SDValue Val = St->getValue();
12614+
SDValue Val = peekThroughBitcast(St->getValue());
1257712615
// We must have a base and an offset.
1257812616
if (!BasePtr.getBase().getNode())
1257912617
return;
@@ -12601,10 +12639,12 @@ void DAGCombiner::getStoreMergeCandidates(
1260112639
int64_t &Offset) -> bool {
1260212640
if (Other->isVolatile() || Other->isIndexed())
1260312641
return false;
12604-
SDValue Val = Other->getValue();
12642+
SDValue Val = peekThroughBitcast(Other->getValue());
12643+
// Allow merging constants of different types as integers.
12644+
bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
12645+
: Other->getMemoryVT() != MemVT;
1260512646
if (IsLoadSrc) {
12606-
// Loads must match type.
12607-
if (Other->getMemoryVT() != MemVT)
12647+
if (NoTypeMatch)
1260812648
return false;
1260912649
// The Load's Base Ptr must also match
1261012650
if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Val)) {
@@ -12617,16 +12657,16 @@ void DAGCombiner::getStoreMergeCandidates(
1261712657
return false;
1261812658
}
1261912659
if (IsConstantSrc) {
12620-
// Allow merging constants of different types as integers.
12621-
if (MemVT.isInteger() ? !MemVT.bitsEq(Other->getMemoryVT())
12622-
: Other->getMemoryVT() != MemVT)
12660+
if (NoTypeMatch)
1262312661
return false;
1262412662
if (!(isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val)))
1262512663
return false;
1262612664
}
1262712665
if (IsExtractVecSrc) {
12628-
// Must match type.
12629-
if (Other->getMemoryVT() != MemVT)
12666+
// Do not merge truncated stores here.
12667+
if (Other->isTruncatingStore())
12668+
return false;
12669+
if (!MemVT.bitsEq(Val.getValueType()))
1263012670
return false;
1263112671
if (Val.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
1263212672
Val.getOpcode() != ISD::EXTRACT_SUBVECTOR)
@@ -12723,7 +12763,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
1272312763

1272412764
// Perform an early exit check. Do not bother looking at stored values that
1272512765
// are not constants, loads, or extracted vector elements.
12726-
SDValue StoredVal = St->getValue();
12766+
SDValue StoredVal = peekThroughBitcast(St->getValue());
1272712767
bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
1272812768
bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
1272912769
isa<ConstantFPSDNode>(StoredVal);
@@ -12911,7 +12951,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
1291112951
unsigned NumStoresToMerge = 1;
1291212952
for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
1291312953
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
12914-
SDValue StVal = St->getValue();
12954+
SDValue StVal = peekThroughBitcast(St->getValue());
1291512955
// This restriction could be loosened.
1291612956
// Bail out if any stored values are not elements extracted from a
1291712957
// vector. It should be possible to handle mixed sources, but load
@@ -12977,7 +13017,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
1297713017
BaseIndexOffset LdBasePtr;
1297813018
for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
1297913019
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
12980-
SDValue Val = St->getValue();
13020+
SDValue Val = peekThroughBitcast(St->getValue());
1298113021
LoadSDNode *Ld = dyn_cast<LoadSDNode>(Val);
1298213022
if (!Ld)
1298313023
break;

test/CodeGen/X86/MergeConsecutiveStores.ll

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -522,7 +522,7 @@ define void @merge_vec_extract_stores(<8 x float> %v1, <8 x float> %v2, <4 x flo
522522
; CHECK-NEXT: retq
523523
}
524524

525-
; Merging vector stores when sourced from vector loads is not currently handled.
525+
; Merging vector stores when sourced from vector loads.
526526
define void @merge_vec_stores_from_loads(<4 x float>* %v, <4 x float>* %ptr) {
527527
%load_idx0 = getelementptr inbounds <4 x float>, <4 x float>* %v, i64 0
528528
%load_idx1 = getelementptr inbounds <4 x float>, <4 x float>* %v, i64 1
@@ -621,9 +621,6 @@ define void @merge_bitcast(<4 x i32> %v, float* %ptr) {
621621
ret void
622622

623623
; CHECK-LABEL: merge_bitcast
624-
; CHECK: vmovd %xmm0, (%rdi)
625-
; CHECK-NEXT: vpextrd $1, %xmm0, 4(%rdi)
626-
; CHECK-NEXT: vpextrd $2, %xmm0, 8(%rdi)
627-
; CHECK-NEXT: vpextrd $3, %xmm0, 12(%rdi)
624+
; CHECK: vmovups %xmm0, (%rdi)
628625
; CHECK-NEXT: retq
629626
}

0 commit comments

Comments
 (0)