Skip to content

Commit b872fbb

Browse files
committed
[X86][DAG] Switch X86 Target to post-legalized store merge
Move store merge to happen after intrinsic lowering to allow lowered stores to be merged. Some regressions due in MergeConsecutiveStores to missing insert_subvector that are addressed in follow up patch. Reviewers: craig.topper, efriedma, RKSimon Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D34559 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@310710 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 0fdbc97 commit b872fbb

16 files changed

+175
-235
lines changed

include/llvm/Target/TargetLowering.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2723,6 +2723,9 @@ class TargetLowering : public TargetLoweringBase {
27232723
bool foldBooleans, DAGCombinerInfo &DCI,
27242724
const SDLoc &dl) const;
27252725

2726+
// For targets which wrap address, unwrap for analysis.
2727+
virtual SDValue unwrapAddress(SDValue N) const { return N; }
2728+
27262729
/// Returns true (and the GlobalValue and the offset) if the node is a
27272730
/// GlobalAddress + offset.
27282731
virtual bool

lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "llvm/CodeGen/MachineFrameInfo.h"
1515
#include "llvm/CodeGen/SelectionDAG.h"
1616
#include "llvm/CodeGen/SelectionDAGNodes.h"
17+
#include "llvm/Target/TargetLowering.h"
1718

1819
namespace llvm {
1920

@@ -55,7 +56,7 @@ bool BaseIndexOffset::equalBaseIndex(BaseIndexOffset &Other,
5556
/// Parses tree in Ptr for base, index, offset addresses.
5657
BaseIndexOffset BaseIndexOffset::match(SDValue Ptr, const SelectionDAG &DAG) {
5758
// (((B + I*M) + c)) + c ...
58-
SDValue Base = Ptr;
59+
SDValue Base = DAG.getTargetLoweringInfo().unwrapAddress(Ptr);
5960
SDValue Index = SDValue();
6061
int64_t Offset = 0;
6162
bool IsIndexSignExt = false;

lib/Target/X86/X86ISelLowering.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27034,6 +27034,12 @@ unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(
2703427034
return 1;
2703527035
}
2703627036

27037+
SDValue X86TargetLowering::unwrapAddress(SDValue N) const {
27038+
if (N->getOpcode() == X86ISD::Wrapper || N->getOpcode() == X86ISD::WrapperRIP)
27039+
return N->getOperand(0);
27040+
return N;
27041+
}
27042+
2703727043
/// Returns true (and the GlobalValue and the offset) if the node is a
2703827044
/// GlobalAddress + offset.
2703927045
bool X86TargetLowering::isGAPlusOffset(SDNode *N,

lib/Target/X86/X86ISelLowering.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -812,6 +812,8 @@ namespace llvm {
812812
/// This method returns the name of a target specific DAG node.
813813
const char *getTargetNodeName(unsigned Opcode) const override;
814814

815+
bool mergeStoresAfterLegalization() const override { return true; }
816+
815817
bool isCheapToSpeculateCttz() const override;
816818

817819
bool isCheapToSpeculateCtlz() const override;
@@ -867,6 +869,8 @@ namespace llvm {
867869
const SelectionDAG &DAG,
868870
unsigned Depth) const override;
869871

872+
SDValue unwrapAddress(SDValue N) const override;
873+
870874
bool isGAPlusOffset(SDNode *N, const GlobalValue* &GA,
871875
int64_t &Offset) const override;
872876

test/CodeGen/X86/MergeConsecutiveStores.ll

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -492,10 +492,15 @@ define void @merge_vec_element_store(<8 x float> %v, float* %ptr) {
492492
store float %vecext7, float* %arrayidx7, align 4
493493
ret void
494494

495-
; CHECK-LABEL: merge_vec_element_store
496-
; CHECK: vmovups
497-
; CHECK-NEXT: vzeroupper
498-
; CHECK-NEXT: retq
495+
; CHECK: vextractf128 $1, %ymm0, %xmm1
496+
; CHECK: vinsertf128 $1, %xmm1, %ymm0, %ymm0
497+
; CHECK: retq
498+
499+
; This is what should be generated:
500+
; FIXME-LABEL: merge_vec_element_store
501+
; FIXME: vmovups
502+
; FIXME-NEXT: vzeroupper
503+
; FIXME-NEXT: retq
499504
}
500505

501506
; PR21711 - Merge vector stores into wider vector stores.
@@ -515,11 +520,18 @@ define void @merge_vec_extract_stores(<8 x float> %v1, <8 x float> %v2, <4 x flo
515520
store <4 x float> %shuffle3, <4 x float>* %idx3, align 16
516521
ret void
517522

518-
; CHECK-LABEL: merge_vec_extract_stores
519-
; CHECK: vmovups %ymm0, 48(%rdi)
520-
; CHECK-NEXT: vmovups %ymm1, 80(%rdi)
521-
; CHECK-NEXT: vzeroupper
522-
; CHECK-NEXT: retq
523+
; These vblendpd are obviously redundant.
524+
; CHECK: vblendpd $12, %ymm0, %ymm0, %ymm0 # ymm0 = ymm0[0,1,2,3]
525+
; CHECK: vmovupd %ymm0, 48(%rdi)
526+
; CHECK: vblendpd $12, %ymm1, %ymm1, %ymm0 # ymm0 = ymm1[0,1,2,3]
527+
; CHECK: vmovupd %ymm0, 80(%rdi)
528+
529+
; This is what should be generated:
530+
; FIXME-LABEL: merge_vec_extract_stores
531+
; FIXME: vmovups %ymm0, 48(%rdi)
532+
; FIXME-NEXT: vmovups %ymm1, 80(%rdi)
533+
; FIXME-NEXT: vzeroupper
534+
; FIXME-NEXT: retq
523535
}
524536

525537
; Merging vector stores when sourced from vector loads.
@@ -557,8 +569,7 @@ define void @merge_vec_stores_of_constants(<4 x i32>* %ptr) {
557569
}
558570

559571
; This is a minimized test based on real code that was failing.
560-
; We could merge stores (and loads) like this...
561-
572+
; This should now be merged.
562573
define void @merge_vec_element_and_scalar_load([6 x i64]* %array) {
563574
%idx0 = getelementptr inbounds [6 x i64], [6 x i64]* %array, i64 0, i64 0
564575
%idx1 = getelementptr inbounds [6 x i64], [6 x i64]* %array, i64 0, i64 1
@@ -575,10 +586,8 @@ define void @merge_vec_element_and_scalar_load([6 x i64]* %array) {
575586
ret void
576587

577588
; CHECK-LABEL: merge_vec_element_and_scalar_load
578-
; CHECK: movq (%rdi), %rax
579-
; CHECK-NEXT: movq 8(%rdi), %rcx
580-
; CHECK-NEXT: movq %rax, 32(%rdi)
581-
; CHECK-NEXT: movq %rcx, 40(%rdi)
589+
; CHECK: vmovups (%rdi), %xmm0
590+
; CHECK-NEXT: vmovups %xmm0, 32(%rdi)
582591
; CHECK-NEXT: retq
583592
}
584593

test/CodeGen/X86/bigstructret.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,22 +31,21 @@ entry:
3131
ret %0 %3
3232
}
3333

34+
3435
define fastcc %1 @ReturnBigStruct2() nounwind readnone {
3536
; X86-LABEL: ReturnBigStruct2:
3637
; X86: # BB#0: # %entry
3738
; X86-NEXT: movl $48, 4(%ecx)
3839
; X86-NEXT: movb $1, 2(%ecx)
39-
; X86-NEXT: movb $1, 1(%ecx)
40-
; X86-NEXT: movb $0, (%ecx)
40+
; X86-NEXT: movw $256, (%ecx) # imm = 0x100
4141
; X86-NEXT: movl %ecx, %eax
4242
; X86-NEXT: retl
4343
;
4444
; X64-LABEL: ReturnBigStruct2:
4545
; X64: # BB#0: # %entry
4646
; X64-NEXT: movl $48, 4(%rdi)
4747
; X64-NEXT: movb $1, 2(%rdi)
48-
; X64-NEXT: movb $1, 1(%rdi)
49-
; X64-NEXT: movb $0, (%rdi)
48+
; X64-NEXT: movw $256, (%rdi) # imm = 0x100
5049
; X64-NEXT: movq %rdi, %rax
5150
; X64-NEXT: retq
5251
entry:

test/CodeGen/X86/bitcast-i256.ll

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,8 @@ define i256 @foo(<8 x i32> %a) {
1212
;
1313
; SLOW-LABEL: foo:
1414
; SLOW: # BB#0:
15-
; SLOW-NEXT: vextractf128 $1, %ymm0, %xmm1
16-
; SLOW-NEXT: vpextrq $1, %xmm1, 24(%rdi)
17-
; SLOW-NEXT: vmovq %xmm1, 16(%rdi)
18-
; SLOW-NEXT: vpextrq $1, %xmm0, 8(%rdi)
19-
; SLOW-NEXT: vmovq %xmm0, (%rdi)
15+
; SLOW-NEXT: vextractf128 $1, %ymm0, 16(%rdi)
16+
; SLOW-NEXT: vmovups %xmm0, (%rdi)
2017
; SLOW-NEXT: movq %rdi, %rax
2118
; SLOW-NEXT: vzeroupper
2219
; SLOW-NEXT: retq

test/CodeGen/X86/constant-combines.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,11 @@ define void @PR22524({ float, float }* %arg) {
1515
;
1616
; CHECK-LABEL: PR22524:
1717
; CHECK: # BB#0: # %entry
18-
; CHECK-NEXT: movl $0, 4(%rdi)
1918
; CHECK-NEXT: xorl %eax, %eax
2019
; CHECK-NEXT: movd %eax, %xmm0
2120
; CHECK-NEXT: xorps %xmm1, %xmm1
2221
; CHECK-NEXT: mulss %xmm0, %xmm1
23-
; CHECK-NEXT: movl $0, (%rdi)
22+
; CHECK-NEXT: movq $0, (%rdi)
2423
; CHECK-NEXT: movss %xmm1, 4(%rdi)
2524
; CHECK-NEXT: retq
2625
entry:

test/CodeGen/X86/extract-store.ll

Lines changed: 46 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -510,22 +510,22 @@ define void @extract_f64_1(double* nocapture %dst, <2 x double> %foo) nounwind {
510510
}
511511

512512
define void @extract_f128_0(fp128* nocapture %dst, <2 x fp128> %foo) nounwind {
513-
; X32-LABEL: extract_f128_0:
514-
; X32: # BB#0:
515-
; X32-NEXT: pushl %edi
516-
; X32-NEXT: pushl %esi
517-
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
518-
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
519-
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
520-
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
521-
; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
522-
; X32-NEXT: movl %esi, 12(%edi)
523-
; X32-NEXT: movl %edx, 8(%edi)
524-
; X32-NEXT: movl %ecx, 4(%edi)
525-
; X32-NEXT: movl %eax, (%edi)
526-
; X32-NEXT: popl %esi
527-
; X32-NEXT: popl %edi
528-
; X32-NEXT: retl
513+
; SSE-X32-LABEL: extract_f128_0:
514+
; SSE-X32: # BB#0:
515+
; SSE-X32-NEXT: pushl %edi
516+
; SSE-X32-NEXT: pushl %esi
517+
; SSE-X32-NEXT: movl {{[0-9]+}}(%esp), %eax
518+
; SSE-X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
519+
; SSE-X32-NEXT: movl {{[0-9]+}}(%esp), %edx
520+
; SSE-X32-NEXT: movl {{[0-9]+}}(%esp), %esi
521+
; SSE-X32-NEXT: movl {{[0-9]+}}(%esp), %edi
522+
; SSE-X32-NEXT: movl %esi, 12(%edi)
523+
; SSE-X32-NEXT: movl %edx, 8(%edi)
524+
; SSE-X32-NEXT: movl %ecx, 4(%edi)
525+
; SSE-X32-NEXT: movl %eax, (%edi)
526+
; SSE-X32-NEXT: popl %esi
527+
; SSE-X32-NEXT: popl %edi
528+
; SSE-X32-NEXT: retl
529529
;
530530
; SSE2-X64-LABEL: extract_f128_0:
531531
; SSE2-X64: # BB#0:
@@ -539,6 +539,13 @@ define void @extract_f128_0(fp128* nocapture %dst, <2 x fp128> %foo) nounwind {
539539
; SSE41-X64-NEXT: movq %rsi, (%rdi)
540540
; SSE41-X64-NEXT: retq
541541
;
542+
; AVX-X32-LABEL: extract_f128_0:
543+
; AVX-X32: # BB#0:
544+
; AVX-X32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0
545+
; AVX-X32-NEXT: movl {{[0-9]+}}(%esp), %eax
546+
; AVX-X32-NEXT: vmovups %xmm0, (%eax)
547+
; AVX-X32-NEXT: retl
548+
;
542549
; AVX-X64-LABEL: extract_f128_0:
543550
; AVX-X64: # BB#0:
544551
; AVX-X64-NEXT: movq %rdx, 8(%rdi)
@@ -555,22 +562,22 @@ define void @extract_f128_0(fp128* nocapture %dst, <2 x fp128> %foo) nounwind {
555562
}
556563

557564
define void @extract_f128_1(fp128* nocapture %dst, <2 x fp128> %foo) nounwind {
558-
; X32-LABEL: extract_f128_1:
559-
; X32: # BB#0:
560-
; X32-NEXT: pushl %edi
561-
; X32-NEXT: pushl %esi
562-
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
563-
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
564-
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
565-
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
566-
; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
567-
; X32-NEXT: movl %esi, 12(%edi)
568-
; X32-NEXT: movl %edx, 8(%edi)
569-
; X32-NEXT: movl %ecx, 4(%edi)
570-
; X32-NEXT: movl %eax, (%edi)
571-
; X32-NEXT: popl %esi
572-
; X32-NEXT: popl %edi
573-
; X32-NEXT: retl
565+
; SSE-X32-LABEL: extract_f128_1:
566+
; SSE-X32: # BB#0:
567+
; SSE-X32-NEXT: pushl %edi
568+
; SSE-X32-NEXT: pushl %esi
569+
; SSE-X32-NEXT: movl {{[0-9]+}}(%esp), %eax
570+
; SSE-X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
571+
; SSE-X32-NEXT: movl {{[0-9]+}}(%esp), %edx
572+
; SSE-X32-NEXT: movl {{[0-9]+}}(%esp), %esi
573+
; SSE-X32-NEXT: movl {{[0-9]+}}(%esp), %edi
574+
; SSE-X32-NEXT: movl %esi, 12(%edi)
575+
; SSE-X32-NEXT: movl %edx, 8(%edi)
576+
; SSE-X32-NEXT: movl %ecx, 4(%edi)
577+
; SSE-X32-NEXT: movl %eax, (%edi)
578+
; SSE-X32-NEXT: popl %esi
579+
; SSE-X32-NEXT: popl %edi
580+
; SSE-X32-NEXT: retl
574581
;
575582
; SSE2-X64-LABEL: extract_f128_1:
576583
; SSE2-X64: # BB#0:
@@ -584,6 +591,13 @@ define void @extract_f128_1(fp128* nocapture %dst, <2 x fp128> %foo) nounwind {
584591
; SSE41-X64-NEXT: movq %rcx, (%rdi)
585592
; SSE41-X64-NEXT: retq
586593
;
594+
; AVX-X32-LABEL: extract_f128_1:
595+
; AVX-X32: # BB#0:
596+
; AVX-X32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0
597+
; AVX-X32-NEXT: movl {{[0-9]+}}(%esp), %eax
598+
; AVX-X32-NEXT: vmovups %xmm0, (%eax)
599+
; AVX-X32-NEXT: retl
600+
;
587601
; AVX-X64-LABEL: extract_f128_1:
588602
; AVX-X64: # BB#0:
589603
; AVX-X64-NEXT: movq %r8, 8(%rdi)

test/CodeGen/X86/fold-vector-sext-crash2.ll

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,10 @@ define <2 x i256> @test_zext1() {
5353
ret <2 x i256> %Shuff
5454

5555
; X64-LABEL: test_zext1
56-
; X64: movq $0
57-
; X64-NEXT: movq $0
56+
; X64: xorps %xmm0, %xmm0
57+
; X64: movaps %xmm0
58+
; X64: movaps %xmm0
59+
; X64: movaps %xmm0
5860
; X64-NEXT: movq $0
5961
; X64-NEXT: movq $254
6062

@@ -75,8 +77,10 @@ define <2 x i256> @test_zext2() {
7577
ret <2 x i256> %Shuff
7678

7779
; X64-LABEL: test_zext2
78-
; X64: movq $0
79-
; X64-NEXT: movq $0
80+
; X64: xorps %xmm0, %xmm0
81+
; X64-NEXT: movaps %xmm0
82+
; X64-NEXT: movaps %xmm0
83+
; X64-NEXT: movaps %xmm0
8084
; X64-NEXT: movq $-1
8185
; X64-NEXT: movq $-2
8286

0 commit comments

Comments
 (0)