Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4852,6 +4852,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
return lowerMemcpyInline(MI);
case G_ZEXT:
case G_SEXT:
case G_FPEXT:
case G_ANYEXT:
return lowerEXT(MI);
case G_TRUNC:
Expand Down
9 changes: 9 additions & 0 deletions llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -825,6 +825,15 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.legalFor(
{{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}})
.libcallFor({{s128, s64}, {s128, s32}, {s128, s16}})
.moreElementsToNextPow2(0)
.lowerIf([](const LegalityQuery &Q) {
LLT DstTy = Q.Types[0];
LLT SrcTy = Q.Types[1];
return SrcTy.isVector() && DstTy.isVector() &&
SrcTy.getNumElements() > 2 &&
SrcTy.getScalarSizeInBits() == 16 &&
DstTy.getScalarSizeInBits() == 64;
})
.clampNumElements(0, v4s32, v4s32)
.clampNumElements(0, v2s64, v2s64)
.scalarize(0);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -555,8 +555,8 @@
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_FPEXT (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
# DEBUG-NEXT: .. the first uncovered type index: 2, OK
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_FPTRUNC (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
# DEBUG-NEXT: .. the first uncovered type index: 2, OK
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
Expand Down
48 changes: 24 additions & 24 deletions llvm/test/CodeGen/AArch64/fmla.ll
Original file line number Diff line number Diff line change
Expand Up @@ -865,22 +865,22 @@ define <7 x half> @fmuladd_v7f16(<7 x half> %a, <7 x half> %b, <7 x half> %c) {
; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v3.4h
; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v2.4h
; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v5.4h
; CHECK-GI-NOFP16-NEXT: mov v5.h[0], v2.h[4]
; CHECK-GI-NOFP16-NEXT: fcvtl v4.4s, v4.4h
; CHECK-GI-NOFP16-NEXT: fadd v0.4s, v0.4s, v1.4s
; CHECK-GI-NOFP16-NEXT: mov v5.h[1], v2.h[5]
; CHECK-GI-NOFP16-NEXT: fmul v1.4s, v3.4s, v4.4s
; CHECK-GI-NOFP16-NEXT: fcvtn v3.4h, v0.4s
; CHECK-GI-NOFP16-NEXT: mov v5.h[2], v2.h[6]
; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s
; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v3.h[0]
; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v5.4h
; CHECK-GI-NOFP16-NEXT: mov v1.h[0], v2.h[4]
; CHECK-GI-NOFP16-NEXT: fmul v3.4s, v3.4s, v4.4s
; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v2.h[5]
; CHECK-GI-NOFP16-NEXT: fcvtn v4.4h, v0.4s
; CHECK-GI-NOFP16-NEXT: fcvtn v3.4h, v3.4s
; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[6]
; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v4.h[0]
; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v3.4h
; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v3.h[1]
; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v1.4s, v2.4s
; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v3.h[2]
; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v4.h[1]
; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v2.4s, v1.4s
; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v4.h[2]
; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s
; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v3.h[3]
; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v4.h[3]
; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0]
; CHECK-GI-NOFP16-NEXT: mov v0.h[5], v1.h[1]
; CHECK-GI-NOFP16-NEXT: mov v0.h[6], v1.h[2]
Expand Down Expand Up @@ -1350,22 +1350,22 @@ define <7 x half> @fmul_v7f16(<7 x half> %a, <7 x half> %b, <7 x half> %c) {
; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v3.4h
; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v2.4h
; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v5.4h
; CHECK-GI-NOFP16-NEXT: mov v5.h[0], v2.h[4]
; CHECK-GI-NOFP16-NEXT: fcvtl v4.4s, v4.4h
; CHECK-GI-NOFP16-NEXT: fadd v0.4s, v0.4s, v1.4s
; CHECK-GI-NOFP16-NEXT: mov v5.h[1], v2.h[5]
; CHECK-GI-NOFP16-NEXT: fmul v1.4s, v3.4s, v4.4s
; CHECK-GI-NOFP16-NEXT: fcvtn v3.4h, v0.4s
; CHECK-GI-NOFP16-NEXT: mov v5.h[2], v2.h[6]
; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s
; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v3.h[0]
; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v5.4h
; CHECK-GI-NOFP16-NEXT: mov v1.h[0], v2.h[4]
; CHECK-GI-NOFP16-NEXT: fmul v3.4s, v3.4s, v4.4s
; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v2.h[5]
; CHECK-GI-NOFP16-NEXT: fcvtn v4.4h, v0.4s
; CHECK-GI-NOFP16-NEXT: fcvtn v3.4h, v3.4s
; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[6]
; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v4.h[0]
; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v3.4h
; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v3.h[1]
; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v1.4s, v2.4s
; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v3.h[2]
; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v4.h[1]
; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v2.4s, v1.4s
; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v4.h[2]
; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s
; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v3.h[3]
; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v4.h[3]
; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0]
; CHECK-GI-NOFP16-NEXT: mov v0.h[5], v1.h[1]
; CHECK-GI-NOFP16-NEXT: mov v0.h[6], v1.h[2]
Expand Down
26 changes: 6 additions & 20 deletions llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll
Original file line number Diff line number Diff line change
Expand Up @@ -241,30 +241,16 @@ define <4 x double> @h_to_d(<4 x half> %a) {
;
; CHECK-CVT-GI-LABEL: h_to_d:
; CHECK-CVT-GI: // %bb.0:
; CHECK-CVT-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-CVT-GI-NEXT: mov h1, v0.h[1]
; CHECK-CVT-GI-NEXT: mov h2, v0.h[2]
; CHECK-CVT-GI-NEXT: mov h3, v0.h[3]
; CHECK-CVT-GI-NEXT: fcvt d0, h0
; CHECK-CVT-GI-NEXT: fcvt d4, h1
; CHECK-CVT-GI-NEXT: fcvt d1, h2
; CHECK-CVT-GI-NEXT: fcvt d2, h3
; CHECK-CVT-GI-NEXT: mov v0.d[1], v4.d[0]
; CHECK-CVT-GI-NEXT: mov v1.d[1], v2.d[0]
; CHECK-CVT-GI-NEXT: fcvtl v1.4s, v0.4h
; CHECK-CVT-GI-NEXT: fcvtl v0.2d, v1.2s
; CHECK-CVT-GI-NEXT: fcvtl2 v1.2d, v1.4s
; CHECK-CVT-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: h_to_d:
; CHECK-FP16-GI: // %bb.0:
; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-FP16-GI-NEXT: mov h1, v0.h[1]
; CHECK-FP16-GI-NEXT: mov h2, v0.h[2]
; CHECK-FP16-GI-NEXT: mov h3, v0.h[3]
; CHECK-FP16-GI-NEXT: fcvt d0, h0
; CHECK-FP16-GI-NEXT: fcvt d4, h1
; CHECK-FP16-GI-NEXT: fcvt d1, h2
; CHECK-FP16-GI-NEXT: fcvt d2, h3
; CHECK-FP16-GI-NEXT: mov v0.d[1], v4.d[0]
; CHECK-FP16-GI-NEXT: mov v1.d[1], v2.d[0]
; CHECK-FP16-GI-NEXT: fcvtl v1.4s, v0.4h
; CHECK-FP16-GI-NEXT: fcvtl v0.2d, v1.2s
; CHECK-FP16-GI-NEXT: fcvtl2 v1.2d, v1.4s
; CHECK-FP16-GI-NEXT: ret
%1 = fpext <4 x half> %a to <4 x double>
ret <4 x double> %1
Expand Down
50 changes: 12 additions & 38 deletions llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll
Original file line number Diff line number Diff line change
Expand Up @@ -298,48 +298,22 @@ define <8 x double> @h_to_d(<8 x half> %a) {
;
; CHECK-CVT-GI-LABEL: h_to_d:
; CHECK-CVT-GI: // %bb.0:
; CHECK-CVT-GI-NEXT: mov h1, v0.h[1]
; CHECK-CVT-GI-NEXT: mov h2, v0.h[2]
; CHECK-CVT-GI-NEXT: mov h3, v0.h[3]
; CHECK-CVT-GI-NEXT: mov h4, v0.h[4]
; CHECK-CVT-GI-NEXT: mov h5, v0.h[5]
; CHECK-CVT-GI-NEXT: mov h6, v0.h[6]
; CHECK-CVT-GI-NEXT: mov h7, v0.h[7]
; CHECK-CVT-GI-NEXT: fcvt d0, h0
; CHECK-CVT-GI-NEXT: fcvt d16, h1
; CHECK-CVT-GI-NEXT: fcvt d1, h2
; CHECK-CVT-GI-NEXT: fcvt d17, h3
; CHECK-CVT-GI-NEXT: fcvt d2, h4
; CHECK-CVT-GI-NEXT: fcvt d4, h5
; CHECK-CVT-GI-NEXT: fcvt d3, h6
; CHECK-CVT-GI-NEXT: fcvt d5, h7
; CHECK-CVT-GI-NEXT: mov v0.d[1], v16.d[0]
; CHECK-CVT-GI-NEXT: mov v1.d[1], v17.d[0]
; CHECK-CVT-GI-NEXT: mov v2.d[1], v4.d[0]
; CHECK-CVT-GI-NEXT: mov v3.d[1], v5.d[0]
; CHECK-CVT-GI-NEXT: fcvtl v1.4s, v0.4h
; CHECK-CVT-GI-NEXT: fcvtl2 v3.4s, v0.8h
; CHECK-CVT-GI-NEXT: fcvtl v0.2d, v1.2s
; CHECK-CVT-GI-NEXT: fcvtl2 v1.2d, v1.4s
; CHECK-CVT-GI-NEXT: fcvtl v2.2d, v3.2s
; CHECK-CVT-GI-NEXT: fcvtl2 v3.2d, v3.4s
; CHECK-CVT-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: h_to_d:
; CHECK-FP16-GI: // %bb.0:
; CHECK-FP16-GI-NEXT: mov h1, v0.h[1]
; CHECK-FP16-GI-NEXT: mov h2, v0.h[2]
; CHECK-FP16-GI-NEXT: mov h3, v0.h[3]
; CHECK-FP16-GI-NEXT: mov h4, v0.h[4]
; CHECK-FP16-GI-NEXT: mov h5, v0.h[5]
; CHECK-FP16-GI-NEXT: mov h6, v0.h[6]
; CHECK-FP16-GI-NEXT: mov h7, v0.h[7]
; CHECK-FP16-GI-NEXT: fcvt d0, h0
; CHECK-FP16-GI-NEXT: fcvt d16, h1
; CHECK-FP16-GI-NEXT: fcvt d1, h2
; CHECK-FP16-GI-NEXT: fcvt d17, h3
; CHECK-FP16-GI-NEXT: fcvt d2, h4
; CHECK-FP16-GI-NEXT: fcvt d4, h5
; CHECK-FP16-GI-NEXT: fcvt d3, h6
; CHECK-FP16-GI-NEXT: fcvt d5, h7
; CHECK-FP16-GI-NEXT: mov v0.d[1], v16.d[0]
; CHECK-FP16-GI-NEXT: mov v1.d[1], v17.d[0]
; CHECK-FP16-GI-NEXT: mov v2.d[1], v4.d[0]
; CHECK-FP16-GI-NEXT: mov v3.d[1], v5.d[0]
; CHECK-FP16-GI-NEXT: fcvtl v1.4s, v0.4h
; CHECK-FP16-GI-NEXT: fcvtl2 v3.4s, v0.8h
; CHECK-FP16-GI-NEXT: fcvtl v0.2d, v1.2s
; CHECK-FP16-GI-NEXT: fcvtl2 v1.2d, v1.4s
; CHECK-FP16-GI-NEXT: fcvtl v2.2d, v3.2s
; CHECK-FP16-GI-NEXT: fcvtl2 v3.2d, v3.4s
; CHECK-FP16-GI-NEXT: ret
%1 = fpext <8 x half> %a to <8 x double>
ret <8 x double> %1
Expand Down
Loading