Skip to content
Open
6 changes: 6 additions & 0 deletions llvm/docs/GlobalISel/GenericOpcode.rst
Original file line number Diff line number Diff line change
Expand Up @@ -536,6 +536,12 @@ G_FPTRUNC

Convert a floating point value to a narrower type.

G_FPTRUNC_ODD
^^^^^^^^^^^^^

Convert a floating point value to a narrower type using round-to-odd rounding
mode.

G_FPTOSI, G_FPTOUI, G_SITOFP, G_UITOFP
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Expand Down
15 changes: 15 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -1332,6 +1332,21 @@ class LLVM_ABI MachineIRBuilder {
buildFPTrunc(const DstOp &Res, const SrcOp &Op,
std::optional<unsigned> Flags = std::nullopt);

/// Build and insert \p Res = G_FPTRUNC_ODD \p Op
///
/// G_FPTRUNC_ODD converts a floating-point value into one with a smaller type
/// using round to odd.
///
/// \pre setBasicBlock or setMI must have been called.
/// \pre \p Res must be a generic virtual register with scalar or vector type.
/// \pre \p Op must be a generic virtual register with scalar or vector type.
/// \pre \p Res must be smaller than \p Op
///
/// \return The newly created instruction.
MachineInstrBuilder
buildFPTruncOdd(const DstOp &Res, const SrcOp &Op,
std::optional<unsigned> Flags = std::nullopt);

/// Build and insert \p Res = G_TRUNC \p Op
///
/// G_TRUNC extracts the low bits of a type. For a vector type each element is
Expand Down
3 changes: 3 additions & 0 deletions llvm/include/llvm/Support/TargetOpcodes.def
Original file line number Diff line number Diff line change
Expand Up @@ -692,6 +692,9 @@ HANDLE_TARGET_OPCODE(G_FPEXT)
/// Generic float to signed-int conversion
HANDLE_TARGET_OPCODE(G_FPTRUNC)

/// Generic float to signed-int conversion using round to odd
HANDLE_TARGET_OPCODE(G_FPTRUNC_ODD)

/// Generic float to signed-int conversion
HANDLE_TARGET_OPCODE(G_FPTOSI)

Expand Down
6 changes: 6 additions & 0 deletions llvm/include/llvm/Target/GenericOpcodes.td
Original file line number Diff line number Diff line change
Expand Up @@ -782,6 +782,12 @@ def G_FPTRUNC : GenericInstruction {
let hasSideEffects = false;
}

def G_FPTRUNC_ODD : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type1:$src);
let hasSideEffects = false;
}

def G_FPTOSI : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type1:$src);
Expand Down
28 changes: 25 additions & 3 deletions llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5595,6 +5595,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_ANYEXT:
case G_FPEXT:
case G_FPTRUNC:
case G_FPTRUNC_ODD:
case G_SITOFP:
case G_UITOFP:
case G_FPTOSI:
Expand Down Expand Up @@ -8476,7 +8477,8 @@ LegalizerHelper::lowerFPTOINT_SAT(MachineInstr &MI) {
return Legalized;
}

// f64 -> f16 conversion using round-to-nearest-even rounding mode.
// f64 -> f16 conversion using round-to-nearest-even rounding mode for scalars
// and round-to-odd for vectors.
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) {
const LLT S1 = LLT::scalar(1);
Expand All @@ -8486,8 +8488,28 @@ LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) {
assert(MRI.getType(Dst).getScalarType() == LLT::scalar(16) &&
MRI.getType(Src).getScalarType() == LLT::scalar(64));

if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.
return UnableToLegalize;
if (MRI.getType(Src).isVector()) {
LLT SrcTy = MRI.getType(Src);

LLT MidTy = LLT::fixed_vector(SrcTy.getNumElements(), LLT::scalar(32));

// Check if G_FPTRUNC_ODD has been added to the legalizer and the resultant
// types can be legalized.
auto LegalizeAction =
LI.getAction({TargetOpcode::G_FPTRUNC_ODD, {MidTy, SrcTy}}).Action;

if (LegalizeAction == LegalizeActions::Unsupported ||
LegalizeAction == LegalizeActions::NotFound)
return UnableToLegalize;

MIRBuilder.setInstrAndDebugLoc(MI);

MachineInstrBuilder Mid = MIRBuilder.buildFPTruncOdd(MidTy, Src);
MIRBuilder.buildFPTrunc(Dst, Mid.getReg(0));

MI.eraseFromParent();
return Legalized;
}

if (MI.getFlag(MachineInstr::FmAfn)) {
unsigned Flags = MI.getFlags();
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -936,6 +936,12 @@ MachineIRBuilder::buildFPTrunc(const DstOp &Res, const SrcOp &Op,
return buildInstr(TargetOpcode::G_FPTRUNC, Res, Op, Flags);
}

MachineInstrBuilder
MachineIRBuilder::buildFPTruncOdd(const DstOp &Res, const SrcOp &Op,
std::optional<unsigned> Flags) {
return buildInstr(TargetOpcode::G_FPTRUNC_ODD, Res, Op, Flags);
}

MachineInstrBuilder MachineIRBuilder::buildICmp(CmpInst::Predicate Pred,
const DstOp &Res,
const SrcOp &Op0,
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AArch64/AArch64InstrGISel.td
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,8 @@ def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;

def : GINodeEquiv<G_AARCH64_PREFETCH, AArch64Prefetch>;

def : GINodeEquiv<G_FPTRUNC_ODD, AArch64fcvtxn_n>;

// These are patterns that we only use for GlobalISel via the importer.
def : Pat<(f32 (fadd (vector_extract (v2f32 FPR64:$Rn), (i64 0)),
(vector_extract (v2f32 FPR64:$Rn), (i64 1)))),
Expand Down
21 changes: 18 additions & 3 deletions llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/IR/DerivedTypes.h"
Expand Down Expand Up @@ -817,10 +818,24 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.legalFor(
{{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}})
.libcallFor({{s16, s128}, {s32, s128}, {s64, s128}})
.clampNumElements(0, v4s16, v4s16)
.clampNumElements(0, v2s32, v2s32)
.moreElementsToNextPow2(1)
.lowerIf([](const LegalityQuery &Q) {
LLT DstTy = Q.Types[0];
LLT SrcTy = Q.Types[1];
return SrcTy.isFixedVector() && DstTy.isFixedVector() &&
SrcTy.getScalarSizeInBits() == 64 &&
DstTy.getScalarSizeInBits() == 16;
})
// Clamp based on input
.clampNumElements(1, v4s32, v4s32)
.clampNumElements(1, v2s64, v2s64)
.scalarize(0);

getActionDefinitionsBuilder(G_FPTRUNC_ODD)
.legalFor({{s16, s32}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}})
.clampMaxNumElements(1, s32, 4)
.clampMaxNumElements(1, s64, 2);

getActionDefinitionsBuilder(G_FPEXT)
.legalFor(
{{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}})
Expand Down Expand Up @@ -2389,4 +2404,4 @@ bool AArch64LegalizerInfo::legalizePrefetch(MachineInstr &MI,
MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
MI.eraseFromParent();
return true;
}
}
2 changes: 2 additions & 0 deletions llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ class AArch64LegalizerInfo : public LegalizerInfo {
bool legalizeDynStackAlloc(MachineInstr &MI, LegalizerHelper &Helper) const;
bool legalizePrefetch(MachineInstr &MI, LegalizerHelper &Helper) const;
bool legalizeBitcast(MachineInstr &MI, LegalizerHelper &Helper) const;
bool legalizeFptrunc(MachineInstr &MI, MachineIRBuilder &MIRBuilder,
MachineRegisterInfo &MRI) const;
const AArch64Subtarget *ST;
};
} // End llvm namespace.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -558,8 +558,11 @@
# DEBUG-NEXT: .. the first uncovered type index: 2, OK
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
# DEBUG-NEXT: G_FPTRUNC (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
# DEBUG-NEXT: .. the first uncovered type index: 2, OK
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_FPTRUNC_ODD (opcode 204): 2 type indices, 0 imm indices
# DEBUG-NEXT: .. the first uncovered type index: 2, OK
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
# DEBUG-NEXT: G_FPTOSI (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
Expand Down
24 changes: 8 additions & 16 deletions llvm/test/CodeGen/AArch64/arm64-fp128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1197,30 +1197,22 @@ define <2 x half> @vec_round_f16(<2 x fp128> %val) {
;
; CHECK-GI-LABEL: vec_round_f16:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: sub sp, sp, #64
; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
; CHECK-GI-NEXT: .cfi_def_cfa_offset 64
; CHECK-GI-NEXT: sub sp, sp, #48
; CHECK-GI-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
; CHECK-GI-NEXT: .cfi_def_cfa_offset 48
; CHECK-GI-NEXT: .cfi_offset w30, -16
; CHECK-GI-NEXT: mov v2.d[0], x8
; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill
; CHECK-GI-NEXT: mov v2.d[1], x8
; CHECK-GI-NEXT: str q2, [sp, #32] // 16-byte Folded Spill
; CHECK-GI-NEXT: bl __trunctfhf2
; CHECK-GI-NEXT: // kill: def $h0 killed $h0 def $q0
; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-GI-NEXT: bl __trunctfhf2
; CHECK-GI-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
; CHECK-GI-NEXT: // kill: def $h0 killed $h0 def $q0
; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
; CHECK-GI-NEXT: bl __trunctfhf2
; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
; CHECK-GI-NEXT: bl __trunctfhf2
; CHECK-GI-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload
; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
; CHECK-GI-NEXT: mov v0.h[1], v1.h[0]
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: add sp, sp, #64
; CHECK-GI-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[1], v0.h[0]
; CHECK-GI-NEXT: fmov d0, d1
; CHECK-GI-NEXT: add sp, sp, #48
; CHECK-GI-NEXT: ret
%dst = fptrunc <2 x fp128> %val to <2 x half>
ret <2 x half> %dst
Expand Down
47 changes: 6 additions & 41 deletions llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll
Original file line number Diff line number Diff line change
Expand Up @@ -170,47 +170,12 @@ define <4 x half> @s_to_h(<4 x float> %a) {
}

define <4 x half> @d_to_h(<4 x double> %a) {
; CHECK-CVT-SD-LABEL: d_to_h:
; CHECK-CVT-SD: // %bb.0:
; CHECK-CVT-SD-NEXT: fcvtxn v0.2s, v0.2d
; CHECK-CVT-SD-NEXT: fcvtxn2 v0.4s, v1.2d
; CHECK-CVT-SD-NEXT: fcvtn v0.4h, v0.4s
; CHECK-CVT-SD-NEXT: ret
;
; CHECK-FP16-SD-LABEL: d_to_h:
; CHECK-FP16-SD: // %bb.0:
; CHECK-FP16-SD-NEXT: fcvtxn v0.2s, v0.2d
; CHECK-FP16-SD-NEXT: fcvtxn2 v0.4s, v1.2d
; CHECK-FP16-SD-NEXT: fcvtn v0.4h, v0.4s
; CHECK-FP16-SD-NEXT: ret
;
; CHECK-CVT-GI-LABEL: d_to_h:
; CHECK-CVT-GI: // %bb.0:
; CHECK-CVT-GI-NEXT: mov d2, v0.d[1]
; CHECK-CVT-GI-NEXT: fcvt h0, d0
; CHECK-CVT-GI-NEXT: mov d3, v1.d[1]
; CHECK-CVT-GI-NEXT: fcvt h1, d1
; CHECK-CVT-GI-NEXT: fcvt h2, d2
; CHECK-CVT-GI-NEXT: mov v0.h[1], v2.h[0]
; CHECK-CVT-GI-NEXT: fcvt h2, d3
; CHECK-CVT-GI-NEXT: mov v0.h[2], v1.h[0]
; CHECK-CVT-GI-NEXT: mov v0.h[3], v2.h[0]
; CHECK-CVT-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-CVT-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: d_to_h:
; CHECK-FP16-GI: // %bb.0:
; CHECK-FP16-GI-NEXT: mov d2, v0.d[1]
; CHECK-FP16-GI-NEXT: fcvt h0, d0
; CHECK-FP16-GI-NEXT: mov d3, v1.d[1]
; CHECK-FP16-GI-NEXT: fcvt h1, d1
; CHECK-FP16-GI-NEXT: fcvt h2, d2
; CHECK-FP16-GI-NEXT: mov v0.h[1], v2.h[0]
; CHECK-FP16-GI-NEXT: fcvt h2, d3
; CHECK-FP16-GI-NEXT: mov v0.h[2], v1.h[0]
; CHECK-FP16-GI-NEXT: mov v0.h[3], v2.h[0]
; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-FP16-GI-NEXT: ret
; CHECK-LABEL: d_to_h:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtxn v0.2s, v0.2d
; CHECK-NEXT: fcvtxn2 v0.4s, v1.2d
; CHECK-NEXT: fcvtn v0.4h, v0.4s
; CHECK-NEXT: ret
%1 = fptrunc <4 x double> %a to <4 x half>
ret <4 x half> %1
}
Expand Down
74 changes: 9 additions & 65 deletions llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll
Original file line number Diff line number Diff line change
Expand Up @@ -176,71 +176,15 @@ define <8 x half> @s_to_h(<8 x float> %a) {
}

define <8 x half> @d_to_h(<8 x double> %a) {
; CHECK-CVT-SD-LABEL: d_to_h:
; CHECK-CVT-SD: // %bb.0:
; CHECK-CVT-SD-NEXT: fcvtxn v0.2s, v0.2d
; CHECK-CVT-SD-NEXT: fcvtxn v2.2s, v2.2d
; CHECK-CVT-SD-NEXT: fcvtxn2 v0.4s, v1.2d
; CHECK-CVT-SD-NEXT: fcvtxn2 v2.4s, v3.2d
; CHECK-CVT-SD-NEXT: fcvtn v0.4h, v0.4s
; CHECK-CVT-SD-NEXT: fcvtn2 v0.8h, v2.4s
; CHECK-CVT-SD-NEXT: ret
;
; CHECK-FP16-SD-LABEL: d_to_h:
; CHECK-FP16-SD: // %bb.0:
; CHECK-FP16-SD-NEXT: fcvtxn v0.2s, v0.2d
; CHECK-FP16-SD-NEXT: fcvtxn v2.2s, v2.2d
; CHECK-FP16-SD-NEXT: fcvtxn2 v0.4s, v1.2d
; CHECK-FP16-SD-NEXT: fcvtxn2 v2.4s, v3.2d
; CHECK-FP16-SD-NEXT: fcvtn v0.4h, v0.4s
; CHECK-FP16-SD-NEXT: fcvtn2 v0.8h, v2.4s
; CHECK-FP16-SD-NEXT: ret
;
; CHECK-CVT-GI-LABEL: d_to_h:
; CHECK-CVT-GI: // %bb.0:
; CHECK-CVT-GI-NEXT: mov d4, v0.d[1]
; CHECK-CVT-GI-NEXT: fcvt h0, d0
; CHECK-CVT-GI-NEXT: mov d5, v1.d[1]
; CHECK-CVT-GI-NEXT: fcvt h1, d1
; CHECK-CVT-GI-NEXT: fcvt h4, d4
; CHECK-CVT-GI-NEXT: mov v0.h[1], v4.h[0]
; CHECK-CVT-GI-NEXT: fcvt h4, d5
; CHECK-CVT-GI-NEXT: mov v0.h[2], v1.h[0]
; CHECK-CVT-GI-NEXT: mov d1, v2.d[1]
; CHECK-CVT-GI-NEXT: fcvt h2, d2
; CHECK-CVT-GI-NEXT: mov v0.h[3], v4.h[0]
; CHECK-CVT-GI-NEXT: fcvt h1, d1
; CHECK-CVT-GI-NEXT: mov v0.h[4], v2.h[0]
; CHECK-CVT-GI-NEXT: mov d2, v3.d[1]
; CHECK-CVT-GI-NEXT: fcvt h3, d3
; CHECK-CVT-GI-NEXT: mov v0.h[5], v1.h[0]
; CHECK-CVT-GI-NEXT: fcvt h1, d2
; CHECK-CVT-GI-NEXT: mov v0.h[6], v3.h[0]
; CHECK-CVT-GI-NEXT: mov v0.h[7], v1.h[0]
; CHECK-CVT-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: d_to_h:
; CHECK-FP16-GI: // %bb.0:
; CHECK-FP16-GI-NEXT: mov d4, v0.d[1]
; CHECK-FP16-GI-NEXT: fcvt h0, d0
; CHECK-FP16-GI-NEXT: mov d5, v1.d[1]
; CHECK-FP16-GI-NEXT: fcvt h1, d1
; CHECK-FP16-GI-NEXT: fcvt h4, d4
; CHECK-FP16-GI-NEXT: mov v0.h[1], v4.h[0]
; CHECK-FP16-GI-NEXT: fcvt h4, d5
; CHECK-FP16-GI-NEXT: mov v0.h[2], v1.h[0]
; CHECK-FP16-GI-NEXT: mov d1, v2.d[1]
; CHECK-FP16-GI-NEXT: fcvt h2, d2
; CHECK-FP16-GI-NEXT: mov v0.h[3], v4.h[0]
; CHECK-FP16-GI-NEXT: fcvt h1, d1
; CHECK-FP16-GI-NEXT: mov v0.h[4], v2.h[0]
; CHECK-FP16-GI-NEXT: mov d2, v3.d[1]
; CHECK-FP16-GI-NEXT: fcvt h3, d3
; CHECK-FP16-GI-NEXT: mov v0.h[5], v1.h[0]
; CHECK-FP16-GI-NEXT: fcvt h1, d2
; CHECK-FP16-GI-NEXT: mov v0.h[6], v3.h[0]
; CHECK-FP16-GI-NEXT: mov v0.h[7], v1.h[0]
; CHECK-FP16-GI-NEXT: ret
; CHECK-LABEL: d_to_h:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtxn v0.2s, v0.2d
; CHECK-NEXT: fcvtxn v2.2s, v2.2d
; CHECK-NEXT: fcvtxn2 v0.4s, v1.2d
; CHECK-NEXT: fcvtxn2 v2.4s, v3.2d
; CHECK-NEXT: fcvtn v0.4h, v0.4s
; CHECK-NEXT: fcvtn2 v0.8h, v2.4s
; CHECK-NEXT: ret
%1 = fptrunc <8 x double> %a to <8 x half>
ret <8 x half> %1
}
Expand Down
Loading