diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index fc3e9ff0d6238..14cf8019968c0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -5406,6 +5406,10 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Node->getOpcode() == ISD::STRICT_SINT_TO_FP || Node->getOpcode() == ISD::STRICT_FSETCC || Node->getOpcode() == ISD::STRICT_FSETCCS || + Node->getOpcode() == ISD::STRICT_LRINT || + Node->getOpcode() == ISD::STRICT_LLRINT || + Node->getOpcode() == ISD::STRICT_LROUND || + Node->getOpcode() == ISD::STRICT_LLROUND || Node->getOpcode() == ISD::VP_REDUCE_FADD || Node->getOpcode() == ISD::VP_REDUCE_FMUL || Node->getOpcode() == ISD::VP_REDUCE_FMAX || @@ -5951,6 +5955,25 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Results.push_back(Tmp3); Results.push_back(Tmp3.getValue(1)); break; + case ISD::LLROUND: + case ISD::LROUND: + case ISD::LRINT: + case ISD::LLRINT: + Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); + Tmp2 = DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0), Tmp1); + Results.push_back(Tmp2); + break; + case ISD::STRICT_LLROUND: + case ISD::STRICT_LROUND: + case ISD::STRICT_LRINT: + case ISD::STRICT_LLRINT: + Tmp1 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other}, + {Node->getOperand(0), Node->getOperand(1)}); + Tmp2 = DAG.getNode(Node->getOpcode(), dl, {NVT, MVT::Other}, + {Tmp1.getValue(1), Tmp1}); + Results.push_back(Tmp2); + Results.push_back(Tmp2.getValue(1)); + break; case ISD::BUILD_VECTOR: { MVT EltVT = OVT.getVectorElementType(); MVT NewEltVT = NVT.getVectorElementType(); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index a1e6dc028260b..d6b0ceffd8cd8 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -3801,15 +3801,21 @@ bool DAGTypeLegalizer::SoftPromoteHalfOperand(SDNode *N, unsigned OpNo) { case ISD::FAKE_USE: Res = SoftPromoteHalfOp_FAKE_USE(N, OpNo); break; - case ISD::FCOPYSIGN: Res = SoftPromoteHalfOp_FCOPYSIGN(N, OpNo); break; - case ISD::STRICT_FP_TO_SINT: - case ISD::STRICT_FP_TO_UINT: + case ISD::FCOPYSIGN: + Res = SoftPromoteHalfOp_FCOPYSIGN(N, OpNo); + break; case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: - case ISD::LRINT: + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: case ISD::LLRINT: - case ISD::LROUND: case ISD::LLROUND: + case ISD::LRINT: + case ISD::LROUND: + case ISD::STRICT_LLRINT: + case ISD::STRICT_LLROUND: + case ISD::STRICT_LRINT: + case ISD::STRICT_LROUND: Res = SoftPromoteHalfOp_Op0WithStrict(N); break; case ISD::FP_TO_SINT_SAT: diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp index 812fa85ef3150..92e7865a6a817 100644 --- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp +++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp @@ -119,11 +119,17 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI, .widenScalarToNextPow2(0, /*Min=*/8) .clampScalar(0, s8, sMaxScalar); - getActionDefinitionsBuilder({G_LROUND, G_LLROUND, G_FCOS, G_FCOSH, G_FACOS, - G_FSIN, G_FSINH, G_FASIN, G_FTAN, G_FTANH, - G_FATAN, G_FATAN2, G_FPOW, G_FEXP, G_FEXP2, - G_FEXP10, G_FLOG, G_FLOG2, G_FLOG10, G_FPOWI, - G_FSINCOS, G_FCEIL, G_FFLOOR}) + getActionDefinitionsBuilder({G_LROUND, G_LLROUND}) + .widenScalarIf(typeIs(1, s16), + [=](const LegalityQuery &) { + return std::pair(1, s32); + }) + .libcall(); + + getActionDefinitionsBuilder( + {G_FCOS, G_FCOSH, G_FACOS, G_FSIN, G_FSINH, G_FASIN, G_FTAN, + G_FTANH, G_FATAN, G_FATAN2, G_FPOW, G_FEXP, G_FEXP2, G_FEXP10, + G_FLOG, G_FLOG2, G_FLOG10, G_FPOWI, G_FSINCOS, G_FCEIL, G_FFLOOR}) .libcall(); getActionDefinitionsBuilder(G_FSQRT) @@ -447,7 +453,8 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI, getActionDefinitionsBuilder(G_FPEXT) .legalFor(HasSSE2, {{s64, s32}}) .legalFor(HasAVX, {{v4s64, v4s32}}) - .legalFor(HasAVX512, {{v8s64, v8s32}}); + .legalFor(HasAVX512, {{v8s64, v8s32}}) + .libcall(); getActionDefinitionsBuilder(G_FPTRUNC) .legalFor(HasSSE2, {{s32, s64}}) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index fd72f0815876f..68020ebdd865f 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1,3 +1,4 @@ +// I //===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. @@ -717,9 +718,17 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FCANONICALIZE, MVT::f16, Custom); setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Custom); setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Custom); + + setOperationAction(ISD::LLROUND, MVT::f16, Expand); + setOperationAction(ISD::LROUND, MVT::f16, Expand); setOperationAction(ISD::LRINT, MVT::f16, Expand); setOperationAction(ISD::LLRINT, MVT::f16, Expand); + setOperationAction(ISD::STRICT_LLROUND, MVT::f16, Promote); + setOperationAction(ISD::STRICT_LROUND, MVT::f16, Promote); + setOperationAction(ISD::STRICT_LRINT, MVT::f16, Promote); + setOperationAction(ISD::STRICT_LLRINT, MVT::f16, Promote); + // Lower this to MOVMSK plus an AND. setOperationAction(ISD::FGETSIGN, MVT::i64, Custom); setOperationAction(ISD::FGETSIGN, MVT::i32, Custom); diff --git a/llvm/test/CodeGen/LoongArch/lrint-conv.ll b/llvm/test/CodeGen/LoongArch/lrint-conv.ll index 85de820025614..262d1c16a6486 100644 --- a/llvm/test/CodeGen/LoongArch/lrint-conv.ll +++ b/llvm/test/CodeGen/LoongArch/lrint-conv.ll @@ -5,16 +5,31 @@ ; RUN: sed 's/ITy/i32/g' %s | llc -mtriple=loongarch64 | FileCheck %s --check-prefixes=LA64-I32 ; RUN: sed 's/ITy/i64/g' %s | llc -mtriple=loongarch64 | FileCheck %s --check-prefixes=LA64-I64 -; FIXME: crash -; define ITy @test_lrint_ixx_f16(half %x) nounwind { -; %res = tail call ITy @llvm.lrint.ITy.f16(half %x) -; ret ITy %res -; } +define ITy @test_lrint_ixx_f16(half %x) nounwind { +; LA32-LABEL: test_lrint_ixx_f16: +; LA32: bl lrintf +; +; LA64-I32-LABEL: test_lrint_ixx_f16: +; LA64-I32: pcaddu18i $ra, %call36(lrintf) +; +; LA64-I64-LABEL: test_lrint_ixx_f16: +; LA64-I64: pcaddu18i $t8, %call36(lrintf) + %res = tail call ITy @llvm.lrint.ITy.f16(half %x) + ret ITy %res +} -; define ITy @test_llrint_ixx_f16(half %x) nounwind { -; %res = tail call ITy @llvm.llrint.ITy.f16(half %x) -; ret ITy %res -; } +define ITy @test_llrint_ixx_f16(half %x) nounwind { +; LA32-LABEL: test_llrint_ixx_f16: +; LA32: bl llrintf +; +; LA64-I32-LABEL: test_llrint_ixx_f16: +; LA64-I32: pcaddu18i $ra, %call36(llrintf) +; +; LA64-I64-LABEL: test_llrint_ixx_f16: +; LA64-I64: pcaddu18i $t8, %call36(llrintf) + %res = tail call ITy @llvm.llrint.ITy.f16(half %x) + ret ITy %res +} define ITy @test_lrint_ixx_f32(float %x) nounwind { ; LA32-LABEL: test_lrint_ixx_f32: diff --git a/llvm/test/CodeGen/Mips/llrint-conv.ll b/llvm/test/CodeGen/Mips/llrint-conv.ll index 592d40c0f65aa..0e449b77f515f 100644 --- a/llvm/test/CodeGen/Mips/llrint-conv.ll +++ b/llvm/test/CodeGen/Mips/llrint-conv.ll @@ -1,19 +1,22 @@ ; RUN: llc < %s -mtriple=mips64el -mattr=+soft-float | FileCheck %s ; RUN: llc < %s -mtriple=mips -mattr=+soft-float | FileCheck %s -; FIXME: crash -; define signext i32 @testmswh(half %x) { -; entry: -; %0 = tail call i64 @llvm.llrint.i64.f16(half %x) -; %conv = trunc i64 %0 to i32 -; ret i32 %conv -; } +define signext i32 @testmswh(half %x) { +; CHECK-LABEL: testmswh: +; CHECK: jal llrintf +entry: + %0 = tail call i64 @llvm.llrint.i64.f16(half %x) + %conv = trunc i64 %0 to i32 + ret i32 %conv +} -; define i64 @testmsxh(half %x) { -; entry: -; %0 = tail call i64 @llvm.llrint.i64.f16(half %x) -; ret i64 %0 -; } +define i64 @testmsxh(half %x) { +; CHECK-LABEL: testmsxh: +; CHECK: jal llrintf +entry: + %0 = tail call i64 @llvm.llrint.i64.f16(half %x) + ret i64 %0 +} define signext i32 @testmsws(float %x) { ; CHECK-LABEL: testmsws: diff --git a/llvm/test/CodeGen/Mips/lrint-conv.ll b/llvm/test/CodeGen/Mips/lrint-conv.ll index 6d2e392675f1c..64c5cb9ac5b07 100644 --- a/llvm/test/CodeGen/Mips/lrint-conv.ll +++ b/llvm/test/CodeGen/Mips/lrint-conv.ll @@ -1,19 +1,22 @@ ; RUN: llc < %s -mtriple=mips64el -mattr=+soft-float | FileCheck %s ; RUN: llc < %s -mtriple=mips -mattr=+soft-float | FileCheck %s -; FIXME: crash -; define signext i32 @testmswh(half %x) { -; entry: -; %0 = tail call i64 @llvm.lrint.i64.f16(half %x) -; %conv = trunc i64 %0 to i32 -; ret i32 %conv -; } +define signext i32 @testmswh(half %x) { +; CHECK-LABEL: testmswh: +; CHECK: jal lrintf +entry: + %0 = tail call i64 @llvm.lrint.i64.f16(half %x) + %conv = trunc i64 %0 to i32 + ret i32 %conv +} -; define i64 @testmsxh(half %x) { -; entry: -; %0 = tail call i64 @llvm.lrint.i64.f16(half %x) -; ret i64 %0 -; } +define i64 @testmsxh(half %x) { +; CHECK-LABEL: testmsxh: +; CHECK: jal lrintf +entry: + %0 = tail call i64 @llvm.lrint.i64.f16(half %x) + ret i64 %0 +} define signext i32 @testmsws(float %x) { ; CHECK-LABEL: testmsws: diff --git a/llvm/test/CodeGen/RISCV/lrint-conv.ll b/llvm/test/CodeGen/RISCV/lrint-conv.ll index d3af2153588a1..ecb6bd0932ef3 100644 --- a/llvm/test/CodeGen/RISCV/lrint-conv.ll +++ b/llvm/test/CodeGen/RISCV/lrint-conv.ll @@ -5,14 +5,25 @@ ; RUN: sed 's/ITy/i32/g' %s | llc -mtriple=riscv64 | FileCheck %s --check-prefixes=RV64 ; RUN: sed 's/ITy/i64/g' %s | llc -mtriple=riscv64 | FileCheck %s --check-prefixes=RV64 -; FIXME: crash -; define ITy @test_lrint_ixx_f16(half %x) nounwind { -; %res = tail call ITy @llvm.lrint.ITy.f16(half %x) -; } +define ITy @test_lrint_ixx_f16(half %x) nounwind { +; RV32-LABEL: test_lrint_ixx_f16: +; RV32: call lrintf +; +; RV64-LABEL: test_lrint_ixx_f16: +; RV64: call lrintf + %res = tail call ITy @llvm.lrint.ITy.f16(half %x) + ret ITy %res +} -; define ITy @test_llrint_ixx_f16(half %x) nounwind { -; %res = tail call ITy @llvm.llrint.ITy.f16(half %x) -; } +define ITy @test_llrint_ixx_f16(half %x) nounwind { +; RV32-LABEL: test_llrint_ixx_f16: +; RV32: call llrintf +; +; RV64-LABEL: test_llrint_ixx_f16: +; RV64: call llrintf + %res = tail call ITy @llvm.llrint.ITy.f16(half %x) + ret ITy %res +} define ITy @test_lrint_ixx_f32(float %x) nounwind { ; RV32-LABEL: test_lrint_ixx_f32: diff --git a/llvm/test/CodeGen/X86/llrint-conv.ll b/llvm/test/CodeGen/X86/llrint-conv.ll index 5f38645f74636..089bc187bab7d 100644 --- a/llvm/test/CodeGen/X86/llrint-conv.ll +++ b/llvm/test/CodeGen/X86/llrint-conv.ll @@ -7,12 +7,44 @@ ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefixes=X64,X64-AVX ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X64,X64-AVX -; FIXME: crash -; define i64 @test_llrint_i64_f16(half %x) nounwind { -; entry: -; %0 = tail call i64 @llvm.llrint.i64.f16(half %x) -; ret i64 %0 -; } +define i64 @test_llrint_i64_f16(half %x) nounwind { +; X86-NOSSE-LABEL: test_llrint_i64_f16: +; X86-NOSSE: # %bb.0: # %entry +; X86-NOSSE-NEXT: pushl %eax +; X86-NOSSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl %eax, (%esp) +; X86-NOSSE-NEXT: calll __extendhfsf2 +; X86-NOSSE-NEXT: fstps (%esp) +; X86-NOSSE-NEXT: calll llrintf +; X86-NOSSE-NEXT: popl %ecx +; X86-NOSSE-NEXT: retl +; +; X86-SSE2-LABEL: test_llrint_i64_f16: +; X86-SSE2: # %bb.0: # %entry +; X86-SSE2-NEXT: pushl %eax +; X86-SSE2-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0 +; X86-SSE2-NEXT: pextrw $0, %xmm0, %eax +; X86-SSE2-NEXT: movw %ax, (%esp) +; X86-SSE2-NEXT: calll __extendhfsf2 +; X86-SSE2-NEXT: fstps (%esp) +; X86-SSE2-NEXT: calll llrintf +; X86-SSE2-NEXT: popl %ecx +; X86-SSE2-NEXT: retl +; +; X64-SSE-LABEL: test_llrint_i64_f16: +; X64-SSE: # %bb.0: # %entry +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: callq __extendhfsf2@PLT +; X64-SSE-NEXT: callq rintf@PLT +; X64-SSE-NEXT: callq __truncsfhf2@PLT +; X64-SSE-NEXT: callq __extendhfsf2@PLT +; X64-SSE-NEXT: cvttss2si %xmm0, %rax +; X64-SSE-NEXT: popq %rcx +; X64-SSE-NEXT: retq +entry: + %0 = tail call i64 @llvm.llrint.i64.f16(half %x) + ret i64 %0 +} define i64 @test_llrint_i64_f32(float %x) nounwind { ; X86-NOSSE-LABEL: test_llrint_i64_f32: @@ -217,12 +249,44 @@ entry: ret i64 %0 } -; FIXME: crash -; define i64 @test_llrint_i64_f16_strict(half %x) nounwind strictfp { -; entry: -; %0 = tail call i64 @llvm.experimental.constrained.llrint.i64.f16(half %x, metadata!"round.dynamic", metadata!"fpexcept.strict") -; ret i64 %0 -; } +define i64 @test_llrint_i64_f16_strict(half %x) nounwind strictfp { +; X86-NOSSE-LABEL: test_llrint_i64_f16_strict: +; X86-NOSSE: # %bb.0: # %entry +; X86-NOSSE-NEXT: pushl %eax +; X86-NOSSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl %eax, (%esp) +; X86-NOSSE-NEXT: calll __extendhfsf2 +; X86-NOSSE-NEXT: fstps (%esp) +; X86-NOSSE-NEXT: wait +; X86-NOSSE-NEXT: calll llrintf +; X86-NOSSE-NEXT: popl %ecx +; X86-NOSSE-NEXT: retl +; +; X86-SSE2-LABEL: test_llrint_i64_f16_strict: +; X86-SSE2: # %bb.0: # %entry +; X86-SSE2-NEXT: pushl %eax +; X86-SSE2-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0 +; X86-SSE2-NEXT: pextrw $0, %xmm0, %eax +; X86-SSE2-NEXT: movw %ax, (%esp) +; X86-SSE2-NEXT: calll __extendhfsf2 +; X86-SSE2-NEXT: fstps (%esp) +; X86-SSE2-NEXT: wait +; X86-SSE2-NEXT: calll llrintf +; X86-SSE2-NEXT: popl %ecx +; X86-SSE2-NEXT: retl +; +; X64-SSE-LABEL: test_llrint_i64_f16_strict: +; X64-SSE: # %bb.0: # %entry +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: callq __extendhfsf2@PLT +; X64-SSE-NEXT: callq llrintf@PLT +; X64-SSE-NEXT: movq %xmm0, %rax +; X64-SSE-NEXT: popq %rcx +; X64-SSE-NEXT: retq +entry: + %0 = tail call i64 @llvm.experimental.constrained.llrint.i64.f16(half %x, metadata!"round.dynamic", metadata!"fpexcept.strict") + ret i64 %0 +} define i64 @test_llrint_i64_f32_strict(float %x) nounwind strictfp { ; X86-NOSSE-LABEL: test_llrint_i64_f32_strict: diff --git a/llvm/test/CodeGen/X86/llround-conv.ll b/llvm/test/CodeGen/X86/llround-conv.ll index ef4df82e9e57e..83151ebf9af07 100644 --- a/llvm/test/CodeGen/X86/llround-conv.ll +++ b/llvm/test/CodeGen/X86/llround-conv.ll @@ -5,11 +5,62 @@ ; RUN: llc < %s -mtriple=i686-linux-gnu -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=GISEL-X86 ; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=GISEL-X64 -; FIXME: crash -; define i64 @test_llround_f16(half %x) nounwind { -; %conv = tail call i64 @llvm.llround.f16(half %x) -; ret i64 %conv -; } +define i64 @test_llround_f16(half %x) nounwind { +; X86-NOSSE-LABEL: test_llround_f16: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl %eax +; X86-NOSSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl %eax, (%esp) +; X86-NOSSE-NEXT: calll __extendhfsf2 +; X86-NOSSE-NEXT: fstps (%esp) +; X86-NOSSE-NEXT: calll llroundf +; X86-NOSSE-NEXT: popl %ecx +; X86-NOSSE-NEXT: retl +; +; X86-SSE2-LABEL: test_llround_f16: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: pushl %eax +; X86-SSE2-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0 +; X86-SSE2-NEXT: pextrw $0, %xmm0, %eax +; X86-SSE2-NEXT: movw %ax, (%esp) +; X86-SSE2-NEXT: calll __extendhfsf2 +; X86-SSE2-NEXT: fstps (%esp) +; X86-SSE2-NEXT: calll llroundf +; X86-SSE2-NEXT: popl %ecx +; X86-SSE2-NEXT: retl +; +; X64-LABEL: test_llround_f16: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: callq __extendhfsf2@PLT +; X64-NEXT: callq roundf@PLT +; X64-NEXT: callq __truncsfhf2@PLT +; X64-NEXT: callq __extendhfsf2@PLT +; X64-NEXT: cvttss2si %xmm0, %rax +; X64-NEXT: popq %rcx +; X64-NEXT: retq +; +; GISEL-X86-LABEL: test_llround_f16: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: subl $12, %esp +; GISEL-X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: movl %eax, (%esp) +; GISEL-X86-NEXT: calll __extendhfsf2 +; GISEL-X86-NEXT: fstps (%esp) +; GISEL-X86-NEXT: calll llroundf +; GISEL-X86-NEXT: addl $12, %esp +; GISEL-X86-NEXT: retl +; +; GISEL-X64-LABEL: test_llround_f16: +; GISEL-X64: # %bb.0: +; GISEL-X64-NEXT: pushq %rax +; GISEL-X64-NEXT: callq __extendhfsf2 +; GISEL-X64-NEXT: callq llroundf +; GISEL-X64-NEXT: popq %rcx +; GISEL-X64-NEXT: retq + %conv = tail call i64 @llvm.llround.f16(half %x) + ret i64 %conv +} define i64 @test_llround_f32(float %x) nounwind { ; X86-NOSSE-LABEL: test_llround_f32: @@ -184,11 +235,62 @@ define i64 @test_llround_f128(fp128 %x) nounwind { ret i64 %conv } -; FIXME: crash -; define i64 @test_llround_i64_f16(half %x) nounwind { -; %conv = call i64 @llvm.llround.i64.f16(half %x) -; ret i64 %conv -; } +define i64 @test_llround_i64_f16(half %x) nounwind { +; X86-NOSSE-LABEL: test_llround_i64_f16: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl %eax +; X86-NOSSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl %eax, (%esp) +; X86-NOSSE-NEXT: calll __extendhfsf2 +; X86-NOSSE-NEXT: fstps (%esp) +; X86-NOSSE-NEXT: calll llroundf +; X86-NOSSE-NEXT: popl %ecx +; X86-NOSSE-NEXT: retl +; +; X86-SSE2-LABEL: test_llround_i64_f16: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: pushl %eax +; X86-SSE2-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0 +; X86-SSE2-NEXT: pextrw $0, %xmm0, %eax +; X86-SSE2-NEXT: movw %ax, (%esp) +; X86-SSE2-NEXT: calll __extendhfsf2 +; X86-SSE2-NEXT: fstps (%esp) +; X86-SSE2-NEXT: calll llroundf +; X86-SSE2-NEXT: popl %ecx +; X86-SSE2-NEXT: retl +; +; X64-LABEL: test_llround_i64_f16: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: callq __extendhfsf2@PLT +; X64-NEXT: callq roundf@PLT +; X64-NEXT: callq __truncsfhf2@PLT +; X64-NEXT: callq __extendhfsf2@PLT +; X64-NEXT: cvttss2si %xmm0, %rax +; X64-NEXT: popq %rcx +; X64-NEXT: retq +; +; GISEL-X86-LABEL: test_llround_i64_f16: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: subl $12, %esp +; GISEL-X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: movl %eax, (%esp) +; GISEL-X86-NEXT: calll __extendhfsf2 +; GISEL-X86-NEXT: fstps (%esp) +; GISEL-X86-NEXT: calll llroundf +; GISEL-X86-NEXT: addl $12, %esp +; GISEL-X86-NEXT: retl +; +; GISEL-X64-LABEL: test_llround_i64_f16: +; GISEL-X64: # %bb.0: +; GISEL-X64-NEXT: pushq %rax +; GISEL-X64-NEXT: callq __extendhfsf2 +; GISEL-X64-NEXT: callq llroundf +; GISEL-X64-NEXT: popq %rcx +; GISEL-X64-NEXT: retq + %conv = call i64 @llvm.llround.i64.f16(half %x) + ret i64 %conv +} define i64 @test_llround_i64_f32(float %x) nounwind { ; X86-NOSSE-LABEL: test_llround_i64_f32: diff --git a/llvm/test/CodeGen/X86/lrint-conv-i32.ll b/llvm/test/CodeGen/X86/lrint-conv-i32.ll index 2b99b4c50f58a..f4cb0d3ff87e6 100644 --- a/llvm/test/CodeGen/X86/lrint-conv-i32.ll +++ b/llvm/test/CodeGen/X86/lrint-conv-i32.ll @@ -7,12 +7,51 @@ ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefixes=X64,X64-AVX ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X64,X64-AVX -; FIXME: crash -; define i32 @test_lrint_i32_f16(half %x) nounwind { -; entry: -; %0 = tail call i32 @llvm.lrint.i32.f16(half %x) -; ret i32 %0 -; } +define i32 @test_lrint_i32_f16(half %x) nounwind { +; X86-NOSSE-LABEL: test_lrint_i32_f16: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl %eax +; X86-NOSSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: pushl %eax +; X86-NOSSE-NEXT: calll __extendhfsf2 +; X86-NOSSE-NEXT: addl $4, %esp +; X86-NOSSE-NEXT: fistpl (%esp) +; X86-NOSSE-NEXT: movl (%esp), %eax +; X86-NOSSE-NEXT: popl %ecx +; X86-NOSSE-NEXT: retl +; +; X86-SSE2-LABEL: test_lrint_i32_f16: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: subl $8, %esp +; X86-SSE2-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0 +; X86-SSE2-NEXT: pextrw $0, %xmm0, %eax +; X86-SSE2-NEXT: movw %ax, (%esp) +; X86-SSE2-NEXT: calll __extendhfsf2 +; X86-SSE2-NEXT: fstps (%esp) +; X86-SSE2-NEXT: calll rintf +; X86-SSE2-NEXT: fstps (%esp) +; X86-SSE2-NEXT: calll __truncsfhf2 +; X86-SSE2-NEXT: pextrw $0, %xmm0, %eax +; X86-SSE2-NEXT: movw %ax, (%esp) +; X86-SSE2-NEXT: calll __extendhfsf2 +; X86-SSE2-NEXT: fstps {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: cvttss2si {{[0-9]+}}(%esp), %eax +; X86-SSE2-NEXT: addl $8, %esp +; X86-SSE2-NEXT: retl +; +; X64-SSE-LABEL: test_lrint_i32_f16: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: callq __extendhfsf2@PLT +; X64-SSE-NEXT: callq rintf@PLT +; X64-SSE-NEXT: callq __truncsfhf2@PLT +; X64-SSE-NEXT: callq __extendhfsf2@PLT +; X64-SSE-NEXT: cvttss2si %xmm0, %eax +; X64-SSE-NEXT: popq %rcx +; X64-SSE-NEXT: retq + %conv = tail call i32 @llvm.lrint.i32.f16(half %x) + ret i32 %conv +} define i32 @test_lrint_i32_f32(float %x) nounwind { ; X86-NOSSE-LABEL: test_lrint_i32_f32: @@ -154,11 +193,47 @@ define i32 @test_lrint_i32_f128(fp128 %x) nounwind { ret i32 %conv } -; FIXME: crash -; define i32 @test_lrint_i32_f16_strict(half %x) nounwind strictfp { -; %conv = tail call i32 @llvm.experimental.constrained.lrint.i32.f16(half %x, metadata!"round.dynamic", metadata!"fpexcept.strict") -; ret i32 %conv -; } +define i32 @test_lrint_i32_f16_strict(half %x) nounwind strictfp { +; X86-NOSSE-LABEL: test_lrint_i32_f16_strict: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl %eax +; X86-NOSSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl %eax, (%esp) +; X86-NOSSE-NEXT: calll __extendhfsf2 +; X86-NOSSE-NEXT: fstps (%esp) +; X86-NOSSE-NEXT: wait +; X86-NOSSE-NEXT: calll lrintf +; X86-NOSSE-NEXT: popl %ecx +; X86-NOSSE-NEXT: retl +; +; X86-SSE2-LABEL: test_lrint_i32_f16_strict: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: subl $8, %esp +; X86-SSE2-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0 +; X86-SSE2-NEXT: pextrw $0, %xmm0, %eax +; X86-SSE2-NEXT: movw %ax, (%esp) +; X86-SSE2-NEXT: calll __extendhfsf2 +; X86-SSE2-NEXT: fstps (%esp) +; X86-SSE2-NEXT: wait +; X86-SSE2-NEXT: calll lrintf +; X86-SSE2-NEXT: fstps {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: wait +; X86-SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-SSE2-NEXT: movd %xmm0, %eax +; X86-SSE2-NEXT: addl $8, %esp +; X86-SSE2-NEXT: retl +; +; X64-SSE-LABEL: test_lrint_i32_f16_strict: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: callq __extendhfsf2@PLT +; X64-SSE-NEXT: callq lrintf@PLT +; X64-SSE-NEXT: movd %xmm0, %eax +; X64-SSE-NEXT: popq %rcx +; X64-SSE-NEXT: retq + %conv = tail call i32 @llvm.experimental.constrained.lrint.i32.f16(half %x, metadata!"round.dynamic", metadata!"fpexcept.strict") + ret i32 %conv +} define i32 @test_lrint_i32_f32_strict(float %x) nounwind strictfp { ; X86-NOSSE-LABEL: test_lrint_i32_f32_strict: diff --git a/llvm/test/CodeGen/X86/lrint-conv-i64.ll b/llvm/test/CodeGen/X86/lrint-conv-i64.ll index 731c03bf0d747..f18c5d6e20bdb 100644 --- a/llvm/test/CodeGen/X86/lrint-conv-i64.ll +++ b/llvm/test/CodeGen/X86/lrint-conv-i64.ll @@ -5,11 +5,43 @@ ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefixes=CHECK,AVX ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefixes=CHECK,AVX -; FIXME: crash -; define i64 @test_lrint_i64_f16(half %x) nounwind { -; %conv = tail call i64 @llvm.lrint.i64.f16(half %x) -; ret i64 %conv -; } +define i64 @test_lrint_i64_f16(half %x) nounwind { +; X86-NOSSE-LABEL: test_lrint_i64_f16: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl %eax +; X86-NOSSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl %eax, (%esp) +; X86-NOSSE-NEXT: calll __extendhfsf2 +; X86-NOSSE-NEXT: fstps (%esp) +; X86-NOSSE-NEXT: calll lrintf +; X86-NOSSE-NEXT: popl %ecx +; X86-NOSSE-NEXT: retl +; +; X86-SSE2-LABEL: test_lrint_i64_f16: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: pushl %eax +; X86-SSE2-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0 +; X86-SSE2-NEXT: pextrw $0, %xmm0, %eax +; X86-SSE2-NEXT: movw %ax, (%esp) +; X86-SSE2-NEXT: calll __extendhfsf2 +; X86-SSE2-NEXT: fstps (%esp) +; X86-SSE2-NEXT: calll lrintf +; X86-SSE2-NEXT: popl %ecx +; X86-SSE2-NEXT: retl +; +; SSE-LABEL: test_lrint_i64_f16: +; SSE: # %bb.0: +; SSE-NEXT: pushq %rax +; SSE-NEXT: callq __extendhfsf2@PLT +; SSE-NEXT: callq rintf@PLT +; SSE-NEXT: callq __truncsfhf2@PLT +; SSE-NEXT: callq __extendhfsf2@PLT +; SSE-NEXT: cvttss2si %xmm0, %rax +; SSE-NEXT: popq %rcx +; SSE-NEXT: retq + %conv = tail call i64 @llvm.lrint.i64.f16(half %x) + ret i64 %conv +} define i64 @test_lrint_i64_f32(float %x) nounwind { ; X86-NOSSE-LABEL: test_lrint_i64_f32: @@ -149,11 +181,41 @@ define i64 @test_lrint_i64_f128(fp128 %x) nounwind { ret i64 %conv } -; FIXME: crash -; define i64 @test_lrint_i64_f16_strict(half %x) nounwind { -; %conv = tail call i64 @llvm.experimental.constrained.lrint.i64.f16(half %x, metadata!"round.dynamic", metadata!"fpexcept.strict") -; ret i64 %conv -; } +define i64 @test_lrint_i64_f16_strict(half %x) nounwind { +; X86-NOSSE-LABEL: test_lrint_i64_f16_strict: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl %eax +; X86-NOSSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl %eax, (%esp) +; X86-NOSSE-NEXT: calll __extendhfsf2 +; X86-NOSSE-NEXT: fstps (%esp) +; X86-NOSSE-NEXT: calll lrintf +; X86-NOSSE-NEXT: popl %ecx +; X86-NOSSE-NEXT: retl +; +; X86-SSE2-LABEL: test_lrint_i64_f16_strict: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: pushl %eax +; X86-SSE2-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0 +; X86-SSE2-NEXT: pextrw $0, %xmm0, %eax +; X86-SSE2-NEXT: movw %ax, (%esp) +; X86-SSE2-NEXT: calll __extendhfsf2 +; X86-SSE2-NEXT: fstps (%esp) +; X86-SSE2-NEXT: calll lrintf +; X86-SSE2-NEXT: popl %ecx +; X86-SSE2-NEXT: retl +; +; SSE-LABEL: test_lrint_i64_f16_strict: +; SSE: # %bb.0: +; SSE-NEXT: pushq %rax +; SSE-NEXT: callq __extendhfsf2@PLT +; SSE-NEXT: callq lrintf@PLT +; SSE-NEXT: movq %xmm0, %rax +; SSE-NEXT: popq %rcx +; SSE-NEXT: retq + %conv = tail call i64 @llvm.experimental.constrained.lrint.i64.f16(half %x, metadata!"round.dynamic", metadata!"fpexcept.strict") + ret i64 %conv +} define i64 @test_lrint_i64_f32_strict(float %x) nounwind { ; X86-NOSSE-LABEL: test_lrint_i64_f32_strict: diff --git a/llvm/test/CodeGen/X86/lround-conv-i32.ll b/llvm/test/CodeGen/X86/lround-conv-i32.ll index 389f29233dcce..73abbee86880f 100644 --- a/llvm/test/CodeGen/X86/lround-conv-i32.ll +++ b/llvm/test/CodeGen/X86/lround-conv-i32.ll @@ -5,11 +5,69 @@ ; RUN: llc < %s -mtriple=i686-linux-gnu -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=GISEL-X86 ; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=GISEL-X64 -; FIXME: crash -; define i32 @test_lround_i32_f16(half %x) nounwind { -; %conv = tail call i32 @llvm.lround.i32.f16(half %x) -; ret i32 %conv -; } +define i32 @test_lround_i32_f16(half %x) nounwind { +; X86-NOSSE-LABEL: test_lround_i32_f16: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl %eax +; X86-NOSSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl %eax, (%esp) +; X86-NOSSE-NEXT: calll __extendhfsf2 +; X86-NOSSE-NEXT: fstps (%esp) +; X86-NOSSE-NEXT: calll lroundf +; X86-NOSSE-NEXT: popl %ecx +; X86-NOSSE-NEXT: retl +; +; X86-SSE2-LABEL: test_lround_i32_f16: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: subl $8, %esp +; X86-SSE2-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0 +; X86-SSE2-NEXT: pextrw $0, %xmm0, %eax +; X86-SSE2-NEXT: movw %ax, (%esp) +; X86-SSE2-NEXT: calll __extendhfsf2 +; X86-SSE2-NEXT: fstps (%esp) +; X86-SSE2-NEXT: calll roundf +; X86-SSE2-NEXT: fstps (%esp) +; X86-SSE2-NEXT: calll __truncsfhf2 +; X86-SSE2-NEXT: pextrw $0, %xmm0, %eax +; X86-SSE2-NEXT: movw %ax, (%esp) +; X86-SSE2-NEXT: calll __extendhfsf2 +; X86-SSE2-NEXT: fstps {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: cvttss2si {{[0-9]+}}(%esp), %eax +; X86-SSE2-NEXT: addl $8, %esp +; X86-SSE2-NEXT: retl +; +; X64-LABEL: test_lround_i32_f16: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: callq __extendhfsf2@PLT +; X64-NEXT: callq roundf@PLT +; X64-NEXT: callq __truncsfhf2@PLT +; X64-NEXT: callq __extendhfsf2@PLT +; X64-NEXT: cvttss2si %xmm0, %eax +; X64-NEXT: popq %rcx +; X64-NEXT: retq +; +; GISEL-X86-LABEL: test_lround_i32_f16: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: subl $12, %esp +; GISEL-X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: movl %eax, (%esp) +; GISEL-X86-NEXT: calll __extendhfsf2 +; GISEL-X86-NEXT: fstps (%esp) +; GISEL-X86-NEXT: calll lroundf +; GISEL-X86-NEXT: addl $12, %esp +; GISEL-X86-NEXT: retl +; +; GISEL-X64-LABEL: test_lround_i32_f16: +; GISEL-X64: # %bb.0: +; GISEL-X64-NEXT: pushq %rax +; GISEL-X64-NEXT: callq __extendhfsf2 +; GISEL-X64-NEXT: callq lroundf +; GISEL-X64-NEXT: popq %rcx +; GISEL-X64-NEXT: retq + %conv = tail call i32 @llvm.lround.i32.f16(half %x) + ret i32 %conv +} define i32 @test_lround_i32_f32(float %x) nounwind { ; X86-LABEL: test_lround_i32_f32: @@ -175,6 +233,3 @@ define i32 @test_lround_i32_f128(fp128 %x) nounwind { ; %conv = tail call i32 @llvm.experimental.constrained.lround.i32.f128(fp128 %x, metadata!"round.dynamic", metadata!"fpexcept.strict") ; ret i32 %conv ; } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; X86-NOSSE: {{.*}} -; X86-SSE2: {{.*}} diff --git a/llvm/test/CodeGen/X86/lround-conv-i64.ll b/llvm/test/CodeGen/X86/lround-conv-i64.ll index 8b8230074728f..81f01cc27eb14 100644 --- a/llvm/test/CodeGen/X86/lround-conv-i64.ll +++ b/llvm/test/CodeGen/X86/lround-conv-i64.ll @@ -5,12 +5,63 @@ ; RUN: llc < %s -mtriple=i686-linux-gnu -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=GISEL-X86 ; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=GISEL-X64 -; FIXME: crash -; define i64 @test_lround_i64_f16(half %x) nounwind { -; entry: -; %0 = tail call i64 @llvm.lround.i64.f16(half %x) -; ret i64 %0 -; } +define i64 @test_lround_i64_f16(half %x) nounwind { +; X86-NOSSE-LABEL: test_lround_i64_f16: +; X86-NOSSE: # %bb.0: # %entry +; X86-NOSSE-NEXT: pushl %eax +; X86-NOSSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl %eax, (%esp) +; X86-NOSSE-NEXT: calll __extendhfsf2 +; X86-NOSSE-NEXT: fstps (%esp) +; X86-NOSSE-NEXT: calll lroundf +; X86-NOSSE-NEXT: popl %ecx +; X86-NOSSE-NEXT: retl +; +; X86-SSE2-LABEL: test_lround_i64_f16: +; X86-SSE2: # %bb.0: # %entry +; X86-SSE2-NEXT: pushl %eax +; X86-SSE2-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0 +; X86-SSE2-NEXT: pextrw $0, %xmm0, %eax +; X86-SSE2-NEXT: movw %ax, (%esp) +; X86-SSE2-NEXT: calll __extendhfsf2 +; X86-SSE2-NEXT: fstps (%esp) +; X86-SSE2-NEXT: calll lroundf +; X86-SSE2-NEXT: popl %ecx +; X86-SSE2-NEXT: retl +; +; X64-LABEL: test_lround_i64_f16: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: callq __extendhfsf2@PLT +; X64-NEXT: callq roundf@PLT +; X64-NEXT: callq __truncsfhf2@PLT +; X64-NEXT: callq __extendhfsf2@PLT +; X64-NEXT: cvttss2si %xmm0, %rax +; X64-NEXT: popq %rcx +; X64-NEXT: retq +; +; GISEL-X86-LABEL: test_lround_i64_f16: +; GISEL-X86: # %bb.0: # %entry +; GISEL-X86-NEXT: subl $12, %esp +; GISEL-X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: movl %eax, (%esp) +; GISEL-X86-NEXT: calll __extendhfsf2 +; GISEL-X86-NEXT: fstps (%esp) +; GISEL-X86-NEXT: calll lroundf +; GISEL-X86-NEXT: addl $12, %esp +; GISEL-X86-NEXT: retl +; +; GISEL-X64-LABEL: test_lround_i64_f16: +; GISEL-X64: # %bb.0: # %entry +; GISEL-X64-NEXT: pushq %rax +; GISEL-X64-NEXT: callq __extendhfsf2 +; GISEL-X64-NEXT: callq lroundf +; GISEL-X64-NEXT: popq %rcx +; GISEL-X64-NEXT: retq +entry: + %0 = tail call i64 @llvm.lround.i64.f16(half %x) + ret i64 %0 +} define i64 @test_lround_i64_f32(float %x) nounwind { ; X86-NOSSE-LABEL: test_lround_i64_f32: