[PowerPC] Extend and update the test for half support (NFC)#152625
Conversation
|
@llvm/pr-subscribers-backend-powerpc Author: Trevor Gross (tgross35) Changes
Patch is 47.29 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/152625.diff 1 Files Affected:
diff --git a/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll b/llvm/test/CodeGen/PowerPC/half.ll
similarity index 50%
rename from llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll
rename to llvm/test/CodeGen/PowerPC/half.ll
index 50f05cca80458..7cc2ceded8fde 100644
--- a/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll
+++ b/llvm/test/CodeGen/PowerPC/half.ll
@@ -1,4 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=powerpc-unknown-unknown \
+; RUN: -verify-machineinstrs -ppc-asm-full-reg-names < %s | FileCheck %s \
+; RUN: --check-prefix=PPC32
; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown \
; RUN: -verify-machineinstrs -ppc-asm-full-reg-names < %s | FileCheck %s \
; RUN: --check-prefix=P8
@@ -7,10 +10,141 @@
; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown -mattr=-hard-float \
; RUN: -verify-machineinstrs -ppc-asm-full-reg-names < %s | FileCheck %s \
; RUN: --check-prefix=SOFT
+; RUN: llc -mtriple=powerpc64-unknown-unknown \
+; RUN: -verify-machineinstrs -ppc-asm-full-reg-names < %s | FileCheck %s \
+; RUN: --check-prefix=BE
; Tests for various operations on half precison float. Much of the test is
; copied from test/CodeGen/X86/half.ll.
+
+define void @store(half %x, ptr %p) #0 {
+; PPC32-LABEL: store:
+; PPC32: # %bb.0:
+; PPC32-NEXT: mflr r0
+; PPC32-NEXT: stwu r1, -16(r1)
+; PPC32-NEXT: stw r0, 20(r1)
+; PPC32-NEXT: stw r30, 8(r1) # 4-byte Folded Spill
+; PPC32-NEXT: mr r30, r3
+; PPC32-NEXT: bl __truncsfhf2
+; PPC32-NEXT: sth r3, 0(r30)
+; PPC32-NEXT: lwz r30, 8(r1) # 4-byte Folded Reload
+; PPC32-NEXT: lwz r0, 20(r1)
+; PPC32-NEXT: addi r1, r1, 16
+; PPC32-NEXT: mtlr r0
+; PPC32-NEXT: blr
+;
+; P8-LABEL: store:
+; P8: # %bb.0:
+; P8-NEXT: mflr r0
+; P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
+; P8-NEXT: stdu r1, -48(r1)
+; P8-NEXT: std r0, 64(r1)
+; P8-NEXT: mr r30, r4
+; P8-NEXT: bl __truncsfhf2
+; P8-NEXT: nop
+; P8-NEXT: sth r3, 0(r30)
+; P8-NEXT: addi r1, r1, 48
+; P8-NEXT: ld r0, 16(r1)
+; P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
+; P8-NEXT: mtlr r0
+; P8-NEXT: blr
+;
+; CHECK-LABEL: store:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xscvdphp f0, f1
+; CHECK-NEXT: stxsihx f0, 0, r4
+; CHECK-NEXT: blr
+;
+; SOFT-LABEL: store:
+; SOFT: # %bb.0:
+; SOFT-NEXT: sth r3, 0(r4)
+; SOFT-NEXT: blr
+;
+; BE-LABEL: store:
+; BE: # %bb.0:
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -128(r1)
+; BE-NEXT: std r0, 144(r1)
+; BE-NEXT: std r30, 112(r1) # 8-byte Folded Spill
+; BE-NEXT: mr r30, r4
+; BE-NEXT: bl __truncsfhf2
+; BE-NEXT: nop
+; BE-NEXT: sth r3, 0(r30)
+; BE-NEXT: ld r30, 112(r1) # 8-byte Folded Reload
+; BE-NEXT: addi r1, r1, 128
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
+ store half %x, ptr %p
+ ret void
+}
+
+define half @return(ptr %p) #0 {
+; PPC32-LABEL: return:
+; PPC32: # %bb.0:
+; PPC32-NEXT: mflr r0
+; PPC32-NEXT: stwu r1, -16(r1)
+; PPC32-NEXT: stw r0, 20(r1)
+; PPC32-NEXT: lhz r3, 0(r3)
+; PPC32-NEXT: bl __extendhfsf2
+; PPC32-NEXT: lwz r0, 20(r1)
+; PPC32-NEXT: addi r1, r1, 16
+; PPC32-NEXT: mtlr r0
+; PPC32-NEXT: blr
+;
+; P8-LABEL: return:
+; P8: # %bb.0:
+; P8-NEXT: mflr r0
+; P8-NEXT: stdu r1, -32(r1)
+; P8-NEXT: std r0, 48(r1)
+; P8-NEXT: lhz r3, 0(r3)
+; P8-NEXT: bl __extendhfsf2
+; P8-NEXT: nop
+; P8-NEXT: addi r1, r1, 32
+; P8-NEXT: ld r0, 16(r1)
+; P8-NEXT: mtlr r0
+; P8-NEXT: blr
+;
+; CHECK-LABEL: return:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lxsihzx f0, 0, r3
+; CHECK-NEXT: xscvhpdp f1, f0
+; CHECK-NEXT: blr
+;
+; SOFT-LABEL: return:
+; SOFT: # %bb.0:
+; SOFT-NEXT: lhz r3, 0(r3)
+; SOFT-NEXT: blr
+;
+; BE-LABEL: return:
+; BE: # %bb.0:
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -112(r1)
+; BE-NEXT: std r0, 128(r1)
+; BE-NEXT: lhz r3, 0(r3)
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
+; BE-NEXT: addi r1, r1, 112
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
+ %r = load half, ptr %p
+ ret half %r
+}
+
define dso_local double @loadd(ptr nocapture readonly %a) local_unnamed_addr #0 {
+; PPC32-LABEL: loadd:
+; PPC32: # %bb.0: # %entry
+; PPC32-NEXT: mflr r0
+; PPC32-NEXT: stwu r1, -16(r1)
+; PPC32-NEXT: stw r0, 20(r1)
+; PPC32-NEXT: lhz r3, 2(r3)
+; PPC32-NEXT: bl __extendhfsf2
+; PPC32-NEXT: lwz r0, 20(r1)
+; PPC32-NEXT: addi r1, r1, 16
+; PPC32-NEXT: mtlr r0
+; PPC32-NEXT: blr
+;
; P8-LABEL: loadd:
; P8: # %bb.0: # %entry
; P8-NEXT: mflr r0
@@ -45,6 +179,19 @@ define dso_local double @loadd(ptr nocapture readonly %a) local_unnamed_addr #0
; SOFT-NEXT: ld r0, 16(r1)
; SOFT-NEXT: mtlr r0
; SOFT-NEXT: blr
+;
+; BE-LABEL: loadd:
+; BE: # %bb.0: # %entry
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -112(r1)
+; BE-NEXT: std r0, 128(r1)
+; BE-NEXT: lhz r3, 2(r3)
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
+; BE-NEXT: addi r1, r1, 112
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
entry:
%arrayidx = getelementptr inbounds i16, ptr %a, i64 1
%0 = load i16, ptr %arrayidx, align 2
@@ -55,6 +202,18 @@ entry:
declare double @llvm.convert.from.fp16.f64(i16)
define dso_local float @loadf(ptr nocapture readonly %a) local_unnamed_addr #0 {
+; PPC32-LABEL: loadf:
+; PPC32: # %bb.0: # %entry
+; PPC32-NEXT: mflr r0
+; PPC32-NEXT: stwu r1, -16(r1)
+; PPC32-NEXT: stw r0, 20(r1)
+; PPC32-NEXT: lhz r3, 2(r3)
+; PPC32-NEXT: bl __extendhfsf2
+; PPC32-NEXT: lwz r0, 20(r1)
+; PPC32-NEXT: addi r1, r1, 16
+; PPC32-NEXT: mtlr r0
+; PPC32-NEXT: blr
+;
; P8-LABEL: loadf:
; P8: # %bb.0: # %entry
; P8-NEXT: mflr r0
@@ -87,6 +246,19 @@ define dso_local float @loadf(ptr nocapture readonly %a) local_unnamed_addr #0 {
; SOFT-NEXT: ld r0, 16(r1)
; SOFT-NEXT: mtlr r0
; SOFT-NEXT: blr
+;
+; BE-LABEL: loadf:
+; BE: # %bb.0: # %entry
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -112(r1)
+; BE-NEXT: std r0, 128(r1)
+; BE-NEXT: lhz r3, 2(r3)
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
+; BE-NEXT: addi r1, r1, 112
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
entry:
%arrayidx = getelementptr inbounds i16, ptr %a, i64 1
%0 = load i16, ptr %arrayidx, align 2
@@ -97,6 +269,21 @@ entry:
declare float @llvm.convert.from.fp16.f32(i16)
define dso_local void @stored(ptr nocapture %a, double %b) local_unnamed_addr #0 {
+; PPC32-LABEL: stored:
+; PPC32: # %bb.0: # %entry
+; PPC32-NEXT: mflr r0
+; PPC32-NEXT: stwu r1, -16(r1)
+; PPC32-NEXT: stw r0, 20(r1)
+; PPC32-NEXT: stw r30, 8(r1) # 4-byte Folded Spill
+; PPC32-NEXT: mr r30, r3
+; PPC32-NEXT: bl __truncdfhf2
+; PPC32-NEXT: sth r3, 0(r30)
+; PPC32-NEXT: lwz r30, 8(r1) # 4-byte Folded Reload
+; PPC32-NEXT: lwz r0, 20(r1)
+; PPC32-NEXT: addi r1, r1, 16
+; PPC32-NEXT: mtlr r0
+; PPC32-NEXT: blr
+;
; P8-LABEL: stored:
; P8: # %bb.0: # %entry
; P8-NEXT: mflr r0
@@ -140,6 +327,22 @@ define dso_local void @stored(ptr nocapture %a, double %b) local_unnamed_addr #0
; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; SOFT-NEXT: mtlr r0
; SOFT-NEXT: blr
+;
+; BE-LABEL: stored:
+; BE: # %bb.0: # %entry
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -128(r1)
+; BE-NEXT: std r0, 144(r1)
+; BE-NEXT: std r30, 112(r1) # 8-byte Folded Spill
+; BE-NEXT: mr r30, r3
+; BE-NEXT: bl __truncdfhf2
+; BE-NEXT: nop
+; BE-NEXT: sth r3, 0(r30)
+; BE-NEXT: ld r30, 112(r1) # 8-byte Folded Reload
+; BE-NEXT: addi r1, r1, 128
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
entry:
%0 = tail call i16 @llvm.convert.to.fp16.f64(double %b)
store i16 %0, ptr %a, align 2
@@ -149,6 +352,21 @@ entry:
declare i16 @llvm.convert.to.fp16.f64(double)
define dso_local void @storef(ptr nocapture %a, float %b) local_unnamed_addr #0 {
+; PPC32-LABEL: storef:
+; PPC32: # %bb.0: # %entry
+; PPC32-NEXT: mflr r0
+; PPC32-NEXT: stwu r1, -16(r1)
+; PPC32-NEXT: stw r0, 20(r1)
+; PPC32-NEXT: stw r30, 8(r1) # 4-byte Folded Spill
+; PPC32-NEXT: mr r30, r3
+; PPC32-NEXT: bl __truncsfhf2
+; PPC32-NEXT: sth r3, 0(r30)
+; PPC32-NEXT: lwz r30, 8(r1) # 4-byte Folded Reload
+; PPC32-NEXT: lwz r0, 20(r1)
+; PPC32-NEXT: addi r1, r1, 16
+; PPC32-NEXT: mtlr r0
+; PPC32-NEXT: blr
+;
; P8-LABEL: storef:
; P8: # %bb.0: # %entry
; P8-NEXT: mflr r0
@@ -192,6 +410,22 @@ define dso_local void @storef(ptr nocapture %a, float %b) local_unnamed_addr #0
; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; SOFT-NEXT: mtlr r0
; SOFT-NEXT: blr
+;
+; BE-LABEL: storef:
+; BE: # %bb.0: # %entry
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -128(r1)
+; BE-NEXT: std r0, 144(r1)
+; BE-NEXT: std r30, 112(r1) # 8-byte Folded Spill
+; BE-NEXT: mr r30, r3
+; BE-NEXT: bl __truncsfhf2
+; BE-NEXT: nop
+; BE-NEXT: sth r3, 0(r30)
+; BE-NEXT: ld r30, 112(r1) # 8-byte Folded Reload
+; BE-NEXT: addi r1, r1, 128
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
entry:
%0 = tail call i16 @llvm.convert.to.fp16.f32(float %b)
store i16 %0, ptr %a, align 2
@@ -200,6 +434,12 @@ entry:
declare i16 @llvm.convert.to.fp16.f32(float)
define void @test_load_store(ptr %in, ptr %out) #0 {
+; PPC32-LABEL: test_load_store:
+; PPC32: # %bb.0:
+; PPC32-NEXT: lhz r3, 0(r3)
+; PPC32-NEXT: sth r3, 0(r4)
+; PPC32-NEXT: blr
+;
; P8-LABEL: test_load_store:
; P8: # %bb.0:
; P8-NEXT: lhz r3, 0(r3)
@@ -230,11 +470,22 @@ define void @test_load_store(ptr %in, ptr %out) #0 {
; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; SOFT-NEXT: mtlr r0
; SOFT-NEXT: blr
+;
+; BE-LABEL: test_load_store:
+; BE: # %bb.0:
+; BE-NEXT: lhz r3, 0(r3)
+; BE-NEXT: sth r3, 0(r4)
+; BE-NEXT: blr
%val = load half, ptr %in
store half %val, ptr %out
ret void
}
define i16 @test_bitcast_from_half(ptr %addr) #0 {
+; PPC32-LABEL: test_bitcast_from_half:
+; PPC32: # %bb.0:
+; PPC32-NEXT: lhz r3, 0(r3)
+; PPC32-NEXT: blr
+;
; P8-LABEL: test_bitcast_from_half:
; P8: # %bb.0:
; P8-NEXT: lhz r3, 0(r3)
@@ -249,11 +500,21 @@ define i16 @test_bitcast_from_half(ptr %addr) #0 {
; SOFT: # %bb.0:
; SOFT-NEXT: lhz r3, 0(r3)
; SOFT-NEXT: blr
+;
+; BE-LABEL: test_bitcast_from_half:
+; BE: # %bb.0:
+; BE-NEXT: lhz r3, 0(r3)
+; BE-NEXT: blr
%val = load half, ptr %addr
%val_int = bitcast half %val to i16
ret i16 %val_int
}
define void @test_bitcast_to_half(ptr %addr, i16 %in) #0 {
+; PPC32-LABEL: test_bitcast_to_half:
+; PPC32: # %bb.0:
+; PPC32-NEXT: sth r4, 0(r3)
+; PPC32-NEXT: blr
+;
; P8-LABEL: test_bitcast_to_half:
; P8: # %bb.0:
; P8-NEXT: sth r4, 0(r3)
@@ -268,11 +529,137 @@ define void @test_bitcast_to_half(ptr %addr, i16 %in) #0 {
; SOFT: # %bb.0:
; SOFT-NEXT: sth r4, 0(r3)
; SOFT-NEXT: blr
+;
+; BE-LABEL: test_bitcast_to_half:
+; BE: # %bb.0:
+; BE-NEXT: sth r4, 0(r3)
+; BE-NEXT: blr
%val_fp = bitcast i16 %in to half
store half %val_fp, ptr %addr
ret void
}
+
+
+; Checks for https://github.com/llvm/llvm-project/issues/97981
+define half @from_bits(i16 %x) #0 {
+; PPC32-LABEL: from_bits:
+; PPC32: # %bb.0:
+; PPC32-NEXT: mflr r0
+; PPC32-NEXT: stwu r1, -16(r1)
+; PPC32-NEXT: clrlwi r3, r3, 16
+; PPC32-NEXT: stw r0, 20(r1)
+; PPC32-NEXT: bl __extendhfsf2
+; PPC32-NEXT: lwz r0, 20(r1)
+; PPC32-NEXT: addi r1, r1, 16
+; PPC32-NEXT: mtlr r0
+; PPC32-NEXT: blr
+;
+; P8-LABEL: from_bits:
+; P8: # %bb.0:
+; P8-NEXT: mflr r0
+; P8-NEXT: stdu r1, -32(r1)
+; P8-NEXT: clrldi r3, r3, 48
+; P8-NEXT: std r0, 48(r1)
+; P8-NEXT: bl __extendhfsf2
+; P8-NEXT: nop
+; P8-NEXT: addi r1, r1, 32
+; P8-NEXT: ld r0, 16(r1)
+; P8-NEXT: mtlr r0
+; P8-NEXT: blr
+;
+; CHECK-LABEL: from_bits:
+; CHECK: # %bb.0:
+; CHECK-NEXT: clrlwi r3, r3, 16
+; CHECK-NEXT: mtfprwz f0, r3
+; CHECK-NEXT: xscvhpdp f1, f0
+; CHECK-NEXT: blr
+;
+; SOFT-LABEL: from_bits:
+; SOFT: # %bb.0:
+; SOFT-NEXT: blr
+;
+; BE-LABEL: from_bits:
+; BE: # %bb.0:
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -112(r1)
+; BE-NEXT: clrldi r3, r3, 48
+; BE-NEXT: std r0, 128(r1)
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
+; BE-NEXT: addi r1, r1, 112
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
+ %res = bitcast i16 %x to half
+ ret half %res
+}
+
+define i16 @to_bits(half %x) #0 {
+; PPC32-LABEL: to_bits:
+; PPC32: # %bb.0:
+; PPC32-NEXT: mflr r0
+; PPC32-NEXT: stwu r1, -16(r1)
+; PPC32-NEXT: stw r0, 20(r1)
+; PPC32-NEXT: bl __truncsfhf2
+; PPC32-NEXT: clrlwi r3, r3, 16
+; PPC32-NEXT: lwz r0, 20(r1)
+; PPC32-NEXT: addi r1, r1, 16
+; PPC32-NEXT: mtlr r0
+; PPC32-NEXT: blr
+;
+; P8-LABEL: to_bits:
+; P8: # %bb.0:
+; P8-NEXT: mflr r0
+; P8-NEXT: stdu r1, -32(r1)
+; P8-NEXT: std r0, 48(r1)
+; P8-NEXT: bl __truncsfhf2
+; P8-NEXT: nop
+; P8-NEXT: clrldi r3, r3, 48
+; P8-NEXT: addi r1, r1, 32
+; P8-NEXT: ld r0, 16(r1)
+; P8-NEXT: mtlr r0
+; P8-NEXT: blr
+;
+; CHECK-LABEL: to_bits:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xscvdphp f0, f1
+; CHECK-NEXT: mffprwz r3, f0
+; CHECK-NEXT: clrlwi r3, r3, 16
+; CHECK-NEXT: blr
+;
+; SOFT-LABEL: to_bits:
+; SOFT: # %bb.0:
+; SOFT-NEXT: blr
+;
+; BE-LABEL: to_bits:
+; BE: # %bb.0:
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -112(r1)
+; BE-NEXT: std r0, 128(r1)
+; BE-NEXT: bl __truncsfhf2
+; BE-NEXT: nop
+; BE-NEXT: clrldi r3, r3, 48
+; BE-NEXT: addi r1, r1, 112
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
+ %res = bitcast half %x to i16
+ ret i16 %res
+}
+
define float @test_extend32(ptr %addr) #0 {
+; PPC32-LABEL: test_extend32:
+; PPC32: # %bb.0:
+; PPC32-NEXT: mflr r0
+; PPC32-NEXT: stwu r1, -16(r1)
+; PPC32-NEXT: stw r0, 20(r1)
+; PPC32-NEXT: lhz r3, 0(r3)
+; PPC32-NEXT: bl __extendhfsf2
+; PPC32-NEXT: lwz r0, 20(r1)
+; PPC32-NEXT: addi r1, r1, 16
+; PPC32-NEXT: mtlr r0
+; PPC32-NEXT: blr
+;
; P8-LABEL: test_extend32:
; P8: # %bb.0:
; P8-NEXT: mflr r0
@@ -304,11 +691,36 @@ define float @test_extend32(ptr %addr) #0 {
; SOFT-NEXT: ld r0, 16(r1)
; SOFT-NEXT: mtlr r0
; SOFT-NEXT: blr
+;
+; BE-LABEL: test_extend32:
+; BE: # %bb.0:
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -112(r1)
+; BE-NEXT: std r0, 128(r1)
+; BE-NEXT: lhz r3, 0(r3)
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
+; BE-NEXT: addi r1, r1, 112
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
%val16 = load half, ptr %addr
%val32 = fpext half %val16 to float
ret float %val32
}
define double @test_extend64(ptr %addr) #0 {
+; PPC32-LABEL: test_extend64:
+; PPC32: # %bb.0:
+; PPC32-NEXT: mflr r0
+; PPC32-NEXT: stwu r1, -16(r1)
+; PPC32-NEXT: stw r0, 20(r1)
+; PPC32-NEXT: lhz r3, 0(r3)
+; PPC32-NEXT: bl __extendhfsf2
+; PPC32-NEXT: lwz r0, 20(r1)
+; PPC32-NEXT: addi r1, r1, 16
+; PPC32-NEXT: mtlr r0
+; PPC32-NEXT: blr
+;
; P8-LABEL: test_extend64:
; P8: # %bb.0:
; P8-NEXT: mflr r0
@@ -342,11 +754,39 @@ define double @test_extend64(ptr %addr) #0 {
; SOFT-NEXT: ld r0, 16(r1)
; SOFT-NEXT: mtlr r0
; SOFT-NEXT: blr
+;
+; BE-LABEL: test_extend64:
+; BE: # %bb.0:
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -112(r1)
+; BE-NEXT: std r0, 128(r1)
+; BE-NEXT: lhz r3, 0(r3)
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
+; BE-NEXT: addi r1, r1, 112
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
%val16 = load half, ptr %addr
%val32 = fpext half %val16 to double
ret double %val32
}
define void @test_trunc32(float %in, ptr %addr) #0 {
+; PPC32-LABEL: test_trunc32:
+; PPC32: # %bb.0:
+; PPC32-NEXT: mflr r0
+; PPC32-NEXT: stwu r1, -16(r1)
+; PPC32-NEXT: stw r0, 20(r1)
+; PPC32-NEXT: stw r30, 8(r1) # 4-byte Folded Spill
+; PPC32-NEXT: mr r30, r3
+; PPC32-NEXT: bl __truncsfhf2
+; PPC32-NEXT: sth r3, 0(r30)
+; PPC32-NEXT: lwz r30, 8(r1) # 4-byte Folded Reload
+; PPC32-NEXT: lwz r0, 20(r1)
+; PPC32-NEXT: addi r1, r1, 16
+; PPC32-NEXT: mtlr r0
+; PPC32-NEXT: blr
+;
; P8-LABEL: test_trunc32:
; P8: # %bb.0:
; P8-NEXT: mflr r0
@@ -390,11 +830,42 @@ define void @test_trunc32(float %in, ptr %addr) #0 {
; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; SOFT-NEXT: mtlr r0
; SOFT-NEXT: blr
+;
+; BE-LABEL: test_trunc32:
+; BE: # %bb.0:
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -128(r1)
+; BE-NEXT: std r0, 144(r1)
+; BE-NEXT: std r30, 112(r1) # 8-byte Folded Spill
+; BE-NEXT: mr r30, r4
+; BE-NEXT: bl __truncsfhf2
+; BE-NEXT: nop
+; BE-NEXT: sth r3, 0(r30)
+; BE-NEXT: ld r30, 112(r1) # 8-byte Folded Reload
+; BE-NEXT: addi r1, r1, 128
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
%val16 = fptrunc float %in to half
store half %val16, ptr %addr
ret void
}
define void @test_trunc64(double %in, ptr %addr) #0 {
+; PPC32-LABEL: test_trunc64:
+; PPC32: # %bb.0:
+; PPC32-NEXT: mflr r0
+; PPC32-NEXT: stwu r1, -16(r1)
+; PPC32-NEXT: stw r0, 20(r1)
+; PPC32-NEXT: stw r30, 8(r1) # 4-byte Folded Spill
+; PPC32-NEXT: mr r30, r3
+; PPC32-NEXT: bl __truncdfhf2
+; PPC32-NEXT: sth r3, 0(r30)
+; PPC32-NEXT: lwz r30, 8(r1) # 4-byte Folded Reload
+; PPC32-NEXT: lwz r0, 20(r1)
+; PPC32-NEXT: addi r1, r1, 16
+; PPC32-NEXT: mtlr r0
+; PPC32-NEXT: blr
+;
; P8-LABEL: test_trunc64:
; P8: # %bb.0:
; P8-NEXT: mflr r0
@@ -437,11 +908,40 @@ define void @test_trunc64(double %in, ptr %addr) #0 {
; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; SOFT-NEXT: mtlr r0
; SOFT-NEXT: blr
+;
+; BE-LABEL: test_trunc64:
+; BE: # %bb.0:
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -128(r1)
+; BE-NEXT: std r0, 144(r1)
+; BE-NEXT: std r30, 112(r1) # 8-byte Folded Spill
+; BE-NEXT: mr r30, r4
+; BE-NEXT: bl __truncdfhf2
+; BE-NEXT: nop
+; BE-NEXT: sth r3, 0(r30)
+; BE-NEXT: ld r30, 112(r1) # 8-byte Folded Reload
+; BE-NEXT: addi r1, r1, 128
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
%val16 = fptrunc double %in to half
store half %val16, ptr %addr
ret void
}
define i64 @test_fptosi_i64(ptr %p) #0 {
+; PPC32-LABEL: test_fptosi_i64:
+; PPC32: # %bb.0:
+; PPC32-NEXT: mflr r0
+; PPC32-NEXT: stwu r1, -16(r1)
+; PPC32-NEXT: stw r0, 20(r1)
+; PPC32-NEXT: lhz r3, 0(r3)
+; PPC32-NEXT: bl __extendhfsf2
+; PPC32-NEXT: bl __fixsfdi
+; PPC32-NEXT: lwz r0, 20(r1)
+; PPC32-NEXT: addi r1, r1, 16
+; PPC32-NEXT: mtlr r0
+; PPC32-NEXT: blr
+;
; P8-LABEL: test_fptosi_i64:
; P8: # %bb.0:
; P8-NEXT: mflr r0
@@ -480,11 +980,43 @@ define i64 @test_fptosi_i64(ptr %p) #0 {
; SOFT-NEXT: ld r0, 16(r1)
; SOFT-NEXT: mtlr r0
; SOFT-NEXT: blr
+;
+; BE-LABEL: test_fptosi_i64:
+; BE: # %bb.0:
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -128(r1)
+; BE-NEXT: std r0, 144(r1)
+; BE-NEXT: lhz r3, 0(r3)
+; BE-NEXT: bl __extendhfsf2
+; BE-NEXT: nop
+; BE-NEXT: fctidz f0, f1
+; BE-NEXT: stfd f0, 120(r1)
+; BE-NEXT: ld r3, 120(r1)
+; BE-NEXT: addi r1, r1, 128
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
%a = load half, ptr %p, align 2
%r = fptosi half %a to i64
ret i64 %r
}
define void @test_sitofp_i64(i64 %a, ptr %p) #0 {
+; PPC32-LABEL: test_sitofp_i64:
+; PPC32: # %bb.0:
+; PPC32-NEXT: mflr r0
+; PPC32-NEXT: stwu r1, -16(r1)
+; PPC32-NEXT: stw r0, 20(r1)
+; PPC32-NEXT: stw r30, 8(r1) # 4-byte Folded Spill
+; PPC32-NEXT: mr r30, r5
+; PPC32-NEXT: bl __floatdisf
+; PPC32-NEXT: bl __truncsfhf2
+; PPC32-NEXT: sth r3, 0(r30)
+; PPC32-NEXT: lwz r30, 8(r1) # 4-byte Folded Reload
+; PPC32-NEXT: lwz r0, 20(r1)
+; PPC32-NEXT: addi r1, r1, 16
+; PPC32-NEXT: mtlr r0
+; PPC32-N...
[truncated]
|
|
@chenzheng1030, @EsmeYi, @lei137 could one of you review this? |
347f2bd to
b0ecc22
Compare
|
Gentle ping for review @chenzheng1030, @EsmeYi, @lei137, @RolandF77 |
|
@nikic perhaps, as you reviewed the others |
b0ecc22 to
4e5ed79
Compare
|
(rebased to recheck CI) |
There was a problem hiding this comment.
| ; RUN: --check-prefix=P8 | |
| ; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \ | |
| ; RUN: -verify-machineinstrs -ppc-asm-full-reg-names < %s | FileCheck %s | |
| ; RUN: --check-prefixes=CHECK,P8 | |
| ; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \ | |
| ; RUN: -verify-machineinstrs -ppc-asm-full-reg-names < %s | FileCheck %s | |
| ; RUN: --check-prefixes=CHECK,P9 |
Would something like this reduce some check lines?
There was a problem hiding this comment.
Not too much, but it does save a few blocks. Applied at https://github.com/llvm/llvm-project/compare/4e5ed79a4fb07e1a6f7bed63f7dff17274214416..7ee47fc234865788853acb6826ac86a91fc13fcc.
`f16` is more functional than just a storage type on the platform, though it does have some codegen issues [1]. To prepare for future changes, do the following nonfunctional updates to the existing `half` test: * Add tests for passing and returning the type directly. * Add tests showing bitcast behavior, which is currently incorrect but serves as a baseline. * Add tests for `fabs` and `copysign` (trivial operations that shouldn't require libcalls). * Add invocations for big-endian and for PPC32. * Rename the test to `half.ll` to reflect its status, which also matches other backends. [1]: llvm#97975
4e5ed79 to
7ee47fc
Compare
f16is more functional than just a storage type on the platform, though it does have some codegen issues 1. To prepare for future changes, do the following nonfunctional updates to the existinghalftest:fabsandcopysign(trivial operations that shouldn't require libcalls).half.llto reflect its status, which also matches other backends.