diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index 910a50214df2f..0016f9d553077 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -163,6 +163,9 @@ Changes to the RISC-V Backend Changes to the WebAssembly Backend ---------------------------------- +* `half` now uses a soft float lowering, which resolves various precision and + bitcast issues. + Changes to the Windows Target ----------------------------- diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h index c37970f458e36..d3e1fe7734794 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h @@ -29,6 +29,8 @@ class WebAssemblyTargetLowering final : public TargetLowering { MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override; MVT getPointerMemTy(const DataLayout &DL, uint32_t AS = 0) const override; + bool softPromoteHalfType() const override { return true; } + private: /// Keep a pointer to the WebAssemblySubtarget around so that we can make the /// right decision when generating code for different targets. diff --git a/llvm/test/CodeGen/Generic/half-op.ll b/llvm/test/CodeGen/Generic/half-op.ll index f8ad39f9456aa..30048835201cc 100644 --- a/llvm/test/CodeGen/Generic/half-op.ll +++ b/llvm/test/CodeGen/Generic/half-op.ll @@ -37,7 +37,7 @@ ; RUN: %if spirv-registered-target %{ llc %s -o - -mtriple=spirv-unknown-unknown | FileCheck %s --check-prefixes=NOCRASH %} ; RUN: %if systemz-registered-target %{ llc %s -o - -mtriple=s390x-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,CHECK-FMA %} ; RUN: %if ve-registered-target %{ llc %s -o - -mtriple=ve-unknown-unknown | FileCheck %s --check-prefixes=ALL,BAD-NEG-ABS,BAD-COPYSIGN,BAD-FMA %} -; RUN: %if webassembly-registered-target %{ llc %s -o - -mtriple=wasm32-unknown-unknown | FileCheck %s --check-prefixes=ALL,BAD-NEG-ABS,BAD-COPYSIGN,BAD-FMA %} +; RUN: %if webassembly-registered-target %{ llc %s -o - -mtriple=wasm32-unknown-unknown | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,CHECK-FMA %} ; RUN: %if x86-registered-target %{ llc %s -o - -mtriple=i686-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,CHECK-FMA %} ; RUN: %if x86-registered-target %{ llc %s -o - -mtriple=x86_64-pc-windows-msvc | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,CHECK-FMA %} ; RUN: %if x86-registered-target %{ llc %s -o - -mtriple=x86_64-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,CHECK-FMA %} diff --git a/llvm/test/CodeGen/Generic/half.ll b/llvm/test/CodeGen/Generic/half.ll index ef7bfe2f2d9ce..55831f9438df6 100644 --- a/llvm/test/CodeGen/Generic/half.ll +++ b/llvm/test/CodeGen/Generic/half.ll @@ -39,7 +39,7 @@ ; RUN: %if spirv-registered-target %{ llc %s -o - -mtriple=spirv-unknown-unknown | FileCheck %s --check-prefixes=NOCRASH %} ; RUN: %if systemz-registered-target %{ llc %s -o - -mtriple=s390x-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK %} ; RUN: %if ve-registered-target %{ llc %s -o - -mtriple=ve-unknown-unknown | FileCheck %s --check-prefixes=ALL,BAD %} -; RUN: %if webassembly-registered-target %{ llc %s -o - -mtriple=wasm32-unknown-unknown | FileCheck %s --check-prefixes=ALL,BAD %} +; RUN: %if webassembly-registered-target %{ llc %s -o - -mtriple=wasm32-unknown-unknown | FileCheck %s --check-prefixes=ALL,CHECK %} ; RUN: %if x86-registered-target %{ llc %s -o - -mtriple=i686-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK %} ; RUN: %if x86-registered-target %{ llc %s -o - -mtriple=x86_64-pc-windows-msvc | FileCheck %s --check-prefixes=ALL,CHECK %} ; RUN: %if x86-registered-target %{ llc %s -o - -mtriple=x86_64-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK %} diff --git a/llvm/test/CodeGen/WebAssembly/f16.ll b/llvm/test/CodeGen/WebAssembly/f16.ll index 0486975f6cba7..3c31d55abfadc 100644 --- a/llvm/test/CodeGen/WebAssembly/f16.ll +++ b/llvm/test/CodeGen/WebAssembly/f16.ll @@ -10,12 +10,11 @@ target triple = "wasm32-unknown-unknown" define void @store(half %x, ptr %p) nounwind { ; ALL-LABEL: store: -; ALL: .functype store (f32, i32) -> () +; ALL: .functype store (i32, i32) -> () ; ALL-NEXT: # %bb.0: -; ALL-NEXT: local.get $push2=, 1 -; ALL-NEXT: local.get $push1=, 0 -; ALL-NEXT: call $push0=, __truncsfhf2, $pop1 -; ALL-NEXT: i32.store16 0($pop2), $pop0 +; ALL-NEXT: local.get $push1=, 1 +; ALL-NEXT: local.get $push0=, 0 +; ALL-NEXT: i32.store16 0($pop1), $pop0 ; ALL-NEXT: return store half %x, ptr %p ret void @@ -23,12 +22,11 @@ define void @store(half %x, ptr %p) nounwind { define half @return(ptr %p) nounwind { ; ALL-LABEL: return: -; ALL: .functype return (i32) -> (f32) +; ALL: .functype return (i32) -> (i32) ; ALL-NEXT: # %bb.0: -; ALL-NEXT: local.get $push2=, 0 -; ALL-NEXT: i32.load16_u $push0=, 0($pop2) -; ALL-NEXT: call $push1=, __extendhfsf2, $pop0 -; ALL-NEXT: return $pop1 +; ALL-NEXT: local.get $push1=, 0 +; ALL-NEXT: i32.load16_u $push0=, 0($pop1) +; ALL-NEXT: return $pop0 %r = load half, ptr %p ret half %r } @@ -80,50 +78,28 @@ define dso_local float @loadf(ptr nocapture readonly %a) local_unnamed_addr noun } define dso_local void @stored(ptr nocapture %a, double %b) local_unnamed_addr nounwind { -; DEFISEL-LABEL: stored: -; DEFISEL: .functype stored (i32, f64) -> () -; DEFISEL-NEXT: # %bb.0: -; DEFISEL-NEXT: local.get $push2=, 0 -; DEFISEL-NEXT: local.get $push1=, 1 -; DEFISEL-NEXT: call $push0=, __truncdfhf2, $pop1 -; DEFISEL-NEXT: i32.store16 0($pop2), $pop0 -; DEFISEL-NEXT: return -; -; FASTISEL-LABEL: stored: -; FASTISEL: .functype stored (i32, f64) -> () -; FASTISEL-NEXT: # %bb.0: -; FASTISEL-NEXT: local.get $push4=, 0 -; FASTISEL-NEXT: local.get $push3=, 1 -; FASTISEL-NEXT: call $push2=, __truncdfhf2, $pop3 -; FASTISEL-NEXT: i32.const $push1=, 65535 -; FASTISEL-NEXT: i32.and $push0=, $pop2, $pop1 -; FASTISEL-NEXT: i32.store16 0($pop4), $pop0 -; FASTISEL-NEXT: return +; ALL-LABEL: stored: +; ALL: .functype stored (i32, f64) -> () +; ALL-NEXT: # %bb.0: +; ALL-NEXT: local.get $push2=, 0 +; ALL-NEXT: local.get $push1=, 1 +; ALL-NEXT: call $push0=, __truncdfhf2, $pop1 +; ALL-NEXT: i32.store16 0($pop2), $pop0 +; ALL-NEXT: return %x = tail call i16 @llvm.convert.to.fp16.f64(double %b) store i16 %x, ptr %a, align 2 ret void } define dso_local void @storef(ptr nocapture %a, float %b) local_unnamed_addr nounwind { -; DEFISEL-LABEL: storef: -; DEFISEL: .functype storef (i32, f32) -> () -; DEFISEL-NEXT: # %bb.0: -; DEFISEL-NEXT: local.get $push2=, 0 -; DEFISEL-NEXT: local.get $push1=, 1 -; DEFISEL-NEXT: call $push0=, __truncsfhf2, $pop1 -; DEFISEL-NEXT: i32.store16 0($pop2), $pop0 -; DEFISEL-NEXT: return -; -; FASTISEL-LABEL: storef: -; FASTISEL: .functype storef (i32, f32) -> () -; FASTISEL-NEXT: # %bb.0: -; FASTISEL-NEXT: local.get $push4=, 0 -; FASTISEL-NEXT: local.get $push3=, 1 -; FASTISEL-NEXT: call $push2=, __truncsfhf2, $pop3 -; FASTISEL-NEXT: i32.const $push1=, 65535 -; FASTISEL-NEXT: i32.and $push0=, $pop2, $pop1 -; FASTISEL-NEXT: i32.store16 0($pop4), $pop0 -; FASTISEL-NEXT: return +; ALL-LABEL: storef: +; ALL: .functype storef (i32, f32) -> () +; ALL-NEXT: # %bb.0: +; ALL-NEXT: local.get $push2=, 0 +; ALL-NEXT: local.get $push1=, 1 +; ALL-NEXT: call $push0=, __truncsfhf2, $pop1 +; ALL-NEXT: i32.store16 0($pop2), $pop0 +; ALL-NEXT: return %x = tail call i16 @llvm.convert.to.fp16.f32(float %b) store i16 %x, ptr %a, align 2 ret void @@ -170,33 +146,20 @@ define void @test_bitcast_to_half(ptr %addr, i16 %in) nounwind { define half @from_bits(i16 %x) nounwind { ; ALL-LABEL: from_bits: -; ALL: .functype from_bits (i32) -> (f32) +; ALL: .functype from_bits (i32) -> (i32) ; ALL-NEXT: # %bb.0: -; ALL-NEXT: local.get $push1=, 0 -; ALL-NEXT: call $push0=, __extendhfsf2, $pop1 +; ALL-NEXT: local.get $push0=, 0 ; ALL-NEXT: return $pop0 %res = bitcast i16 %x to half ret half %res } define i16 @to_bits(half %x) nounwind { -; DEFISEL-LABEL: to_bits: -; DEFISEL: .functype to_bits (f32) -> (i32) -; DEFISEL-NEXT: # %bb.0: -; DEFISEL-NEXT: local.get $push3=, 0 -; DEFISEL-NEXT: call $push1=, __truncsfhf2, $pop3 -; DEFISEL-NEXT: i32.const $push0=, 65535 -; DEFISEL-NEXT: i32.and $push2=, $pop1, $pop0 -; DEFISEL-NEXT: return $pop2 -; -; FASTISEL-LABEL: to_bits: -; FASTISEL: .functype to_bits (f32) -> (i32) -; FASTISEL-NEXT: # %bb.0: -; FASTISEL-NEXT: local.get $push3=, 0 -; FASTISEL-NEXT: call $push2=, __truncsfhf2, $pop3 -; FASTISEL-NEXT: i32.const $push1=, 65535 -; FASTISEL-NEXT: i32.and $push0=, $pop2, $pop1 -; FASTISEL-NEXT: return $pop0 +; ALL-LABEL: to_bits: +; ALL: .functype to_bits (i32) -> (i32) +; ALL-NEXT: # %bb.0: +; ALL-NEXT: local.get $push0=, 0 +; ALL-NEXT: return $pop0 %res = bitcast half %x to i16 ret i16 %res } @@ -559,27 +522,35 @@ define float @test_sitofp_fadd_i32(i32 %a, ptr %b) nounwind { ; DEFISEL-LABEL: test_sitofp_fadd_i32: ; DEFISEL: .functype test_sitofp_fadd_i32 (i32, i32) -> (f32) ; DEFISEL-NEXT: # %bb.0: -; DEFISEL-NEXT: local.get $push6=, 1 -; DEFISEL-NEXT: i32.load16_u $push1=, 0($pop6) +; DEFISEL-NEXT: local.get $push8=, 1 +; DEFISEL-NEXT: i32.load16_u $push7=, 0($pop8) +; DEFISEL-NEXT: local.set 1, $pop7 +; DEFISEL-NEXT: local.get $push9=, 0 +; DEFISEL-NEXT: f32.convert_i32_s $push0=, $pop9 +; DEFISEL-NEXT: call $push1=, __truncsfhf2, $pop0 ; DEFISEL-NEXT: call $push2=, __extendhfsf2, $pop1 -; DEFISEL-NEXT: local.get $push7=, 0 -; DEFISEL-NEXT: f32.convert_i32_s $push0=, $pop7 -; DEFISEL-NEXT: call $push3=, __truncsfhf2, $pop0 -; DEFISEL-NEXT: call $push4=, __extendhfsf2, $pop3 -; DEFISEL-NEXT: f32.add $push5=, $pop2, $pop4 -; DEFISEL-NEXT: return $pop5 +; DEFISEL-NEXT: local.get $push10=, 1 +; DEFISEL-NEXT: call $push3=, __extendhfsf2, $pop10 +; DEFISEL-NEXT: f32.add $push4=, $pop2, $pop3 +; DEFISEL-NEXT: call $push5=, __truncsfhf2, $pop4 +; DEFISEL-NEXT: call $push6=, __extendhfsf2, $pop5 +; DEFISEL-NEXT: return $pop6 ; ; FASTISEL-LABEL: test_sitofp_fadd_i32: ; FASTISEL: .functype test_sitofp_fadd_i32 (i32, i32) -> (f32) ; FASTISEL-NEXT: # %bb.0: -; FASTISEL-NEXT: local.get $push6=, 1 -; FASTISEL-NEXT: i32.load16_u $push2=, 0($pop6) +; FASTISEL-NEXT: local.get $push8=, 1 +; FASTISEL-NEXT: i32.load16_u $push7=, 0($pop8) +; FASTISEL-NEXT: local.set 1, $pop7 +; FASTISEL-NEXT: local.get $push9=, 0 +; FASTISEL-NEXT: f32.convert_i32_s $push1=, $pop9 +; FASTISEL-NEXT: call $push2=, __truncsfhf2, $pop1 ; FASTISEL-NEXT: call $push3=, __extendhfsf2, $pop2 -; FASTISEL-NEXT: local.get $push7=, 0 -; FASTISEL-NEXT: f32.convert_i32_s $push1=, $pop7 -; FASTISEL-NEXT: call $push4=, __truncsfhf2, $pop1 -; FASTISEL-NEXT: call $push5=, __extendhfsf2, $pop4 -; FASTISEL-NEXT: f32.add $push0=, $pop3, $pop5 +; FASTISEL-NEXT: local.get $push10=, 1 +; FASTISEL-NEXT: call $push4=, __extendhfsf2, $pop10 +; FASTISEL-NEXT: f32.add $push5=, $pop3, $pop4 +; FASTISEL-NEXT: call $push6=, __truncsfhf2, $pop5 +; FASTISEL-NEXT: call $push0=, __extendhfsf2, $pop6 ; FASTISEL-NEXT: return $pop0 %tmp0 = load half, ptr %b %tmp1 = sitofp i32 %a to half @@ -590,17 +561,20 @@ define float @test_sitofp_fadd_i32(i32 %a, ptr %b) nounwind { define half @chained_fp_ops(half %x) { ; ALL-LABEL: chained_fp_ops: -; ALL: .functype chained_fp_ops (f32) -> (f32) +; ALL: .functype chained_fp_ops (i32) -> (i32) +; ALL-NEXT: .local f32 ; ALL-NEXT: # %bb.0: # %start -; ALL-NEXT: local.get $push6=, 0 -; ALL-NEXT: call $push0=, __truncsfhf2, $pop6 -; ALL-NEXT: call $push5=, __extendhfsf2, $pop0 -; ALL-NEXT: local.tee $push4=, 0, $pop5 -; ALL-NEXT: local.get $push7=, 0 -; ALL-NEXT: f32.add $push1=, $pop4, $pop7 -; ALL-NEXT: f32.const $push2=, 0x1p-1 -; ALL-NEXT: f32.mul $push3=, $pop1, $pop2 -; ALL-NEXT: return $pop3 +; ALL-NEXT: local.get $push8=, 0 +; ALL-NEXT: call $push7=, __extendhfsf2, $pop8 +; ALL-NEXT: local.tee $push6=, 1, $pop7 +; ALL-NEXT: local.get $push9=, 1 +; ALL-NEXT: f32.add $push0=, $pop6, $pop9 +; ALL-NEXT: call $push2=, __truncsfhf2, $pop0 +; ALL-NEXT: call $push3=, __extendhfsf2, $pop2 +; ALL-NEXT: f32.const $push1=, 0x1p-1 +; ALL-NEXT: f32.mul $push4=, $pop3, $pop1 +; ALL-NEXT: call $push5=, __truncsfhf2, $pop4 +; ALL-NEXT: return $pop5 start: %y = fmul half %x, 0xH4000 %z = fdiv half %y, 0xH4000 @@ -609,16 +583,15 @@ start: define half @test_select_cc(half) nounwind { ; ALL-LABEL: test_select_cc: -; ALL: .functype test_select_cc (f32) -> (f32) +; ALL: .functype test_select_cc (i32) -> (i32) ; ALL-NEXT: # %bb.0: -; ALL-NEXT: f32.const $push4=, 0x1p0 +; ALL-NEXT: i32.const $push4=, 15360 +; ALL-NEXT: i32.const $push3=, 0 +; ALL-NEXT: local.get $push6=, 0 +; ALL-NEXT: call $push1=, __extendhfsf2, $pop6 ; ALL-NEXT: f32.const $push0=, 0x0p0 -; ALL-NEXT: local.get $push7=, 0 -; ALL-NEXT: call $push1=, __truncsfhf2, $pop7 -; ALL-NEXT: call $push2=, __extendhfsf2, $pop1 -; ALL-NEXT: f32.const $push6=, 0x0p0 -; ALL-NEXT: f32.ne $push3=, $pop2, $pop6 -; ALL-NEXT: f32.select $push5=, $pop4, $pop0, $pop3 +; ALL-NEXT: f32.ne $push2=, $pop1, $pop0 +; ALL-NEXT: i32.select $push5=, $pop4, $pop3, $pop2 ; ALL-NEXT: return $pop5 %2 = fcmp une half %0, 0xH0000 %3 = uitofp i1 %2 to half @@ -627,27 +600,28 @@ define half @test_select_cc(half) nounwind { define half @fabs(half %x) nounwind { ; ALL-LABEL: fabs: -; ALL: .functype fabs (f32) -> (f32) +; ALL: .functype fabs (i32) -> (i32) ; ALL-NEXT: # %bb.0: -; ALL-NEXT: local.get $push3=, 0 -; ALL-NEXT: call $push0=, __truncsfhf2, $pop3 -; ALL-NEXT: call $push1=, __extendhfsf2, $pop0 -; ALL-NEXT: f32.abs $push2=, $pop1 -; ALL-NEXT: return $pop2 +; ALL-NEXT: local.get $push2=, 0 +; ALL-NEXT: i32.const $push0=, 32767 +; ALL-NEXT: i32.and $push1=, $pop2, $pop0 +; ALL-NEXT: return $pop1 %a = call half @llvm.fabs.f16(half %x) ret half %a } define half @fcopysign(half %x, half %y) nounwind { ; ALL-LABEL: fcopysign: -; ALL: .functype fcopysign (f32, f32) -> (f32) +; ALL: .functype fcopysign (i32, i32) -> (i32) ; ALL-NEXT: # %bb.0: -; ALL-NEXT: local.get $push3=, 0 -; ALL-NEXT: call $push0=, __truncsfhf2, $pop3 -; ALL-NEXT: call $push1=, __extendhfsf2, $pop0 -; ALL-NEXT: local.get $push4=, 1 -; ALL-NEXT: f32.copysign $push2=, $pop1, $pop4 -; ALL-NEXT: return $pop2 +; ALL-NEXT: local.get $push5=, 0 +; ALL-NEXT: i32.const $push2=, 32767 +; ALL-NEXT: i32.and $push3=, $pop5, $pop2 +; ALL-NEXT: local.get $push6=, 1 +; ALL-NEXT: i32.const $push0=, -32768 +; ALL-NEXT: i32.and $push1=, $pop6, $pop0 +; ALL-NEXT: i32.or $push4=, $pop3, $pop1 +; ALL-NEXT: return $pop4 %a = call half @llvm.copysign.f16(half %x, half %y) ret half %a } diff --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll index 59f3edc7168be..267d94b601666 100644 --- a/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll +++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll @@ -103,10 +103,9 @@ entry: define i32 @stest_f16i32(half %x) { ; CHECK-LABEL: stest_f16i32: -; CHECK: .functype stest_f16i32 (f32) -> (i32) +; CHECK: .functype stest_f16i32 (i32) -> (i32) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: # fallthrough-return @@ -122,10 +121,9 @@ entry: define i32 @stest_f16i32_cse(half %x) { ; CHECK-LABEL: stest_f16i32_cse: -; CHECK: .functype stest_f16i32_cse (f32) -> (i32) +; CHECK: .functype stest_f16i32_cse (i32) -> (i32) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i64.trunc_sat_f32_s ; CHECK-NEXT: i32.wrap_i64 @@ -138,10 +136,9 @@ entry: define i32 @utest_f16i32(half %x) { ; CHECK-LABEL: utest_f16i32: -; CHECK: .functype utest_f16i32 (f32) -> (i32) +; CHECK: .functype utest_f16i32 (i32) -> (i32) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: # fallthrough-return @@ -155,10 +152,9 @@ entry: define i32 @utest_f16i32_cse(half %x) { ; CHECK-LABEL: utest_f16i32_cse: -; CHECK: .functype utest_f16i32_cse (f32) -> (i32) +; CHECK: .functype utest_f16i32_cse (i32) -> (i32) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i64.trunc_sat_f32_u ; CHECK-NEXT: i32.wrap_i64 @@ -171,10 +167,9 @@ entry: define i32 @ustest_f16i32(half %x) { ; CHECK-LABEL: ustest_f16i32: -; CHECK: .functype ustest_f16i32 (f32) -> (i32) +; CHECK: .functype ustest_f16i32 (i32) -> (i32) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: # fallthrough-return @@ -190,10 +185,9 @@ entry: define i32 @ustest_f16i32_cse(half %x) { ; CHECK-LABEL: ustest_f16i32_cse: -; CHECK: .functype ustest_f16i32_cse (f32) -> (i32) +; CHECK: .functype ustest_f16i32_cse (i32) -> (i32) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: # fallthrough-return @@ -373,22 +367,20 @@ entry: define i16 @stest_f16i16(half %x) { ; CHECK-LABEL: stest_f16i16: -; CHECK: .functype stest_f16i16 (f32) -> (i32) -; CHECK-NEXT: .local i32 +; CHECK: .functype stest_f16i16 (i32) -> (i32) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s -; CHECK-NEXT: local.tee 1 +; CHECK-NEXT: local.tee 0 ; CHECK-NEXT: i32.const 32767 -; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const 32767 ; CHECK-NEXT: i32.lt_s ; CHECK-NEXT: i32.select -; CHECK-NEXT: local.tee 1 +; CHECK-NEXT: local.tee 0 ; CHECK-NEXT: i32.const -32768 -; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const -32768 ; CHECK-NEXT: i32.gt_s ; CHECK-NEXT: i32.select @@ -405,16 +397,14 @@ entry: define i16 @utest_f16i16(half %x) { ; CHECK-LABEL: utest_f16i16: -; CHECK: .functype utest_f16i16 (f32) -> (i32) -; CHECK-NEXT: .local i32 +; CHECK: .functype utest_f16i16 (i32) -> (i32) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u -; CHECK-NEXT: local.tee 1 +; CHECK-NEXT: local.tee 0 ; CHECK-NEXT: i32.const 65535 -; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const 65535 ; CHECK-NEXT: i32.lt_u ; CHECK-NEXT: i32.select @@ -429,10 +419,9 @@ entry: define i16 @utest_f16i16_cse(half %x) { ; CHECK-LABEL: utest_f16i16_cse: -; CHECK: .functype utest_f16i16_cse (f32) -> (i32) +; CHECK: .functype utest_f16i16_cse (i32) -> (i32) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: # fallthrough-return @@ -444,22 +433,20 @@ entry: define i16 @ustest_f16i16(half %x) { ; CHECK-LABEL: ustest_f16i16: -; CHECK: .functype ustest_f16i16 (f32) -> (i32) -; CHECK-NEXT: .local i32 +; CHECK: .functype ustest_f16i16 (i32) -> (i32) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s -; CHECK-NEXT: local.tee 1 +; CHECK-NEXT: local.tee 0 ; CHECK-NEXT: i32.const 65535 -; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const 65535 ; CHECK-NEXT: i32.lt_s ; CHECK-NEXT: i32.select -; CHECK-NEXT: local.tee 1 +; CHECK-NEXT: local.tee 0 ; CHECK-NEXT: i32.const 0 -; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const 0 ; CHECK-NEXT: i32.gt_s ; CHECK-NEXT: i32.select @@ -476,10 +463,9 @@ entry: define i16 @ustest_f16i16_cse(half %x) { ; CHECK-LABEL: ustest_f16i16_cse: -; CHECK: .functype ustest_f16i16_cse (f32) -> (i32) +; CHECK: .functype ustest_f16i16_cse (i32) -> (i32) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: # fallthrough-return @@ -863,10 +849,9 @@ entry: define i64 @stest_f16i64(half %x) { ; CHECK-LABEL: stest_f16i64: -; CHECK: .functype stest_f16i64 (f32) -> (i64) +; CHECK: .functype stest_f16i64 (i32) -> (i64) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i64.trunc_sat_f32_s ; CHECK-NEXT: # fallthrough-return @@ -882,7 +867,7 @@ entry: define i64 @utest_f16i64(half %x) { ; CHECK-LABEL: utest_f16i64: -; CHECK: .functype utest_f16i64 (f32) -> (i64) +; CHECK: .functype utest_f16i64 (i32) -> (i64) ; CHECK-NEXT: .local i32, i64, i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: global.get __stack_pointer @@ -892,7 +877,6 @@ define i64 @utest_f16i64(half %x) { ; CHECK-NEXT: global.set __stack_pointer ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: call __fixunssfti ; CHECK-NEXT: local.get 1 @@ -921,7 +905,7 @@ entry: define i64 @utest_f16i64_cse(half %x) { ; CHECK-LABEL: utest_f16i64_cse: -; CHECK: .functype utest_f16i64_cse (f32) -> (i64) +; CHECK: .functype utest_f16i64_cse (i32) -> (i64) ; CHECK-NEXT: .local i32, i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: global.get __stack_pointer @@ -931,7 +915,6 @@ define i64 @utest_f16i64_cse(half %x) { ; CHECK-NEXT: global.set __stack_pointer ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: call __fixunssfti ; CHECK-NEXT: local.get 1 @@ -951,7 +934,7 @@ entry: define i64 @ustest_f16i64(half %x) { ; CHECK-LABEL: ustest_f16i64: -; CHECK: .functype ustest_f16i64 (f32) -> (i64) +; CHECK: .functype ustest_f16i64 (i32) -> (i64) ; CHECK-NEXT: .local i32, i64, i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: global.get __stack_pointer @@ -961,7 +944,6 @@ define i64 @ustest_f16i64(half %x) { ; CHECK-NEXT: global.set __stack_pointer ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: call __fixsfti ; CHECK-NEXT: local.get 1 @@ -1103,10 +1085,9 @@ entry: define i32 @stest_f16i32_mm(half %x) { ; CHECK-LABEL: stest_f16i32_mm: -; CHECK: .functype stest_f16i32_mm (f32) -> (i32) +; CHECK: .functype stest_f16i32_mm (i32) -> (i32) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: # fallthrough-return @@ -1120,10 +1101,9 @@ entry: define i32 @utest_f16i32_mm(half %x) { ; CHECK-LABEL: utest_f16i32_mm: -; CHECK: .functype utest_f16i32_mm (f32) -> (i32) +; CHECK: .functype utest_f16i32_mm (i32) -> (i32) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: # fallthrough-return @@ -1136,10 +1116,9 @@ entry: define i32 @ustest_f16i32_mm(half %x) { ; CHECK-LABEL: ustest_f16i32_mm: -; CHECK: .functype ustest_f16i32_mm (f32) -> (i32) +; CHECK: .functype ustest_f16i32_mm (i32) -> (i32) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: # fallthrough-return @@ -1153,10 +1132,9 @@ entry: define i32 @ustest_f16i32_mm_cse(half %x) { ; CHECK-LABEL: ustest_f16i32_mm_cse: -; CHECK: .functype ustest_f16i32_mm_cse (f32) -> (i32) +; CHECK: .functype ustest_f16i32_mm_cse (i32) -> (i32) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: # fallthrough-return @@ -1325,22 +1303,20 @@ entry: define i16 @stest_f16i16_mm(half %x) { ; CHECK-LABEL: stest_f16i16_mm: -; CHECK: .functype stest_f16i16_mm (f32) -> (i32) -; CHECK-NEXT: .local i32 +; CHECK: .functype stest_f16i16_mm (i32) -> (i32) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s -; CHECK-NEXT: local.tee 1 +; CHECK-NEXT: local.tee 0 ; CHECK-NEXT: i32.const 32767 -; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const 32767 ; CHECK-NEXT: i32.lt_s ; CHECK-NEXT: i32.select -; CHECK-NEXT: local.tee 1 +; CHECK-NEXT: local.tee 0 ; CHECK-NEXT: i32.const -32768 -; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const -32768 ; CHECK-NEXT: i32.gt_s ; CHECK-NEXT: i32.select @@ -1355,16 +1331,14 @@ entry: define i16 @utest_f16i16_mm(half %x) { ; CHECK-LABEL: utest_f16i16_mm: -; CHECK: .functype utest_f16i16_mm (f32) -> (i32) -; CHECK-NEXT: .local i32 +; CHECK: .functype utest_f16i16_mm (i32) -> (i32) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u -; CHECK-NEXT: local.tee 1 +; CHECK-NEXT: local.tee 0 ; CHECK-NEXT: i32.const 65535 -; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const 65535 ; CHECK-NEXT: i32.lt_u ; CHECK-NEXT: i32.select @@ -1378,22 +1352,20 @@ entry: define i16 @ustest_f16i16_mm(half %x) { ; CHECK-LABEL: ustest_f16i16_mm: -; CHECK: .functype ustest_f16i16_mm (f32) -> (i32) -; CHECK-NEXT: .local i32 +; CHECK: .functype ustest_f16i16_mm (i32) -> (i32) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s -; CHECK-NEXT: local.tee 1 +; CHECK-NEXT: local.tee 0 ; CHECK-NEXT: i32.const 65535 -; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const 65535 ; CHECK-NEXT: i32.lt_s ; CHECK-NEXT: i32.select -; CHECK-NEXT: local.tee 1 +; CHECK-NEXT: local.tee 0 ; CHECK-NEXT: i32.const 0 -; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const 0 ; CHECK-NEXT: i32.gt_s ; CHECK-NEXT: i32.select @@ -1408,10 +1380,9 @@ entry: define i16 @ustest_f16i16_mm_cse(half %x) { ; CHECK-LABEL: ustest_f16i16_mm_cse: -; CHECK: .functype ustest_f16i16_mm_cse (f32) -> (i32) +; CHECK: .functype ustest_f16i16_mm_cse (i32) -> (i32) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: # fallthrough-return @@ -1622,10 +1593,9 @@ entry: define i64 @stest_f16i64_mm(half %x) { ; CHECK-LABEL: stest_f16i64_mm: -; CHECK: .functype stest_f16i64_mm (f32) -> (i64) +; CHECK: .functype stest_f16i64_mm (i32) -> (i64) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i64.trunc_sat_f32_s ; CHECK-NEXT: # fallthrough-return @@ -1639,7 +1609,7 @@ entry: define i64 @utest_f16i64_mm(half %x) { ; CHECK-LABEL: utest_f16i64_mm: -; CHECK: .functype utest_f16i64_mm (f32) -> (i64) +; CHECK: .functype utest_f16i64_mm (i32) -> (i64) ; CHECK-NEXT: .local i32, i64, i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: global.get __stack_pointer @@ -1649,7 +1619,6 @@ define i64 @utest_f16i64_mm(half %x) { ; CHECK-NEXT: global.set __stack_pointer ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: call __fixunssfti ; CHECK-NEXT: local.get 1 @@ -1677,7 +1646,7 @@ entry: define i64 @ustest_f16i64_mm(half %x) { ; CHECK-LABEL: ustest_f16i64_mm: -; CHECK: .functype ustest_f16i64_mm (f32) -> (i64) +; CHECK: .functype ustest_f16i64_mm (i32) -> (i64) ; CHECK-NEXT: .local i32, i64, i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: global.get __stack_pointer @@ -1687,7 +1656,6 @@ define i64 @ustest_f16i64_mm(half %x) { ; CHECK-NEXT: global.set __stack_pointer ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: call __fixsfti ; CHECK-NEXT: local.get 1 @@ -1726,7 +1694,7 @@ entry: define i64 @utest_f16i64_mm_cse(half %x) { ; CHECK-LABEL: utest_f16i64_mm_cse: -; CHECK: .functype utest_f16i64_mm_cse (f32) -> (i64) +; CHECK: .functype utest_f16i64_mm_cse (i32) -> (i64) ; CHECK-NEXT: .local i32, i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: global.get __stack_pointer @@ -1736,7 +1704,6 @@ define i64 @utest_f16i64_mm_cse(half %x) { ; CHECK-NEXT: global.set __stack_pointer ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: call __fixunssfti ; CHECK-NEXT: local.get 1 @@ -1758,21 +1725,19 @@ entry: define i32 @ustest_f16i32_nsat(half %x) { ; CHECK-LABEL: ustest_f16i32_nsat: -; CHECK: .functype ustest_f16i32_nsat (f32) -> (i32) -; CHECK-NEXT: .local i32 +; CHECK: .functype ustest_f16i32_nsat (i32) -> (i32) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s -; CHECK-NEXT: local.tee 1 +; CHECK-NEXT: local.tee 0 ; CHECK-NEXT: i32.const 31 ; CHECK-NEXT: i32.shr_s -; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.and -; CHECK-NEXT: local.tee 1 +; CHECK-NEXT: local.tee 0 ; CHECK-NEXT: i32.const 0 -; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const 0 ; CHECK-NEXT: i32.gt_s ; CHECK-NEXT: i32.select diff --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll index c44b3bb5a9968..55409d5b2d8c3 100644 --- a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll @@ -174,27 +174,24 @@ entry: define <4 x i32> @stest_f16i32(<4 x half> %x) { ; CHECK-LABEL: stest_f16i32: -; CHECK: .functype stest_f16i32 (f32, f32, f32, f32) -> (v128) +; CHECK: .functype stest_f16i32 (i32, i32, i32, i32) -> (v128) +; CHECK-NEXT: .local f32 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 1 +; CHECK-NEXT: local.set 4 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.splat -; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 4 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 1 ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 2 ; CHECK-NEXT: local.get 3 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 3 @@ -211,27 +208,24 @@ entry: define <4 x i32> @utest_f16i32(<4 x half> %x) { ; CHECK-LABEL: utest_f16i32: -; CHECK: .functype utest_f16i32 (f32, f32, f32, f32) -> (v128) +; CHECK: .functype utest_f16i32 (i32, i32, i32, i32) -> (v128) +; CHECK-NEXT: .local f32 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 1 +; CHECK-NEXT: local.set 4 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.splat -; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 4 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.replace_lane 1 ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.replace_lane 2 ; CHECK-NEXT: local.get 3 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.replace_lane 3 @@ -246,27 +240,24 @@ entry: define <4 x i32> @ustest_f16i32(<4 x half> %x) { ; CHECK-LABEL: ustest_f16i32: -; CHECK: .functype ustest_f16i32 (f32, f32, f32, f32) -> (v128) +; CHECK: .functype ustest_f16i32 (i32, i32, i32, i32) -> (v128) +; CHECK-NEXT: .local f32 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 1 +; CHECK-NEXT: local.set 4 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.splat -; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 4 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.replace_lane 1 ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.replace_lane 2 ; CHECK-NEXT: local.get 3 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.replace_lane 3 @@ -433,56 +424,49 @@ entry: define <8 x i16> @stest_f16i16(<8 x half> %x) { ; CHECK-LABEL: stest_f16i16: -; CHECK: .functype stest_f16i16 (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128) +; CHECK: .functype stest_f16i16 (i32, i32, i32, i32, i32, i32, i32, i32) -> (v128) +; CHECK-NEXT: .local f32, f32, f32, f32, f32 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 5 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 5 +; CHECK-NEXT: local.set 8 ; CHECK-NEXT: local.get 4 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 4 +; CHECK-NEXT: local.set 9 ; CHECK-NEXT: local.get 6 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 6 +; CHECK-NEXT: local.set 10 ; CHECK-NEXT: local.get 7 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 7 +; CHECK-NEXT: local.set 11 ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 1 +; CHECK-NEXT: local.set 12 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.splat -; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 12 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 1 ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 2 ; CHECK-NEXT: local.get 3 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 3 -; CHECK-NEXT: local.get 4 +; CHECK-NEXT: local.get 9 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.splat -; CHECK-NEXT: local.get 5 +; CHECK-NEXT: local.get 8 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 1 -; CHECK-NEXT: local.get 6 +; CHECK-NEXT: local.get 10 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 2 -; CHECK-NEXT: local.get 7 +; CHECK-NEXT: local.get 11 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 3 ; CHECK-NEXT: i16x8.narrow_i32x4_s @@ -499,56 +483,49 @@ entry: define <8 x i16> @utest_f16i16(<8 x half> %x) { ; CHECK-LABEL: utest_f16i16: -; CHECK: .functype utest_f16i16 (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128) +; CHECK: .functype utest_f16i16 (i32, i32, i32, i32, i32, i32, i32, i32) -> (v128) +; CHECK-NEXT: .local f32, f32, f32, f32, f32 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 5 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 5 +; CHECK-NEXT: local.set 8 ; CHECK-NEXT: local.get 4 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 4 +; CHECK-NEXT: local.set 9 ; CHECK-NEXT: local.get 6 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 6 +; CHECK-NEXT: local.set 10 ; CHECK-NEXT: local.get 7 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 7 +; CHECK-NEXT: local.set 11 ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 1 +; CHECK-NEXT: local.set 12 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.splat -; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 12 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.replace_lane 1 ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.replace_lane 2 ; CHECK-NEXT: local.get 3 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.replace_lane 3 -; CHECK-NEXT: local.get 4 +; CHECK-NEXT: local.get 9 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.splat -; CHECK-NEXT: local.get 5 +; CHECK-NEXT: local.get 8 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.replace_lane 1 -; CHECK-NEXT: local.get 6 +; CHECK-NEXT: local.get 10 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.replace_lane 2 -; CHECK-NEXT: local.get 7 +; CHECK-NEXT: local.get 11 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.replace_lane 3 ; CHECK-NEXT: i16x8.narrow_i32x4_u @@ -563,68 +540,60 @@ entry: define <8 x i16> @ustest_f16i16(<8 x half> %x) { ; CHECK-LABEL: ustest_f16i16: -; CHECK: .functype ustest_f16i16 (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128) -; CHECK-NEXT: .local v128, v128 +; CHECK: .functype ustest_f16i16 (i32, i32, i32, i32, i32, i32, i32, i32) -> (v128) +; CHECK-NEXT: .local f32, f32, f32, f32, f32, v128, v128 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 5 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 5 +; CHECK-NEXT: local.set 8 ; CHECK-NEXT: local.get 4 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 4 +; CHECK-NEXT: local.set 9 ; CHECK-NEXT: local.get 6 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 6 +; CHECK-NEXT: local.set 10 ; CHECK-NEXT: local.get 7 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 7 +; CHECK-NEXT: local.set 11 ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 1 +; CHECK-NEXT: local.set 12 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.splat -; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 12 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 1 ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 2 ; CHECK-NEXT: local.get 3 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 3 ; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 -; CHECK-NEXT: local.tee 8 +; CHECK-NEXT: local.tee 13 ; CHECK-NEXT: i32x4.min_s ; CHECK-NEXT: v128.const 0, 0, 0, 0 -; CHECK-NEXT: local.tee 9 +; CHECK-NEXT: local.tee 14 ; CHECK-NEXT: i32x4.max_s -; CHECK-NEXT: local.get 4 +; CHECK-NEXT: local.get 9 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.splat -; CHECK-NEXT: local.get 5 +; CHECK-NEXT: local.get 8 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 1 -; CHECK-NEXT: local.get 6 +; CHECK-NEXT: local.get 10 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 2 -; CHECK-NEXT: local.get 7 +; CHECK-NEXT: local.get 11 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 3 -; CHECK-NEXT: local.get 8 +; CHECK-NEXT: local.get 13 ; CHECK-NEXT: i32x4.min_s -; CHECK-NEXT: local.get 9 +; CHECK-NEXT: local.get 14 ; CHECK-NEXT: i32x4.max_s ; CHECK-NEXT: i16x8.narrow_i32x4_u ; CHECK-NEXT: # fallthrough-return @@ -1164,7 +1133,7 @@ entry: define <2 x i64> @stest_f16i64(<2 x half> %x) { ; CHECK-LABEL: stest_f16i64: -; CHECK: .functype stest_f16i64 (f32, f32) -> (v128) +; CHECK: .functype stest_f16i64 (i32, i32) -> (v128) ; CHECK-NEXT: .local i32, i64, i64, i64, i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: global.get __stack_pointer @@ -1176,12 +1145,10 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) { ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32.add ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: call __fixsfti ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: call __fixsfti ; CHECK-NEXT: local.get 2 @@ -1275,7 +1242,7 @@ entry: define <2 x i64> @utest_f16i64(<2 x half> %x) { ; CHECK-LABEL: utest_f16i64: -; CHECK: .functype utest_f16i64 (f32, f32) -> (v128) +; CHECK: .functype utest_f16i64 (i32, i32) -> (v128) ; CHECK-NEXT: .local i32, i64, i64, i64, i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: global.get __stack_pointer @@ -1287,12 +1254,10 @@ define <2 x i64> @utest_f16i64(<2 x half> %x) { ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32.add ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: call __fixunssfti ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: call __fixunssfti ; CHECK-NEXT: local.get 2 @@ -1334,7 +1299,7 @@ entry: define <2 x i64> @ustest_f16i64(<2 x half> %x) { ; CHECK-LABEL: ustest_f16i64: -; CHECK: .functype ustest_f16i64 (f32, f32) -> (v128) +; CHECK: .functype ustest_f16i64 (i32, i32) -> (v128) ; CHECK-NEXT: .local i32, i64, i64, i64, i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: global.get __stack_pointer @@ -1346,12 +1311,10 @@ define <2 x i64> @ustest_f16i64(<2 x half> %x) { ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32.add ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: call __fixsfti ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: call __fixsfti ; CHECK-NEXT: local.get 2 @@ -1594,27 +1557,24 @@ entry: define <4 x i32> @stest_f16i32_mm(<4 x half> %x) { ; CHECK-LABEL: stest_f16i32_mm: -; CHECK: .functype stest_f16i32_mm (f32, f32, f32, f32) -> (v128) +; CHECK: .functype stest_f16i32_mm (i32, i32, i32, i32) -> (v128) +; CHECK-NEXT: .local f32 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 1 +; CHECK-NEXT: local.set 4 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.splat -; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 4 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 1 ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 2 ; CHECK-NEXT: local.get 3 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 3 @@ -1629,27 +1589,24 @@ entry: define <4 x i32> @utest_f16i32_mm(<4 x half> %x) { ; CHECK-LABEL: utest_f16i32_mm: -; CHECK: .functype utest_f16i32_mm (f32, f32, f32, f32) -> (v128) +; CHECK: .functype utest_f16i32_mm (i32, i32, i32, i32) -> (v128) +; CHECK-NEXT: .local f32 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 1 +; CHECK-NEXT: local.set 4 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.splat -; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 4 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.replace_lane 1 ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.replace_lane 2 ; CHECK-NEXT: local.get 3 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.replace_lane 3 @@ -1663,27 +1620,24 @@ entry: define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) { ; CHECK-LABEL: ustest_f16i32_mm: -; CHECK: .functype ustest_f16i32_mm (f32, f32, f32, f32) -> (v128) +; CHECK: .functype ustest_f16i32_mm (i32, i32, i32, i32) -> (v128) +; CHECK-NEXT: .local f32 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 1 +; CHECK-NEXT: local.set 4 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.splat -; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 4 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.replace_lane 1 ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.replace_lane 2 ; CHECK-NEXT: local.get 3 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.replace_lane 3 @@ -1838,56 +1792,49 @@ entry: define <8 x i16> @stest_f16i16_mm(<8 x half> %x) { ; CHECK-LABEL: stest_f16i16_mm: -; CHECK: .functype stest_f16i16_mm (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128) +; CHECK: .functype stest_f16i16_mm (i32, i32, i32, i32, i32, i32, i32, i32) -> (v128) +; CHECK-NEXT: .local f32, f32, f32, f32, f32 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 5 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 5 +; CHECK-NEXT: local.set 8 ; CHECK-NEXT: local.get 4 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 4 +; CHECK-NEXT: local.set 9 ; CHECK-NEXT: local.get 6 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 6 +; CHECK-NEXT: local.set 10 ; CHECK-NEXT: local.get 7 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 7 +; CHECK-NEXT: local.set 11 ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 1 +; CHECK-NEXT: local.set 12 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.splat -; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 12 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 1 ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 2 ; CHECK-NEXT: local.get 3 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 3 -; CHECK-NEXT: local.get 4 +; CHECK-NEXT: local.get 9 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.splat -; CHECK-NEXT: local.get 5 +; CHECK-NEXT: local.get 8 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 1 -; CHECK-NEXT: local.get 6 +; CHECK-NEXT: local.get 10 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 2 -; CHECK-NEXT: local.get 7 +; CHECK-NEXT: local.get 11 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 3 ; CHECK-NEXT: i16x8.narrow_i32x4_s @@ -1902,56 +1849,49 @@ entry: define <8 x i16> @utest_f16i16_mm(<8 x half> %x) { ; CHECK-LABEL: utest_f16i16_mm: -; CHECK: .functype utest_f16i16_mm (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128) +; CHECK: .functype utest_f16i16_mm (i32, i32, i32, i32, i32, i32, i32, i32) -> (v128) +; CHECK-NEXT: .local f32, f32, f32, f32, f32 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 5 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 5 +; CHECK-NEXT: local.set 8 ; CHECK-NEXT: local.get 4 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 4 +; CHECK-NEXT: local.set 9 ; CHECK-NEXT: local.get 6 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 6 +; CHECK-NEXT: local.set 10 ; CHECK-NEXT: local.get 7 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 7 +; CHECK-NEXT: local.set 11 ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 1 +; CHECK-NEXT: local.set 12 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.splat -; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 12 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.replace_lane 1 ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.replace_lane 2 ; CHECK-NEXT: local.get 3 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.replace_lane 3 -; CHECK-NEXT: local.get 4 +; CHECK-NEXT: local.get 9 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.splat -; CHECK-NEXT: local.get 5 +; CHECK-NEXT: local.get 8 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.replace_lane 1 -; CHECK-NEXT: local.get 6 +; CHECK-NEXT: local.get 10 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.replace_lane 2 -; CHECK-NEXT: local.get 7 +; CHECK-NEXT: local.get 11 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.replace_lane 3 ; CHECK-NEXT: i16x8.narrow_i32x4_u @@ -1965,68 +1905,60 @@ entry: define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) { ; CHECK-LABEL: ustest_f16i16_mm: -; CHECK: .functype ustest_f16i16_mm (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128) -; CHECK-NEXT: .local v128, v128 +; CHECK: .functype ustest_f16i16_mm (i32, i32, i32, i32, i32, i32, i32, i32) -> (v128) +; CHECK-NEXT: .local f32, f32, f32, f32, f32, v128, v128 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 5 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 5 +; CHECK-NEXT: local.set 8 ; CHECK-NEXT: local.get 4 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 4 +; CHECK-NEXT: local.set 9 ; CHECK-NEXT: local.get 6 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 6 +; CHECK-NEXT: local.set 10 ; CHECK-NEXT: local.get 7 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 7 +; CHECK-NEXT: local.set 11 ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 1 +; CHECK-NEXT: local.set 12 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.splat -; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 12 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 1 ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 2 ; CHECK-NEXT: local.get 3 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 3 ; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 -; CHECK-NEXT: local.tee 8 +; CHECK-NEXT: local.tee 13 ; CHECK-NEXT: i32x4.min_s ; CHECK-NEXT: v128.const 0, 0, 0, 0 -; CHECK-NEXT: local.tee 9 +; CHECK-NEXT: local.tee 14 ; CHECK-NEXT: i32x4.max_s -; CHECK-NEXT: local.get 4 +; CHECK-NEXT: local.get 9 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.splat -; CHECK-NEXT: local.get 5 +; CHECK-NEXT: local.get 8 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 1 -; CHECK-NEXT: local.get 6 +; CHECK-NEXT: local.get 10 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 2 -; CHECK-NEXT: local.get 7 +; CHECK-NEXT: local.get 11 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 3 -; CHECK-NEXT: local.get 8 +; CHECK-NEXT: local.get 13 ; CHECK-NEXT: i32x4.min_s -; CHECK-NEXT: local.get 9 +; CHECK-NEXT: local.get 14 ; CHECK-NEXT: i32x4.max_s ; CHECK-NEXT: i16x8.narrow_i32x4_u ; CHECK-NEXT: # fallthrough-return @@ -2522,7 +2454,7 @@ entry: define <2 x i64> @stest_f16i64_mm(<2 x half> %x) { ; CHECK-LABEL: stest_f16i64_mm: -; CHECK: .functype stest_f16i64_mm (f32, f32) -> (v128) +; CHECK: .functype stest_f16i64_mm (i32, i32) -> (v128) ; CHECK-NEXT: .local i32, i64, i64, i64, i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: global.get __stack_pointer @@ -2534,12 +2466,10 @@ define <2 x i64> @stest_f16i64_mm(<2 x half> %x) { ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32.add ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: call __fixsfti ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: call __fixsfti ; CHECK-NEXT: local.get 2 @@ -2631,7 +2561,7 @@ entry: define <2 x i64> @utest_f16i64_mm(<2 x half> %x) { ; CHECK-LABEL: utest_f16i64_mm: -; CHECK: .functype utest_f16i64_mm (f32, f32) -> (v128) +; CHECK: .functype utest_f16i64_mm (i32, i32) -> (v128) ; CHECK-NEXT: .local i32, i64, i64, i64, i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: global.get __stack_pointer @@ -2643,12 +2573,10 @@ define <2 x i64> @utest_f16i64_mm(<2 x half> %x) { ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32.add ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: call __fixunssfti ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: call __fixunssfti ; CHECK-NEXT: local.get 2 @@ -2689,7 +2617,7 @@ entry: define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-LABEL: ustest_f16i64_mm: -; CHECK: .functype ustest_f16i64_mm (f32, f32) -> (v128) +; CHECK: .functype ustest_f16i64_mm (i32, i32) -> (v128) ; CHECK-NEXT: .local i32, i64, i64, i64, i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: global.get __stack_pointer @@ -2701,12 +2629,10 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32.add ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: call __fixsfti ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: call __fixsfti ; CHECK-NEXT: local.get 2 @@ -2770,35 +2696,31 @@ entry: define <4 x i32> @ustest_f16i32_nsat(<4 x half> %x) { ; CHECK-LABEL: ustest_f16i32_nsat: -; CHECK: .functype ustest_f16i32_nsat (f32, f32, f32, f32) -> (v128) -; CHECK-NEXT: .local v128 +; CHECK: .functype ustest_f16i32_nsat (i32, i32, i32, i32) -> (v128) +; CHECK-NEXT: .local f32, v128 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 1 +; CHECK-NEXT: local.set 4 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.splat -; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 4 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 1 ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 2 ; CHECK-NEXT: local.get 3 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 3 ; CHECK-NEXT: v128.const 0, 0, 0, 0 -; CHECK-NEXT: local.tee 4 +; CHECK-NEXT: local.tee 5 ; CHECK-NEXT: i32x4.min_s -; CHECK-NEXT: local.get 4 +; CHECK-NEXT: local.get 5 ; CHECK-NEXT: i32x4.max_s ; CHECK-NEXT: # fallthrough-return entry: diff --git a/llvm/test/CodeGen/WebAssembly/llvm.sincos.ll b/llvm/test/CodeGen/WebAssembly/llvm.sincos.ll index 3c10b09525573..0608a60b739f8 100644 --- a/llvm/test/CodeGen/WebAssembly/llvm.sincos.ll +++ b/llvm/test/CodeGen/WebAssembly/llvm.sincos.ll @@ -4,36 +4,36 @@ define { half, half } @test_sincos_f16(half %a) #0 { ; WASM32-LABEL: test_sincos_f16: -; WASM32: .functype test_sincos_f16 (i32, f32) -> () +; WASM32: .functype test_sincos_f16 (i32, i32) -> () +; WASM32-NEXT: .local f32 ; WASM32-NEXT: # %bb.0: ; WASM32-NEXT: local.get 0 ; WASM32-NEXT: local.get 1 -; WASM32-NEXT: call __truncsfhf2 ; WASM32-NEXT: call __extendhfsf2 -; WASM32-NEXT: local.tee 1 +; WASM32-NEXT: local.tee 2 ; WASM32-NEXT: call cosf ; WASM32-NEXT: call __truncsfhf2 ; WASM32-NEXT: i32.store16 2 ; WASM32-NEXT: local.get 0 -; WASM32-NEXT: local.get 1 +; WASM32-NEXT: local.get 2 ; WASM32-NEXT: call sinf ; WASM32-NEXT: call __truncsfhf2 ; WASM32-NEXT: i32.store16 0 ; WASM32-NEXT: # fallthrough-return ; ; WASM64-LABEL: test_sincos_f16: -; WASM64: .functype test_sincos_f16 (i64, f32) -> () +; WASM64: .functype test_sincos_f16 (i64, i32) -> () +; WASM64-NEXT: .local f32 ; WASM64-NEXT: # %bb.0: ; WASM64-NEXT: local.get 0 ; WASM64-NEXT: local.get 1 -; WASM64-NEXT: call __truncsfhf2 ; WASM64-NEXT: call __extendhfsf2 -; WASM64-NEXT: local.tee 1 +; WASM64-NEXT: local.tee 2 ; WASM64-NEXT: call cosf ; WASM64-NEXT: call __truncsfhf2 ; WASM64-NEXT: i32.store16 2 ; WASM64-NEXT: local.get 0 -; WASM64-NEXT: local.get 1 +; WASM64-NEXT: local.get 2 ; WASM64-NEXT: call sinf ; WASM64-NEXT: call __truncsfhf2 ; WASM64-NEXT: i32.store16 0 @@ -44,12 +44,12 @@ define { half, half } @test_sincos_f16(half %a) #0 { define half @test_sincos_f16_only_use_sin(half %a) #0 { ; CHECK-LABEL: test_sincos_f16_only_use_sin: -; CHECK: .functype test_sincos_f16_only_use_sin (f32) -> (f32) +; CHECK: .functype test_sincos_f16_only_use_sin (i32) -> (i32) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: call sinf +; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: # fallthrough-return %result = call { half, half } @llvm.sincos.f16(half %a) %result.0 = extractvalue { half, half } %result, 0 @@ -58,12 +58,12 @@ define half @test_sincos_f16_only_use_sin(half %a) #0 { define half @test_sincos_f16_only_use_cos(half %a) #0 { ; CHECK-LABEL: test_sincos_f16_only_use_cos: -; CHECK: .functype test_sincos_f16_only_use_cos (f32) -> (f32) +; CHECK: .functype test_sincos_f16_only_use_cos (i32) -> (i32) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: call cosf +; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: # fallthrough-return %result = call { half, half } @llvm.sincos.f16(half %a) %result.1 = extractvalue { half, half } %result, 1 @@ -72,62 +72,60 @@ define half @test_sincos_f16_only_use_cos(half %a) #0 { define { <2 x half>, <2 x half> } @test_sincos_v2f16(<2 x half> %a) #0 { ; WASM32-LABEL: test_sincos_v2f16: -; WASM32: .functype test_sincos_v2f16 (i32, f32, f32) -> () +; WASM32: .functype test_sincos_v2f16 (i32, i32, i32) -> () +; WASM32-NEXT: .local f32, f32 ; WASM32-NEXT: # %bb.0: ; WASM32-NEXT: local.get 0 ; WASM32-NEXT: local.get 2 -; WASM32-NEXT: call __truncsfhf2 ; WASM32-NEXT: call __extendhfsf2 -; WASM32-NEXT: local.tee 2 +; WASM32-NEXT: local.tee 3 ; WASM32-NEXT: call cosf ; WASM32-NEXT: call __truncsfhf2 ; WASM32-NEXT: i32.store16 6 ; WASM32-NEXT: local.get 0 ; WASM32-NEXT: local.get 1 -; WASM32-NEXT: call __truncsfhf2 ; WASM32-NEXT: call __extendhfsf2 -; WASM32-NEXT: local.tee 1 +; WASM32-NEXT: local.tee 4 ; WASM32-NEXT: call cosf ; WASM32-NEXT: call __truncsfhf2 ; WASM32-NEXT: i32.store16 4 ; WASM32-NEXT: local.get 0 -; WASM32-NEXT: local.get 2 +; WASM32-NEXT: local.get 3 ; WASM32-NEXT: call sinf ; WASM32-NEXT: call __truncsfhf2 ; WASM32-NEXT: i32.store16 2 ; WASM32-NEXT: local.get 0 -; WASM32-NEXT: local.get 1 +; WASM32-NEXT: local.get 4 ; WASM32-NEXT: call sinf ; WASM32-NEXT: call __truncsfhf2 ; WASM32-NEXT: i32.store16 0 ; WASM32-NEXT: # fallthrough-return ; ; WASM64-LABEL: test_sincos_v2f16: -; WASM64: .functype test_sincos_v2f16 (i64, f32, f32) -> () +; WASM64: .functype test_sincos_v2f16 (i64, i32, i32) -> () +; WASM64-NEXT: .local f32, f32 ; WASM64-NEXT: # %bb.0: ; WASM64-NEXT: local.get 0 ; WASM64-NEXT: local.get 2 -; WASM64-NEXT: call __truncsfhf2 ; WASM64-NEXT: call __extendhfsf2 -; WASM64-NEXT: local.tee 2 +; WASM64-NEXT: local.tee 3 ; WASM64-NEXT: call cosf ; WASM64-NEXT: call __truncsfhf2 ; WASM64-NEXT: i32.store16 6 ; WASM64-NEXT: local.get 0 ; WASM64-NEXT: local.get 1 -; WASM64-NEXT: call __truncsfhf2 ; WASM64-NEXT: call __extendhfsf2 -; WASM64-NEXT: local.tee 1 +; WASM64-NEXT: local.tee 4 ; WASM64-NEXT: call cosf ; WASM64-NEXT: call __truncsfhf2 ; WASM64-NEXT: i32.store16 4 ; WASM64-NEXT: local.get 0 -; WASM64-NEXT: local.get 2 +; WASM64-NEXT: local.get 3 ; WASM64-NEXT: call sinf ; WASM64-NEXT: call __truncsfhf2 ; WASM64-NEXT: i32.store16 2 ; WASM64-NEXT: local.get 0 -; WASM64-NEXT: local.get 1 +; WASM64-NEXT: local.get 4 ; WASM64-NEXT: call sinf ; WASM64-NEXT: call __truncsfhf2 ; WASM64-NEXT: i32.store16 0 diff --git a/llvm/test/CodeGen/WebAssembly/simd-relaxed-fma.ll b/llvm/test/CodeGen/WebAssembly/simd-relaxed-fma.ll index 600241aef99d0..12886341a618e 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-relaxed-fma.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-relaxed-fma.ll @@ -9,55 +9,55 @@ target triple = "wasm32" define half @fadd_fmul_contract_f16(half %a, half %b, half %c) { ; RELAXED-LABEL: fadd_fmul_contract_f16: -; RELAXED: .functype fadd_fmul_contract_f16 (f32, f32, f32) -> (f32) +; RELAXED: .functype fadd_fmul_contract_f16 (i32, i32, i32) -> (i32) ; RELAXED-NEXT: # %bb.0: -; RELAXED-NEXT: call $push0=, __truncsfhf2, $0 -; RELAXED-NEXT: call $push1=, __extendhfsf2, $pop0 -; RELAXED-NEXT: call $push2=, __truncsfhf2, $1 -; RELAXED-NEXT: call $push3=, __extendhfsf2, $pop2 -; RELAXED-NEXT: f32.mul $push4=, $pop1, $pop3 -; RELAXED-NEXT: call $push5=, __truncsfhf2, $2 -; RELAXED-NEXT: call $push6=, __extendhfsf2, $pop5 -; RELAXED-NEXT: f32.add $push7=, $pop4, $pop6 +; RELAXED-NEXT: call $push0=, __extendhfsf2, $0 +; RELAXED-NEXT: call $push1=, __extendhfsf2, $1 +; RELAXED-NEXT: f32.mul $push2=, $pop0, $pop1 +; RELAXED-NEXT: call $push3=, __truncsfhf2, $pop2 +; RELAXED-NEXT: call $push4=, __extendhfsf2, $pop3 +; RELAXED-NEXT: call $push5=, __extendhfsf2, $2 +; RELAXED-NEXT: f32.add $push6=, $pop4, $pop5 +; RELAXED-NEXT: call $push7=, __truncsfhf2, $pop6 ; RELAXED-NEXT: return $pop7 ; ; STRICT-LABEL: fadd_fmul_contract_f16: -; STRICT: .functype fadd_fmul_contract_f16 (f32, f32, f32) -> (f32) +; STRICT: .functype fadd_fmul_contract_f16 (i32, i32, i32) -> (i32) ; STRICT-NEXT: # %bb.0: -; STRICT-NEXT: call $push0=, __truncsfhf2, $0 -; STRICT-NEXT: call $push1=, __extendhfsf2, $pop0 -; STRICT-NEXT: call $push2=, __truncsfhf2, $1 -; STRICT-NEXT: call $push3=, __extendhfsf2, $pop2 -; STRICT-NEXT: f32.mul $push4=, $pop1, $pop3 -; STRICT-NEXT: call $push5=, __truncsfhf2, $2 -; STRICT-NEXT: call $push6=, __extendhfsf2, $pop5 -; STRICT-NEXT: f32.add $push7=, $pop4, $pop6 +; STRICT-NEXT: call $push0=, __extendhfsf2, $0 +; STRICT-NEXT: call $push1=, __extendhfsf2, $1 +; STRICT-NEXT: f32.mul $push2=, $pop0, $pop1 +; STRICT-NEXT: call $push3=, __truncsfhf2, $pop2 +; STRICT-NEXT: call $push4=, __extendhfsf2, $pop3 +; STRICT-NEXT: call $push5=, __extendhfsf2, $2 +; STRICT-NEXT: f32.add $push6=, $pop4, $pop5 +; STRICT-NEXT: call $push7=, __truncsfhf2, $pop6 ; STRICT-NEXT: return $pop7 ; ; NOFP16-LABEL: fadd_fmul_contract_f16: -; NOFP16: .functype fadd_fmul_contract_f16 (f32, f32, f32) -> (f32) +; NOFP16: .functype fadd_fmul_contract_f16 (i32, i32, i32) -> (i32) ; NOFP16-NEXT: # %bb.0: -; NOFP16-NEXT: call $push0=, __truncsfhf2, $0 -; NOFP16-NEXT: call $push1=, __extendhfsf2, $pop0 -; NOFP16-NEXT: call $push2=, __truncsfhf2, $1 -; NOFP16-NEXT: call $push3=, __extendhfsf2, $pop2 -; NOFP16-NEXT: f32.mul $push4=, $pop1, $pop3 -; NOFP16-NEXT: call $push5=, __truncsfhf2, $2 -; NOFP16-NEXT: call $push6=, __extendhfsf2, $pop5 -; NOFP16-NEXT: f32.add $push7=, $pop4, $pop6 +; NOFP16-NEXT: call $push0=, __extendhfsf2, $0 +; NOFP16-NEXT: call $push1=, __extendhfsf2, $1 +; NOFP16-NEXT: f32.mul $push2=, $pop0, $pop1 +; NOFP16-NEXT: call $push3=, __truncsfhf2, $pop2 +; NOFP16-NEXT: call $push4=, __extendhfsf2, $pop3 +; NOFP16-NEXT: call $push5=, __extendhfsf2, $2 +; NOFP16-NEXT: f32.add $push6=, $pop4, $pop5 +; NOFP16-NEXT: call $push7=, __truncsfhf2, $pop6 ; NOFP16-NEXT: return $pop7 ; ; NOSIMD-LABEL: fadd_fmul_contract_f16: -; NOSIMD: .functype fadd_fmul_contract_f16 (f32, f32, f32) -> (f32) +; NOSIMD: .functype fadd_fmul_contract_f16 (i32, i32, i32) -> (i32) ; NOSIMD-NEXT: # %bb.0: -; NOSIMD-NEXT: call $push0=, __truncsfhf2, $0 -; NOSIMD-NEXT: call $push1=, __extendhfsf2, $pop0 -; NOSIMD-NEXT: call $push2=, __truncsfhf2, $1 -; NOSIMD-NEXT: call $push3=, __extendhfsf2, $pop2 -; NOSIMD-NEXT: f32.mul $push4=, $pop1, $pop3 -; NOSIMD-NEXT: call $push5=, __truncsfhf2, $2 -; NOSIMD-NEXT: call $push6=, __extendhfsf2, $pop5 -; NOSIMD-NEXT: f32.add $push7=, $pop4, $pop6 +; NOSIMD-NEXT: call $push0=, __extendhfsf2, $0 +; NOSIMD-NEXT: call $push1=, __extendhfsf2, $1 +; NOSIMD-NEXT: f32.mul $push2=, $pop0, $pop1 +; NOSIMD-NEXT: call $push3=, __truncsfhf2, $pop2 +; NOSIMD-NEXT: call $push4=, __extendhfsf2, $pop3 +; NOSIMD-NEXT: call $push5=, __extendhfsf2, $2 +; NOSIMD-NEXT: f32.add $push6=, $pop4, $pop5 +; NOSIMD-NEXT: call $push7=, __truncsfhf2, $pop6 ; NOSIMD-NEXT: return $pop7 %mul = fmul contract half %b, %a %add = fadd contract half %mul, %c @@ -66,55 +66,55 @@ define half @fadd_fmul_contract_f16(half %a, half %b, half %c) { define half @fmuladd_contract_f16(half %a, half %b, half %c) { ; RELAXED-LABEL: fmuladd_contract_f16: -; RELAXED: .functype fmuladd_contract_f16 (f32, f32, f32) -> (f32) +; RELAXED: .functype fmuladd_contract_f16 (i32, i32, i32) -> (i32) ; RELAXED-NEXT: # %bb.0: -; RELAXED-NEXT: call $push0=, __truncsfhf2, $1 -; RELAXED-NEXT: call $push1=, __extendhfsf2, $pop0 -; RELAXED-NEXT: call $push2=, __truncsfhf2, $0 -; RELAXED-NEXT: call $push3=, __extendhfsf2, $pop2 -; RELAXED-NEXT: f32.mul $push4=, $pop1, $pop3 -; RELAXED-NEXT: call $push5=, __truncsfhf2, $2 -; RELAXED-NEXT: call $push6=, __extendhfsf2, $pop5 -; RELAXED-NEXT: f32.add $push7=, $pop4, $pop6 +; RELAXED-NEXT: call $push0=, __extendhfsf2, $1 +; RELAXED-NEXT: call $push1=, __extendhfsf2, $0 +; RELAXED-NEXT: f32.mul $push2=, $pop0, $pop1 +; RELAXED-NEXT: call $push3=, __truncsfhf2, $pop2 +; RELAXED-NEXT: call $push4=, __extendhfsf2, $pop3 +; RELAXED-NEXT: call $push5=, __extendhfsf2, $2 +; RELAXED-NEXT: f32.add $push6=, $pop4, $pop5 +; RELAXED-NEXT: call $push7=, __truncsfhf2, $pop6 ; RELAXED-NEXT: return $pop7 ; ; STRICT-LABEL: fmuladd_contract_f16: -; STRICT: .functype fmuladd_contract_f16 (f32, f32, f32) -> (f32) +; STRICT: .functype fmuladd_contract_f16 (i32, i32, i32) -> (i32) ; STRICT-NEXT: # %bb.0: -; STRICT-NEXT: call $push0=, __truncsfhf2, $1 -; STRICT-NEXT: call $push1=, __extendhfsf2, $pop0 -; STRICT-NEXT: call $push2=, __truncsfhf2, $0 -; STRICT-NEXT: call $push3=, __extendhfsf2, $pop2 -; STRICT-NEXT: f32.mul $push4=, $pop1, $pop3 -; STRICT-NEXT: call $push5=, __truncsfhf2, $2 -; STRICT-NEXT: call $push6=, __extendhfsf2, $pop5 -; STRICT-NEXT: f32.add $push7=, $pop4, $pop6 +; STRICT-NEXT: call $push0=, __extendhfsf2, $1 +; STRICT-NEXT: call $push1=, __extendhfsf2, $0 +; STRICT-NEXT: f32.mul $push2=, $pop0, $pop1 +; STRICT-NEXT: call $push3=, __truncsfhf2, $pop2 +; STRICT-NEXT: call $push4=, __extendhfsf2, $pop3 +; STRICT-NEXT: call $push5=, __extendhfsf2, $2 +; STRICT-NEXT: f32.add $push6=, $pop4, $pop5 +; STRICT-NEXT: call $push7=, __truncsfhf2, $pop6 ; STRICT-NEXT: return $pop7 ; ; NOFP16-LABEL: fmuladd_contract_f16: -; NOFP16: .functype fmuladd_contract_f16 (f32, f32, f32) -> (f32) +; NOFP16: .functype fmuladd_contract_f16 (i32, i32, i32) -> (i32) ; NOFP16-NEXT: # %bb.0: -; NOFP16-NEXT: call $push0=, __truncsfhf2, $1 -; NOFP16-NEXT: call $push1=, __extendhfsf2, $pop0 -; NOFP16-NEXT: call $push2=, __truncsfhf2, $0 -; NOFP16-NEXT: call $push3=, __extendhfsf2, $pop2 -; NOFP16-NEXT: f32.mul $push4=, $pop1, $pop3 -; NOFP16-NEXT: call $push5=, __truncsfhf2, $2 -; NOFP16-NEXT: call $push6=, __extendhfsf2, $pop5 -; NOFP16-NEXT: f32.add $push7=, $pop4, $pop6 +; NOFP16-NEXT: call $push0=, __extendhfsf2, $1 +; NOFP16-NEXT: call $push1=, __extendhfsf2, $0 +; NOFP16-NEXT: f32.mul $push2=, $pop0, $pop1 +; NOFP16-NEXT: call $push3=, __truncsfhf2, $pop2 +; NOFP16-NEXT: call $push4=, __extendhfsf2, $pop3 +; NOFP16-NEXT: call $push5=, __extendhfsf2, $2 +; NOFP16-NEXT: f32.add $push6=, $pop4, $pop5 +; NOFP16-NEXT: call $push7=, __truncsfhf2, $pop6 ; NOFP16-NEXT: return $pop7 ; ; NOSIMD-LABEL: fmuladd_contract_f16: -; NOSIMD: .functype fmuladd_contract_f16 (f32, f32, f32) -> (f32) +; NOSIMD: .functype fmuladd_contract_f16 (i32, i32, i32) -> (i32) ; NOSIMD-NEXT: # %bb.0: -; NOSIMD-NEXT: call $push0=, __truncsfhf2, $1 -; NOSIMD-NEXT: call $push1=, __extendhfsf2, $pop0 -; NOSIMD-NEXT: call $push2=, __truncsfhf2, $0 -; NOSIMD-NEXT: call $push3=, __extendhfsf2, $pop2 -; NOSIMD-NEXT: f32.mul $push4=, $pop1, $pop3 -; NOSIMD-NEXT: call $push5=, __truncsfhf2, $2 -; NOSIMD-NEXT: call $push6=, __extendhfsf2, $pop5 -; NOSIMD-NEXT: f32.add $push7=, $pop4, $pop6 +; NOSIMD-NEXT: call $push0=, __extendhfsf2, $1 +; NOSIMD-NEXT: call $push1=, __extendhfsf2, $0 +; NOSIMD-NEXT: f32.mul $push2=, $pop0, $pop1 +; NOSIMD-NEXT: call $push3=, __truncsfhf2, $pop2 +; NOSIMD-NEXT: call $push4=, __extendhfsf2, $pop3 +; NOSIMD-NEXT: call $push5=, __extendhfsf2, $2 +; NOSIMD-NEXT: f32.add $push6=, $pop4, $pop5 +; NOSIMD-NEXT: call $push7=, __truncsfhf2, $pop6 ; NOSIMD-NEXT: return $pop7 %fma = call contract half @llvm.fmuladd(half %a, half %b, half %c) ret half %fma @@ -122,55 +122,55 @@ define half @fmuladd_contract_f16(half %a, half %b, half %c) { define half @fmuladd_f16(half %a, half %b, half %c) { ; RELAXED-LABEL: fmuladd_f16: -; RELAXED: .functype fmuladd_f16 (f32, f32, f32) -> (f32) +; RELAXED: .functype fmuladd_f16 (i32, i32, i32) -> (i32) ; RELAXED-NEXT: # %bb.0: -; RELAXED-NEXT: call $push0=, __truncsfhf2, $1 -; RELAXED-NEXT: call $push1=, __extendhfsf2, $pop0 -; RELAXED-NEXT: call $push2=, __truncsfhf2, $0 -; RELAXED-NEXT: call $push3=, __extendhfsf2, $pop2 -; RELAXED-NEXT: f32.mul $push4=, $pop1, $pop3 -; RELAXED-NEXT: call $push5=, __truncsfhf2, $2 -; RELAXED-NEXT: call $push6=, __extendhfsf2, $pop5 -; RELAXED-NEXT: f32.add $push7=, $pop4, $pop6 +; RELAXED-NEXT: call $push0=, __extendhfsf2, $1 +; RELAXED-NEXT: call $push1=, __extendhfsf2, $0 +; RELAXED-NEXT: f32.mul $push2=, $pop0, $pop1 +; RELAXED-NEXT: call $push3=, __truncsfhf2, $pop2 +; RELAXED-NEXT: call $push4=, __extendhfsf2, $pop3 +; RELAXED-NEXT: call $push5=, __extendhfsf2, $2 +; RELAXED-NEXT: f32.add $push6=, $pop4, $pop5 +; RELAXED-NEXT: call $push7=, __truncsfhf2, $pop6 ; RELAXED-NEXT: return $pop7 ; ; STRICT-LABEL: fmuladd_f16: -; STRICT: .functype fmuladd_f16 (f32, f32, f32) -> (f32) +; STRICT: .functype fmuladd_f16 (i32, i32, i32) -> (i32) ; STRICT-NEXT: # %bb.0: -; STRICT-NEXT: call $push0=, __truncsfhf2, $1 -; STRICT-NEXT: call $push1=, __extendhfsf2, $pop0 -; STRICT-NEXT: call $push2=, __truncsfhf2, $0 -; STRICT-NEXT: call $push3=, __extendhfsf2, $pop2 -; STRICT-NEXT: f32.mul $push4=, $pop1, $pop3 -; STRICT-NEXT: call $push5=, __truncsfhf2, $2 -; STRICT-NEXT: call $push6=, __extendhfsf2, $pop5 -; STRICT-NEXT: f32.add $push7=, $pop4, $pop6 +; STRICT-NEXT: call $push0=, __extendhfsf2, $1 +; STRICT-NEXT: call $push1=, __extendhfsf2, $0 +; STRICT-NEXT: f32.mul $push2=, $pop0, $pop1 +; STRICT-NEXT: call $push3=, __truncsfhf2, $pop2 +; STRICT-NEXT: call $push4=, __extendhfsf2, $pop3 +; STRICT-NEXT: call $push5=, __extendhfsf2, $2 +; STRICT-NEXT: f32.add $push6=, $pop4, $pop5 +; STRICT-NEXT: call $push7=, __truncsfhf2, $pop6 ; STRICT-NEXT: return $pop7 ; ; NOFP16-LABEL: fmuladd_f16: -; NOFP16: .functype fmuladd_f16 (f32, f32, f32) -> (f32) +; NOFP16: .functype fmuladd_f16 (i32, i32, i32) -> (i32) ; NOFP16-NEXT: # %bb.0: -; NOFP16-NEXT: call $push0=, __truncsfhf2, $1 -; NOFP16-NEXT: call $push1=, __extendhfsf2, $pop0 -; NOFP16-NEXT: call $push2=, __truncsfhf2, $0 -; NOFP16-NEXT: call $push3=, __extendhfsf2, $pop2 -; NOFP16-NEXT: f32.mul $push4=, $pop1, $pop3 -; NOFP16-NEXT: call $push5=, __truncsfhf2, $2 -; NOFP16-NEXT: call $push6=, __extendhfsf2, $pop5 -; NOFP16-NEXT: f32.add $push7=, $pop4, $pop6 +; NOFP16-NEXT: call $push0=, __extendhfsf2, $1 +; NOFP16-NEXT: call $push1=, __extendhfsf2, $0 +; NOFP16-NEXT: f32.mul $push2=, $pop0, $pop1 +; NOFP16-NEXT: call $push3=, __truncsfhf2, $pop2 +; NOFP16-NEXT: call $push4=, __extendhfsf2, $pop3 +; NOFP16-NEXT: call $push5=, __extendhfsf2, $2 +; NOFP16-NEXT: f32.add $push6=, $pop4, $pop5 +; NOFP16-NEXT: call $push7=, __truncsfhf2, $pop6 ; NOFP16-NEXT: return $pop7 ; ; NOSIMD-LABEL: fmuladd_f16: -; NOSIMD: .functype fmuladd_f16 (f32, f32, f32) -> (f32) +; NOSIMD: .functype fmuladd_f16 (i32, i32, i32) -> (i32) ; NOSIMD-NEXT: # %bb.0: -; NOSIMD-NEXT: call $push0=, __truncsfhf2, $1 -; NOSIMD-NEXT: call $push1=, __extendhfsf2, $pop0 -; NOSIMD-NEXT: call $push2=, __truncsfhf2, $0 -; NOSIMD-NEXT: call $push3=, __extendhfsf2, $pop2 -; NOSIMD-NEXT: f32.mul $push4=, $pop1, $pop3 -; NOSIMD-NEXT: call $push5=, __truncsfhf2, $2 -; NOSIMD-NEXT: call $push6=, __extendhfsf2, $pop5 -; NOSIMD-NEXT: f32.add $push7=, $pop4, $pop6 +; NOSIMD-NEXT: call $push0=, __extendhfsf2, $1 +; NOSIMD-NEXT: call $push1=, __extendhfsf2, $0 +; NOSIMD-NEXT: f32.mul $push2=, $pop0, $pop1 +; NOSIMD-NEXT: call $push3=, __truncsfhf2, $pop2 +; NOSIMD-NEXT: call $push4=, __extendhfsf2, $pop3 +; NOSIMD-NEXT: call $push5=, __extendhfsf2, $2 +; NOSIMD-NEXT: f32.add $push6=, $pop4, $pop5 +; NOSIMD-NEXT: call $push7=, __truncsfhf2, $pop6 ; NOSIMD-NEXT: return $pop7 %fma = call half @llvm.fmuladd(half %a, half %b, half %c) ret half %fma @@ -427,173 +427,157 @@ define <8 x half> @fadd_fmul_contract_8xf16(<8 x half> %a, <8 x half> %b, <8 x h ; STRICT-NEXT: return $pop0 ; ; NOFP16-LABEL: fadd_fmul_contract_8xf16: -; NOFP16: .functype fadd_fmul_contract_8xf16 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NOFP16: .functype fadd_fmul_contract_8xf16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NOFP16-NEXT: # %bb.0: -; NOFP16-NEXT: call $push0=, __truncsfhf2, $8 -; NOFP16-NEXT: call $push1=, __extendhfsf2, $pop0 -; NOFP16-NEXT: call $push2=, __truncsfhf2, $16 -; NOFP16-NEXT: call $push3=, __extendhfsf2, $pop2 -; NOFP16-NEXT: f32.mul $push4=, $pop1, $pop3 -; NOFP16-NEXT: call $push5=, __truncsfhf2, $24 -; NOFP16-NEXT: call $push6=, __extendhfsf2, $pop5 -; NOFP16-NEXT: f32.add $push7=, $pop4, $pop6 -; NOFP16-NEXT: call $push8=, __truncsfhf2, $pop7 -; NOFP16-NEXT: i32.store16 14($0), $pop8 -; NOFP16-NEXT: call $push9=, __truncsfhf2, $7 -; NOFP16-NEXT: call $push10=, __extendhfsf2, $pop9 -; NOFP16-NEXT: call $push11=, __truncsfhf2, $15 +; NOFP16-NEXT: call $push0=, __extendhfsf2, $8 +; NOFP16-NEXT: call $push1=, __extendhfsf2, $16 +; NOFP16-NEXT: f32.mul $push2=, $pop0, $pop1 +; NOFP16-NEXT: call $push3=, __truncsfhf2, $pop2 +; NOFP16-NEXT: call $push4=, __extendhfsf2, $pop3 +; NOFP16-NEXT: call $push5=, __extendhfsf2, $24 +; NOFP16-NEXT: f32.add $push6=, $pop4, $pop5 +; NOFP16-NEXT: call $push7=, __truncsfhf2, $pop6 +; NOFP16-NEXT: i32.store16 14($0), $pop7 +; NOFP16-NEXT: call $push8=, __extendhfsf2, $7 +; NOFP16-NEXT: call $push9=, __extendhfsf2, $15 +; NOFP16-NEXT: f32.mul $push10=, $pop8, $pop9 +; NOFP16-NEXT: call $push11=, __truncsfhf2, $pop10 ; NOFP16-NEXT: call $push12=, __extendhfsf2, $pop11 -; NOFP16-NEXT: f32.mul $push13=, $pop10, $pop12 -; NOFP16-NEXT: call $push14=, __truncsfhf2, $23 -; NOFP16-NEXT: call $push15=, __extendhfsf2, $pop14 -; NOFP16-NEXT: f32.add $push16=, $pop13, $pop15 -; NOFP16-NEXT: call $push17=, __truncsfhf2, $pop16 -; NOFP16-NEXT: i32.store16 12($0), $pop17 -; NOFP16-NEXT: call $push18=, __truncsfhf2, $6 -; NOFP16-NEXT: call $push19=, __extendhfsf2, $pop18 -; NOFP16-NEXT: call $push20=, __truncsfhf2, $14 -; NOFP16-NEXT: call $push21=, __extendhfsf2, $pop20 -; NOFP16-NEXT: f32.mul $push22=, $pop19, $pop21 -; NOFP16-NEXT: call $push23=, __truncsfhf2, $22 -; NOFP16-NEXT: call $push24=, __extendhfsf2, $pop23 -; NOFP16-NEXT: f32.add $push25=, $pop22, $pop24 -; NOFP16-NEXT: call $push26=, __truncsfhf2, $pop25 -; NOFP16-NEXT: i32.store16 10($0), $pop26 -; NOFP16-NEXT: call $push27=, __truncsfhf2, $5 +; NOFP16-NEXT: call $push13=, __extendhfsf2, $23 +; NOFP16-NEXT: f32.add $push14=, $pop12, $pop13 +; NOFP16-NEXT: call $push15=, __truncsfhf2, $pop14 +; NOFP16-NEXT: i32.store16 12($0), $pop15 +; NOFP16-NEXT: call $push16=, __extendhfsf2, $6 +; NOFP16-NEXT: call $push17=, __extendhfsf2, $14 +; NOFP16-NEXT: f32.mul $push18=, $pop16, $pop17 +; NOFP16-NEXT: call $push19=, __truncsfhf2, $pop18 +; NOFP16-NEXT: call $push20=, __extendhfsf2, $pop19 +; NOFP16-NEXT: call $push21=, __extendhfsf2, $22 +; NOFP16-NEXT: f32.add $push22=, $pop20, $pop21 +; NOFP16-NEXT: call $push23=, __truncsfhf2, $pop22 +; NOFP16-NEXT: i32.store16 10($0), $pop23 +; NOFP16-NEXT: call $push24=, __extendhfsf2, $5 +; NOFP16-NEXT: call $push25=, __extendhfsf2, $13 +; NOFP16-NEXT: f32.mul $push26=, $pop24, $pop25 +; NOFP16-NEXT: call $push27=, __truncsfhf2, $pop26 ; NOFP16-NEXT: call $push28=, __extendhfsf2, $pop27 -; NOFP16-NEXT: call $push29=, __truncsfhf2, $13 -; NOFP16-NEXT: call $push30=, __extendhfsf2, $pop29 -; NOFP16-NEXT: f32.mul $push31=, $pop28, $pop30 -; NOFP16-NEXT: call $push32=, __truncsfhf2, $21 -; NOFP16-NEXT: call $push33=, __extendhfsf2, $pop32 -; NOFP16-NEXT: f32.add $push34=, $pop31, $pop33 +; NOFP16-NEXT: call $push29=, __extendhfsf2, $21 +; NOFP16-NEXT: f32.add $push30=, $pop28, $pop29 +; NOFP16-NEXT: call $push31=, __truncsfhf2, $pop30 +; NOFP16-NEXT: i32.store16 8($0), $pop31 +; NOFP16-NEXT: call $push32=, __extendhfsf2, $4 +; NOFP16-NEXT: call $push33=, __extendhfsf2, $12 +; NOFP16-NEXT: f32.mul $push34=, $pop32, $pop33 ; NOFP16-NEXT: call $push35=, __truncsfhf2, $pop34 -; NOFP16-NEXT: i32.store16 8($0), $pop35 -; NOFP16-NEXT: call $push36=, __truncsfhf2, $4 -; NOFP16-NEXT: call $push37=, __extendhfsf2, $pop36 -; NOFP16-NEXT: call $push38=, __truncsfhf2, $12 -; NOFP16-NEXT: call $push39=, __extendhfsf2, $pop38 -; NOFP16-NEXT: f32.mul $push40=, $pop37, $pop39 -; NOFP16-NEXT: call $push41=, __truncsfhf2, $20 -; NOFP16-NEXT: call $push42=, __extendhfsf2, $pop41 -; NOFP16-NEXT: f32.add $push43=, $pop40, $pop42 -; NOFP16-NEXT: call $push44=, __truncsfhf2, $pop43 -; NOFP16-NEXT: i32.store16 6($0), $pop44 -; NOFP16-NEXT: call $push45=, __truncsfhf2, $3 -; NOFP16-NEXT: call $push46=, __extendhfsf2, $pop45 -; NOFP16-NEXT: call $push47=, __truncsfhf2, $11 -; NOFP16-NEXT: call $push48=, __extendhfsf2, $pop47 -; NOFP16-NEXT: f32.mul $push49=, $pop46, $pop48 -; NOFP16-NEXT: call $push50=, __truncsfhf2, $19 -; NOFP16-NEXT: call $push51=, __extendhfsf2, $pop50 -; NOFP16-NEXT: f32.add $push52=, $pop49, $pop51 -; NOFP16-NEXT: call $push53=, __truncsfhf2, $pop52 -; NOFP16-NEXT: i32.store16 4($0), $pop53 -; NOFP16-NEXT: call $push54=, __truncsfhf2, $2 -; NOFP16-NEXT: call $push55=, __extendhfsf2, $pop54 -; NOFP16-NEXT: call $push56=, __truncsfhf2, $10 -; NOFP16-NEXT: call $push57=, __extendhfsf2, $pop56 -; NOFP16-NEXT: f32.mul $push58=, $pop55, $pop57 -; NOFP16-NEXT: call $push59=, __truncsfhf2, $18 +; NOFP16-NEXT: call $push36=, __extendhfsf2, $pop35 +; NOFP16-NEXT: call $push37=, __extendhfsf2, $20 +; NOFP16-NEXT: f32.add $push38=, $pop36, $pop37 +; NOFP16-NEXT: call $push39=, __truncsfhf2, $pop38 +; NOFP16-NEXT: i32.store16 6($0), $pop39 +; NOFP16-NEXT: call $push40=, __extendhfsf2, $3 +; NOFP16-NEXT: call $push41=, __extendhfsf2, $11 +; NOFP16-NEXT: f32.mul $push42=, $pop40, $pop41 +; NOFP16-NEXT: call $push43=, __truncsfhf2, $pop42 +; NOFP16-NEXT: call $push44=, __extendhfsf2, $pop43 +; NOFP16-NEXT: call $push45=, __extendhfsf2, $19 +; NOFP16-NEXT: f32.add $push46=, $pop44, $pop45 +; NOFP16-NEXT: call $push47=, __truncsfhf2, $pop46 +; NOFP16-NEXT: i32.store16 4($0), $pop47 +; NOFP16-NEXT: call $push48=, __extendhfsf2, $2 +; NOFP16-NEXT: call $push49=, __extendhfsf2, $10 +; NOFP16-NEXT: f32.mul $push50=, $pop48, $pop49 +; NOFP16-NEXT: call $push51=, __truncsfhf2, $pop50 +; NOFP16-NEXT: call $push52=, __extendhfsf2, $pop51 +; NOFP16-NEXT: call $push53=, __extendhfsf2, $18 +; NOFP16-NEXT: f32.add $push54=, $pop52, $pop53 +; NOFP16-NEXT: call $push55=, __truncsfhf2, $pop54 +; NOFP16-NEXT: i32.store16 2($0), $pop55 +; NOFP16-NEXT: call $push56=, __extendhfsf2, $1 +; NOFP16-NEXT: call $push57=, __extendhfsf2, $9 +; NOFP16-NEXT: f32.mul $push58=, $pop56, $pop57 +; NOFP16-NEXT: call $push59=, __truncsfhf2, $pop58 ; NOFP16-NEXT: call $push60=, __extendhfsf2, $pop59 -; NOFP16-NEXT: f32.add $push61=, $pop58, $pop60 -; NOFP16-NEXT: call $push62=, __truncsfhf2, $pop61 -; NOFP16-NEXT: i32.store16 2($0), $pop62 -; NOFP16-NEXT: call $push63=, __truncsfhf2, $1 -; NOFP16-NEXT: call $push64=, __extendhfsf2, $pop63 -; NOFP16-NEXT: call $push65=, __truncsfhf2, $9 -; NOFP16-NEXT: call $push66=, __extendhfsf2, $pop65 -; NOFP16-NEXT: f32.mul $push67=, $pop64, $pop66 -; NOFP16-NEXT: call $push68=, __truncsfhf2, $17 -; NOFP16-NEXT: call $push69=, __extendhfsf2, $pop68 -; NOFP16-NEXT: f32.add $push70=, $pop67, $pop69 -; NOFP16-NEXT: call $push71=, __truncsfhf2, $pop70 -; NOFP16-NEXT: i32.store16 0($0), $pop71 +; NOFP16-NEXT: call $push61=, __extendhfsf2, $17 +; NOFP16-NEXT: f32.add $push62=, $pop60, $pop61 +; NOFP16-NEXT: call $push63=, __truncsfhf2, $pop62 +; NOFP16-NEXT: i32.store16 0($0), $pop63 ; NOFP16-NEXT: return ; ; NOSIMD-LABEL: fadd_fmul_contract_8xf16: -; NOSIMD: .functype fadd_fmul_contract_8xf16 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NOSIMD: .functype fadd_fmul_contract_8xf16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NOSIMD-NEXT: # %bb.0: -; NOSIMD-NEXT: call $push0=, __truncsfhf2, $8 -; NOSIMD-NEXT: call $push1=, __extendhfsf2, $pop0 -; NOSIMD-NEXT: call $push2=, __truncsfhf2, $16 -; NOSIMD-NEXT: call $push3=, __extendhfsf2, $pop2 -; NOSIMD-NEXT: f32.mul $push4=, $pop1, $pop3 -; NOSIMD-NEXT: call $push5=, __truncsfhf2, $24 -; NOSIMD-NEXT: call $push6=, __extendhfsf2, $pop5 -; NOSIMD-NEXT: f32.add $push7=, $pop4, $pop6 -; NOSIMD-NEXT: call $push8=, __truncsfhf2, $pop7 -; NOSIMD-NEXT: i32.store16 14($0), $pop8 -; NOSIMD-NEXT: call $push9=, __truncsfhf2, $7 -; NOSIMD-NEXT: call $push10=, __extendhfsf2, $pop9 -; NOSIMD-NEXT: call $push11=, __truncsfhf2, $15 +; NOSIMD-NEXT: call $push0=, __extendhfsf2, $8 +; NOSIMD-NEXT: call $push1=, __extendhfsf2, $16 +; NOSIMD-NEXT: f32.mul $push2=, $pop0, $pop1 +; NOSIMD-NEXT: call $push3=, __truncsfhf2, $pop2 +; NOSIMD-NEXT: call $push4=, __extendhfsf2, $pop3 +; NOSIMD-NEXT: call $push5=, __extendhfsf2, $24 +; NOSIMD-NEXT: f32.add $push6=, $pop4, $pop5 +; NOSIMD-NEXT: call $push7=, __truncsfhf2, $pop6 +; NOSIMD-NEXT: i32.store16 14($0), $pop7 +; NOSIMD-NEXT: call $push8=, __extendhfsf2, $7 +; NOSIMD-NEXT: call $push9=, __extendhfsf2, $15 +; NOSIMD-NEXT: f32.mul $push10=, $pop8, $pop9 +; NOSIMD-NEXT: call $push11=, __truncsfhf2, $pop10 ; NOSIMD-NEXT: call $push12=, __extendhfsf2, $pop11 -; NOSIMD-NEXT: f32.mul $push13=, $pop10, $pop12 -; NOSIMD-NEXT: call $push14=, __truncsfhf2, $23 -; NOSIMD-NEXT: call $push15=, __extendhfsf2, $pop14 -; NOSIMD-NEXT: f32.add $push16=, $pop13, $pop15 -; NOSIMD-NEXT: call $push17=, __truncsfhf2, $pop16 -; NOSIMD-NEXT: i32.store16 12($0), $pop17 -; NOSIMD-NEXT: call $push18=, __truncsfhf2, $6 -; NOSIMD-NEXT: call $push19=, __extendhfsf2, $pop18 -; NOSIMD-NEXT: call $push20=, __truncsfhf2, $14 -; NOSIMD-NEXT: call $push21=, __extendhfsf2, $pop20 -; NOSIMD-NEXT: f32.mul $push22=, $pop19, $pop21 -; NOSIMD-NEXT: call $push23=, __truncsfhf2, $22 -; NOSIMD-NEXT: call $push24=, __extendhfsf2, $pop23 -; NOSIMD-NEXT: f32.add $push25=, $pop22, $pop24 -; NOSIMD-NEXT: call $push26=, __truncsfhf2, $pop25 -; NOSIMD-NEXT: i32.store16 10($0), $pop26 -; NOSIMD-NEXT: call $push27=, __truncsfhf2, $5 +; NOSIMD-NEXT: call $push13=, __extendhfsf2, $23 +; NOSIMD-NEXT: f32.add $push14=, $pop12, $pop13 +; NOSIMD-NEXT: call $push15=, __truncsfhf2, $pop14 +; NOSIMD-NEXT: i32.store16 12($0), $pop15 +; NOSIMD-NEXT: call $push16=, __extendhfsf2, $6 +; NOSIMD-NEXT: call $push17=, __extendhfsf2, $14 +; NOSIMD-NEXT: f32.mul $push18=, $pop16, $pop17 +; NOSIMD-NEXT: call $push19=, __truncsfhf2, $pop18 +; NOSIMD-NEXT: call $push20=, __extendhfsf2, $pop19 +; NOSIMD-NEXT: call $push21=, __extendhfsf2, $22 +; NOSIMD-NEXT: f32.add $push22=, $pop20, $pop21 +; NOSIMD-NEXT: call $push23=, __truncsfhf2, $pop22 +; NOSIMD-NEXT: i32.store16 10($0), $pop23 +; NOSIMD-NEXT: call $push24=, __extendhfsf2, $5 +; NOSIMD-NEXT: call $push25=, __extendhfsf2, $13 +; NOSIMD-NEXT: f32.mul $push26=, $pop24, $pop25 +; NOSIMD-NEXT: call $push27=, __truncsfhf2, $pop26 ; NOSIMD-NEXT: call $push28=, __extendhfsf2, $pop27 -; NOSIMD-NEXT: call $push29=, __truncsfhf2, $13 -; NOSIMD-NEXT: call $push30=, __extendhfsf2, $pop29 -; NOSIMD-NEXT: f32.mul $push31=, $pop28, $pop30 -; NOSIMD-NEXT: call $push32=, __truncsfhf2, $21 -; NOSIMD-NEXT: call $push33=, __extendhfsf2, $pop32 -; NOSIMD-NEXT: f32.add $push34=, $pop31, $pop33 +; NOSIMD-NEXT: call $push29=, __extendhfsf2, $21 +; NOSIMD-NEXT: f32.add $push30=, $pop28, $pop29 +; NOSIMD-NEXT: call $push31=, __truncsfhf2, $pop30 +; NOSIMD-NEXT: i32.store16 8($0), $pop31 +; NOSIMD-NEXT: call $push32=, __extendhfsf2, $4 +; NOSIMD-NEXT: call $push33=, __extendhfsf2, $12 +; NOSIMD-NEXT: f32.mul $push34=, $pop32, $pop33 ; NOSIMD-NEXT: call $push35=, __truncsfhf2, $pop34 -; NOSIMD-NEXT: i32.store16 8($0), $pop35 -; NOSIMD-NEXT: call $push36=, __truncsfhf2, $4 -; NOSIMD-NEXT: call $push37=, __extendhfsf2, $pop36 -; NOSIMD-NEXT: call $push38=, __truncsfhf2, $12 -; NOSIMD-NEXT: call $push39=, __extendhfsf2, $pop38 -; NOSIMD-NEXT: f32.mul $push40=, $pop37, $pop39 -; NOSIMD-NEXT: call $push41=, __truncsfhf2, $20 -; NOSIMD-NEXT: call $push42=, __extendhfsf2, $pop41 -; NOSIMD-NEXT: f32.add $push43=, $pop40, $pop42 -; NOSIMD-NEXT: call $push44=, __truncsfhf2, $pop43 -; NOSIMD-NEXT: i32.store16 6($0), $pop44 -; NOSIMD-NEXT: call $push45=, __truncsfhf2, $3 -; NOSIMD-NEXT: call $push46=, __extendhfsf2, $pop45 -; NOSIMD-NEXT: call $push47=, __truncsfhf2, $11 -; NOSIMD-NEXT: call $push48=, __extendhfsf2, $pop47 -; NOSIMD-NEXT: f32.mul $push49=, $pop46, $pop48 -; NOSIMD-NEXT: call $push50=, __truncsfhf2, $19 -; NOSIMD-NEXT: call $push51=, __extendhfsf2, $pop50 -; NOSIMD-NEXT: f32.add $push52=, $pop49, $pop51 -; NOSIMD-NEXT: call $push53=, __truncsfhf2, $pop52 -; NOSIMD-NEXT: i32.store16 4($0), $pop53 -; NOSIMD-NEXT: call $push54=, __truncsfhf2, $2 -; NOSIMD-NEXT: call $push55=, __extendhfsf2, $pop54 -; NOSIMD-NEXT: call $push56=, __truncsfhf2, $10 -; NOSIMD-NEXT: call $push57=, __extendhfsf2, $pop56 -; NOSIMD-NEXT: f32.mul $push58=, $pop55, $pop57 -; NOSIMD-NEXT: call $push59=, __truncsfhf2, $18 +; NOSIMD-NEXT: call $push36=, __extendhfsf2, $pop35 +; NOSIMD-NEXT: call $push37=, __extendhfsf2, $20 +; NOSIMD-NEXT: f32.add $push38=, $pop36, $pop37 +; NOSIMD-NEXT: call $push39=, __truncsfhf2, $pop38 +; NOSIMD-NEXT: i32.store16 6($0), $pop39 +; NOSIMD-NEXT: call $push40=, __extendhfsf2, $3 +; NOSIMD-NEXT: call $push41=, __extendhfsf2, $11 +; NOSIMD-NEXT: f32.mul $push42=, $pop40, $pop41 +; NOSIMD-NEXT: call $push43=, __truncsfhf2, $pop42 +; NOSIMD-NEXT: call $push44=, __extendhfsf2, $pop43 +; NOSIMD-NEXT: call $push45=, __extendhfsf2, $19 +; NOSIMD-NEXT: f32.add $push46=, $pop44, $pop45 +; NOSIMD-NEXT: call $push47=, __truncsfhf2, $pop46 +; NOSIMD-NEXT: i32.store16 4($0), $pop47 +; NOSIMD-NEXT: call $push48=, __extendhfsf2, $2 +; NOSIMD-NEXT: call $push49=, __extendhfsf2, $10 +; NOSIMD-NEXT: f32.mul $push50=, $pop48, $pop49 +; NOSIMD-NEXT: call $push51=, __truncsfhf2, $pop50 +; NOSIMD-NEXT: call $push52=, __extendhfsf2, $pop51 +; NOSIMD-NEXT: call $push53=, __extendhfsf2, $18 +; NOSIMD-NEXT: f32.add $push54=, $pop52, $pop53 +; NOSIMD-NEXT: call $push55=, __truncsfhf2, $pop54 +; NOSIMD-NEXT: i32.store16 2($0), $pop55 +; NOSIMD-NEXT: call $push56=, __extendhfsf2, $1 +; NOSIMD-NEXT: call $push57=, __extendhfsf2, $9 +; NOSIMD-NEXT: f32.mul $push58=, $pop56, $pop57 +; NOSIMD-NEXT: call $push59=, __truncsfhf2, $pop58 ; NOSIMD-NEXT: call $push60=, __extendhfsf2, $pop59 -; NOSIMD-NEXT: f32.add $push61=, $pop58, $pop60 -; NOSIMD-NEXT: call $push62=, __truncsfhf2, $pop61 -; NOSIMD-NEXT: i32.store16 2($0), $pop62 -; NOSIMD-NEXT: call $push63=, __truncsfhf2, $1 -; NOSIMD-NEXT: call $push64=, __extendhfsf2, $pop63 -; NOSIMD-NEXT: call $push65=, __truncsfhf2, $9 -; NOSIMD-NEXT: call $push66=, __extendhfsf2, $pop65 -; NOSIMD-NEXT: f32.mul $push67=, $pop64, $pop66 -; NOSIMD-NEXT: call $push68=, __truncsfhf2, $17 -; NOSIMD-NEXT: call $push69=, __extendhfsf2, $pop68 -; NOSIMD-NEXT: f32.add $push70=, $pop67, $pop69 -; NOSIMD-NEXT: call $push71=, __truncsfhf2, $pop70 -; NOSIMD-NEXT: i32.store16 0($0), $pop71 +; NOSIMD-NEXT: call $push61=, __extendhfsf2, $17 +; NOSIMD-NEXT: f32.add $push62=, $pop60, $pop61 +; NOSIMD-NEXT: call $push63=, __truncsfhf2, $pop62 +; NOSIMD-NEXT: i32.store16 0($0), $pop63 ; NOSIMD-NEXT: return %mul = fmul contract <8 x half> %b, %a %add = fadd contract <8 x half> %mul, %c @@ -657,173 +641,157 @@ define <8 x half> @fmuladd_contract_8xf16(<8 x half> %a, <8 x half> %b, <8 x hal ; STRICT-NEXT: return $pop0 ; ; NOFP16-LABEL: fmuladd_contract_8xf16: -; NOFP16: .functype fmuladd_contract_8xf16 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NOFP16: .functype fmuladd_contract_8xf16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NOFP16-NEXT: # %bb.0: -; NOFP16-NEXT: call $push0=, __truncsfhf2, $16 -; NOFP16-NEXT: call $push1=, __extendhfsf2, $pop0 -; NOFP16-NEXT: call $push2=, __truncsfhf2, $8 -; NOFP16-NEXT: call $push3=, __extendhfsf2, $pop2 -; NOFP16-NEXT: f32.mul $push4=, $pop1, $pop3 -; NOFP16-NEXT: call $push5=, __truncsfhf2, $24 -; NOFP16-NEXT: call $push6=, __extendhfsf2, $pop5 -; NOFP16-NEXT: f32.add $push7=, $pop4, $pop6 -; NOFP16-NEXT: call $push8=, __truncsfhf2, $pop7 -; NOFP16-NEXT: i32.store16 14($0), $pop8 -; NOFP16-NEXT: call $push9=, __truncsfhf2, $15 -; NOFP16-NEXT: call $push10=, __extendhfsf2, $pop9 -; NOFP16-NEXT: call $push11=, __truncsfhf2, $7 +; NOFP16-NEXT: call $push0=, __extendhfsf2, $16 +; NOFP16-NEXT: call $push1=, __extendhfsf2, $8 +; NOFP16-NEXT: f32.mul $push2=, $pop0, $pop1 +; NOFP16-NEXT: call $push3=, __truncsfhf2, $pop2 +; NOFP16-NEXT: call $push4=, __extendhfsf2, $pop3 +; NOFP16-NEXT: call $push5=, __extendhfsf2, $24 +; NOFP16-NEXT: f32.add $push6=, $pop4, $pop5 +; NOFP16-NEXT: call $push7=, __truncsfhf2, $pop6 +; NOFP16-NEXT: i32.store16 14($0), $pop7 +; NOFP16-NEXT: call $push8=, __extendhfsf2, $15 +; NOFP16-NEXT: call $push9=, __extendhfsf2, $7 +; NOFP16-NEXT: f32.mul $push10=, $pop8, $pop9 +; NOFP16-NEXT: call $push11=, __truncsfhf2, $pop10 ; NOFP16-NEXT: call $push12=, __extendhfsf2, $pop11 -; NOFP16-NEXT: f32.mul $push13=, $pop10, $pop12 -; NOFP16-NEXT: call $push14=, __truncsfhf2, $23 -; NOFP16-NEXT: call $push15=, __extendhfsf2, $pop14 -; NOFP16-NEXT: f32.add $push16=, $pop13, $pop15 -; NOFP16-NEXT: call $push17=, __truncsfhf2, $pop16 -; NOFP16-NEXT: i32.store16 12($0), $pop17 -; NOFP16-NEXT: call $push18=, __truncsfhf2, $14 -; NOFP16-NEXT: call $push19=, __extendhfsf2, $pop18 -; NOFP16-NEXT: call $push20=, __truncsfhf2, $6 -; NOFP16-NEXT: call $push21=, __extendhfsf2, $pop20 -; NOFP16-NEXT: f32.mul $push22=, $pop19, $pop21 -; NOFP16-NEXT: call $push23=, __truncsfhf2, $22 -; NOFP16-NEXT: call $push24=, __extendhfsf2, $pop23 -; NOFP16-NEXT: f32.add $push25=, $pop22, $pop24 -; NOFP16-NEXT: call $push26=, __truncsfhf2, $pop25 -; NOFP16-NEXT: i32.store16 10($0), $pop26 -; NOFP16-NEXT: call $push27=, __truncsfhf2, $13 +; NOFP16-NEXT: call $push13=, __extendhfsf2, $23 +; NOFP16-NEXT: f32.add $push14=, $pop12, $pop13 +; NOFP16-NEXT: call $push15=, __truncsfhf2, $pop14 +; NOFP16-NEXT: i32.store16 12($0), $pop15 +; NOFP16-NEXT: call $push16=, __extendhfsf2, $14 +; NOFP16-NEXT: call $push17=, __extendhfsf2, $6 +; NOFP16-NEXT: f32.mul $push18=, $pop16, $pop17 +; NOFP16-NEXT: call $push19=, __truncsfhf2, $pop18 +; NOFP16-NEXT: call $push20=, __extendhfsf2, $pop19 +; NOFP16-NEXT: call $push21=, __extendhfsf2, $22 +; NOFP16-NEXT: f32.add $push22=, $pop20, $pop21 +; NOFP16-NEXT: call $push23=, __truncsfhf2, $pop22 +; NOFP16-NEXT: i32.store16 10($0), $pop23 +; NOFP16-NEXT: call $push24=, __extendhfsf2, $13 +; NOFP16-NEXT: call $push25=, __extendhfsf2, $5 +; NOFP16-NEXT: f32.mul $push26=, $pop24, $pop25 +; NOFP16-NEXT: call $push27=, __truncsfhf2, $pop26 ; NOFP16-NEXT: call $push28=, __extendhfsf2, $pop27 -; NOFP16-NEXT: call $push29=, __truncsfhf2, $5 -; NOFP16-NEXT: call $push30=, __extendhfsf2, $pop29 -; NOFP16-NEXT: f32.mul $push31=, $pop28, $pop30 -; NOFP16-NEXT: call $push32=, __truncsfhf2, $21 -; NOFP16-NEXT: call $push33=, __extendhfsf2, $pop32 -; NOFP16-NEXT: f32.add $push34=, $pop31, $pop33 +; NOFP16-NEXT: call $push29=, __extendhfsf2, $21 +; NOFP16-NEXT: f32.add $push30=, $pop28, $pop29 +; NOFP16-NEXT: call $push31=, __truncsfhf2, $pop30 +; NOFP16-NEXT: i32.store16 8($0), $pop31 +; NOFP16-NEXT: call $push32=, __extendhfsf2, $12 +; NOFP16-NEXT: call $push33=, __extendhfsf2, $4 +; NOFP16-NEXT: f32.mul $push34=, $pop32, $pop33 ; NOFP16-NEXT: call $push35=, __truncsfhf2, $pop34 -; NOFP16-NEXT: i32.store16 8($0), $pop35 -; NOFP16-NEXT: call $push36=, __truncsfhf2, $12 -; NOFP16-NEXT: call $push37=, __extendhfsf2, $pop36 -; NOFP16-NEXT: call $push38=, __truncsfhf2, $4 -; NOFP16-NEXT: call $push39=, __extendhfsf2, $pop38 -; NOFP16-NEXT: f32.mul $push40=, $pop37, $pop39 -; NOFP16-NEXT: call $push41=, __truncsfhf2, $20 -; NOFP16-NEXT: call $push42=, __extendhfsf2, $pop41 -; NOFP16-NEXT: f32.add $push43=, $pop40, $pop42 -; NOFP16-NEXT: call $push44=, __truncsfhf2, $pop43 -; NOFP16-NEXT: i32.store16 6($0), $pop44 -; NOFP16-NEXT: call $push45=, __truncsfhf2, $11 -; NOFP16-NEXT: call $push46=, __extendhfsf2, $pop45 -; NOFP16-NEXT: call $push47=, __truncsfhf2, $3 -; NOFP16-NEXT: call $push48=, __extendhfsf2, $pop47 -; NOFP16-NEXT: f32.mul $push49=, $pop46, $pop48 -; NOFP16-NEXT: call $push50=, __truncsfhf2, $19 -; NOFP16-NEXT: call $push51=, __extendhfsf2, $pop50 -; NOFP16-NEXT: f32.add $push52=, $pop49, $pop51 -; NOFP16-NEXT: call $push53=, __truncsfhf2, $pop52 -; NOFP16-NEXT: i32.store16 4($0), $pop53 -; NOFP16-NEXT: call $push54=, __truncsfhf2, $10 -; NOFP16-NEXT: call $push55=, __extendhfsf2, $pop54 -; NOFP16-NEXT: call $push56=, __truncsfhf2, $2 -; NOFP16-NEXT: call $push57=, __extendhfsf2, $pop56 -; NOFP16-NEXT: f32.mul $push58=, $pop55, $pop57 -; NOFP16-NEXT: call $push59=, __truncsfhf2, $18 +; NOFP16-NEXT: call $push36=, __extendhfsf2, $pop35 +; NOFP16-NEXT: call $push37=, __extendhfsf2, $20 +; NOFP16-NEXT: f32.add $push38=, $pop36, $pop37 +; NOFP16-NEXT: call $push39=, __truncsfhf2, $pop38 +; NOFP16-NEXT: i32.store16 6($0), $pop39 +; NOFP16-NEXT: call $push40=, __extendhfsf2, $11 +; NOFP16-NEXT: call $push41=, __extendhfsf2, $3 +; NOFP16-NEXT: f32.mul $push42=, $pop40, $pop41 +; NOFP16-NEXT: call $push43=, __truncsfhf2, $pop42 +; NOFP16-NEXT: call $push44=, __extendhfsf2, $pop43 +; NOFP16-NEXT: call $push45=, __extendhfsf2, $19 +; NOFP16-NEXT: f32.add $push46=, $pop44, $pop45 +; NOFP16-NEXT: call $push47=, __truncsfhf2, $pop46 +; NOFP16-NEXT: i32.store16 4($0), $pop47 +; NOFP16-NEXT: call $push48=, __extendhfsf2, $10 +; NOFP16-NEXT: call $push49=, __extendhfsf2, $2 +; NOFP16-NEXT: f32.mul $push50=, $pop48, $pop49 +; NOFP16-NEXT: call $push51=, __truncsfhf2, $pop50 +; NOFP16-NEXT: call $push52=, __extendhfsf2, $pop51 +; NOFP16-NEXT: call $push53=, __extendhfsf2, $18 +; NOFP16-NEXT: f32.add $push54=, $pop52, $pop53 +; NOFP16-NEXT: call $push55=, __truncsfhf2, $pop54 +; NOFP16-NEXT: i32.store16 2($0), $pop55 +; NOFP16-NEXT: call $push56=, __extendhfsf2, $9 +; NOFP16-NEXT: call $push57=, __extendhfsf2, $1 +; NOFP16-NEXT: f32.mul $push58=, $pop56, $pop57 +; NOFP16-NEXT: call $push59=, __truncsfhf2, $pop58 ; NOFP16-NEXT: call $push60=, __extendhfsf2, $pop59 -; NOFP16-NEXT: f32.add $push61=, $pop58, $pop60 -; NOFP16-NEXT: call $push62=, __truncsfhf2, $pop61 -; NOFP16-NEXT: i32.store16 2($0), $pop62 -; NOFP16-NEXT: call $push63=, __truncsfhf2, $9 -; NOFP16-NEXT: call $push64=, __extendhfsf2, $pop63 -; NOFP16-NEXT: call $push65=, __truncsfhf2, $1 -; NOFP16-NEXT: call $push66=, __extendhfsf2, $pop65 -; NOFP16-NEXT: f32.mul $push67=, $pop64, $pop66 -; NOFP16-NEXT: call $push68=, __truncsfhf2, $17 -; NOFP16-NEXT: call $push69=, __extendhfsf2, $pop68 -; NOFP16-NEXT: f32.add $push70=, $pop67, $pop69 -; NOFP16-NEXT: call $push71=, __truncsfhf2, $pop70 -; NOFP16-NEXT: i32.store16 0($0), $pop71 +; NOFP16-NEXT: call $push61=, __extendhfsf2, $17 +; NOFP16-NEXT: f32.add $push62=, $pop60, $pop61 +; NOFP16-NEXT: call $push63=, __truncsfhf2, $pop62 +; NOFP16-NEXT: i32.store16 0($0), $pop63 ; NOFP16-NEXT: return ; ; NOSIMD-LABEL: fmuladd_contract_8xf16: -; NOSIMD: .functype fmuladd_contract_8xf16 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NOSIMD: .functype fmuladd_contract_8xf16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NOSIMD-NEXT: # %bb.0: -; NOSIMD-NEXT: call $push0=, __truncsfhf2, $16 -; NOSIMD-NEXT: call $push1=, __extendhfsf2, $pop0 -; NOSIMD-NEXT: call $push2=, __truncsfhf2, $8 -; NOSIMD-NEXT: call $push3=, __extendhfsf2, $pop2 -; NOSIMD-NEXT: f32.mul $push4=, $pop1, $pop3 -; NOSIMD-NEXT: call $push5=, __truncsfhf2, $24 -; NOSIMD-NEXT: call $push6=, __extendhfsf2, $pop5 -; NOSIMD-NEXT: f32.add $push7=, $pop4, $pop6 -; NOSIMD-NEXT: call $push8=, __truncsfhf2, $pop7 -; NOSIMD-NEXT: i32.store16 14($0), $pop8 -; NOSIMD-NEXT: call $push9=, __truncsfhf2, $15 -; NOSIMD-NEXT: call $push10=, __extendhfsf2, $pop9 -; NOSIMD-NEXT: call $push11=, __truncsfhf2, $7 +; NOSIMD-NEXT: call $push0=, __extendhfsf2, $16 +; NOSIMD-NEXT: call $push1=, __extendhfsf2, $8 +; NOSIMD-NEXT: f32.mul $push2=, $pop0, $pop1 +; NOSIMD-NEXT: call $push3=, __truncsfhf2, $pop2 +; NOSIMD-NEXT: call $push4=, __extendhfsf2, $pop3 +; NOSIMD-NEXT: call $push5=, __extendhfsf2, $24 +; NOSIMD-NEXT: f32.add $push6=, $pop4, $pop5 +; NOSIMD-NEXT: call $push7=, __truncsfhf2, $pop6 +; NOSIMD-NEXT: i32.store16 14($0), $pop7 +; NOSIMD-NEXT: call $push8=, __extendhfsf2, $15 +; NOSIMD-NEXT: call $push9=, __extendhfsf2, $7 +; NOSIMD-NEXT: f32.mul $push10=, $pop8, $pop9 +; NOSIMD-NEXT: call $push11=, __truncsfhf2, $pop10 ; NOSIMD-NEXT: call $push12=, __extendhfsf2, $pop11 -; NOSIMD-NEXT: f32.mul $push13=, $pop10, $pop12 -; NOSIMD-NEXT: call $push14=, __truncsfhf2, $23 -; NOSIMD-NEXT: call $push15=, __extendhfsf2, $pop14 -; NOSIMD-NEXT: f32.add $push16=, $pop13, $pop15 -; NOSIMD-NEXT: call $push17=, __truncsfhf2, $pop16 -; NOSIMD-NEXT: i32.store16 12($0), $pop17 -; NOSIMD-NEXT: call $push18=, __truncsfhf2, $14 -; NOSIMD-NEXT: call $push19=, __extendhfsf2, $pop18 -; NOSIMD-NEXT: call $push20=, __truncsfhf2, $6 -; NOSIMD-NEXT: call $push21=, __extendhfsf2, $pop20 -; NOSIMD-NEXT: f32.mul $push22=, $pop19, $pop21 -; NOSIMD-NEXT: call $push23=, __truncsfhf2, $22 -; NOSIMD-NEXT: call $push24=, __extendhfsf2, $pop23 -; NOSIMD-NEXT: f32.add $push25=, $pop22, $pop24 -; NOSIMD-NEXT: call $push26=, __truncsfhf2, $pop25 -; NOSIMD-NEXT: i32.store16 10($0), $pop26 -; NOSIMD-NEXT: call $push27=, __truncsfhf2, $13 +; NOSIMD-NEXT: call $push13=, __extendhfsf2, $23 +; NOSIMD-NEXT: f32.add $push14=, $pop12, $pop13 +; NOSIMD-NEXT: call $push15=, __truncsfhf2, $pop14 +; NOSIMD-NEXT: i32.store16 12($0), $pop15 +; NOSIMD-NEXT: call $push16=, __extendhfsf2, $14 +; NOSIMD-NEXT: call $push17=, __extendhfsf2, $6 +; NOSIMD-NEXT: f32.mul $push18=, $pop16, $pop17 +; NOSIMD-NEXT: call $push19=, __truncsfhf2, $pop18 +; NOSIMD-NEXT: call $push20=, __extendhfsf2, $pop19 +; NOSIMD-NEXT: call $push21=, __extendhfsf2, $22 +; NOSIMD-NEXT: f32.add $push22=, $pop20, $pop21 +; NOSIMD-NEXT: call $push23=, __truncsfhf2, $pop22 +; NOSIMD-NEXT: i32.store16 10($0), $pop23 +; NOSIMD-NEXT: call $push24=, __extendhfsf2, $13 +; NOSIMD-NEXT: call $push25=, __extendhfsf2, $5 +; NOSIMD-NEXT: f32.mul $push26=, $pop24, $pop25 +; NOSIMD-NEXT: call $push27=, __truncsfhf2, $pop26 ; NOSIMD-NEXT: call $push28=, __extendhfsf2, $pop27 -; NOSIMD-NEXT: call $push29=, __truncsfhf2, $5 -; NOSIMD-NEXT: call $push30=, __extendhfsf2, $pop29 -; NOSIMD-NEXT: f32.mul $push31=, $pop28, $pop30 -; NOSIMD-NEXT: call $push32=, __truncsfhf2, $21 -; NOSIMD-NEXT: call $push33=, __extendhfsf2, $pop32 -; NOSIMD-NEXT: f32.add $push34=, $pop31, $pop33 +; NOSIMD-NEXT: call $push29=, __extendhfsf2, $21 +; NOSIMD-NEXT: f32.add $push30=, $pop28, $pop29 +; NOSIMD-NEXT: call $push31=, __truncsfhf2, $pop30 +; NOSIMD-NEXT: i32.store16 8($0), $pop31 +; NOSIMD-NEXT: call $push32=, __extendhfsf2, $12 +; NOSIMD-NEXT: call $push33=, __extendhfsf2, $4 +; NOSIMD-NEXT: f32.mul $push34=, $pop32, $pop33 ; NOSIMD-NEXT: call $push35=, __truncsfhf2, $pop34 -; NOSIMD-NEXT: i32.store16 8($0), $pop35 -; NOSIMD-NEXT: call $push36=, __truncsfhf2, $12 -; NOSIMD-NEXT: call $push37=, __extendhfsf2, $pop36 -; NOSIMD-NEXT: call $push38=, __truncsfhf2, $4 -; NOSIMD-NEXT: call $push39=, __extendhfsf2, $pop38 -; NOSIMD-NEXT: f32.mul $push40=, $pop37, $pop39 -; NOSIMD-NEXT: call $push41=, __truncsfhf2, $20 -; NOSIMD-NEXT: call $push42=, __extendhfsf2, $pop41 -; NOSIMD-NEXT: f32.add $push43=, $pop40, $pop42 -; NOSIMD-NEXT: call $push44=, __truncsfhf2, $pop43 -; NOSIMD-NEXT: i32.store16 6($0), $pop44 -; NOSIMD-NEXT: call $push45=, __truncsfhf2, $11 -; NOSIMD-NEXT: call $push46=, __extendhfsf2, $pop45 -; NOSIMD-NEXT: call $push47=, __truncsfhf2, $3 -; NOSIMD-NEXT: call $push48=, __extendhfsf2, $pop47 -; NOSIMD-NEXT: f32.mul $push49=, $pop46, $pop48 -; NOSIMD-NEXT: call $push50=, __truncsfhf2, $19 -; NOSIMD-NEXT: call $push51=, __extendhfsf2, $pop50 -; NOSIMD-NEXT: f32.add $push52=, $pop49, $pop51 -; NOSIMD-NEXT: call $push53=, __truncsfhf2, $pop52 -; NOSIMD-NEXT: i32.store16 4($0), $pop53 -; NOSIMD-NEXT: call $push54=, __truncsfhf2, $10 -; NOSIMD-NEXT: call $push55=, __extendhfsf2, $pop54 -; NOSIMD-NEXT: call $push56=, __truncsfhf2, $2 -; NOSIMD-NEXT: call $push57=, __extendhfsf2, $pop56 -; NOSIMD-NEXT: f32.mul $push58=, $pop55, $pop57 -; NOSIMD-NEXT: call $push59=, __truncsfhf2, $18 +; NOSIMD-NEXT: call $push36=, __extendhfsf2, $pop35 +; NOSIMD-NEXT: call $push37=, __extendhfsf2, $20 +; NOSIMD-NEXT: f32.add $push38=, $pop36, $pop37 +; NOSIMD-NEXT: call $push39=, __truncsfhf2, $pop38 +; NOSIMD-NEXT: i32.store16 6($0), $pop39 +; NOSIMD-NEXT: call $push40=, __extendhfsf2, $11 +; NOSIMD-NEXT: call $push41=, __extendhfsf2, $3 +; NOSIMD-NEXT: f32.mul $push42=, $pop40, $pop41 +; NOSIMD-NEXT: call $push43=, __truncsfhf2, $pop42 +; NOSIMD-NEXT: call $push44=, __extendhfsf2, $pop43 +; NOSIMD-NEXT: call $push45=, __extendhfsf2, $19 +; NOSIMD-NEXT: f32.add $push46=, $pop44, $pop45 +; NOSIMD-NEXT: call $push47=, __truncsfhf2, $pop46 +; NOSIMD-NEXT: i32.store16 4($0), $pop47 +; NOSIMD-NEXT: call $push48=, __extendhfsf2, $10 +; NOSIMD-NEXT: call $push49=, __extendhfsf2, $2 +; NOSIMD-NEXT: f32.mul $push50=, $pop48, $pop49 +; NOSIMD-NEXT: call $push51=, __truncsfhf2, $pop50 +; NOSIMD-NEXT: call $push52=, __extendhfsf2, $pop51 +; NOSIMD-NEXT: call $push53=, __extendhfsf2, $18 +; NOSIMD-NEXT: f32.add $push54=, $pop52, $pop53 +; NOSIMD-NEXT: call $push55=, __truncsfhf2, $pop54 +; NOSIMD-NEXT: i32.store16 2($0), $pop55 +; NOSIMD-NEXT: call $push56=, __extendhfsf2, $9 +; NOSIMD-NEXT: call $push57=, __extendhfsf2, $1 +; NOSIMD-NEXT: f32.mul $push58=, $pop56, $pop57 +; NOSIMD-NEXT: call $push59=, __truncsfhf2, $pop58 ; NOSIMD-NEXT: call $push60=, __extendhfsf2, $pop59 -; NOSIMD-NEXT: f32.add $push61=, $pop58, $pop60 -; NOSIMD-NEXT: call $push62=, __truncsfhf2, $pop61 -; NOSIMD-NEXT: i32.store16 2($0), $pop62 -; NOSIMD-NEXT: call $push63=, __truncsfhf2, $9 -; NOSIMD-NEXT: call $push64=, __extendhfsf2, $pop63 -; NOSIMD-NEXT: call $push65=, __truncsfhf2, $1 -; NOSIMD-NEXT: call $push66=, __extendhfsf2, $pop65 -; NOSIMD-NEXT: f32.mul $push67=, $pop64, $pop66 -; NOSIMD-NEXT: call $push68=, __truncsfhf2, $17 -; NOSIMD-NEXT: call $push69=, __extendhfsf2, $pop68 -; NOSIMD-NEXT: f32.add $push70=, $pop67, $pop69 -; NOSIMD-NEXT: call $push71=, __truncsfhf2, $pop70 -; NOSIMD-NEXT: i32.store16 0($0), $pop71 +; NOSIMD-NEXT: call $push61=, __extendhfsf2, $17 +; NOSIMD-NEXT: f32.add $push62=, $pop60, $pop61 +; NOSIMD-NEXT: call $push63=, __truncsfhf2, $pop62 +; NOSIMD-NEXT: i32.store16 0($0), $pop63 ; NOSIMD-NEXT: return %fma = call contract <8 x half> @llvm.fmuladd(<8 x half> %a, <8 x half> %b, <8 x half> %c) ret <8 x half> %fma @@ -843,173 +811,157 @@ define <8 x half> @fmuladd_8xf16(<8 x half> %a, <8 x half> %b, <8 x half> %c) { ; STRICT-NEXT: return $pop0 ; ; NOFP16-LABEL: fmuladd_8xf16: -; NOFP16: .functype fmuladd_8xf16 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NOFP16: .functype fmuladd_8xf16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NOFP16-NEXT: # %bb.0: -; NOFP16-NEXT: call $push0=, __truncsfhf2, $16 -; NOFP16-NEXT: call $push1=, __extendhfsf2, $pop0 -; NOFP16-NEXT: call $push2=, __truncsfhf2, $8 -; NOFP16-NEXT: call $push3=, __extendhfsf2, $pop2 -; NOFP16-NEXT: f32.mul $push4=, $pop1, $pop3 -; NOFP16-NEXT: call $push5=, __truncsfhf2, $24 -; NOFP16-NEXT: call $push6=, __extendhfsf2, $pop5 -; NOFP16-NEXT: f32.add $push7=, $pop4, $pop6 -; NOFP16-NEXT: call $push8=, __truncsfhf2, $pop7 -; NOFP16-NEXT: i32.store16 14($0), $pop8 -; NOFP16-NEXT: call $push9=, __truncsfhf2, $15 -; NOFP16-NEXT: call $push10=, __extendhfsf2, $pop9 -; NOFP16-NEXT: call $push11=, __truncsfhf2, $7 +; NOFP16-NEXT: call $push0=, __extendhfsf2, $16 +; NOFP16-NEXT: call $push1=, __extendhfsf2, $8 +; NOFP16-NEXT: f32.mul $push2=, $pop0, $pop1 +; NOFP16-NEXT: call $push3=, __truncsfhf2, $pop2 +; NOFP16-NEXT: call $push4=, __extendhfsf2, $pop3 +; NOFP16-NEXT: call $push5=, __extendhfsf2, $24 +; NOFP16-NEXT: f32.add $push6=, $pop4, $pop5 +; NOFP16-NEXT: call $push7=, __truncsfhf2, $pop6 +; NOFP16-NEXT: i32.store16 14($0), $pop7 +; NOFP16-NEXT: call $push8=, __extendhfsf2, $15 +; NOFP16-NEXT: call $push9=, __extendhfsf2, $7 +; NOFP16-NEXT: f32.mul $push10=, $pop8, $pop9 +; NOFP16-NEXT: call $push11=, __truncsfhf2, $pop10 ; NOFP16-NEXT: call $push12=, __extendhfsf2, $pop11 -; NOFP16-NEXT: f32.mul $push13=, $pop10, $pop12 -; NOFP16-NEXT: call $push14=, __truncsfhf2, $23 -; NOFP16-NEXT: call $push15=, __extendhfsf2, $pop14 -; NOFP16-NEXT: f32.add $push16=, $pop13, $pop15 -; NOFP16-NEXT: call $push17=, __truncsfhf2, $pop16 -; NOFP16-NEXT: i32.store16 12($0), $pop17 -; NOFP16-NEXT: call $push18=, __truncsfhf2, $14 -; NOFP16-NEXT: call $push19=, __extendhfsf2, $pop18 -; NOFP16-NEXT: call $push20=, __truncsfhf2, $6 -; NOFP16-NEXT: call $push21=, __extendhfsf2, $pop20 -; NOFP16-NEXT: f32.mul $push22=, $pop19, $pop21 -; NOFP16-NEXT: call $push23=, __truncsfhf2, $22 -; NOFP16-NEXT: call $push24=, __extendhfsf2, $pop23 -; NOFP16-NEXT: f32.add $push25=, $pop22, $pop24 -; NOFP16-NEXT: call $push26=, __truncsfhf2, $pop25 -; NOFP16-NEXT: i32.store16 10($0), $pop26 -; NOFP16-NEXT: call $push27=, __truncsfhf2, $13 +; NOFP16-NEXT: call $push13=, __extendhfsf2, $23 +; NOFP16-NEXT: f32.add $push14=, $pop12, $pop13 +; NOFP16-NEXT: call $push15=, __truncsfhf2, $pop14 +; NOFP16-NEXT: i32.store16 12($0), $pop15 +; NOFP16-NEXT: call $push16=, __extendhfsf2, $14 +; NOFP16-NEXT: call $push17=, __extendhfsf2, $6 +; NOFP16-NEXT: f32.mul $push18=, $pop16, $pop17 +; NOFP16-NEXT: call $push19=, __truncsfhf2, $pop18 +; NOFP16-NEXT: call $push20=, __extendhfsf2, $pop19 +; NOFP16-NEXT: call $push21=, __extendhfsf2, $22 +; NOFP16-NEXT: f32.add $push22=, $pop20, $pop21 +; NOFP16-NEXT: call $push23=, __truncsfhf2, $pop22 +; NOFP16-NEXT: i32.store16 10($0), $pop23 +; NOFP16-NEXT: call $push24=, __extendhfsf2, $13 +; NOFP16-NEXT: call $push25=, __extendhfsf2, $5 +; NOFP16-NEXT: f32.mul $push26=, $pop24, $pop25 +; NOFP16-NEXT: call $push27=, __truncsfhf2, $pop26 ; NOFP16-NEXT: call $push28=, __extendhfsf2, $pop27 -; NOFP16-NEXT: call $push29=, __truncsfhf2, $5 -; NOFP16-NEXT: call $push30=, __extendhfsf2, $pop29 -; NOFP16-NEXT: f32.mul $push31=, $pop28, $pop30 -; NOFP16-NEXT: call $push32=, __truncsfhf2, $21 -; NOFP16-NEXT: call $push33=, __extendhfsf2, $pop32 -; NOFP16-NEXT: f32.add $push34=, $pop31, $pop33 +; NOFP16-NEXT: call $push29=, __extendhfsf2, $21 +; NOFP16-NEXT: f32.add $push30=, $pop28, $pop29 +; NOFP16-NEXT: call $push31=, __truncsfhf2, $pop30 +; NOFP16-NEXT: i32.store16 8($0), $pop31 +; NOFP16-NEXT: call $push32=, __extendhfsf2, $12 +; NOFP16-NEXT: call $push33=, __extendhfsf2, $4 +; NOFP16-NEXT: f32.mul $push34=, $pop32, $pop33 ; NOFP16-NEXT: call $push35=, __truncsfhf2, $pop34 -; NOFP16-NEXT: i32.store16 8($0), $pop35 -; NOFP16-NEXT: call $push36=, __truncsfhf2, $12 -; NOFP16-NEXT: call $push37=, __extendhfsf2, $pop36 -; NOFP16-NEXT: call $push38=, __truncsfhf2, $4 -; NOFP16-NEXT: call $push39=, __extendhfsf2, $pop38 -; NOFP16-NEXT: f32.mul $push40=, $pop37, $pop39 -; NOFP16-NEXT: call $push41=, __truncsfhf2, $20 -; NOFP16-NEXT: call $push42=, __extendhfsf2, $pop41 -; NOFP16-NEXT: f32.add $push43=, $pop40, $pop42 -; NOFP16-NEXT: call $push44=, __truncsfhf2, $pop43 -; NOFP16-NEXT: i32.store16 6($0), $pop44 -; NOFP16-NEXT: call $push45=, __truncsfhf2, $11 -; NOFP16-NEXT: call $push46=, __extendhfsf2, $pop45 -; NOFP16-NEXT: call $push47=, __truncsfhf2, $3 -; NOFP16-NEXT: call $push48=, __extendhfsf2, $pop47 -; NOFP16-NEXT: f32.mul $push49=, $pop46, $pop48 -; NOFP16-NEXT: call $push50=, __truncsfhf2, $19 -; NOFP16-NEXT: call $push51=, __extendhfsf2, $pop50 -; NOFP16-NEXT: f32.add $push52=, $pop49, $pop51 -; NOFP16-NEXT: call $push53=, __truncsfhf2, $pop52 -; NOFP16-NEXT: i32.store16 4($0), $pop53 -; NOFP16-NEXT: call $push54=, __truncsfhf2, $10 -; NOFP16-NEXT: call $push55=, __extendhfsf2, $pop54 -; NOFP16-NEXT: call $push56=, __truncsfhf2, $2 -; NOFP16-NEXT: call $push57=, __extendhfsf2, $pop56 -; NOFP16-NEXT: f32.mul $push58=, $pop55, $pop57 -; NOFP16-NEXT: call $push59=, __truncsfhf2, $18 +; NOFP16-NEXT: call $push36=, __extendhfsf2, $pop35 +; NOFP16-NEXT: call $push37=, __extendhfsf2, $20 +; NOFP16-NEXT: f32.add $push38=, $pop36, $pop37 +; NOFP16-NEXT: call $push39=, __truncsfhf2, $pop38 +; NOFP16-NEXT: i32.store16 6($0), $pop39 +; NOFP16-NEXT: call $push40=, __extendhfsf2, $11 +; NOFP16-NEXT: call $push41=, __extendhfsf2, $3 +; NOFP16-NEXT: f32.mul $push42=, $pop40, $pop41 +; NOFP16-NEXT: call $push43=, __truncsfhf2, $pop42 +; NOFP16-NEXT: call $push44=, __extendhfsf2, $pop43 +; NOFP16-NEXT: call $push45=, __extendhfsf2, $19 +; NOFP16-NEXT: f32.add $push46=, $pop44, $pop45 +; NOFP16-NEXT: call $push47=, __truncsfhf2, $pop46 +; NOFP16-NEXT: i32.store16 4($0), $pop47 +; NOFP16-NEXT: call $push48=, __extendhfsf2, $10 +; NOFP16-NEXT: call $push49=, __extendhfsf2, $2 +; NOFP16-NEXT: f32.mul $push50=, $pop48, $pop49 +; NOFP16-NEXT: call $push51=, __truncsfhf2, $pop50 +; NOFP16-NEXT: call $push52=, __extendhfsf2, $pop51 +; NOFP16-NEXT: call $push53=, __extendhfsf2, $18 +; NOFP16-NEXT: f32.add $push54=, $pop52, $pop53 +; NOFP16-NEXT: call $push55=, __truncsfhf2, $pop54 +; NOFP16-NEXT: i32.store16 2($0), $pop55 +; NOFP16-NEXT: call $push56=, __extendhfsf2, $9 +; NOFP16-NEXT: call $push57=, __extendhfsf2, $1 +; NOFP16-NEXT: f32.mul $push58=, $pop56, $pop57 +; NOFP16-NEXT: call $push59=, __truncsfhf2, $pop58 ; NOFP16-NEXT: call $push60=, __extendhfsf2, $pop59 -; NOFP16-NEXT: f32.add $push61=, $pop58, $pop60 -; NOFP16-NEXT: call $push62=, __truncsfhf2, $pop61 -; NOFP16-NEXT: i32.store16 2($0), $pop62 -; NOFP16-NEXT: call $push63=, __truncsfhf2, $9 -; NOFP16-NEXT: call $push64=, __extendhfsf2, $pop63 -; NOFP16-NEXT: call $push65=, __truncsfhf2, $1 -; NOFP16-NEXT: call $push66=, __extendhfsf2, $pop65 -; NOFP16-NEXT: f32.mul $push67=, $pop64, $pop66 -; NOFP16-NEXT: call $push68=, __truncsfhf2, $17 -; NOFP16-NEXT: call $push69=, __extendhfsf2, $pop68 -; NOFP16-NEXT: f32.add $push70=, $pop67, $pop69 -; NOFP16-NEXT: call $push71=, __truncsfhf2, $pop70 -; NOFP16-NEXT: i32.store16 0($0), $pop71 +; NOFP16-NEXT: call $push61=, __extendhfsf2, $17 +; NOFP16-NEXT: f32.add $push62=, $pop60, $pop61 +; NOFP16-NEXT: call $push63=, __truncsfhf2, $pop62 +; NOFP16-NEXT: i32.store16 0($0), $pop63 ; NOFP16-NEXT: return ; ; NOSIMD-LABEL: fmuladd_8xf16: -; NOSIMD: .functype fmuladd_8xf16 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NOSIMD: .functype fmuladd_8xf16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NOSIMD-NEXT: # %bb.0: -; NOSIMD-NEXT: call $push0=, __truncsfhf2, $16 -; NOSIMD-NEXT: call $push1=, __extendhfsf2, $pop0 -; NOSIMD-NEXT: call $push2=, __truncsfhf2, $8 -; NOSIMD-NEXT: call $push3=, __extendhfsf2, $pop2 -; NOSIMD-NEXT: f32.mul $push4=, $pop1, $pop3 -; NOSIMD-NEXT: call $push5=, __truncsfhf2, $24 -; NOSIMD-NEXT: call $push6=, __extendhfsf2, $pop5 -; NOSIMD-NEXT: f32.add $push7=, $pop4, $pop6 -; NOSIMD-NEXT: call $push8=, __truncsfhf2, $pop7 -; NOSIMD-NEXT: i32.store16 14($0), $pop8 -; NOSIMD-NEXT: call $push9=, __truncsfhf2, $15 -; NOSIMD-NEXT: call $push10=, __extendhfsf2, $pop9 -; NOSIMD-NEXT: call $push11=, __truncsfhf2, $7 +; NOSIMD-NEXT: call $push0=, __extendhfsf2, $16 +; NOSIMD-NEXT: call $push1=, __extendhfsf2, $8 +; NOSIMD-NEXT: f32.mul $push2=, $pop0, $pop1 +; NOSIMD-NEXT: call $push3=, __truncsfhf2, $pop2 +; NOSIMD-NEXT: call $push4=, __extendhfsf2, $pop3 +; NOSIMD-NEXT: call $push5=, __extendhfsf2, $24 +; NOSIMD-NEXT: f32.add $push6=, $pop4, $pop5 +; NOSIMD-NEXT: call $push7=, __truncsfhf2, $pop6 +; NOSIMD-NEXT: i32.store16 14($0), $pop7 +; NOSIMD-NEXT: call $push8=, __extendhfsf2, $15 +; NOSIMD-NEXT: call $push9=, __extendhfsf2, $7 +; NOSIMD-NEXT: f32.mul $push10=, $pop8, $pop9 +; NOSIMD-NEXT: call $push11=, __truncsfhf2, $pop10 ; NOSIMD-NEXT: call $push12=, __extendhfsf2, $pop11 -; NOSIMD-NEXT: f32.mul $push13=, $pop10, $pop12 -; NOSIMD-NEXT: call $push14=, __truncsfhf2, $23 -; NOSIMD-NEXT: call $push15=, __extendhfsf2, $pop14 -; NOSIMD-NEXT: f32.add $push16=, $pop13, $pop15 -; NOSIMD-NEXT: call $push17=, __truncsfhf2, $pop16 -; NOSIMD-NEXT: i32.store16 12($0), $pop17 -; NOSIMD-NEXT: call $push18=, __truncsfhf2, $14 -; NOSIMD-NEXT: call $push19=, __extendhfsf2, $pop18 -; NOSIMD-NEXT: call $push20=, __truncsfhf2, $6 -; NOSIMD-NEXT: call $push21=, __extendhfsf2, $pop20 -; NOSIMD-NEXT: f32.mul $push22=, $pop19, $pop21 -; NOSIMD-NEXT: call $push23=, __truncsfhf2, $22 -; NOSIMD-NEXT: call $push24=, __extendhfsf2, $pop23 -; NOSIMD-NEXT: f32.add $push25=, $pop22, $pop24 -; NOSIMD-NEXT: call $push26=, __truncsfhf2, $pop25 -; NOSIMD-NEXT: i32.store16 10($0), $pop26 -; NOSIMD-NEXT: call $push27=, __truncsfhf2, $13 +; NOSIMD-NEXT: call $push13=, __extendhfsf2, $23 +; NOSIMD-NEXT: f32.add $push14=, $pop12, $pop13 +; NOSIMD-NEXT: call $push15=, __truncsfhf2, $pop14 +; NOSIMD-NEXT: i32.store16 12($0), $pop15 +; NOSIMD-NEXT: call $push16=, __extendhfsf2, $14 +; NOSIMD-NEXT: call $push17=, __extendhfsf2, $6 +; NOSIMD-NEXT: f32.mul $push18=, $pop16, $pop17 +; NOSIMD-NEXT: call $push19=, __truncsfhf2, $pop18 +; NOSIMD-NEXT: call $push20=, __extendhfsf2, $pop19 +; NOSIMD-NEXT: call $push21=, __extendhfsf2, $22 +; NOSIMD-NEXT: f32.add $push22=, $pop20, $pop21 +; NOSIMD-NEXT: call $push23=, __truncsfhf2, $pop22 +; NOSIMD-NEXT: i32.store16 10($0), $pop23 +; NOSIMD-NEXT: call $push24=, __extendhfsf2, $13 +; NOSIMD-NEXT: call $push25=, __extendhfsf2, $5 +; NOSIMD-NEXT: f32.mul $push26=, $pop24, $pop25 +; NOSIMD-NEXT: call $push27=, __truncsfhf2, $pop26 ; NOSIMD-NEXT: call $push28=, __extendhfsf2, $pop27 -; NOSIMD-NEXT: call $push29=, __truncsfhf2, $5 -; NOSIMD-NEXT: call $push30=, __extendhfsf2, $pop29 -; NOSIMD-NEXT: f32.mul $push31=, $pop28, $pop30 -; NOSIMD-NEXT: call $push32=, __truncsfhf2, $21 -; NOSIMD-NEXT: call $push33=, __extendhfsf2, $pop32 -; NOSIMD-NEXT: f32.add $push34=, $pop31, $pop33 +; NOSIMD-NEXT: call $push29=, __extendhfsf2, $21 +; NOSIMD-NEXT: f32.add $push30=, $pop28, $pop29 +; NOSIMD-NEXT: call $push31=, __truncsfhf2, $pop30 +; NOSIMD-NEXT: i32.store16 8($0), $pop31 +; NOSIMD-NEXT: call $push32=, __extendhfsf2, $12 +; NOSIMD-NEXT: call $push33=, __extendhfsf2, $4 +; NOSIMD-NEXT: f32.mul $push34=, $pop32, $pop33 ; NOSIMD-NEXT: call $push35=, __truncsfhf2, $pop34 -; NOSIMD-NEXT: i32.store16 8($0), $pop35 -; NOSIMD-NEXT: call $push36=, __truncsfhf2, $12 -; NOSIMD-NEXT: call $push37=, __extendhfsf2, $pop36 -; NOSIMD-NEXT: call $push38=, __truncsfhf2, $4 -; NOSIMD-NEXT: call $push39=, __extendhfsf2, $pop38 -; NOSIMD-NEXT: f32.mul $push40=, $pop37, $pop39 -; NOSIMD-NEXT: call $push41=, __truncsfhf2, $20 -; NOSIMD-NEXT: call $push42=, __extendhfsf2, $pop41 -; NOSIMD-NEXT: f32.add $push43=, $pop40, $pop42 -; NOSIMD-NEXT: call $push44=, __truncsfhf2, $pop43 -; NOSIMD-NEXT: i32.store16 6($0), $pop44 -; NOSIMD-NEXT: call $push45=, __truncsfhf2, $11 -; NOSIMD-NEXT: call $push46=, __extendhfsf2, $pop45 -; NOSIMD-NEXT: call $push47=, __truncsfhf2, $3 -; NOSIMD-NEXT: call $push48=, __extendhfsf2, $pop47 -; NOSIMD-NEXT: f32.mul $push49=, $pop46, $pop48 -; NOSIMD-NEXT: call $push50=, __truncsfhf2, $19 -; NOSIMD-NEXT: call $push51=, __extendhfsf2, $pop50 -; NOSIMD-NEXT: f32.add $push52=, $pop49, $pop51 -; NOSIMD-NEXT: call $push53=, __truncsfhf2, $pop52 -; NOSIMD-NEXT: i32.store16 4($0), $pop53 -; NOSIMD-NEXT: call $push54=, __truncsfhf2, $10 -; NOSIMD-NEXT: call $push55=, __extendhfsf2, $pop54 -; NOSIMD-NEXT: call $push56=, __truncsfhf2, $2 -; NOSIMD-NEXT: call $push57=, __extendhfsf2, $pop56 -; NOSIMD-NEXT: f32.mul $push58=, $pop55, $pop57 -; NOSIMD-NEXT: call $push59=, __truncsfhf2, $18 +; NOSIMD-NEXT: call $push36=, __extendhfsf2, $pop35 +; NOSIMD-NEXT: call $push37=, __extendhfsf2, $20 +; NOSIMD-NEXT: f32.add $push38=, $pop36, $pop37 +; NOSIMD-NEXT: call $push39=, __truncsfhf2, $pop38 +; NOSIMD-NEXT: i32.store16 6($0), $pop39 +; NOSIMD-NEXT: call $push40=, __extendhfsf2, $11 +; NOSIMD-NEXT: call $push41=, __extendhfsf2, $3 +; NOSIMD-NEXT: f32.mul $push42=, $pop40, $pop41 +; NOSIMD-NEXT: call $push43=, __truncsfhf2, $pop42 +; NOSIMD-NEXT: call $push44=, __extendhfsf2, $pop43 +; NOSIMD-NEXT: call $push45=, __extendhfsf2, $19 +; NOSIMD-NEXT: f32.add $push46=, $pop44, $pop45 +; NOSIMD-NEXT: call $push47=, __truncsfhf2, $pop46 +; NOSIMD-NEXT: i32.store16 4($0), $pop47 +; NOSIMD-NEXT: call $push48=, __extendhfsf2, $10 +; NOSIMD-NEXT: call $push49=, __extendhfsf2, $2 +; NOSIMD-NEXT: f32.mul $push50=, $pop48, $pop49 +; NOSIMD-NEXT: call $push51=, __truncsfhf2, $pop50 +; NOSIMD-NEXT: call $push52=, __extendhfsf2, $pop51 +; NOSIMD-NEXT: call $push53=, __extendhfsf2, $18 +; NOSIMD-NEXT: f32.add $push54=, $pop52, $pop53 +; NOSIMD-NEXT: call $push55=, __truncsfhf2, $pop54 +; NOSIMD-NEXT: i32.store16 2($0), $pop55 +; NOSIMD-NEXT: call $push56=, __extendhfsf2, $9 +; NOSIMD-NEXT: call $push57=, __extendhfsf2, $1 +; NOSIMD-NEXT: f32.mul $push58=, $pop56, $pop57 +; NOSIMD-NEXT: call $push59=, __truncsfhf2, $pop58 ; NOSIMD-NEXT: call $push60=, __extendhfsf2, $pop59 -; NOSIMD-NEXT: f32.add $push61=, $pop58, $pop60 -; NOSIMD-NEXT: call $push62=, __truncsfhf2, $pop61 -; NOSIMD-NEXT: i32.store16 2($0), $pop62 -; NOSIMD-NEXT: call $push63=, __truncsfhf2, $9 -; NOSIMD-NEXT: call $push64=, __extendhfsf2, $pop63 -; NOSIMD-NEXT: call $push65=, __truncsfhf2, $1 -; NOSIMD-NEXT: call $push66=, __extendhfsf2, $pop65 -; NOSIMD-NEXT: f32.mul $push67=, $pop64, $pop66 -; NOSIMD-NEXT: call $push68=, __truncsfhf2, $17 -; NOSIMD-NEXT: call $push69=, __extendhfsf2, $pop68 -; NOSIMD-NEXT: f32.add $push70=, $pop67, $pop69 -; NOSIMD-NEXT: call $push71=, __truncsfhf2, $pop70 -; NOSIMD-NEXT: i32.store16 0($0), $pop71 +; NOSIMD-NEXT: call $push61=, __extendhfsf2, $17 +; NOSIMD-NEXT: f32.add $push62=, $pop60, $pop61 +; NOSIMD-NEXT: call $push63=, __truncsfhf2, $pop62 +; NOSIMD-NEXT: i32.store16 0($0), $pop63 ; NOSIMD-NEXT: return %fma = call <8 x half> @llvm.fmuladd(<8 x half> %a, <8 x half> %b, <8 x half> %c) ret <8 x half> %fma