[MethodImpl(MethodImplOptions.NoInlining)]
public static unsafe int foo(ref int* src, int length)
{
Vector<int> total = new Vector<int>(0);
Vector<int> pred = (Vector<int>)Sve.CreateWhileLessThanMask32Bit(0, length);
Vector<int> vec = Sve.LoadVector(pred, src);
total = Sve.ConditionalSelect(pred, Sve.Add(total, vec), total);
return (int)Sve.AddAcross(total).ToScalar();
}
G_M25815_IG01: ;; offset=0x0000
stp fp, lr, [sp, #-0x20]!
mov fp, sp
stp xzr, xzr, [fp, #0x10] // [V02 loc0], [V02 loc0+0x08]
;; size=12 bbWeight=1 PerfScore 2.50
G_M25815_IG02: ;; offset=0x000C
str xzr, [fp, #0x10]
str xzr, [fp, #0x18]
mov w2, wzr
whilelt p0.s, w2, w1
mov z16.s, p0/z, #1
ptrue p0.s
cmpne p0.s, p0/z, z16.s, #0
ldr q16, [fp, #0x10] // [V02 loc0]
ldr x0, [x0]
ld1w { z17.s }, p0/z, [x0]
ldr q18, [fp, #0x10] // [V02 loc0]
add z16.s, z16.s, z17.s
sel z16.s, p0, z16.s, z18.s
str q16, [fp, #0x10] // [V02 loc0]
ldr q16, [fp, #0x10] // [V02 loc0]
ptrue p0.s
saddv d16, p0, z16.s
umov x0, v16.d[0]
;; size=72 bbWeight=1 PerfScore 39.50
G_M25815_IG03: ;; offset=0x0054
ldp fp, lr, [sp], #0x20
ret lr
;; size=8 bbWeight=1 PerfScore 2.00
These three lines are not required. They are converting mask -> vector -> mask
mov z16.s, p0/z, #1
ptrue p0.s
cmpne p0.s, p0/z, z16.s, #0
With
DOTNET_TieredCompilation=0These three lines are not required. They are converting mask -> vector -> mask
I suspect this is because there are two uses of
pred- in conditional select and load vector.