From 55731b41720fc6a8c1f127e7e4746e58cb717632 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Tue, 21 Nov 2023 22:36:26 -0800 Subject: [PATCH 01/32] Add insEncodeReg* methods Change-Id: I9d32d52c8a54e4f001a8af5ff556e747087c9149 --- src/coreclr/jit/emitarm64.cpp | 217 ++++++++++++++++++++++++++++++++-- src/coreclr/jit/emitarm64.h | 57 ++++++++- 2 files changed, 257 insertions(+), 17 deletions(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 8c474c3b9d8166..1e011d5be06262 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -10633,25 +10633,129 @@ void emitter::emitIns_Call(EmitCallType callType, /***************************************************************************** * - * Returns an encoding for the specified register used in the 'Pd' position + * Return an encoding for the specified 'V' register used in '4' thru '0' position. */ -/*static*/ emitter::code_t emitter::insEncodeReg_Pd(regNumber reg) +/*static*/ emitter::code_t emitter::insEncodeReg_V_4_to_0(regNumber reg) { - assert(emitter::isPredicateRegister(reg)); + assert(isVectorRegister(reg)); + emitter::code_t ureg = (emitter::code_t)reg - (emitter::code_t)REG_V0; + assert((ureg >= 0) && (ureg <= 32)); + return ureg << 0; +} + +/***************************************************************************** + * + * Return an encoding for the specified 'V' register used in '9' thru '5' position. + */ + +/*static*/ emitter::code_t emitter::insEncodeReg_V_9_to_5(regNumber reg) +{ + assert(isVectorRegister(reg)); + emitter::code_t ureg = (emitter::code_t)reg - (emitter::code_t)REG_V0; + assert((ureg >= 0) && (ureg <= 32)); + return ureg << 5; +} + +/***************************************************************************** + * + * Return an encoding for the specified 'P' register used in '12' thru '10' position. + */ + +/*static*/ emitter::code_t emitter::insEncodeReg_P_12_to_10(regNumber reg) +{ + assert(isPredicateRegister(reg)); emitter::code_t ureg = (emitter::code_t)reg - (emitter::code_t)REG_P0; assert((ureg >= 0) && (ureg <= 15)); - return ureg; + return ureg << 10; +} + +/***************************************************************************** + * + * Return an encoding for the specified 'V' register used in '21' thru '17' position. + */ + +/*static*/ emitter::code_t emitter::insEncodeReg_V_21_to_17(regNumber reg) +{ + assert(isVectorRegister(reg)); + emitter::code_t ureg = (emitter::code_t)reg - (emitter::code_t)REG_V0; + assert((ureg >= 0) && (ureg <= 32)); + return ureg << 17; +} + +/***************************************************************************** + * + * Return an encoding for the specified 'R' register used in '21' thru '17' position. + */ + +/*static*/ emitter::code_t emitter::insEncodeReg_R_21_to_17(regNumber reg) +{ + assert(isIntegerRegister(reg)); + emitter::code_t ureg = (emitter::code_t)reg; + assert((ureg >= 0) && (ureg <= 32)); + return ureg << 17; +} + +/***************************************************************************** + * + * Return an encoding for the specified 'R' register used in '9' thru '5' position. + */ + +/*static*/ emitter::code_t emitter::insEncodeReg_R_9_to_5(regNumber reg) +{ + assert(isIntegerRegister(reg)); + emitter::code_t ureg = (emitter::code_t)reg; + assert((ureg >= 0) && (ureg <= 32)); + return ureg << 5; +} + +/***************************************************************************** + * + * Return an encoding for the specified 'R' register used in '4' thru '0' position. + */ + +/*static*/ emitter::code_t emitter::insEncodeReg_R_4_to_0(regNumber reg) +{ + assert(isIntegerRegister(reg)); + emitter::code_t ureg = (emitter::code_t)reg; + assert((ureg >= 0) && (ureg <= 32)); + return ureg << 0; +} + +/***************************************************************************** + * + * Return an encoding for the specified 'P' register used in '20' thru '17' position. + */ + +/*static*/ emitter::code_t emitter::insEncodeReg_P_20_to_17(regNumber reg) +{ + assert(isPredicateRegister(reg)); + emitter::code_t ureg = (emitter::code_t)reg - (emitter::code_t)REG_P0; + assert((ureg >= 0) && (ureg <= 15)); + return ureg << 17; +} + +/***************************************************************************** + * + * Return an encoding for the specified 'P' register used in '3' thru '0' position. + */ + +/*static*/ emitter::code_t emitter::insEncodeReg_P_3_to_0(regNumber reg) +{ + assert(isPredicateRegister(reg)); + emitter::code_t ureg = (emitter::code_t)reg - (emitter::code_t)REG_P0; + assert((ureg >= 0) && (ureg <= 15)); + return ureg << 0; } /***************************************************************************** * - * Returns an encoding for the specified register used in the 'Pn' position + * Return an encoding for the specified 'P' register used in '8' thru '5' position. */ -/*static*/ emitter::code_t emitter::insEncodeReg_Pn(regNumber reg) +/*static*/ emitter::code_t emitter::insEncodeReg_P_8_to_5(regNumber reg) { - assert(emitter::isPredicateRegister(reg)); + assert(isPredicateRegister(reg)); emitter::code_t ureg = (emitter::code_t)reg - (emitter::code_t)REG_P0; assert((ureg >= 0) && (ureg <= 15)); return ureg << 5; @@ -10659,15 +10763,106 @@ void emitter::emitIns_Call(EmitCallType callType, /***************************************************************************** * - * Returns an encoding for the specified register used in the 'Pm' position + * Return an encoding for the specified 'P' register used in '13' thru '10' position. */ -/*static*/ emitter::code_t emitter::insEncodeReg_Pm(regNumber reg) +/*static*/ emitter::code_t emitter::insEncodeReg_P_13_to_10(regNumber reg) { - assert(emitter::isPredicateRegister(reg)); + assert(isPredicateRegister(reg)); emitter::code_t ureg = (emitter::code_t)reg - (emitter::code_t)REG_P0; assert((ureg >= 0) && (ureg <= 15)); - return ureg << 16; + return ureg << 10; +} + +/***************************************************************************** + * + * Return an encoding for the specified 'R' register used in '18' thru '17' position. + */ + +/*static*/ emitter::code_t emitter::insEncodeReg_R_18_to_17(regNumber reg) +{ + assert(isIntegerRegister(reg)); + emitter::code_t ureg = (emitter::code_t)reg; + assert((ureg >= 0) && (ureg <= 32)); + return ureg << 17; +} + +/***************************************************************************** + * + * Return an encoding for the specified 'P' register used in '7' thru '5' position. + */ + +/*static*/ emitter::code_t emitter::insEncodeReg_P_7_to_5(regNumber reg) +{ + assert(isPredicateRegister(reg)); + emitter::code_t ureg = (emitter::code_t)reg - (emitter::code_t)REG_P0; + assert((ureg >= 0) && (ureg <= 15)); + return ureg << 5; +} + +/***************************************************************************** + * + * Return an encoding for the specified 'P' register used in '3' thru '1' position. + */ + +/*static*/ emitter::code_t emitter::insEncodeReg_P_3_to_1(regNumber reg) +{ + assert(isPredicateRegister(reg)); + emitter::code_t ureg = (emitter::code_t)reg - (emitter::code_t)REG_P0; + assert((ureg >= 0) && (ureg <= 15)); + return ureg << 1; +} + +/***************************************************************************** + * + * Return an encoding for the specified 'P' register used in '2' thru '0' position. + */ + +/*static*/ emitter::code_t emitter::insEncodeReg_P_2_to_0(regNumber reg) +{ + assert(isPredicateRegister(reg)); + emitter::code_t ureg = (emitter::code_t)reg - (emitter::code_t)REG_P0; + assert((ureg >= 0) && (ureg <= 15)); + return ureg << 0; +} + +/***************************************************************************** + * + * Return an encoding for the specified 'V' register used in '19' thru '17' position. + */ + +/*static*/ emitter::code_t emitter::insEncodeReg_V_19_to_17(regNumber reg) +{ + assert(isVectorRegister(reg)); + emitter::code_t ureg = (emitter::code_t)reg - (emitter::code_t)REG_V0; + assert((ureg >= 0) && (ureg <= 32)); + return ureg << 17; +} + +/***************************************************************************** + * + * Return an encoding for the specified 'V' register used in '20' thru '17' position. + */ + +/*static*/ emitter::code_t emitter::insEncodeReg_V_20_to_17(regNumber reg) +{ + assert(isVectorRegister(reg)); + emitter::code_t ureg = (emitter::code_t)reg - (emitter::code_t)REG_V0; + assert((ureg >= 0) && (ureg <= 32)); + return ureg << 17; +} + +/***************************************************************************** + * + * Return an encoding for the specified 'V' register used in '9' thru '6' position. + */ + +/*static*/ emitter::code_t emitter::insEncodeReg_V_9_to_6(regNumber reg) +{ + assert(isVectorRegister(reg)); + emitter::code_t ureg = (emitter::code_t)reg - (emitter::code_t)REG_V0; + assert((ureg >= 0) && (ureg <= 32)); + return ureg << 6; } /***************************************************************************** diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 24f2d1199a4c88..e5a156b1bfdb53 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -316,14 +316,59 @@ static code_t insEncodeReg_Vm(regNumber reg); // Returns an encoding for the specified register used in the 'Va' position static code_t insEncodeReg_Va(regNumber reg); -// Returns an encoding for the specified register used in the 'Pd' position -static code_t insEncodeReg_Pd(regNumber reg); +// Return an encoding for the specified register used in '4' thru '0' position. +static code_t insEncodeReg_V_4_to_0(regNumber reg); -// Returns an encoding for the specified register used in the 'Pn' position -static code_t insEncodeReg_Pn(regNumber reg); +// Return an encoding for the specified register used in '9' thru '5' position. +static code_t insEncodeReg_V_9_to_5(regNumber reg); -// Returns an encoding for the specified register used in the 'Pm' position -static code_t insEncodeReg_Pm(regNumber reg); +// Return an encoding for the specified register used in '12' thru '10' position. +static code_t insEncodeReg_P_12_to_10(regNumber reg); + +// Return an encoding for the specified register used in '21' thru '17' position. +static code_t insEncodeReg_V_21_to_17(regNumber reg); + +// Return an encoding for the specified register used in '21' thru '17' position. +static code_t insEncodeReg_R_21_to_17(regNumber reg); + +// Return an encoding for the specified register used in '9' thru '5' position. +static code_t insEncodeReg_R_9_to_5(regNumber reg); + +// Return an encoding for the specified register used in '4' thru '0' position. +static code_t insEncodeReg_R_4_to_0(regNumber reg); + +// Return an encoding for the specified register used in '20' thru '17' position. +static code_t insEncodeReg_P_20_to_17(regNumber reg); + +// Return an encoding for the specified register used in '3' thru '0' position. +static code_t insEncodeReg_P_3_to_0(regNumber reg); + +// Return an encoding for the specified register used in '8' thru '5' position. +static code_t insEncodeReg_P_8_to_5(regNumber reg); + +// Return an encoding for the specified register used in '13' thru '10' position. +static code_t insEncodeReg_P_13_to_10(regNumber reg); + +// Return an encoding for the specified register used in '18' thru '17' position. +static code_t insEncodeReg_R_18_to_17(regNumber reg); + +// Return an encoding for the specified register used in '7' thru '5' position. +static code_t insEncodeReg_P_7_to_5(regNumber reg); + +// Return an encoding for the specified register used in '3' thru '1' position. +static code_t insEncodeReg_P_3_to_1(regNumber reg); + +// Return an encoding for the specified register used in '2' thru '0' position. +static code_t insEncodeReg_P_2_to_0(regNumber reg); + +// Return an encoding for the specified register used in '19' thru '17' position. +static code_t insEncodeReg_V_19_to_17(regNumber reg); + +// Return an encoding for the specified register used in '20' thru '17' position. +static code_t insEncodeReg_V_20_to_17(regNumber reg); + +// Return an encoding for the specified register used in '9' thru '6' position. +static code_t insEncodeReg_V_9_to_6(regNumber reg); // Returns an encoding for the imm which represents the condition code. static code_t insEncodeCond(insCond cond); From 90fb3de2e57215a2a4fc84cd92b8d6159be4a5ed Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 20 Nov 2023 14:27:57 +0000 Subject: [PATCH 02/32] Add Arm64 encodings for 3A groups --- src/coreclr/jit/codegenarm64.cpp | 30 +++- src/coreclr/jit/emit.cpp | 2 + src/coreclr/jit/emit.h | 3 + src/coreclr/jit/emitarm64.cpp | 237 ++++++++++++++++++++++++++----- src/coreclr/jit/emitarm64.h | 27 +++- src/coreclr/jit/targetarm64.h | 2 + 6 files changed, 264 insertions(+), 37 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 4d1a2fd36f43fd..8ced7da21d4a42 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -5433,7 +5433,7 @@ void CodeGen::genProfilingLeaveCallback(unsigned helper) // Uncomment "#define ALL_ARM64_EMITTER_UNIT_TESTS" to run all the unit tests here. // After adding a unit test, and verifying it works, put it under this #ifdef, so we don't see it run every time. -//#define ALL_ARM64_EMITTER_UNIT_TESTS +#define ALL_ARM64_EMITTER_UNIT_TESTS #if defined(DEBUG) void CodeGen::genArm64EmitterUnitTests() @@ -7304,8 +7304,8 @@ void CodeGen::genArm64EmitterUnitTests() genDefineTempLabel(genCreateTempLabel()); - theEmitter->emitIns_I(INS_brk, EA_PTRSIZE, 0); - theEmitter->emitIns_I(INS_brk, EA_PTRSIZE, 65535); + // theEmitter->emitIns_I(INS_brk, EA_PTRSIZE, 0); + // theEmitter->emitIns_I(INS_brk, EA_PTRSIZE, 65535); theEmitter->emitIns_BARR(INS_dsb, INS_BARRIER_OSHLD); theEmitter->emitIns_BARR(INS_dmb, INS_BARRIER_OSHST); @@ -10104,6 +10104,30 @@ void CodeGen::genArm64EmitterUnitTests() #endif +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS + // + // R_R_R SVE operations, one dest, two source + // + + // TODO-SVE: Fix once we add Z and predicate registers + + theEmitter->emitIns_R_R_R(INS_sve_and, EA_SCALABLE, REG_V0, REG_V1, REG_V2, INS_OPTS_SCALABLE_B); // AND ., /M, ., . + theEmitter->emitIns_R_R_R(INS_sve_bic, EA_SCALABLE, REG_V3, REG_V4, REG_V5, INS_OPTS_SCALABLE_H); // BIC ., /M, ., . + theEmitter->emitIns_R_R_R(INS_sve_eor, EA_SCALABLE, REG_V14, REG_V5, REG_V16, INS_OPTS_SCALABLE_S); // EOR ., /M, ., . + theEmitter->emitIns_R_R_R(INS_sve_orr, EA_SCALABLE, REG_V29, REG_V7, REG_V31, INS_OPTS_SCALABLE_D); // ORR ., /M, ., . + + theEmitter->emitIns_R_R_R(INS_sve_add, EA_SCALABLE, REG_V5, REG_V6, REG_V7, INS_OPTS_SCALABLE_B); // ADD ., /M, ., . + theEmitter->emitIns_R_R_R(INS_sve_sub, EA_SCALABLE, REG_V15, REG_V7, REG_V29, INS_OPTS_SCALABLE_H); // SUB ., /M, ., . + theEmitter->emitIns_R_R_R(INS_sve_subr, EA_SCALABLE, REG_V2, REG_V0, REG_V13, INS_OPTS_SCALABLE_S); // SUBR ., /M, ., . + + theEmitter->emitIns_R_R_R(INS_sve_sdiv, EA_SCALABLE, REG_V3, REG_V2, REG_V9, INS_OPTS_SCALABLE_S); // SDIV ., /M, ., . + theEmitter->emitIns_R_R_R(INS_sve_sdivr, EA_SCALABLE, REG_V31, REG_V3, REG_V29, INS_OPTS_SCALABLE_D); // SDIVR ., /M, ., . + theEmitter->emitIns_R_R_R(INS_sve_udiv, EA_SCALABLE, REG_V1, REG_V0, REG_V0, INS_OPTS_SCALABLE_S); // UDIV ., /M, ., . + theEmitter->emitIns_R_R_R(INS_sve_udivr, EA_SCALABLE, REG_V13, REG_V7, REG_V15, INS_OPTS_SCALABLE_D); // UDIVR ., /M, ., . + + +#endif // ALL_ARM64_EMITTER_UNIT_TESTS + #ifdef ALL_ARM64_EMITTER_UNIT_TESTS BasicBlock* label = genCreateTempLabel(); diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index b18451d9d1fe88..49875c459927e1 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -2730,6 +2730,8 @@ const emitAttr emitter::emitSizeDecode[emitter::OPSZ_COUNT] = { EA_1BYTE, EA_2BYTE, EA_4BYTE, EA_8BYTE, EA_16BYTE, #if defined(TARGET_XARCH) EA_32BYTE, EA_64BYTE, +#elif defined(TARGET_ARM64) + EA_SCALABLE, #endif // TARGET_XARCH }; diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index 5471cc0bc0467e..8ca70a92b9cdc8 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -505,6 +505,9 @@ class emitter OPSZ32 = 5, OPSZ64 = 6, OPSZ_COUNT = 7, +#elif defined(TARGET_ARM64) + OPSZ_SCALABLE = 5, + OPSZ_COUNT = 6, #else OPSZ_COUNT = 5, #endif diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 1e011d5be06262..3db3e533739a77 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -943,6 +943,39 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(datasize == EA_8BYTE); break; + case IF_SVE_AA_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise logical operations (predicated) + case IF_SVE_AB_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer add/subtract vectors (predicated) + case IF_SVE_AD_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer min/max/difference (predicated) + case IF_SVE_AE_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer multiply vectors (predicated) + case IF_SVE_AN_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by vector (predicated) + case IF_SVE_AO_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by wide elements (predicated) + case IF_SVE_CM_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally broadcast element to vector + case IF_SVE_CN_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to SIMD&FP scalar + case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register + case IF_SVE_EP_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer halving add/subtract (predicated) + case IF_SVE_ER_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer pairwise arithmetic + case IF_SVE_ET_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating add/subtract + case IF_SVE_EU_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating/rounding bitwise shift left (predicated) + case IF_SVE_GR_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 floating-point pairwise operations + case IF_SVE_HJ_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point serial reduction (predicated) + case IF_SVE_HL_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated) + elemsize = id->idOpSize(); + assert(insOptsScalable(id->idInsOpt())); + assert(isSveRegister(id->idReg1())); // ddddd + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // mmmmm + assert(isScalableVectorSize(elemsize)); // xx + break; + + case IF_SVE_AC_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer divide vectors (predicated) + elemsize = id->idOpSize(); + assert(insOptsScalableWords(id->idInsOpt())); + assert(isSveRegister(id->idReg1())); // ddddd + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // mmmmm + assert(isScalableVectorSize(elemsize)); // xx + break; + default: printf("unexpected format %s\n", emitIfName(id->idInsFmt())); assert(!"Unexpected format"); @@ -1477,7 +1510,7 @@ const char* emitter::emitPredicateRegName(regNumber reg) int index = (int)reg - (int)REG_P0; - return vRegNames[index]; + return pRegNames[index]; } /***************************************************************************** @@ -3769,59 +3802,62 @@ emitter::code_t emitter::emitInsCodeSve(instruction ins, insFormat fmt) assert(encoding_found); + // The ins enum includes all the Arm64 instructions. But, the insCodes tables start from the first SVE instruction. + unsigned sve_ins_offset = ((unsigned)ins - INS_sve_invalid); + switch (index) { case 0: - assert(ins < ArrLen(insCodes1)); - code = insCodes1[ins]; + assert(sve_ins_offset < ArrLen(insCodes1)); + code = insCodes1[sve_ins_offset]; break; case 1: - assert(ins < ArrLen(insCodes2)); - code = insCodes2[ins]; + assert(sve_ins_offset < ArrLen(insCodes2)); + code = insCodes2[sve_ins_offset]; break; case 2: - assert(ins < ArrLen(insCodes3)); - code = insCodes3[ins]; + assert(sve_ins_offset < ArrLen(insCodes3)); + code = insCodes3[sve_ins_offset]; break; case 3: - assert(ins < ArrLen(insCodes4)); - code = insCodes4[ins]; + assert(sve_ins_offset < ArrLen(insCodes4)); + code = insCodes4[sve_ins_offset]; break; case 4: - assert(ins < ArrLen(insCodes5)); - code = insCodes5[ins]; + assert(sve_ins_offset < ArrLen(insCodes5)); + code = insCodes5[sve_ins_offset]; break; case 5: - assert(ins < ArrLen(insCodes6)); - code = insCodes6[ins]; + assert(sve_ins_offset < ArrLen(insCodes6)); + code = insCodes6[sve_ins_offset]; break; case 6: - assert(ins < ArrLen(insCodes7)); - code = insCodes7[ins]; + assert(sve_ins_offset < ArrLen(insCodes7)); + code = insCodes7[sve_ins_offset]; break; case 7: - assert(ins < ArrLen(insCodes8)); - code = insCodes8[ins]; + assert(sve_ins_offset < ArrLen(insCodes8)); + code = insCodes8[sve_ins_offset]; break; case 8: - assert(ins < ArrLen(insCodes9)); - code = insCodes9[ins]; + assert(sve_ins_offset < ArrLen(insCodes9)); + code = insCodes9[sve_ins_offset]; break; case 9: - assert(ins < ArrLen(insCodes10)); - code = insCodes10[ins]; + assert(sve_ins_offset < ArrLen(insCodes10)); + code = insCodes10[sve_ins_offset]; break; case 10: - assert(ins < ArrLen(insCodes11)); - code = insCodes11[ins]; + assert(sve_ins_offset < ArrLen(insCodes11)); + code = insCodes11[sve_ins_offset]; break; case 11: - assert(ins < ArrLen(insCodes12)); - code = insCodes12[ins]; + assert(sve_ins_offset < ArrLen(insCodes12)); + code = insCodes12[sve_ins_offset]; break; case 12: - assert(ins < ArrLen(insCodes13)); - code = insCodes13[ins]; + assert(sve_ins_offset < ArrLen(insCodes13)); + code = insCodes13[sve_ins_offset]; break; } @@ -8047,6 +8083,38 @@ void emitter::emitIns_R_R_R( fmt = IF_DV_3A; break; + case INS_sve_and: + case INS_sve_bic: + case INS_sve_eor: + case INS_sve_orr: + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(insOptsScalable(opt)); + fmt = IF_SVE_AA_3A; + break; + + case INS_sve_add: + case INS_sve_sub: + case INS_sve_subr: + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(insOptsScalable(opt)); + fmt = IF_SVE_AB_3A; + break; + + case INS_sve_sdiv: + case INS_sve_sdivr: + case INS_sve_udiv: + case INS_sve_udivr: + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(insOptsScalableWords(opt)); + fmt = IF_SVE_AC_3A; + break; + default: unreached(); break; @@ -10664,7 +10732,7 @@ void emitter::emitIns_Call(EmitCallType callType, /*static*/ emitter::code_t emitter::insEncodeReg_P_12_to_10(regNumber reg) { - assert(isPredicateRegister(reg)); + assert(isLowPredicateRegister(reg)); emitter::code_t ureg = (emitter::code_t)reg - (emitter::code_t)REG_P0; assert((ureg >= 0) && (ureg <= 15)); return ureg << 10; @@ -10794,7 +10862,7 @@ void emitter::emitIns_Call(EmitCallType callType, /*static*/ emitter::code_t emitter::insEncodeReg_P_7_to_5(regNumber reg) { - assert(isPredicateRegister(reg)); + assert(isLowPredicateRegister(reg)); emitter::code_t ureg = (emitter::code_t)reg - (emitter::code_t)REG_P0; assert((ureg >= 0) && (ureg <= 15)); return ureg << 5; @@ -10807,7 +10875,7 @@ void emitter::emitIns_Call(EmitCallType callType, /*static*/ emitter::code_t emitter::insEncodeReg_P_3_to_1(regNumber reg) { - assert(isPredicateRegister(reg)); + assert(isLowPredicateRegister(reg)); emitter::code_t ureg = (emitter::code_t)reg - (emitter::code_t)REG_P0; assert((ureg >= 0) && (ureg <= 15)); return ureg << 1; @@ -11658,6 +11726,30 @@ void emitter::emitIns_Call(EmitCallType callType, } } +/***************************************************************************** + * + * Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 Sve vector instruction + */ + +/*static*/ emitter::code_t emitter::insEncodeSveElemsize(insOpts opt) +{ + if (opt == INS_OPTS_SCALABLE_D) + { + return 0x00C00000; // set the bit at location 23 and 22 + } + else if (opt == INS_OPTS_SCALABLE_S) + { + return 0x00800000; // set the bit at location 23 + } + else if (opt == INS_OPTS_SCALABLE_H) + { + return 0x00400000; // set the bit at location 22 + } + assert(opt == INS_OPTS_SCALABLE_B); + return 0x00000000; +} + + BYTE* emitter::emitOutputLoadLabel(BYTE* dst, BYTE* srcAddr, BYTE* dstAddr, instrDescJmp* id) { instruction ins = id->idIns(); @@ -13605,6 +13697,31 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) dst += emitOutput_Instr(dst, code); break; + case IF_SVE_AA_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise logical operations (predicated) + case IF_SVE_AB_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer add/subtract vectors (predicated) + case IF_SVE_AC_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer divide vectors (predicated) + case IF_SVE_AD_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer min/max/difference (predicated) + case IF_SVE_AE_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer multiply vectors (predicated) + case IF_SVE_AN_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by vector (predicated) + case IF_SVE_AO_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by wide elements (predicated) + case IF_SVE_CM_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally broadcast element to vector + case IF_SVE_CN_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to SIMD&FP scalar + case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register + case IF_SVE_EP_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer halving add/subtract (predicated) + case IF_SVE_ER_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer pairwise arithmetic + case IF_SVE_ET_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating add/subtract + case IF_SVE_EU_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating/rounding bitwise shift left (predicated) + case IF_SVE_GR_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 floating-point pairwise operations + case IF_SVE_HJ_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point serial reduction (predicated) + case IF_SVE_HL_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated) + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd + code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg + code |= insEncodeReg_V_9_to_5(id->idReg3()); // mmmmm + code |= insEncodeSveElemsize(id->idInsOpt()); // xx + dst += emitOutput_Instr(dst, code); + break; + default: assert(!"Unexpected format"); break; @@ -14067,7 +14184,7 @@ void emitter::emitDispReg(regNumber reg, emitAttr attr, bool addComma) void emitter::emitDispSveReg(regNumber reg, insOpts opt, bool addComma) { assert(insOptsScalable(opt)); - assert(isVectorRegister(reg)); + assert(isSveRegister(reg)); printf(emitSveRegName(reg)); emitDispArrangement(opt); @@ -14160,11 +14277,21 @@ void emitter::emitDispVectorElemList( //------------------------------------------------------------------------ // emitDispPredicateReg: Display a predicate register name with with an arrangement suffix // -void emitter::emitDispPredicateReg(regNumber reg, insOpts opt, bool addComma) +void emitter::emitDispPredicateReg(regNumber reg, bool merge, bool addComma) { assert(isPredicateRegister(reg)); printf(emitPredicateRegName(reg)); - emitDispArrangement(opt); + + // TODO-SVE: Some instructions have a bit to indicate Zero or Merge. This will probably + // need encoding in opts or similar. + if (merge) + { + printf("/m"); + } + else + { + printf("/z"); + } if (addComma) emitDispComma(); @@ -15777,6 +15904,29 @@ void emitter::emitDispInsHelp( } break; + case IF_SVE_AA_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise logical operations (predicated) + case IF_SVE_AB_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer add/subtract vectors (predicated) + case IF_SVE_AC_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer divide vectors (predicated) + case IF_SVE_AD_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer min/max/difference (predicated) + case IF_SVE_AE_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer multiply vectors (predicated) + case IF_SVE_AN_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by vector (predicated) + case IF_SVE_AO_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by wide elements (predicated) + case IF_SVE_CM_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally broadcast element to vector + case IF_SVE_CN_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to SIMD&FP scalar + case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register + case IF_SVE_EP_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer halving add/subtract (predicated) + case IF_SVE_ER_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer pairwise arithmetic + case IF_SVE_ET_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating add/subtract + case IF_SVE_EU_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating/rounding bitwise shift left (predicated) + case IF_SVE_GR_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 floating-point pairwise operations + case IF_SVE_HJ_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point serial reduction (predicated) + case IF_SVE_HL_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated) + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispPredicateReg(id->idReg2(), true, true); // ggg + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm + break; + default: printf("unexpected format %s", emitIfName(id->idInsFmt())); assert(!"unexpectedFormat"); @@ -17962,6 +18112,27 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins } break; + case IF_SVE_AA_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise logical operations (predicated) + case IF_SVE_AB_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer add/subtract vectors (predicated) + case IF_SVE_AC_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer divide vectors (predicated) + case IF_SVE_AD_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer min/max/difference (predicated) + case IF_SVE_AE_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer multiply vectors (predicated) + case IF_SVE_AN_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by vector (predicated) + case IF_SVE_AO_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by wide elements (predicated) + case IF_SVE_CM_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally broadcast element to vector + case IF_SVE_CN_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to SIMD&FP scalar + case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register + case IF_SVE_EP_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer halving add/subtract (predicated) + case IF_SVE_ER_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer pairwise arithmetic + case IF_SVE_ET_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating add/subtract + case IF_SVE_EU_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating/rounding bitwise shift left (predicated) + case IF_SVE_GR_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 floating-point pairwise operations + case IF_SVE_HJ_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point serial reduction (predicated) + case IF_SVE_HL_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated) + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_1C; + break; + default: // all other instructions perfScoreUnhandledInstruction(id, &result); diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index e5a156b1bfdb53..7d02065940bfbd 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -45,7 +45,7 @@ void emitDispVectorReg(regNumber reg, insOpts opt, bool addComma); void emitDispVectorRegIndex(regNumber reg, emitAttr elemsize, ssize_t index, bool addComma); void emitDispVectorRegList(regNumber firstReg, unsigned listSize, insOpts opt, bool addComma); void emitDispVectorElemList(regNumber firstReg, unsigned listSize, emitAttr elemsize, unsigned index, bool addComma); -void emitDispPredicateReg(regNumber reg, insOpts opt, bool addComma); +void emitDispPredicateReg(regNumber reg, bool merge, bool addComma); void emitDispArrangement(insOpts opt); void emitDispElemsize(emitAttr elemsize); void emitDispShiftedReg(regNumber reg, insOpts opt, ssize_t imm, emitAttr attr); @@ -452,6 +452,9 @@ static code_t insEncodeExtendScale(ssize_t imm); // Returns the encoding to have the Rm register be auto scaled by the ld/st size static code_t insEncodeReg3Scale(bool isScaled); +// Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 SVE vector instruction +static code_t insEncodeSveElemsize(insOpts opt); + // Returns true if 'reg' represents an integer register. static bool isIntegerRegister(regNumber reg) { @@ -702,6 +705,11 @@ inline static bool isValidVectorElemsizeFloat(emitAttr size) return (size == EA_8BYTE) || (size == EA_4BYTE); } +inline static bool isScalableVectorSize(emitAttr size) +{ + return (size == EA_SCALABLE); +} + inline static bool isGeneralRegister(regNumber reg) { return (reg >= REG_INT_FIRST) && (reg <= REG_LR); @@ -727,11 +735,22 @@ inline static bool isFloatReg(regNumber reg) return isVectorRegister(reg); } +inline static bool isSveRegister(regNumber reg) +{ + // TODO-SVE: Fix once we add Z registers + return (reg >= REG_FP_FIRST && reg <= REG_FP_LAST); +} + inline static bool isPredicateRegister(regNumber reg) { return (reg >= REG_PREDICATE_FIRST && reg <= REG_PREDICATE_LAST); } +inline static bool isLowPredicateRegister(regNumber reg) +{ + return (reg >= REG_PREDICATE_LOW_FIRST && reg <= REG_PREDICATE_LOW_LAST); +} + inline static bool insOptsNone(insOpts opt) { return (opt == INS_OPTS_NONE); @@ -834,6 +853,12 @@ inline static bool insOptsScalable(insOpts opt) opt == INS_OPTS_SCALABLE_D)); } +inline static bool insOptsScalableWords(insOpts opt) +{ + // TODO-SVE: Maybe this function needs a better name. + return ((opt == INS_OPTS_SCALABLE_S || opt == INS_OPTS_SCALABLE_D)); +} + static bool isValidImmCond(ssize_t imm); static bool isValidImmCondFlags(ssize_t imm); static bool isValidImmCondFlagsImm5(ssize_t imm); diff --git a/src/coreclr/jit/targetarm64.h b/src/coreclr/jit/targetarm64.h index 61b3f3245ee22a..44054f0f0c41e2 100644 --- a/src/coreclr/jit/targetarm64.h +++ b/src/coreclr/jit/targetarm64.h @@ -52,6 +52,8 @@ #define LAST_FP_ARGREG REG_V15 #define REG_PREDICATE_FIRST REG_P0 #define REG_PREDICATE_LAST REG_P15 + #define REG_PREDICATE_LOW_FIRST REG_P0 + #define REG_PREDICATE_LOW_LAST REG_P7 // Some instructions can only use the first half of the predicate registers. #define REGNUM_BITS 6 // number of bits in a REG_* #define REGSIZE_BYTES 8 // number of bytes in one general purpose register From 15ee2fc01c585cce60beb427968f58873cff14e3 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 22 Nov 2023 16:09:06 +0000 Subject: [PATCH 03/32] Remove isSveRegister --- src/coreclr/jit/emitarm64.cpp | 6 +++--- src/coreclr/jit/emitarm64.h | 6 ------ 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 3db3e533739a77..d911f2655a2af3 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -961,7 +961,7 @@ void emitter::emitInsSanityCheck(instrDesc* id) case IF_SVE_HL_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated) elemsize = id->idOpSize(); assert(insOptsScalable(id->idInsOpt())); - assert(isSveRegister(id->idReg1())); // ddddd + assert(isVectorRegister(id->idReg1())); // ddddd assert(isLowPredicateRegister(id->idReg2())); // ggg assert(isVectorRegister(id->idReg3())); // mmmmm assert(isScalableVectorSize(elemsize)); // xx @@ -970,7 +970,7 @@ void emitter::emitInsSanityCheck(instrDesc* id) case IF_SVE_AC_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer divide vectors (predicated) elemsize = id->idOpSize(); assert(insOptsScalableWords(id->idInsOpt())); - assert(isSveRegister(id->idReg1())); // ddddd + assert(isVectorRegister(id->idReg1())); // ddddd assert(isLowPredicateRegister(id->idReg2())); // ggg assert(isVectorRegister(id->idReg3())); // mmmmm assert(isScalableVectorSize(elemsize)); // xx @@ -14184,7 +14184,7 @@ void emitter::emitDispReg(regNumber reg, emitAttr attr, bool addComma) void emitter::emitDispSveReg(regNumber reg, insOpts opt, bool addComma) { assert(insOptsScalable(opt)); - assert(isSveRegister(reg)); + assert(isVectorRegister(reg)); printf(emitSveRegName(reg)); emitDispArrangement(opt); diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 7d02065940bfbd..48b338ad9f48b3 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -735,12 +735,6 @@ inline static bool isFloatReg(regNumber reg) return isVectorRegister(reg); } -inline static bool isSveRegister(regNumber reg) -{ - // TODO-SVE: Fix once we add Z registers - return (reg >= REG_FP_FIRST && reg <= REG_FP_LAST); -} - inline static bool isPredicateRegister(regNumber reg) { return (reg >= REG_PREDICATE_FIRST && reg <= REG_PREDICATE_LAST); From b7490a0c6500325391e210908111c59507c59f9a Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 22 Nov 2023 16:35:59 +0000 Subject: [PATCH 04/32] Split ALL_ARM64_EMITTER_UNIT_TESTS into subsets --- src/coreclr/jit/codegenarm64.cpp | 193 ++++++++++++++++--------------- 1 file changed, 99 insertions(+), 94 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 8ced7da21d4a42..a1f89f50b454ff 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -5431,9 +5431,12 @@ void CodeGen::genProfilingLeaveCallback(unsigned helper) * disassembler thinks the instructions as the same as we do. */ -// Uncomment "#define ALL_ARM64_EMITTER_UNIT_TESTS" to run all the unit tests here. +// Uncomment "#define ALL_ARM64_EMITTER_UNIT_TESTS" and one or more of the categories to run all the unit tests here. // After adding a unit test, and verifying it works, put it under this #ifdef, so we don't see it run every time. #define ALL_ARM64_EMITTER_UNIT_TESTS +// #define ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL +// #define ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD +#define ALL_ARM64_EMITTER_UNIT_TESTS_SVE #if defined(DEBUG) void CodeGen::genArm64EmitterUnitTests() @@ -5450,7 +5453,7 @@ void CodeGen::genArm64EmitterUnitTests() emitter* theEmitter = GetEmitter(); #endif // ALL_ARM64_EMITTER_UNIT_TESTS -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL // We use this: // genDefineTempLabel(genCreateTempLabel()); // to create artificial labels to help separate groups of tests. @@ -5555,9 +5558,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R(INS_stlxrb, EA_4BYTE, REG_R8, REG_R5, REG_R14); theEmitter->emitIns_R_R_R(INS_stlxrh, EA_4BYTE, REG_R12, REG_R3, REG_R15); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL // // Loads to and Stores from one, two, three, or four SIMD&FP registers // @@ -5770,9 +5773,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R(INS_tbx_4regs, EA_8BYTE, REG_V0, REG_V1, REG_V6, INS_OPTS_8B); theEmitter->emitIns_R_R_R(INS_tbx_4regs, EA_16BYTE, REG_V0, REG_V1, REG_V6, INS_OPTS_16B); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL // // Loads to and Stores from one, two, three, or four SIMD&FP registers // @@ -5953,9 +5956,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R(INS_ld4r, EA_8BYTE, REG_V4, REG_R8, REG_R9, INS_OPTS_1D); theEmitter->emitIns_R_R_R(INS_ld4r, EA_16BYTE, REG_V10, REG_R14, REG_R15, INS_OPTS_2D); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL // // Loads to and Stores from one, two, three, or four SIMD&FP registers // @@ -6136,9 +6139,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_I(INS_ld4r, EA_8BYTE, REG_V30, REG_R2, 32, INS_OPTS_1D); theEmitter->emitIns_R_R_I(INS_ld4r, EA_16BYTE, REG_V3, REG_R7, 32, INS_OPTS_2D); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL // // Loads to and Stores from one, two, three, or four SIMD&FP registers // @@ -6193,9 +6196,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_I(INS_st4, EA_4BYTE, REG_V10, REG_R14, 0); theEmitter->emitIns_R_R_I(INS_st4, EA_8BYTE, REG_V15, REG_R19, 1); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL // // Loads to and Stores from one, two, three, or four SIMD&FP registers // @@ -6250,9 +6253,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R_I(INS_st4, EA_4BYTE, REG_V12, REG_R16, REG_R17, 0, INS_OPTS_POST_INDEX); theEmitter->emitIns_R_R_R_I(INS_st4, EA_8BYTE, REG_V18, REG_R22, REG_R23, 1, INS_OPTS_POST_INDEX); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL // // Loads to and Stores from one, two, three, or four SIMD&FP registers // @@ -6307,9 +6310,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_I_I(INS_st4, EA_4BYTE, REG_V10, REG_R14, 0, 16, INS_OPTS_POST_INDEX); theEmitter->emitIns_R_R_I_I(INS_st4, EA_8BYTE, REG_V15, REG_R19, 1, 32, INS_OPTS_POST_INDEX); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL // // Compares // @@ -6341,9 +6344,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, 0xfffffffffffff000LL); theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, 0xffffffffff800000LL); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL // R_R // @@ -6362,9 +6365,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R(INS_rev, EA_4BYTE, REG_R10, REG_R5); theEmitter->emitIns_R_R(INS_rev16, EA_4BYTE, REG_R11, REG_R6); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL // // R_I // @@ -6415,9 +6418,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_I(INS_tst, EA_4BYTE, REG_R8, 0xFFF00000); theEmitter->emitIns_R_I(INS_tst, EA_4BYTE, REG_R8, 0x55555555); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL // // R_R // @@ -6452,9 +6455,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_Mov(INS_uxtb, EA_4BYTE, REG_R3, REG_R13, /* canSkip */ false); theEmitter->emitIns_Mov(INS_uxth, EA_4BYTE, REG_R2, REG_R14, /* canSkip */ false); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL // // R_I_I // @@ -6473,9 +6476,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_I_I(INS_movn, EA_4BYTE, REG_R8, 0x5678, 16, INS_OPTS_LSL); theEmitter->emitIns_R_I_I(INS_movz, EA_4BYTE, REG_R8, 0x8765, 16, INS_OPTS_LSL); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL // // R_R_I // @@ -6583,9 +6586,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL); theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 0xffffffffff800000LL); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL // // R_R_I cmp/txt // @@ -6647,9 +6650,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 2, INS_OPTS_SXTH); theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 4, INS_OPTS_SXTW); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL // // R_R_R // @@ -6702,9 +6705,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R(INS_asrv, EA_4BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_rorv, EA_4BYTE, REG_R8, REG_R9, REG_R10); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL // // ARMv8.1 LSE Atomics // @@ -6771,9 +6774,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R(INS_staddl, EA_4BYTE, REG_R8, REG_R10); theEmitter->emitIns_R_R(INS_stadd, EA_8BYTE, REG_R8, REG_R10); theEmitter->emitIns_R_R(INS_staddl, EA_8BYTE, REG_R8, REG_R10); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL // // R_R_I_I // @@ -6804,9 +6807,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_I_I(INS_bfxil, EA_4BYTE, REG_R1, REG_R5, 11, 9); theEmitter->emitIns_R_R_I_I(INS_ubfx, EA_4BYTE, REG_R8, REG_R9, 22, 8); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL // // R_R_R_I // @@ -6913,7 +6916,7 @@ void CodeGen::genArm64EmitterUnitTests() #endif // ALL_ARM64_EMITTER_UNIT_TESTS -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL // // R_R_R_I -- load/store pair // @@ -6959,9 +6962,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_ZR, REG_ZR, REG_SP, 16, INS_OPTS_POST_INDEX); theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_ZR, REG_ZR, REG_R8, 16, INS_OPTS_PRE_INDEX); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL // // R_R_R_Ext -- load/store shifted/extend // @@ -7086,9 +7089,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R_Ext(INS_strb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX); theEmitter->emitIns_R_R_R_Ext(INS_strb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL // // R_R_R_R // @@ -7109,9 +7112,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R_R(INS_umaddl, EA_8BYTE, REG_R10, REG_R25, REG_R8, REG_R23); theEmitter->emitIns_R_R_R_R(INS_umsubl, EA_8BYTE, REG_R11, REG_R26, REG_R9, REG_R24); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL // R_COND // @@ -7147,9 +7150,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R7, INS_COND_GT); // gt theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R6, INS_COND_LE); // le -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL // R_R_COND // @@ -7171,9 +7174,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_COND(INS_cinc, EA_4BYTE, REG_R2, REG_R6, INS_COND_GT); // gt theEmitter->emitIns_R_R_COND(INS_cinv, EA_8BYTE, REG_R3, REG_R7, INS_COND_LE); // le -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL // R_R_R_COND // @@ -7196,9 +7199,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R_COND(INS_csel, EA_4BYTE, REG_R2, REG_R6, REG_R0, INS_COND_GT); // gt theEmitter->emitIns_R_R_R_COND(INS_csinc, EA_8BYTE, REG_R3, REG_R7, REG_R1, INS_COND_LE); // le -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL // R_R_FLAGS_COND // @@ -7282,9 +7285,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R7, 7, INS_FLAGS_NZC, INS_COND_GT); // gt theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R6, 4, INS_FLAGS_NZCV, INS_COND_LE); // le -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL // // Branch to register // @@ -7295,9 +7298,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R(INS_ret, EA_PTRSIZE, REG_R8); theEmitter->emitIns_R(INS_ret, EA_PTRSIZE, REG_LR); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL // // Misc // @@ -7323,9 +7326,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_BARR(INS_dmb, INS_BARRIER_ST); theEmitter->emitIns_BARR(INS_isb, INS_BARRIER_SY); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD //////////////////////////////////////////////////////////////////////////////// // // SIMD and Floating point @@ -7499,9 +7502,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_1BYTE, REG_V4, REG_SP, REG_R9, INS_OPTS_SXTX); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_1BYTE, REG_V5, REG_R7, REG_R9, INS_OPTS_UXTX); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD // // R_R mov and aliases for mov // @@ -7587,9 +7590,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R_I(INS_ext, EA_16BYTE, REG_V8, REG_V9, REG_V10, 11, INS_OPTS_16B); theEmitter->emitIns_R_R_R_I(INS_ext, EA_16BYTE, REG_V12, REG_V13, REG_V14, 15, INS_OPTS_16B); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD // // R_I movi and mvni // @@ -7662,9 +7665,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V14, 0x5DFF, INS_OPTS_4S); // MSL 8 theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V15, 0x5DFFFF, INS_OPTS_4S); // MSL 16 -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD // // R_I orr/bic vector immediate // @@ -7699,9 +7702,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_I(INS_bic, EA_16BYTE, REG_V12, 0x5D0000, INS_OPTS_4S); // LSL 16 theEmitter->emitIns_R_I(INS_bic, EA_16BYTE, REG_V13, 0x5D000000, INS_OPTS_4S); // LSL 24 -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD // // R_F cmp/fmov immediate // @@ -7740,9 +7743,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_F(INS_fcmpe, EA_8BYTE, REG_V14, 0.0); theEmitter->emitIns_R_F(INS_fcmpe, EA_4BYTE, REG_V15, 0.0); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD // // R_R cmeq/fmov/fcmp/fcvt // @@ -7784,9 +7787,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R(INS_fcvt, EA_2BYTE, REG_V5, REG_V6, INS_OPTS_S_TO_H); theEmitter->emitIns_R_R(INS_fcvt, EA_2BYTE, REG_V7, REG_V8, INS_OPTS_D_TO_H); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD // // R_R floating point conversions // @@ -7981,9 +7984,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R(INS_ucvtf, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S); theEmitter->emitIns_R_R(INS_ucvtf, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD // // R_R floating point operations, one dest, one source // @@ -8106,7 +8109,7 @@ void CodeGen::genArm64EmitterUnitTests() #endif -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD genDefineTempLabel(genCreateTempLabel()); // abs scalar @@ -8391,9 +8394,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R(INS_xtn2, EA_16BYTE, REG_V4, REG_V10, INS_OPTS_8H); theEmitter->emitIns_R_R(INS_xtn2, EA_16BYTE, REG_V5, REG_V11, INS_OPTS_4S); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD // // R_R floating point round to int, one dest, one source // @@ -8461,9 +8464,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R(INS_frintz, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S); theEmitter->emitIns_R_R(INS_frintz, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD // // R_R_R floating point operations, one dest, two source // @@ -8589,9 +8592,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R(INS_fnmul, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE theEmitter->emitIns_R_R_R(INS_fnmul, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD // // R_R_I vector operations, one dest, one source reg, one immed // @@ -9026,9 +9029,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_I(INS_uqshrn2, EA_16BYTE, REG_V20, REG_V21, 17, INS_OPTS_4S); theEmitter->emitIns_R_R_I(INS_uqshrn2, EA_16BYTE, REG_V22, REG_V23, 32, INS_OPTS_4S); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD // // R_R_R vector operations, one dest, two source // @@ -9140,9 +9143,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R(INS_uabd, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H); theEmitter->emitIns_R_R_R(INS_uabd, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S); theEmitter->emitIns_R_R_R(INS_uabd, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD // sdot vector theEmitter->emitIns_R_R_R(INS_sdot, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_2S); theEmitter->emitIns_R_R_R(INS_sdot, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_4S); @@ -9288,9 +9291,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R(INS_fcmgt, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S); theEmitter->emitIns_R_R_R(INS_fcmgt, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S); theEmitter->emitIns_R_R_R(INS_fcmgt, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_2D); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD // trn1 vector theEmitter->emitIns_R_R_R(INS_trn1, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B); theEmitter->emitIns_R_R_R(INS_trn1, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B); @@ -9344,9 +9347,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R(INS_zip2, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S); theEmitter->emitIns_R_R_R(INS_zip2, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S); theEmitter->emitIns_R_R_R(INS_zip2, EA_16BYTE, REG_V18, REG_V19, REG_V20, INS_OPTS_2D); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD // srshl scalar theEmitter->emitIns_R_R_R(INS_srshl, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_NONE); @@ -9794,9 +9797,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R(INS_usubw2, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_16B); theEmitter->emitIns_R_R_R(INS_usubw2, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_8H); theEmitter->emitIns_R_R_R(INS_usubw2, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD // // R_R_R vector multiply // @@ -9854,9 +9857,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V18, REG_V19, REG_V3, 0, INS_OPTS_8H); theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V20, REG_V21, REG_V4, 3, INS_OPTS_8H); theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V22, REG_V23, REG_V5, 7, INS_OPTS_8H); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD // pmull vector theEmitter->emitIns_R_R_R(INS_pmull, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B); theEmitter->emitIns_R_R_R(INS_pmull, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_1D); @@ -10056,9 +10059,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R_I(INS_umull2, EA_16BYTE, REG_V6, REG_V7, REG_V8, 7, INS_OPTS_8H); theEmitter->emitIns_R_R_R_I(INS_umull2, EA_16BYTE, REG_V9, REG_V10, REG_V11, 3, INS_OPTS_4S); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD // // R_R_R floating point operations, one source/dest, and two source // @@ -10085,9 +10088,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R_I(INS_fmls, EA_16BYTE, REG_V24, REG_V25, REG_V26, 2, INS_OPTS_4S); theEmitter->emitIns_R_R_R_I(INS_fmls, EA_16BYTE, REG_V27, REG_V28, REG_V29, 0, INS_OPTS_2D); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD // // R_R_R_R floating point operations, one dest, and three source // @@ -10104,11 +10107,13 @@ void CodeGen::genArm64EmitterUnitTests() #endif -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_SVE // // R_R_R SVE operations, one dest, two source // + genDefineTempLabel(genCreateTempLabel()); + // TODO-SVE: Fix once we add Z and predicate registers theEmitter->emitIns_R_R_R(INS_sve_and, EA_SCALABLE, REG_V0, REG_V1, REG_V2, INS_OPTS_SCALABLE_B); // AND ., /M, ., . @@ -10126,7 +10131,7 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R(INS_sve_udivr, EA_SCALABLE, REG_V13, REG_V7, REG_V15, INS_OPTS_SCALABLE_D); // UDIVR ., /M, ., . -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_SVE #ifdef ALL_ARM64_EMITTER_UNIT_TESTS From 473483c5cf182b423d3da15a4830e5edc3aee09d Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 22 Nov 2023 17:15:54 +0000 Subject: [PATCH 05/32] AD, AE, AN --- src/coreclr/jit/codegenarm64.cpp | 45 +++++++++++++++++++--------- src/coreclr/jit/emitarm64.cpp | 50 ++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+), 14 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index a1f89f50b454ff..4b62a3cf599e44 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -10116,20 +10116,37 @@ void CodeGen::genArm64EmitterUnitTests() // TODO-SVE: Fix once we add Z and predicate registers - theEmitter->emitIns_R_R_R(INS_sve_and, EA_SCALABLE, REG_V0, REG_V1, REG_V2, INS_OPTS_SCALABLE_B); // AND ., /M, ., . - theEmitter->emitIns_R_R_R(INS_sve_bic, EA_SCALABLE, REG_V3, REG_V4, REG_V5, INS_OPTS_SCALABLE_H); // BIC ., /M, ., . - theEmitter->emitIns_R_R_R(INS_sve_eor, EA_SCALABLE, REG_V14, REG_V5, REG_V16, INS_OPTS_SCALABLE_S); // EOR ., /M, ., . - theEmitter->emitIns_R_R_R(INS_sve_orr, EA_SCALABLE, REG_V29, REG_V7, REG_V31, INS_OPTS_SCALABLE_D); // ORR ., /M, ., . - - theEmitter->emitIns_R_R_R(INS_sve_add, EA_SCALABLE, REG_V5, REG_V6, REG_V7, INS_OPTS_SCALABLE_B); // ADD ., /M, ., . - theEmitter->emitIns_R_R_R(INS_sve_sub, EA_SCALABLE, REG_V15, REG_V7, REG_V29, INS_OPTS_SCALABLE_H); // SUB ., /M, ., . - theEmitter->emitIns_R_R_R(INS_sve_subr, EA_SCALABLE, REG_V2, REG_V0, REG_V13, INS_OPTS_SCALABLE_S); // SUBR ., /M, ., . - - theEmitter->emitIns_R_R_R(INS_sve_sdiv, EA_SCALABLE, REG_V3, REG_V2, REG_V9, INS_OPTS_SCALABLE_S); // SDIV ., /M, ., . - theEmitter->emitIns_R_R_R(INS_sve_sdivr, EA_SCALABLE, REG_V31, REG_V3, REG_V29, INS_OPTS_SCALABLE_D); // SDIVR ., /M, ., . - theEmitter->emitIns_R_R_R(INS_sve_udiv, EA_SCALABLE, REG_V1, REG_V0, REG_V0, INS_OPTS_SCALABLE_S); // UDIV ., /M, ., . - theEmitter->emitIns_R_R_R(INS_sve_udivr, EA_SCALABLE, REG_V13, REG_V7, REG_V15, INS_OPTS_SCALABLE_D); // UDIVR ., /M, ., . - + theEmitter->emitIns_R_R_R(INS_sve_and, EA_SCALABLE, REG_V0, REG_P1, REG_V2, INS_OPTS_SCALABLE_B); // AND ., /M, ., . + theEmitter->emitIns_R_R_R(INS_sve_bic, EA_SCALABLE, REG_V3, REG_P4, REG_V5, INS_OPTS_SCALABLE_H); // BIC ., /M, ., . + theEmitter->emitIns_R_R_R(INS_sve_eor, EA_SCALABLE, REG_V14, REG_P5, REG_V16, INS_OPTS_SCALABLE_S); // EOR ., /M, ., . + theEmitter->emitIns_R_R_R(INS_sve_orr, EA_SCALABLE, REG_V29, REG_P7, REG_V31, INS_OPTS_SCALABLE_D); // ORR ., /M, ., . + + theEmitter->emitIns_R_R_R(INS_sve_add, EA_SCALABLE, REG_V5, REG_P6, REG_V7, INS_OPTS_SCALABLE_B); // ADD ., /M, ., . + theEmitter->emitIns_R_R_R(INS_sve_sub, EA_SCALABLE, REG_V15, REG_P7, REG_V29, INS_OPTS_SCALABLE_H); // SUB ., /M, ., . + theEmitter->emitIns_R_R_R(INS_sve_subr, EA_SCALABLE, REG_V2, REG_P0, REG_V13, INS_OPTS_SCALABLE_S); // SUBR ., /M, ., . + + theEmitter->emitIns_R_R_R(INS_sve_sdiv, EA_SCALABLE, REG_V3, REG_P2, REG_V9, INS_OPTS_SCALABLE_S); // SDIV ., /M, ., . + theEmitter->emitIns_R_R_R(INS_sve_sdivr, EA_SCALABLE, REG_V31, REG_P3, REG_V29, INS_OPTS_SCALABLE_D); // SDIVR ., /M, ., . + theEmitter->emitIns_R_R_R(INS_sve_udiv, EA_SCALABLE, REG_V1, REG_P0, REG_V0, INS_OPTS_SCALABLE_S); // UDIV ., /M, ., . + theEmitter->emitIns_R_R_R(INS_sve_udivr, EA_SCALABLE, REG_V13, REG_P7, REG_V15, INS_OPTS_SCALABLE_D); // UDIVR ., /M, ., . + + theEmitter->emitIns_R_R_R(INS_sve_smax, EA_SCALABLE, REG_V24, REG_P0, REG_V2, INS_OPTS_SCALABLE_B); // SMAX ., /M, ., . + theEmitter->emitIns_R_R_R(INS_sve_smin, EA_SCALABLE, REG_V9, REG_P1, REG_V27, INS_OPTS_SCALABLE_H); // SMIN ., /M, ., . + theEmitter->emitIns_R_R_R(INS_sve_sabd, EA_SCALABLE, REG_V5, REG_P2, REG_V6, INS_OPTS_SCALABLE_B); // SABD ., /M, ., . + theEmitter->emitIns_R_R_R(INS_sve_uabd, EA_SCALABLE, REG_V23, REG_P3, REG_V9, INS_OPTS_SCALABLE_S); // UABD ., /M, ., . + theEmitter->emitIns_R_R_R(INS_sve_umax, EA_SCALABLE, REG_V15, REG_P4, REG_V2, INS_OPTS_SCALABLE_S); // UMAX ., /M, ., . + theEmitter->emitIns_R_R_R(INS_sve_umin, EA_SCALABLE, REG_V12, REG_P7, REG_V0, INS_OPTS_SCALABLE_D); // UMIN ., /M, ., . + + theEmitter->emitIns_R_R_R(INS_sve_mul, EA_SCALABLE, REG_V5, REG_P1, REG_V3, INS_OPTS_SCALABLE_D); // IF_SVE_AE_3A /* MUL ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_smulh, EA_SCALABLE, REG_V17, REG_P5, REG_V5, INS_OPTS_SCALABLE_S); // IF_SVE_AE_3A /* SMULH ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_umulh, EA_SCALABLE, REG_V12, REG_P2, REG_V24, INS_OPTS_SCALABLE_B); // IF_SVE_AE_3A /* UMULH ., /M, ., . */ + + theEmitter->emitIns_R_R_R(INS_sve_asr, EA_SCALABLE, REG_V5, REG_P0, REG_V21, INS_OPTS_SCALABLE_S); // IF_SVE_AN_3A /* ASR ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_asrr, EA_SCALABLE, REG_V1, REG_P7, REG_V20, INS_OPTS_SCALABLE_B); // IF_SVE_AN_3A /* ASRR ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_lsl, EA_SCALABLE, REG_V0, REG_P2, REG_V0, INS_OPTS_SCALABLE_H); // IF_SVE_AN_3A /* LSL ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_lslr, EA_SCALABLE, REG_V27, REG_P6, REG_V31, INS_OPTS_SCALABLE_D); // IF_SVE_AN_3A /* LSLR ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_lsr, EA_SCALABLE, REG_V5, REG_P5, REG_V6, INS_OPTS_SCALABLE_B); // IF_SVE_AN_3A /* LSR ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_lsrr, EA_SCALABLE, REG_V15, REG_P4, REG_V17, INS_OPTS_SCALABLE_S); // IF_SVE_AN_3A /* LSRR ., /M, ., . */ #endif // ALL_ARM64_EMITTER_UNIT_TESTS_SVE diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index d911f2655a2af3..729b284a35233f 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -8115,6 +8115,56 @@ void emitter::emitIns_R_R_R( fmt = IF_SVE_AC_3A; break; + case INS_sve_sabd: + case INS_sve_smax: + case INS_sve_smin: + case INS_sve_uabd: + case INS_sve_umax: + case INS_sve_umin: + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(insOptsScalable(opt)); + fmt = IF_SVE_AD_3A; + break; + + case INS_sve_mul: + case INS_sve_smulh: + case INS_sve_umulh: + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(insOptsScalable(opt)); + fmt = IF_SVE_AE_3A; + break; + + case INS_sve_asr: + case INS_sve_asrr: + case INS_sve_lsl: + case INS_sve_lslr: + case INS_sve_lsr: + case INS_sve_lsrr: + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(insOptsScalable(opt)); + fmt = IF_SVE_AN_3A; + break; + + //TODO in this PR.... + // case IF_SVE_AO_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by wide elements (predicated) + // case IF_SVE_CM_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally broadcast element to vector + // case IF_SVE_CN_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to SIMD&FP scalar + // case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register + // case IF_SVE_EP_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer halving add/subtract (predicated) + // case IF_SVE_ER_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer pairwise arithmetic + // case IF_SVE_ET_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating add/subtract + // case IF_SVE_EU_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating/rounding bitwise shift left (predicated) + // case IF_SVE_GR_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 floating-point pairwise operations + // case IF_SVE_HJ_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point serial reduction (predicated) + // case IF_SVE_HL_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated) + + default: unreached(); break; From c5d82cdd5f859c379d4a3981883993985e73594d Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 23 Nov 2023 10:33:09 +0000 Subject: [PATCH 06/32] Remove REG_PREDICATE_LOW_FIRST --- src/coreclr/jit/emitarm64.h | 2 +- src/coreclr/jit/targetarm64.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 22a71d75a0e9d3..77791bccaf7c90 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -742,7 +742,7 @@ inline static bool isPredicateRegister(regNumber reg) inline static bool isLowPredicateRegister(regNumber reg) { - return (reg >= REG_PREDICATE_LOW_FIRST && reg <= REG_PREDICATE_LOW_LAST); + return (reg >= REG_PREDICATE_FIRST && reg <= REG_PREDICATE_LOW_LAST); } inline static bool insOptsNone(insOpts opt) diff --git a/src/coreclr/jit/targetarm64.h b/src/coreclr/jit/targetarm64.h index 44054f0f0c41e2..961862e5184d7e 100644 --- a/src/coreclr/jit/targetarm64.h +++ b/src/coreclr/jit/targetarm64.h @@ -52,7 +52,6 @@ #define LAST_FP_ARGREG REG_V15 #define REG_PREDICATE_FIRST REG_P0 #define REG_PREDICATE_LAST REG_P15 - #define REG_PREDICATE_LOW_FIRST REG_P0 #define REG_PREDICATE_LOW_LAST REG_P7 // Some instructions can only use the first half of the predicate registers. #define REGNUM_BITS 6 // number of bits in a REG_* From 4461ecbe2b9be80d40e9dd8e8a5a6e4e1e2dc722 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 23 Nov 2023 10:54:41 +0000 Subject: [PATCH 07/32] Fix sve_ins_offset --- src/coreclr/jit/emitarm64.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index b4d6bb84b87bf3..1a1cc4920aeea5 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -3803,9 +3803,6 @@ emitter::code_t emitter::emitInsCodeSve(instruction ins, insFormat fmt) assert(encoding_found); const unsigned sve_ins_offset = ((unsigned)ins - INS_sve_invalid); - // The ins enum includes all the Arm64 instructions. But, the insCodes tables start from the first SVE instruction. - unsigned sve_ins_offset = ((unsigned)ins - INS_sve_invalid); - switch (index) { case 0: From f93a4f698b6a3d171fb1440ed4ff45569090bf0a Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 23 Nov 2023 11:48:07 +0000 Subject: [PATCH 08/32] Add IF_SVE_AO_3A, including INS_OPTS_SCALABLE_WIDE_ --- src/coreclr/jit/codegenarm64.cpp | 4 +++ src/coreclr/jit/emitarm64.cpp | 50 +++++++++++++++++++++++++------- src/coreclr/jit/emitarm64.h | 5 ++++ src/coreclr/jit/instr.h | 4 +++ 4 files changed, 53 insertions(+), 10 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 4b62a3cf599e44..5013d39b3d4f65 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -10148,6 +10148,10 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R(INS_sve_lsr, EA_SCALABLE, REG_V5, REG_P5, REG_V6, INS_OPTS_SCALABLE_B); // IF_SVE_AN_3A /* LSR ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_lsrr, EA_SCALABLE, REG_V15, REG_P4, REG_V17, INS_OPTS_SCALABLE_S); // IF_SVE_AN_3A /* LSRR ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_asr, EA_SCALABLE, REG_V4, REG_P3, REG_V24, INS_OPTS_SCALABLE_WIDE_B); // IF_SVE_AO_3A /* ASR ., /M, ., .D */ + theEmitter->emitIns_R_R_R(INS_sve_lsl, EA_SCALABLE, REG_V19, REG_P7, REG_V3, INS_OPTS_SCALABLE_WIDE_H); // IF_SVE_AO_3A /* LSL ., /M, ., .D */ + theEmitter->emitIns_R_R_R(INS_sve_lsr, EA_SCALABLE, REG_V0, REG_P0, REG_V0, INS_OPTS_SCALABLE_WIDE_S); // IF_SVE_AO_3A /* LSR ., /M, ., .D */ + #endif // ALL_ARM64_EMITTER_UNIT_TESTS_SVE #ifdef ALL_ARM64_EMITTER_UNIT_TESTS diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 1a1cc4920aeea5..cac6100e33c4ca 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -948,7 +948,6 @@ void emitter::emitInsSanityCheck(instrDesc* id) case IF_SVE_AD_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer min/max/difference (predicated) case IF_SVE_AE_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer multiply vectors (predicated) case IF_SVE_AN_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by vector (predicated) - case IF_SVE_AO_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by wide elements (predicated) case IF_SVE_CM_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally broadcast element to vector case IF_SVE_CN_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to SIMD&FP scalar case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register @@ -976,6 +975,15 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(isScalableVectorSize(elemsize)); // xx break; + case IF_SVE_AO_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by wide elements (predicated) + elemsize = id->idOpSize(); + assert(insOptsScalableWide(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // mmmmm + assert(isScalableVectorSize(elemsize)); // xx + break; + default: printf("unexpected format %s\n", emitIfName(id->idInsFmt())); assert(!"Unexpected format"); @@ -8136,11 +8144,8 @@ void emitter::emitIns_R_R_R( fmt = IF_SVE_AE_3A; break; - case INS_sve_asr: case INS_sve_asrr: - case INS_sve_lsl: case INS_sve_lslr: - case INS_sve_lsr: case INS_sve_lsrr: assert(isVectorRegister(reg1)); assert(isLowPredicateRegister(reg2)); @@ -8149,8 +8154,24 @@ void emitter::emitIns_R_R_R( fmt = IF_SVE_AN_3A; break; + case INS_sve_asr: + case INS_sve_lsl: + case INS_sve_lsr: + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + if (insOptsScalable(opt)) + { + fmt = IF_SVE_AN_3A; + } + else + { + assert(insOptsScalableWide(opt)); + fmt = IF_SVE_AO_3A; + } + break; + //TODO in this PR.... - // case IF_SVE_AO_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by wide elements (predicated) // case IF_SVE_CM_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally broadcast element to vector // case IF_SVE_CN_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to SIMD&FP scalar // case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register @@ -11785,15 +11806,15 @@ void emitter::emitIns_Call(EmitCallType callType, { return 0x00C00000; // set the bit at location 23 and 22 } - else if (opt == INS_OPTS_SCALABLE_S) + else if (opt == INS_OPTS_SCALABLE_S || opt == INS_OPTS_SCALABLE_WIDE_S) { return 0x00800000; // set the bit at location 23 } - else if (opt == INS_OPTS_SCALABLE_H) + else if (opt == INS_OPTS_SCALABLE_H || opt == INS_OPTS_SCALABLE_WIDE_H) { return 0x00400000; // set the bit at location 22 } - assert(opt == INS_OPTS_SCALABLE_B); + assert(opt == INS_OPTS_SCALABLE_B || opt == INS_OPTS_SCALABLE_WIDE_B); return 0x00000000; } @@ -14231,7 +14252,7 @@ void emitter::emitDispReg(regNumber reg, emitAttr attr, bool addComma) // void emitter::emitDispSveReg(regNumber reg, insOpts opt, bool addComma) { - assert(insOptsScalable(opt)); + assert(insOptsScalable(opt) || insOptsScalableWide(opt)); assert(isVectorRegister(reg)); printf(emitSveRegName(reg)); emitDispArrangement(opt); @@ -14361,6 +14382,7 @@ void emitter::emitDispArrangement(insOpts opt) str = "16b"; break; case INS_OPTS_SCALABLE_B: + case INS_OPTS_SCALABLE_WIDE_B: str = "b"; break; case INS_OPTS_4H: @@ -14370,6 +14392,7 @@ void emitter::emitDispArrangement(insOpts opt) str = "8h"; break; case INS_OPTS_SCALABLE_H: + case INS_OPTS_SCALABLE_WIDE_H: str = "h"; break; case INS_OPTS_2S: @@ -14379,6 +14402,7 @@ void emitter::emitDispArrangement(insOpts opt) str = "4s"; break; case INS_OPTS_SCALABLE_S: + case INS_OPTS_SCALABLE_WIDE_S: str = "s"; break; case INS_OPTS_1D: @@ -15958,7 +15982,6 @@ void emitter::emitDispInsHelp( case IF_SVE_AD_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer min/max/difference (predicated) case IF_SVE_AE_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer multiply vectors (predicated) case IF_SVE_AN_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by vector (predicated) - case IF_SVE_AO_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by wide elements (predicated) case IF_SVE_CM_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally broadcast element to vector case IF_SVE_CN_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to SIMD&FP scalar case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register @@ -15975,6 +15998,13 @@ void emitter::emitDispInsHelp( emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm break; + case IF_SVE_AO_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by wide elements (predicated) + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispPredicateReg(id->idReg2(), true, true); // ggg + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispSveReg(id->idReg3(), INS_OPTS_SCALABLE_D, false); // mmmmm + break; + default: printf("unexpected format %s", emitIfName(id->idInsFmt())); assert(!"unexpectedFormat"); diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 77791bccaf7c90..3e39fb93723f46 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -853,6 +853,11 @@ inline static bool insOptsScalableWords(insOpts opt) return ((opt == INS_OPTS_SCALABLE_S || opt == INS_OPTS_SCALABLE_D)); } +inline static bool insOptsScalableWide(insOpts opt) +{ + return ((opt == INS_OPTS_SCALABLE_WIDE_B || opt == INS_OPTS_SCALABLE_WIDE_H || opt == INS_OPTS_SCALABLE_WIDE_S)); +} + static bool isValidImmCond(ssize_t imm); static bool isValidImmCondFlags(ssize_t imm); static bool isValidImmCondFlagsImm5(ssize_t imm); diff --git a/src/coreclr/jit/instr.h b/src/coreclr/jit/instr.h index a3a70ab92107bb..4bea1eec27af79 100644 --- a/src/coreclr/jit/instr.h +++ b/src/coreclr/jit/instr.h @@ -273,6 +273,10 @@ enum insOpts : unsigned INS_OPTS_SCALABLE_S, INS_OPTS_SCALABLE_D, + INS_OPTS_SCALABLE_WIDE_B, + INS_OPTS_SCALABLE_WIDE_H, + INS_OPTS_SCALABLE_WIDE_S, + INS_OPTS_MSL, // Vector Immediate (shifting ones variant) INS_OPTS_S_TO_4BYTE, // Single to INT32 From d755ab7e2a1b3811903de66baa879bc522488b9a Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 23 Nov 2023 11:55:50 +0000 Subject: [PATCH 09/32] Add IF_SVE_CM_3A --- src/coreclr/jit/codegenarm64.cpp | 18 ++++++++++++++++++ src/coreclr/jit/emitarm64.cpp | 20 ++++++++------------ 2 files changed, 26 insertions(+), 12 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 5013d39b3d4f65..5c3a3d721adf53 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -10152,6 +10152,24 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R(INS_sve_lsl, EA_SCALABLE, REG_V19, REG_P7, REG_V3, INS_OPTS_SCALABLE_WIDE_H); // IF_SVE_AO_3A /* LSL ., /M, ., .D */ theEmitter->emitIns_R_R_R(INS_sve_lsr, EA_SCALABLE, REG_V0, REG_P0, REG_V0, INS_OPTS_SCALABLE_WIDE_S); // IF_SVE_AO_3A /* LSR ., /M, ., .D */ + theEmitter->emitIns_R_R_R(INS_sve_clasta, EA_SCALABLE, REG_V31, REG_P7, REG_V31, INS_OPTS_SCALABLE_B); // IF_SVE_CM_3A /* CLASTA ., , ., . */ + theEmitter->emitIns_R_R_R(INS_sve_clastb, EA_SCALABLE, REG_V30, REG_P6, REG_V30, INS_OPTS_SCALABLE_D); // IF_SVE_CM_3A /* CLASTB ., , ., . */ + + //TODO in this PR.... + // case IF_SVE_CM_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally broadcast element to vector + // case IF_SVE_CN_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to SIMD&FP scalar + // case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register + // case IF_SVE_EP_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer halving add/subtract (predicated) + // case IF_SVE_ER_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer pairwise arithmetic + // case IF_SVE_ET_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating add/subtract + // case IF_SVE_EU_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating/rounding bitwise shift left (predicated) + // case IF_SVE_GR_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 floating-point pairwise operations + // case IF_SVE_HJ_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point serial reduction (predicated) + // case IF_SVE_HL_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated) + + + + #endif // ALL_ARM64_EMITTER_UNIT_TESTS_SVE #ifdef ALL_ARM64_EMITTER_UNIT_TESTS diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index cac6100e33c4ca..4024d945706506 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -8171,18 +8171,14 @@ void emitter::emitIns_R_R_R( } break; - //TODO in this PR.... - // case IF_SVE_CM_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally broadcast element to vector - // case IF_SVE_CN_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to SIMD&FP scalar - // case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register - // case IF_SVE_EP_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer halving add/subtract (predicated) - // case IF_SVE_ER_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer pairwise arithmetic - // case IF_SVE_ET_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating add/subtract - // case IF_SVE_EU_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating/rounding bitwise shift left (predicated) - // case IF_SVE_GR_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 floating-point pairwise operations - // case IF_SVE_HJ_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point serial reduction (predicated) - // case IF_SVE_HL_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated) - + case INS_sve_clasta: + case INS_sve_clastb: + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(insOptsScalable(opt)); + fmt = IF_SVE_CM_3A; + break; default: unreached(); From 2dcd8e625346a3fccf37bb6e6bc9434c614b327c Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 23 Nov 2023 14:01:18 +0000 Subject: [PATCH 10/32] Add IF_SVE_CN_3A, including INS_OPTS_SCALABLE_TO_SIMD_ --- src/coreclr/jit/codegenarm64.cpp | 5 +- src/coreclr/jit/emitarm64.cpp | 95 ++++++++++++++++++++++---------- src/coreclr/jit/emitarm64.h | 13 ++++- src/coreclr/jit/instr.h | 5 ++ 4 files changed, 86 insertions(+), 32 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 5c3a3d721adf53..a8078d7fb3b1fe 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -10155,9 +10155,10 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R(INS_sve_clasta, EA_SCALABLE, REG_V31, REG_P7, REG_V31, INS_OPTS_SCALABLE_B); // IF_SVE_CM_3A /* CLASTA ., , ., . */ theEmitter->emitIns_R_R_R(INS_sve_clastb, EA_SCALABLE, REG_V30, REG_P6, REG_V30, INS_OPTS_SCALABLE_D); // IF_SVE_CM_3A /* CLASTB ., , ., . */ + theEmitter->emitIns_R_R_R(INS_sve_clasta, EA_2BYTE, REG_V12, REG_P1, REG_V15, INS_OPTS_SCALABLE_TO_SIMD_H); // IF_SVE_CN_3A /* CLASTA , , , . */ + theEmitter->emitIns_R_R_R(INS_sve_clastb, EA_4BYTE, REG_V13, REG_P2, REG_V16, INS_OPTS_SCALABLE_TO_SIMD_S); // IF_SVE_CN_3A /* CLASTB , , , . */ + //TODO in this PR.... - // case IF_SVE_CM_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally broadcast element to vector - // case IF_SVE_CN_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to SIMD&FP scalar // case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register // case IF_SVE_EP_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer halving add/subtract (predicated) // case IF_SVE_ER_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer pairwise arithmetic diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 4024d945706506..e692d9ce7c7037 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -949,7 +949,6 @@ void emitter::emitInsSanityCheck(instrDesc* id) case IF_SVE_AE_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer multiply vectors (predicated) case IF_SVE_AN_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by vector (predicated) case IF_SVE_CM_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally broadcast element to vector - case IF_SVE_CN_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to SIMD&FP scalar case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register case IF_SVE_EP_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer halving add/subtract (predicated) case IF_SVE_ER_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer pairwise arithmetic @@ -959,29 +958,38 @@ void emitter::emitInsSanityCheck(instrDesc* id) case IF_SVE_HJ_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point serial reduction (predicated) case IF_SVE_HL_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated) elemsize = id->idOpSize(); - assert(insOptsScalable(id->idInsOpt())); + assert(insOptsScalableSimple(id->idInsOpt())); // xx assert(isVectorRegister(id->idReg1())); // ddddd assert(isLowPredicateRegister(id->idReg2())); // ggg assert(isVectorRegister(id->idReg3())); // mmmmm - assert(isScalableVectorSize(elemsize)); // xx + assert(isScalableVectorSize(elemsize)); break; case IF_SVE_AC_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer divide vectors (predicated) elemsize = id->idOpSize(); - assert(insOptsScalableWords(id->idInsOpt())); + assert(insOptsScalableWords(id->idInsOpt())); // xx assert(isVectorRegister(id->idReg1())); // ddddd assert(isLowPredicateRegister(id->idReg2())); // ggg assert(isVectorRegister(id->idReg3())); // mmmmm - assert(isScalableVectorSize(elemsize)); // xx + assert(isScalableVectorSize(elemsize)); break; case IF_SVE_AO_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by wide elements (predicated) elemsize = id->idOpSize(); - assert(insOptsScalableWide(id->idInsOpt())); + assert(insOptsScalableWide(id->idInsOpt())); // xx assert(isVectorRegister(id->idReg1())); // ddddd assert(isLowPredicateRegister(id->idReg2())); // ggg assert(isVectorRegister(id->idReg3())); // mmmmm - assert(isScalableVectorSize(elemsize)); // xx + assert(isScalableVectorSize(elemsize)); + break; + + case IF_SVE_CN_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to SIMD&FP scalar + elemsize = id->idOpSize(); + assert(insOptsScalableToSimd(id->idInsOpt())); // xx + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // mmmmm + assert(isValidVectorElemsize(elemsize)); break; default: @@ -8096,7 +8104,7 @@ void emitter::emitIns_R_R_R( assert(isVectorRegister(reg1)); assert(isLowPredicateRegister(reg2)); assert(isVectorRegister(reg3)); - assert(insOptsScalable(opt)); + assert(insOptsScalableSimple(opt)); fmt = IF_SVE_AA_3A; break; @@ -8106,7 +8114,7 @@ void emitter::emitIns_R_R_R( assert(isVectorRegister(reg1)); assert(isLowPredicateRegister(reg2)); assert(isVectorRegister(reg3)); - assert(insOptsScalable(opt)); + assert(insOptsScalableSimple(opt)); fmt = IF_SVE_AB_3A; break; @@ -8130,7 +8138,7 @@ void emitter::emitIns_R_R_R( assert(isVectorRegister(reg1)); assert(isLowPredicateRegister(reg2)); assert(isVectorRegister(reg3)); - assert(insOptsScalable(opt)); + assert(insOptsScalableSimple(opt)); fmt = IF_SVE_AD_3A; break; @@ -8140,7 +8148,7 @@ void emitter::emitIns_R_R_R( assert(isVectorRegister(reg1)); assert(isLowPredicateRegister(reg2)); assert(isVectorRegister(reg3)); - assert(insOptsScalable(opt)); + assert(insOptsScalableSimple(opt)); fmt = IF_SVE_AE_3A; break; @@ -8150,7 +8158,7 @@ void emitter::emitIns_R_R_R( assert(isVectorRegister(reg1)); assert(isLowPredicateRegister(reg2)); assert(isVectorRegister(reg3)); - assert(insOptsScalable(opt)); + assert(insOptsScalableSimple(opt)); fmt = IF_SVE_AN_3A; break; @@ -8160,7 +8168,7 @@ void emitter::emitIns_R_R_R( assert(isVectorRegister(reg1)); assert(isLowPredicateRegister(reg2)); assert(isVectorRegister(reg3)); - if (insOptsScalable(opt)) + if (insOptsScalableSimple(opt)) { fmt = IF_SVE_AN_3A; } @@ -8176,8 +8184,16 @@ void emitter::emitIns_R_R_R( assert(isVectorRegister(reg1)); assert(isLowPredicateRegister(reg2)); assert(isVectorRegister(reg3)); - assert(insOptsScalable(opt)); - fmt = IF_SVE_CM_3A; + if (insOptsScalableSimple(opt)) + { + fmt = IF_SVE_CM_3A; + } + else + { + assert(insOptsScalableToSimd(opt)); + assert(isValidVectorElemsize(size)); + fmt = IF_SVE_CN_3A; + } break; default: @@ -11798,20 +11814,31 @@ void emitter::emitIns_Call(EmitCallType callType, /*static*/ emitter::code_t emitter::insEncodeSveElemsize(insOpts opt) { - if (opt == INS_OPTS_SCALABLE_D) - { - return 0x00C00000; // set the bit at location 23 and 22 - } - else if (opt == INS_OPTS_SCALABLE_S || opt == INS_OPTS_SCALABLE_WIDE_S) - { - return 0x00800000; // set the bit at location 23 - } - else if (opt == INS_OPTS_SCALABLE_H || opt == INS_OPTS_SCALABLE_WIDE_H) + switch (opt) { - return 0x00400000; // set the bit at location 22 + case INS_OPTS_SCALABLE_B: + case INS_OPTS_SCALABLE_WIDE_B: + case INS_OPTS_SCALABLE_TO_SIMD_B: + return 0x00000000; + + case INS_OPTS_SCALABLE_H: + case INS_OPTS_SCALABLE_WIDE_H: + case INS_OPTS_SCALABLE_TO_SIMD_H: + return 0x00400000; // set the bit at location 22 + + case INS_OPTS_SCALABLE_S: + case INS_OPTS_SCALABLE_WIDE_S: + case INS_OPTS_SCALABLE_TO_SIMD_S: + return 0x00800000; // set the bit at location 23 + + case INS_OPTS_SCALABLE_D: + case INS_OPTS_SCALABLE_TO_SIMD_D: + return 0x00C00000; // set the bit at location 23 and 22 + + default: + assert(!"Invalid insOpt for vector register"); } - assert(opt == INS_OPTS_SCALABLE_B || opt == INS_OPTS_SCALABLE_WIDE_B); - return 0x00000000; + return 0; } @@ -14248,7 +14275,7 @@ void emitter::emitDispReg(regNumber reg, emitAttr attr, bool addComma) // void emitter::emitDispSveReg(regNumber reg, insOpts opt, bool addComma) { - assert(insOptsScalable(opt) || insOptsScalableWide(opt)); + assert(insOptsScalable(opt)); assert(isVectorRegister(reg)); printf(emitSveRegName(reg)); emitDispArrangement(opt); @@ -14379,6 +14406,7 @@ void emitter::emitDispArrangement(insOpts opt) break; case INS_OPTS_SCALABLE_B: case INS_OPTS_SCALABLE_WIDE_B: + case INS_OPTS_SCALABLE_TO_SIMD_B: str = "b"; break; case INS_OPTS_4H: @@ -14389,6 +14417,7 @@ void emitter::emitDispArrangement(insOpts opt) break; case INS_OPTS_SCALABLE_H: case INS_OPTS_SCALABLE_WIDE_H: + case INS_OPTS_SCALABLE_TO_SIMD_H: str = "h"; break; case INS_OPTS_2S: @@ -14399,6 +14428,7 @@ void emitter::emitDispArrangement(insOpts opt) break; case INS_OPTS_SCALABLE_S: case INS_OPTS_SCALABLE_WIDE_S: + case INS_OPTS_SCALABLE_TO_SIMD_S: str = "s"; break; case INS_OPTS_1D: @@ -14408,6 +14438,7 @@ void emitter::emitDispArrangement(insOpts opt) str = "2d"; break; case INS_OPTS_SCALABLE_D: + case INS_OPTS_SCALABLE_TO_SIMD_D: str = "d"; break; @@ -15979,7 +16010,6 @@ void emitter::emitDispInsHelp( case IF_SVE_AE_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer multiply vectors (predicated) case IF_SVE_AN_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by vector (predicated) case IF_SVE_CM_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally broadcast element to vector - case IF_SVE_CN_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to SIMD&FP scalar case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register case IF_SVE_EP_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer halving add/subtract (predicated) case IF_SVE_ER_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer pairwise arithmetic @@ -16001,6 +16031,13 @@ void emitter::emitDispInsHelp( emitDispSveReg(id->idReg3(), INS_OPTS_SCALABLE_D, false); // mmmmm break; + case IF_SVE_CN_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to SIMD&FP scalar + emitDispReg(id->idReg1(), size, true); // ddddd + emitDispPredicateReg(id->idReg2(), true, true); // ggg + emitDispReg(id->idReg1(), size, true); // ddddd + emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm + break; + default: printf("unexpected format %s", emitIfName(id->idInsFmt())); assert(!"unexpectedFormat"); diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 3e39fb93723f46..0057fe3fd100d6 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -842,6 +842,11 @@ inline static bool insOptsConvertIntToFloat(insOpts opt) } inline static bool insOptsScalable(insOpts opt) +{ + return insOptsScalableSimple(opt) || insOptsScalableWide(opt) || insOptsScalableToSimd(opt); +} + +inline static bool insOptsScalableSimple(insOpts opt) { return ((opt == INS_OPTS_SCALABLE_B || opt == INS_OPTS_SCALABLE_H || opt == INS_OPTS_SCALABLE_S || opt == INS_OPTS_SCALABLE_D)); @@ -849,7 +854,6 @@ inline static bool insOptsScalable(insOpts opt) inline static bool insOptsScalableWords(insOpts opt) { - // TODO-SVE: Maybe this function needs a better name. return ((opt == INS_OPTS_SCALABLE_S || opt == INS_OPTS_SCALABLE_D)); } @@ -858,6 +862,13 @@ inline static bool insOptsScalableWide(insOpts opt) return ((opt == INS_OPTS_SCALABLE_WIDE_B || opt == INS_OPTS_SCALABLE_WIDE_H || opt == INS_OPTS_SCALABLE_WIDE_S)); } +inline static bool insOptsScalableToSimd(insOpts opt) +{ + return ((opt == INS_OPTS_SCALABLE_TO_SIMD_B || opt == INS_OPTS_SCALABLE_TO_SIMD_H || opt == INS_OPTS_SCALABLE_TO_SIMD_S || + opt == INS_OPTS_SCALABLE_TO_SIMD_D)); +} + + static bool isValidImmCond(ssize_t imm); static bool isValidImmCondFlags(ssize_t imm); static bool isValidImmCondFlagsImm5(ssize_t imm); diff --git a/src/coreclr/jit/instr.h b/src/coreclr/jit/instr.h index 4bea1eec27af79..65bbd123be55d4 100644 --- a/src/coreclr/jit/instr.h +++ b/src/coreclr/jit/instr.h @@ -277,6 +277,11 @@ enum insOpts : unsigned INS_OPTS_SCALABLE_WIDE_H, INS_OPTS_SCALABLE_WIDE_S, + INS_OPTS_SCALABLE_TO_SIMD_B, + INS_OPTS_SCALABLE_TO_SIMD_H, + INS_OPTS_SCALABLE_TO_SIMD_S, + INS_OPTS_SCALABLE_TO_SIMD_D, + INS_OPTS_MSL, // Vector Immediate (shifting ones variant) INS_OPTS_S_TO_4BYTE, // Single to INT32 From ba577825a9fb9ccfd84f634290c2db6c84f754a8 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 23 Nov 2023 16:24:57 +0000 Subject: [PATCH 11/32] IF_SVE_CO_3A and INS_OPTS_SCALABLE_n_TO_SCALAR --- src/coreclr/jit/codegenarm64.cpp | 7 +++++- src/coreclr/jit/emitarm64.cpp | 41 +++++++++++++++++++++++++++----- src/coreclr/jit/emitarm64.h | 9 ++++++- src/coreclr/jit/instr.h | 5 ++++ 4 files changed, 54 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index a8078d7fb3b1fe..6f76d0e46d9e14 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -10158,8 +10158,13 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R(INS_sve_clasta, EA_2BYTE, REG_V12, REG_P1, REG_V15, INS_OPTS_SCALABLE_TO_SIMD_H); // IF_SVE_CN_3A /* CLASTA , , , . */ theEmitter->emitIns_R_R_R(INS_sve_clastb, EA_4BYTE, REG_V13, REG_P2, REG_V16, INS_OPTS_SCALABLE_TO_SIMD_S); // IF_SVE_CN_3A /* CLASTB , , , . */ + //Note: EA_4BYTE used for B and H (destination register is W) + theEmitter->emitIns_R_R_R(INS_sve_clasta, EA_4BYTE, REG_R0, REG_P0, REG_V0, INS_OPTS_SCALABLE_B_TO_SCALAR); // IF_SVE_CO_3A /* CLASTA , , , . */ + theEmitter->emitIns_R_R_R(INS_sve_clasta, EA_4BYTE, REG_R1, REG_P2, REG_V3, INS_OPTS_SCALABLE_H_TO_SCALAR); // IF_SVE_CO_3A /* CLASTA , , , . */ + theEmitter->emitIns_R_R_R(INS_sve_clastb, EA_4BYTE, REG_R23, REG_P5, REG_V12, INS_OPTS_SCALABLE_S_TO_SCALAR); // IF_SVE_CO_3A /* CLASTB , , , . */ + theEmitter->emitIns_R_R_R(INS_sve_clastb, EA_8BYTE, REG_R3, REG_P6, REG_V9, INS_OPTS_SCALABLE_D_TO_SCALAR); // IF_SVE_CO_3A /* CLASTB , , , . */ + //TODO in this PR.... - // case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register // case IF_SVE_EP_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer halving add/subtract (predicated) // case IF_SVE_ER_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer pairwise arithmetic // case IF_SVE_ET_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating add/subtract diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index e692d9ce7c7037..76f1b0bd71a12f 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -949,7 +949,6 @@ void emitter::emitInsSanityCheck(instrDesc* id) case IF_SVE_AE_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer multiply vectors (predicated) case IF_SVE_AN_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by vector (predicated) case IF_SVE_CM_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally broadcast element to vector - case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register case IF_SVE_EP_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer halving add/subtract (predicated) case IF_SVE_ER_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer pairwise arithmetic case IF_SVE_ET_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating add/subtract @@ -992,6 +991,15 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(isValidVectorElemsize(elemsize)); break; + case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register + elemsize = id->idOpSize(); + assert(insOptsScalableToScalar(id->idInsOpt())); // xx + assert(isGeneralRegister(id->idReg1())); // ddddd + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // mmmmm + assert(isValidScalarDatasize(elemsize)); + break; + default: printf("unexpected format %s\n", emitIfName(id->idInsFmt())); assert(!"Unexpected format"); @@ -8181,19 +8189,24 @@ void emitter::emitIns_R_R_R( case INS_sve_clasta: case INS_sve_clastb: - assert(isVectorRegister(reg1)); assert(isLowPredicateRegister(reg2)); assert(isVectorRegister(reg3)); if (insOptsScalableSimple(opt)) { + assert(isVectorRegister(reg1)); fmt = IF_SVE_CM_3A; } - else + else if (insOptsScalableToSimd(opt)) { - assert(insOptsScalableToSimd(opt)); assert(isValidVectorElemsize(size)); fmt = IF_SVE_CN_3A; } + else + { + assert(insOptsScalableToScalar(opt)); + assert(isValidScalarDatasize(size)); + fmt = IF_SVE_CO_3A; + } break; default: @@ -11819,20 +11832,24 @@ void emitter::emitIns_Call(EmitCallType callType, case INS_OPTS_SCALABLE_B: case INS_OPTS_SCALABLE_WIDE_B: case INS_OPTS_SCALABLE_TO_SIMD_B: + case INS_OPTS_SCALABLE_B_TO_SCALAR: return 0x00000000; case INS_OPTS_SCALABLE_H: case INS_OPTS_SCALABLE_WIDE_H: case INS_OPTS_SCALABLE_TO_SIMD_H: + case INS_OPTS_SCALABLE_H_TO_SCALAR: return 0x00400000; // set the bit at location 22 case INS_OPTS_SCALABLE_S: case INS_OPTS_SCALABLE_WIDE_S: case INS_OPTS_SCALABLE_TO_SIMD_S: + case INS_OPTS_SCALABLE_S_TO_SCALAR: return 0x00800000; // set the bit at location 23 case INS_OPTS_SCALABLE_D: case INS_OPTS_SCALABLE_TO_SIMD_D: + case INS_OPTS_SCALABLE_D_TO_SCALAR: return 0x00C00000; // set the bit at location 23 and 22 default: @@ -13798,7 +13815,6 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) case IF_SVE_AO_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by wide elements (predicated) case IF_SVE_CM_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally broadcast element to vector case IF_SVE_CN_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to SIMD&FP scalar - case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register case IF_SVE_EP_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer halving add/subtract (predicated) case IF_SVE_ER_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer pairwise arithmetic case IF_SVE_ET_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating add/subtract @@ -13814,6 +13830,15 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) dst += emitOutput_Instr(dst, code); break; + case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_Rd(id->idReg1()); // ddddd + code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg + code |= insEncodeReg_V_9_to_5(id->idReg3()); // mmmmm + code |= insEncodeSveElemsize(id->idInsOpt()); // xx + dst += emitOutput_Instr(dst, code); + break; + default: assert(!"Unexpected format"); break; @@ -14407,6 +14432,7 @@ void emitter::emitDispArrangement(insOpts opt) case INS_OPTS_SCALABLE_B: case INS_OPTS_SCALABLE_WIDE_B: case INS_OPTS_SCALABLE_TO_SIMD_B: + case INS_OPTS_SCALABLE_B_TO_SCALAR: str = "b"; break; case INS_OPTS_4H: @@ -14418,6 +14444,7 @@ void emitter::emitDispArrangement(insOpts opt) case INS_OPTS_SCALABLE_H: case INS_OPTS_SCALABLE_WIDE_H: case INS_OPTS_SCALABLE_TO_SIMD_H: + case INS_OPTS_SCALABLE_H_TO_SCALAR: str = "h"; break; case INS_OPTS_2S: @@ -14429,6 +14456,7 @@ void emitter::emitDispArrangement(insOpts opt) case INS_OPTS_SCALABLE_S: case INS_OPTS_SCALABLE_WIDE_S: case INS_OPTS_SCALABLE_TO_SIMD_S: + case INS_OPTS_SCALABLE_S_TO_SCALAR: str = "s"; break; case INS_OPTS_1D: @@ -14439,6 +14467,7 @@ void emitter::emitDispArrangement(insOpts opt) break; case INS_OPTS_SCALABLE_D: case INS_OPTS_SCALABLE_TO_SIMD_D: + case INS_OPTS_SCALABLE_D_TO_SCALAR: str = "d"; break; @@ -16010,7 +16039,6 @@ void emitter::emitDispInsHelp( case IF_SVE_AE_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer multiply vectors (predicated) case IF_SVE_AN_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by vector (predicated) case IF_SVE_CM_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally broadcast element to vector - case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register case IF_SVE_EP_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer halving add/subtract (predicated) case IF_SVE_ER_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer pairwise arithmetic case IF_SVE_ET_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating add/subtract @@ -16032,6 +16060,7 @@ void emitter::emitDispInsHelp( break; case IF_SVE_CN_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to SIMD&FP scalar + case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register emitDispReg(id->idReg1(), size, true); // ddddd emitDispPredicateReg(id->idReg2(), true, true); // ggg emitDispReg(id->idReg1(), size, true); // ddddd diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 0057fe3fd100d6..e2eb5f3f13953e 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -843,7 +843,8 @@ inline static bool insOptsConvertIntToFloat(insOpts opt) inline static bool insOptsScalable(insOpts opt) { - return insOptsScalableSimple(opt) || insOptsScalableWide(opt) || insOptsScalableToSimd(opt); + // Opt is any of the scalable types. + return insOptsScalableSimple(opt) || insOptsScalableWide(opt) || insOptsScalableToSimd(opt) || insOptsScalableToScalar(opt); } inline static bool insOptsScalableSimple(insOpts opt) @@ -868,6 +869,12 @@ inline static bool insOptsScalableToSimd(insOpts opt) opt == INS_OPTS_SCALABLE_TO_SIMD_D)); } +inline static bool insOptsScalableToScalar(insOpts opt) +{ + return ((opt == INS_OPTS_SCALABLE_B_TO_SCALAR || opt == INS_OPTS_SCALABLE_H_TO_SCALAR || opt == INS_OPTS_SCALABLE_S_TO_SCALAR || + opt == INS_OPTS_SCALABLE_D_TO_SCALAR)); +} + static bool isValidImmCond(ssize_t imm); static bool isValidImmCondFlags(ssize_t imm); diff --git a/src/coreclr/jit/instr.h b/src/coreclr/jit/instr.h index 65bbd123be55d4..76af921ac694f5 100644 --- a/src/coreclr/jit/instr.h +++ b/src/coreclr/jit/instr.h @@ -282,6 +282,11 @@ enum insOpts : unsigned INS_OPTS_SCALABLE_TO_SIMD_S, INS_OPTS_SCALABLE_TO_SIMD_D, + INS_OPTS_SCALABLE_B_TO_SCALAR, + INS_OPTS_SCALABLE_H_TO_SCALAR, + INS_OPTS_SCALABLE_S_TO_SCALAR, + INS_OPTS_SCALABLE_D_TO_SCALAR, + INS_OPTS_MSL, // Vector Immediate (shifting ones variant) INS_OPTS_S_TO_4BYTE, // Single to INT32 From 0d958e015eb38bd2e8c200d25c49d9f02bfe4317 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 23 Nov 2023 16:33:35 +0000 Subject: [PATCH 12/32] Rename INS_OPTS_SCALABLE_TO_SIMD_ --- src/coreclr/jit/codegenarm64.cpp | 4 ++-- src/coreclr/jit/emitarm64.cpp | 16 ++++++++-------- src/coreclr/jit/emitarm64.h | 4 ++-- src/coreclr/jit/instr.h | 8 ++++---- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 6f76d0e46d9e14..173fe7b0e8f31b 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -10155,8 +10155,8 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R(INS_sve_clasta, EA_SCALABLE, REG_V31, REG_P7, REG_V31, INS_OPTS_SCALABLE_B); // IF_SVE_CM_3A /* CLASTA ., , ., . */ theEmitter->emitIns_R_R_R(INS_sve_clastb, EA_SCALABLE, REG_V30, REG_P6, REG_V30, INS_OPTS_SCALABLE_D); // IF_SVE_CM_3A /* CLASTB ., , ., . */ - theEmitter->emitIns_R_R_R(INS_sve_clasta, EA_2BYTE, REG_V12, REG_P1, REG_V15, INS_OPTS_SCALABLE_TO_SIMD_H); // IF_SVE_CN_3A /* CLASTA , , , . */ - theEmitter->emitIns_R_R_R(INS_sve_clastb, EA_4BYTE, REG_V13, REG_P2, REG_V16, INS_OPTS_SCALABLE_TO_SIMD_S); // IF_SVE_CN_3A /* CLASTB , , , . */ + theEmitter->emitIns_R_R_R(INS_sve_clasta, EA_2BYTE, REG_V12, REG_P1, REG_V15, INS_OPTS_SCALABLE_H_TO_SIMD); // IF_SVE_CN_3A /* CLASTA , , , . */ + theEmitter->emitIns_R_R_R(INS_sve_clastb, EA_4BYTE, REG_V13, REG_P2, REG_V16, INS_OPTS_SCALABLE_S_TO_SIMD); // IF_SVE_CN_3A /* CLASTB , , , . */ //Note: EA_4BYTE used for B and H (destination register is W) theEmitter->emitIns_R_R_R(INS_sve_clasta, EA_4BYTE, REG_R0, REG_P0, REG_V0, INS_OPTS_SCALABLE_B_TO_SCALAR); // IF_SVE_CO_3A /* CLASTA , , , . */ diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 76f1b0bd71a12f..1428b72668f7cc 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -11831,24 +11831,24 @@ void emitter::emitIns_Call(EmitCallType callType, { case INS_OPTS_SCALABLE_B: case INS_OPTS_SCALABLE_WIDE_B: - case INS_OPTS_SCALABLE_TO_SIMD_B: + case INS_OPTS_SCALABLE_B_TO_SIMD: case INS_OPTS_SCALABLE_B_TO_SCALAR: return 0x00000000; case INS_OPTS_SCALABLE_H: case INS_OPTS_SCALABLE_WIDE_H: - case INS_OPTS_SCALABLE_TO_SIMD_H: + case INS_OPTS_SCALABLE_H_TO_SIMD: case INS_OPTS_SCALABLE_H_TO_SCALAR: return 0x00400000; // set the bit at location 22 case INS_OPTS_SCALABLE_S: case INS_OPTS_SCALABLE_WIDE_S: - case INS_OPTS_SCALABLE_TO_SIMD_S: + case INS_OPTS_SCALABLE_S_TO_SIMD: case INS_OPTS_SCALABLE_S_TO_SCALAR: return 0x00800000; // set the bit at location 23 case INS_OPTS_SCALABLE_D: - case INS_OPTS_SCALABLE_TO_SIMD_D: + case INS_OPTS_SCALABLE_D_TO_SIMD: case INS_OPTS_SCALABLE_D_TO_SCALAR: return 0x00C00000; // set the bit at location 23 and 22 @@ -14431,7 +14431,7 @@ void emitter::emitDispArrangement(insOpts opt) break; case INS_OPTS_SCALABLE_B: case INS_OPTS_SCALABLE_WIDE_B: - case INS_OPTS_SCALABLE_TO_SIMD_B: + case INS_OPTS_SCALABLE_B_TO_SIMD: case INS_OPTS_SCALABLE_B_TO_SCALAR: str = "b"; break; @@ -14443,7 +14443,7 @@ void emitter::emitDispArrangement(insOpts opt) break; case INS_OPTS_SCALABLE_H: case INS_OPTS_SCALABLE_WIDE_H: - case INS_OPTS_SCALABLE_TO_SIMD_H: + case INS_OPTS_SCALABLE_H_TO_SIMD: case INS_OPTS_SCALABLE_H_TO_SCALAR: str = "h"; break; @@ -14455,7 +14455,7 @@ void emitter::emitDispArrangement(insOpts opt) break; case INS_OPTS_SCALABLE_S: case INS_OPTS_SCALABLE_WIDE_S: - case INS_OPTS_SCALABLE_TO_SIMD_S: + case INS_OPTS_SCALABLE_S_TO_SIMD: case INS_OPTS_SCALABLE_S_TO_SCALAR: str = "s"; break; @@ -14466,7 +14466,7 @@ void emitter::emitDispArrangement(insOpts opt) str = "2d"; break; case INS_OPTS_SCALABLE_D: - case INS_OPTS_SCALABLE_TO_SIMD_D: + case INS_OPTS_SCALABLE_D_TO_SIMD: case INS_OPTS_SCALABLE_D_TO_SCALAR: str = "d"; break; diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index e2eb5f3f13953e..522880eca2668c 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -865,8 +865,8 @@ inline static bool insOptsScalableWide(insOpts opt) inline static bool insOptsScalableToSimd(insOpts opt) { - return ((opt == INS_OPTS_SCALABLE_TO_SIMD_B || opt == INS_OPTS_SCALABLE_TO_SIMD_H || opt == INS_OPTS_SCALABLE_TO_SIMD_S || - opt == INS_OPTS_SCALABLE_TO_SIMD_D)); + return ((opt == INS_OPTS_SCALABLE_B_TO_SIMD || opt == INS_OPTS_SCALABLE_H_TO_SIMD || opt == INS_OPTS_SCALABLE_S_TO_SIMD || + opt == INS_OPTS_SCALABLE_D_TO_SIMD)); } inline static bool insOptsScalableToScalar(insOpts opt) diff --git a/src/coreclr/jit/instr.h b/src/coreclr/jit/instr.h index 76af921ac694f5..0a5e3c291489b7 100644 --- a/src/coreclr/jit/instr.h +++ b/src/coreclr/jit/instr.h @@ -277,10 +277,10 @@ enum insOpts : unsigned INS_OPTS_SCALABLE_WIDE_H, INS_OPTS_SCALABLE_WIDE_S, - INS_OPTS_SCALABLE_TO_SIMD_B, - INS_OPTS_SCALABLE_TO_SIMD_H, - INS_OPTS_SCALABLE_TO_SIMD_S, - INS_OPTS_SCALABLE_TO_SIMD_D, + INS_OPTS_SCALABLE_B_TO_SIMD, + INS_OPTS_SCALABLE_H_TO_SIMD, + INS_OPTS_SCALABLE_S_TO_SIMD, + INS_OPTS_SCALABLE_D_TO_SIMD, INS_OPTS_SCALABLE_B_TO_SCALAR, INS_OPTS_SCALABLE_H_TO_SCALAR, From f0bf5ca0fb2160178d1a506e5b3c3db3a782bfe5 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 23 Nov 2023 16:48:30 +0000 Subject: [PATCH 13/32] Add IF_SVE_EP_3A --- src/coreclr/jit/codegenarm64.cpp | 10 +++++++++- src/coreclr/jit/emitarm64.cpp | 15 +++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 173fe7b0e8f31b..67c43b3b3bb931 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -10164,8 +10164,16 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R(INS_sve_clastb, EA_4BYTE, REG_R23, REG_P5, REG_V12, INS_OPTS_SCALABLE_S_TO_SCALAR); // IF_SVE_CO_3A /* CLASTB , , , . */ theEmitter->emitIns_R_R_R(INS_sve_clastb, EA_8BYTE, REG_R3, REG_P6, REG_V9, INS_OPTS_SCALABLE_D_TO_SCALAR); // IF_SVE_CO_3A /* CLASTB , , , . */ + theEmitter->emitIns_R_R_R(INS_sve_shadd, EA_SCALABLE, REG_V15, REG_P0, REG_V10, INS_OPTS_SCALABLE_B); // IF_SVE_EP_3A /* SHADD ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_shsub, EA_SCALABLE, REG_V16, REG_P1, REG_V11, INS_OPTS_SCALABLE_B); // IF_SVE_EP_3A /* SHSUB ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_shsubr, EA_SCALABLE, REG_V17, REG_P2, REG_V12, INS_OPTS_SCALABLE_B); // IF_SVE_EP_3A /* SHSUBR ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_srhadd, EA_SCALABLE, REG_V18, REG_P3, REG_V13, INS_OPTS_SCALABLE_B); // IF_SVE_EP_3A /* SRHADD ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_uhadd, EA_SCALABLE, REG_V19, REG_P4, REG_V14, INS_OPTS_SCALABLE_B); // IF_SVE_EP_3A /* UHADD ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_uhsub, EA_SCALABLE, REG_V20, REG_P5, REG_V15, INS_OPTS_SCALABLE_B); // IF_SVE_EP_3A /* UHSUB ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_uhsubr, EA_SCALABLE, REG_V21, REG_P6, REG_V16, INS_OPTS_SCALABLE_B); // IF_SVE_EP_3A /* UHSUBR ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_urhadd, EA_SCALABLE, REG_V22, REG_P7, REG_V17, INS_OPTS_SCALABLE_B); // IF_SVE_EP_3A /* URHADD ., /M, ., . */ + //TODO in this PR.... - // case IF_SVE_EP_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer halving add/subtract (predicated) // case IF_SVE_ER_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer pairwise arithmetic // case IF_SVE_ET_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating add/subtract // case IF_SVE_EU_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating/rounding bitwise shift left (predicated) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 1428b72668f7cc..e446df47134fe9 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -8209,6 +8209,21 @@ void emitter::emitIns_R_R_R( } break; + case INS_sve_shadd: + case INS_sve_shsub: + case INS_sve_shsubr: + case INS_sve_srhadd: + case INS_sve_uhadd: + case INS_sve_uhsub: + case INS_sve_uhsubr: + case INS_sve_urhadd: + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(insOptsScalableSimple(opt)); + fmt = IF_SVE_EP_3A; + break; + default: unreached(); break; From 0477f12d3b84f056fbc13913117b049ac629710c Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 23 Nov 2023 16:54:50 +0000 Subject: [PATCH 14/32] Add IF_SVE_ER_3A --- src/coreclr/jit/codegenarm64.cpp | 20 +++++++++++++------- src/coreclr/jit/emitarm64.cpp | 12 ++++++++++++ 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 67c43b3b3bb931..e35e815543f5bb 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -10152,6 +10152,7 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R(INS_sve_lsl, EA_SCALABLE, REG_V19, REG_P7, REG_V3, INS_OPTS_SCALABLE_WIDE_H); // IF_SVE_AO_3A /* LSL ., /M, ., .D */ theEmitter->emitIns_R_R_R(INS_sve_lsr, EA_SCALABLE, REG_V0, REG_P0, REG_V0, INS_OPTS_SCALABLE_WIDE_S); // IF_SVE_AO_3A /* LSR ., /M, ., .D */ + //TODO-SVE: Currently, these are all printed with /M on the predicate. There should be no predicate extension on these. theEmitter->emitIns_R_R_R(INS_sve_clasta, EA_SCALABLE, REG_V31, REG_P7, REG_V31, INS_OPTS_SCALABLE_B); // IF_SVE_CM_3A /* CLASTA ., , ., . */ theEmitter->emitIns_R_R_R(INS_sve_clastb, EA_SCALABLE, REG_V30, REG_P6, REG_V30, INS_OPTS_SCALABLE_D); // IF_SVE_CM_3A /* CLASTB ., , ., . */ @@ -10165,16 +10166,21 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R(INS_sve_clastb, EA_8BYTE, REG_R3, REG_P6, REG_V9, INS_OPTS_SCALABLE_D_TO_SCALAR); // IF_SVE_CO_3A /* CLASTB , , , . */ theEmitter->emitIns_R_R_R(INS_sve_shadd, EA_SCALABLE, REG_V15, REG_P0, REG_V10, INS_OPTS_SCALABLE_B); // IF_SVE_EP_3A /* SHADD ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_shsub, EA_SCALABLE, REG_V16, REG_P1, REG_V11, INS_OPTS_SCALABLE_B); // IF_SVE_EP_3A /* SHSUB ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_shsubr, EA_SCALABLE, REG_V17, REG_P2, REG_V12, INS_OPTS_SCALABLE_B); // IF_SVE_EP_3A /* SHSUBR ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_srhadd, EA_SCALABLE, REG_V18, REG_P3, REG_V13, INS_OPTS_SCALABLE_B); // IF_SVE_EP_3A /* SRHADD ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_shsub, EA_SCALABLE, REG_V16, REG_P1, REG_V11, INS_OPTS_SCALABLE_H); // IF_SVE_EP_3A /* SHSUB ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_shsubr, EA_SCALABLE, REG_V17, REG_P2, REG_V12, INS_OPTS_SCALABLE_S); // IF_SVE_EP_3A /* SHSUBR ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_srhadd, EA_SCALABLE, REG_V18, REG_P3, REG_V13, INS_OPTS_SCALABLE_D); // IF_SVE_EP_3A /* SRHADD ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_uhadd, EA_SCALABLE, REG_V19, REG_P4, REG_V14, INS_OPTS_SCALABLE_B); // IF_SVE_EP_3A /* UHADD ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_uhsub, EA_SCALABLE, REG_V20, REG_P5, REG_V15, INS_OPTS_SCALABLE_B); // IF_SVE_EP_3A /* UHSUB ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_uhsubr, EA_SCALABLE, REG_V21, REG_P6, REG_V16, INS_OPTS_SCALABLE_B); // IF_SVE_EP_3A /* UHSUBR ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_urhadd, EA_SCALABLE, REG_V22, REG_P7, REG_V17, INS_OPTS_SCALABLE_B); // IF_SVE_EP_3A /* URHADD ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_uhsub, EA_SCALABLE, REG_V20, REG_P5, REG_V15, INS_OPTS_SCALABLE_H); // IF_SVE_EP_3A /* UHSUB ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_uhsubr, EA_SCALABLE, REG_V21, REG_P6, REG_V16, INS_OPTS_SCALABLE_S); // IF_SVE_EP_3A /* UHSUBR ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_urhadd, EA_SCALABLE, REG_V22, REG_P7, REG_V17, INS_OPTS_SCALABLE_D); // IF_SVE_EP_3A /* URHADD ., /M, ., . */ + + theEmitter->emitIns_R_R_R(INS_sve_addp, EA_SCALABLE, REG_V23, REG_P6, REG_V18, INS_OPTS_SCALABLE_B); // IF_SVE_ER_3A /* ADDP ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_smaxp, EA_SCALABLE, REG_V24, REG_P5, REG_V19, INS_OPTS_SCALABLE_H); // IF_SVE_ER_3A /* SMAXP ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_sminp, EA_SCALABLE, REG_V25, REG_P4, REG_V20, INS_OPTS_SCALABLE_S); // IF_SVE_ER_3A /* SMINP ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_umaxp, EA_SCALABLE, REG_V26, REG_P3, REG_V21, INS_OPTS_SCALABLE_D); // IF_SVE_ER_3A /* UMAXP ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_uminp, EA_SCALABLE, REG_V27, REG_P2, REG_V22, INS_OPTS_SCALABLE_B); // IF_SVE_ER_3A /* UMINP ., /M, ., . */ //TODO in this PR.... - // case IF_SVE_ER_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer pairwise arithmetic // case IF_SVE_ET_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating add/subtract // case IF_SVE_EU_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating/rounding bitwise shift left (predicated) // case IF_SVE_GR_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 floating-point pairwise operations diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index e446df47134fe9..0160ee1f0cbd3d 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -8224,6 +8224,18 @@ void emitter::emitIns_R_R_R( fmt = IF_SVE_EP_3A; break; + case INS_sve_addp: + case INS_sve_smaxp: + case INS_sve_sminp: + case INS_sve_umaxp: + case INS_sve_uminp: + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(insOptsScalableSimple(opt)); + fmt = IF_SVE_ER_3A; + break; + default: unreached(); break; From 64ac5a616c69e0380a51a944ca9dac9093e3d26c Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 23 Nov 2023 17:13:22 +0000 Subject: [PATCH 15/32] Add IF_SVE_ET_3A --- src/coreclr/jit/codegenarm64.cpp | 10 +++++++++- src/coreclr/jit/emitarm64.cpp | 15 +++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index e35e815543f5bb..d83fdb47f3f294 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -10180,8 +10180,16 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R(INS_sve_umaxp, EA_SCALABLE, REG_V26, REG_P3, REG_V21, INS_OPTS_SCALABLE_D); // IF_SVE_ER_3A /* UMAXP ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_uminp, EA_SCALABLE, REG_V27, REG_P2, REG_V22, INS_OPTS_SCALABLE_B); // IF_SVE_ER_3A /* UMINP ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_sqadd, EA_SCALABLE, REG_V28, REG_P1, REG_V23, INS_OPTS_SCALABLE_B); // IF_SVE_ET_3A /* SQADD ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_sqsub, EA_SCALABLE, REG_V29, REG_P0, REG_V24, INS_OPTS_SCALABLE_H); // IF_SVE_ET_3A /* SQSUB ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_sqsubr, EA_SCALABLE, REG_V30, REG_P1, REG_V25, INS_OPTS_SCALABLE_H); // IF_SVE_ET_3A /* SQSUBR ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_suqadd, EA_SCALABLE, REG_V31, REG_P2, REG_V26, INS_OPTS_SCALABLE_B); // IF_SVE_ET_3A /* SUQADD ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_uqadd, EA_SCALABLE, REG_V0, REG_P3, REG_V27, INS_OPTS_SCALABLE_S); // IF_SVE_ET_3A /* UQADD ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_uqsub, EA_SCALABLE, REG_V1, REG_P4, REG_V28, INS_OPTS_SCALABLE_D); // IF_SVE_ET_3A /* UQSUB ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_uqsubr, EA_SCALABLE, REG_V2, REG_P5, REG_V29, INS_OPTS_SCALABLE_B); // IF_SVE_ET_3A /* UQSUBR ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_usqadd, EA_SCALABLE, REG_V3, REG_P6, REG_V30, INS_OPTS_SCALABLE_B); // IF_SVE_ET_3A /* USQADD ., /M, ., . */ + //TODO in this PR.... - // case IF_SVE_ET_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating add/subtract // case IF_SVE_EU_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating/rounding bitwise shift left (predicated) // case IF_SVE_GR_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 floating-point pairwise operations // case IF_SVE_HJ_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point serial reduction (predicated) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 0160ee1f0cbd3d..e10c9e020d743c 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -8236,6 +8236,21 @@ void emitter::emitIns_R_R_R( fmt = IF_SVE_ER_3A; break; + case INS_sve_sqadd: + case INS_sve_sqsub: + case INS_sve_sqsubr: + case INS_sve_suqadd: + case INS_sve_uqadd: + case INS_sve_uqsub: + case INS_sve_uqsubr: + case INS_sve_usqadd: + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(insOptsScalableSimple(opt)); + fmt = IF_SVE_ET_3A; + break; + default: unreached(); break; From d88891ac3eb629edc77d5b7a819635c2086bd251 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 24 Nov 2023 09:13:06 +0000 Subject: [PATCH 16/32] Fix formatting --- src/coreclr/jit/codegenarm64.cpp | 227 ++++++++++++++++++++----------- src/coreclr/jit/emit.h | 2 +- src/coreclr/jit/emitarm64.cpp | 199 ++++++++++++++------------- src/coreclr/jit/emitarm64.h | 12 +- 4 files changed, 254 insertions(+), 186 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index d83fdb47f3f294..f82137b04972e1 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -10116,87 +10116,152 @@ void CodeGen::genArm64EmitterUnitTests() // TODO-SVE: Fix once we add Z and predicate registers - theEmitter->emitIns_R_R_R(INS_sve_and, EA_SCALABLE, REG_V0, REG_P1, REG_V2, INS_OPTS_SCALABLE_B); // AND ., /M, ., . - theEmitter->emitIns_R_R_R(INS_sve_bic, EA_SCALABLE, REG_V3, REG_P4, REG_V5, INS_OPTS_SCALABLE_H); // BIC ., /M, ., . - theEmitter->emitIns_R_R_R(INS_sve_eor, EA_SCALABLE, REG_V14, REG_P5, REG_V16, INS_OPTS_SCALABLE_S); // EOR ., /M, ., . - theEmitter->emitIns_R_R_R(INS_sve_orr, EA_SCALABLE, REG_V29, REG_P7, REG_V31, INS_OPTS_SCALABLE_D); // ORR ., /M, ., . - - theEmitter->emitIns_R_R_R(INS_sve_add, EA_SCALABLE, REG_V5, REG_P6, REG_V7, INS_OPTS_SCALABLE_B); // ADD ., /M, ., . - theEmitter->emitIns_R_R_R(INS_sve_sub, EA_SCALABLE, REG_V15, REG_P7, REG_V29, INS_OPTS_SCALABLE_H); // SUB ., /M, ., . - theEmitter->emitIns_R_R_R(INS_sve_subr, EA_SCALABLE, REG_V2, REG_P0, REG_V13, INS_OPTS_SCALABLE_S); // SUBR ., /M, ., . - - theEmitter->emitIns_R_R_R(INS_sve_sdiv, EA_SCALABLE, REG_V3, REG_P2, REG_V9, INS_OPTS_SCALABLE_S); // SDIV ., /M, ., . - theEmitter->emitIns_R_R_R(INS_sve_sdivr, EA_SCALABLE, REG_V31, REG_P3, REG_V29, INS_OPTS_SCALABLE_D); // SDIVR ., /M, ., . - theEmitter->emitIns_R_R_R(INS_sve_udiv, EA_SCALABLE, REG_V1, REG_P0, REG_V0, INS_OPTS_SCALABLE_S); // UDIV ., /M, ., . - theEmitter->emitIns_R_R_R(INS_sve_udivr, EA_SCALABLE, REG_V13, REG_P7, REG_V15, INS_OPTS_SCALABLE_D); // UDIVR ., /M, ., . - - theEmitter->emitIns_R_R_R(INS_sve_smax, EA_SCALABLE, REG_V24, REG_P0, REG_V2, INS_OPTS_SCALABLE_B); // SMAX ., /M, ., . - theEmitter->emitIns_R_R_R(INS_sve_smin, EA_SCALABLE, REG_V9, REG_P1, REG_V27, INS_OPTS_SCALABLE_H); // SMIN ., /M, ., . - theEmitter->emitIns_R_R_R(INS_sve_sabd, EA_SCALABLE, REG_V5, REG_P2, REG_V6, INS_OPTS_SCALABLE_B); // SABD ., /M, ., . - theEmitter->emitIns_R_R_R(INS_sve_uabd, EA_SCALABLE, REG_V23, REG_P3, REG_V9, INS_OPTS_SCALABLE_S); // UABD ., /M, ., . - theEmitter->emitIns_R_R_R(INS_sve_umax, EA_SCALABLE, REG_V15, REG_P4, REG_V2, INS_OPTS_SCALABLE_S); // UMAX ., /M, ., . - theEmitter->emitIns_R_R_R(INS_sve_umin, EA_SCALABLE, REG_V12, REG_P7, REG_V0, INS_OPTS_SCALABLE_D); // UMIN ., /M, ., . - - theEmitter->emitIns_R_R_R(INS_sve_mul, EA_SCALABLE, REG_V5, REG_P1, REG_V3, INS_OPTS_SCALABLE_D); // IF_SVE_AE_3A /* MUL ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_smulh, EA_SCALABLE, REG_V17, REG_P5, REG_V5, INS_OPTS_SCALABLE_S); // IF_SVE_AE_3A /* SMULH ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_umulh, EA_SCALABLE, REG_V12, REG_P2, REG_V24, INS_OPTS_SCALABLE_B); // IF_SVE_AE_3A /* UMULH ., /M, ., . */ - - theEmitter->emitIns_R_R_R(INS_sve_asr, EA_SCALABLE, REG_V5, REG_P0, REG_V21, INS_OPTS_SCALABLE_S); // IF_SVE_AN_3A /* ASR ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_asrr, EA_SCALABLE, REG_V1, REG_P7, REG_V20, INS_OPTS_SCALABLE_B); // IF_SVE_AN_3A /* ASRR ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_lsl, EA_SCALABLE, REG_V0, REG_P2, REG_V0, INS_OPTS_SCALABLE_H); // IF_SVE_AN_3A /* LSL ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_lslr, EA_SCALABLE, REG_V27, REG_P6, REG_V31, INS_OPTS_SCALABLE_D); // IF_SVE_AN_3A /* LSLR ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_lsr, EA_SCALABLE, REG_V5, REG_P5, REG_V6, INS_OPTS_SCALABLE_B); // IF_SVE_AN_3A /* LSR ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_lsrr, EA_SCALABLE, REG_V15, REG_P4, REG_V17, INS_OPTS_SCALABLE_S); // IF_SVE_AN_3A /* LSRR ., /M, ., . */ - - theEmitter->emitIns_R_R_R(INS_sve_asr, EA_SCALABLE, REG_V4, REG_P3, REG_V24, INS_OPTS_SCALABLE_WIDE_B); // IF_SVE_AO_3A /* ASR ., /M, ., .D */ - theEmitter->emitIns_R_R_R(INS_sve_lsl, EA_SCALABLE, REG_V19, REG_P7, REG_V3, INS_OPTS_SCALABLE_WIDE_H); // IF_SVE_AO_3A /* LSL ., /M, ., .D */ - theEmitter->emitIns_R_R_R(INS_sve_lsr, EA_SCALABLE, REG_V0, REG_P0, REG_V0, INS_OPTS_SCALABLE_WIDE_S); // IF_SVE_AO_3A /* LSR ., /M, ., .D */ - - //TODO-SVE: Currently, these are all printed with /M on the predicate. There should be no predicate extension on these. - theEmitter->emitIns_R_R_R(INS_sve_clasta, EA_SCALABLE, REG_V31, REG_P7, REG_V31, INS_OPTS_SCALABLE_B); // IF_SVE_CM_3A /* CLASTA ., , ., . */ - theEmitter->emitIns_R_R_R(INS_sve_clastb, EA_SCALABLE, REG_V30, REG_P6, REG_V30, INS_OPTS_SCALABLE_D); // IF_SVE_CM_3A /* CLASTB ., , ., . */ - - theEmitter->emitIns_R_R_R(INS_sve_clasta, EA_2BYTE, REG_V12, REG_P1, REG_V15, INS_OPTS_SCALABLE_H_TO_SIMD); // IF_SVE_CN_3A /* CLASTA , , , . */ - theEmitter->emitIns_R_R_R(INS_sve_clastb, EA_4BYTE, REG_V13, REG_P2, REG_V16, INS_OPTS_SCALABLE_S_TO_SIMD); // IF_SVE_CN_3A /* CLASTB , , , . */ - - //Note: EA_4BYTE used for B and H (destination register is W) - theEmitter->emitIns_R_R_R(INS_sve_clasta, EA_4BYTE, REG_R0, REG_P0, REG_V0, INS_OPTS_SCALABLE_B_TO_SCALAR); // IF_SVE_CO_3A /* CLASTA , , , . */ - theEmitter->emitIns_R_R_R(INS_sve_clasta, EA_4BYTE, REG_R1, REG_P2, REG_V3, INS_OPTS_SCALABLE_H_TO_SCALAR); // IF_SVE_CO_3A /* CLASTA , , , . */ - theEmitter->emitIns_R_R_R(INS_sve_clastb, EA_4BYTE, REG_R23, REG_P5, REG_V12, INS_OPTS_SCALABLE_S_TO_SCALAR); // IF_SVE_CO_3A /* CLASTB , , , . */ - theEmitter->emitIns_R_R_R(INS_sve_clastb, EA_8BYTE, REG_R3, REG_P6, REG_V9, INS_OPTS_SCALABLE_D_TO_SCALAR); // IF_SVE_CO_3A /* CLASTB , , , . */ - - theEmitter->emitIns_R_R_R(INS_sve_shadd, EA_SCALABLE, REG_V15, REG_P0, REG_V10, INS_OPTS_SCALABLE_B); // IF_SVE_EP_3A /* SHADD ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_shsub, EA_SCALABLE, REG_V16, REG_P1, REG_V11, INS_OPTS_SCALABLE_H); // IF_SVE_EP_3A /* SHSUB ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_shsubr, EA_SCALABLE, REG_V17, REG_P2, REG_V12, INS_OPTS_SCALABLE_S); // IF_SVE_EP_3A /* SHSUBR ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_srhadd, EA_SCALABLE, REG_V18, REG_P3, REG_V13, INS_OPTS_SCALABLE_D); // IF_SVE_EP_3A /* SRHADD ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_uhadd, EA_SCALABLE, REG_V19, REG_P4, REG_V14, INS_OPTS_SCALABLE_B); // IF_SVE_EP_3A /* UHADD ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_uhsub, EA_SCALABLE, REG_V20, REG_P5, REG_V15, INS_OPTS_SCALABLE_H); // IF_SVE_EP_3A /* UHSUB ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_uhsubr, EA_SCALABLE, REG_V21, REG_P6, REG_V16, INS_OPTS_SCALABLE_S); // IF_SVE_EP_3A /* UHSUBR ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_urhadd, EA_SCALABLE, REG_V22, REG_P7, REG_V17, INS_OPTS_SCALABLE_D); // IF_SVE_EP_3A /* URHADD ., /M, ., . */ - - theEmitter->emitIns_R_R_R(INS_sve_addp, EA_SCALABLE, REG_V23, REG_P6, REG_V18, INS_OPTS_SCALABLE_B); // IF_SVE_ER_3A /* ADDP ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_smaxp, EA_SCALABLE, REG_V24, REG_P5, REG_V19, INS_OPTS_SCALABLE_H); // IF_SVE_ER_3A /* SMAXP ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_sminp, EA_SCALABLE, REG_V25, REG_P4, REG_V20, INS_OPTS_SCALABLE_S); // IF_SVE_ER_3A /* SMINP ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_umaxp, EA_SCALABLE, REG_V26, REG_P3, REG_V21, INS_OPTS_SCALABLE_D); // IF_SVE_ER_3A /* UMAXP ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_uminp, EA_SCALABLE, REG_V27, REG_P2, REG_V22, INS_OPTS_SCALABLE_B); // IF_SVE_ER_3A /* UMINP ., /M, ., . */ - - theEmitter->emitIns_R_R_R(INS_sve_sqadd, EA_SCALABLE, REG_V28, REG_P1, REG_V23, INS_OPTS_SCALABLE_B); // IF_SVE_ET_3A /* SQADD ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_sqsub, EA_SCALABLE, REG_V29, REG_P0, REG_V24, INS_OPTS_SCALABLE_H); // IF_SVE_ET_3A /* SQSUB ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_sqsubr, EA_SCALABLE, REG_V30, REG_P1, REG_V25, INS_OPTS_SCALABLE_H); // IF_SVE_ET_3A /* SQSUBR ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_suqadd, EA_SCALABLE, REG_V31, REG_P2, REG_V26, INS_OPTS_SCALABLE_B); // IF_SVE_ET_3A /* SUQADD ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_uqadd, EA_SCALABLE, REG_V0, REG_P3, REG_V27, INS_OPTS_SCALABLE_S); // IF_SVE_ET_3A /* UQADD ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_uqsub, EA_SCALABLE, REG_V1, REG_P4, REG_V28, INS_OPTS_SCALABLE_D); // IF_SVE_ET_3A /* UQSUB ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_uqsubr, EA_SCALABLE, REG_V2, REG_P5, REG_V29, INS_OPTS_SCALABLE_B); // IF_SVE_ET_3A /* UQSUBR ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_usqadd, EA_SCALABLE, REG_V3, REG_P6, REG_V30, INS_OPTS_SCALABLE_B); // IF_SVE_ET_3A /* USQADD ., /M, ., . */ - - //TODO in this PR.... - // case IF_SVE_EU_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating/rounding bitwise shift left (predicated) - // case IF_SVE_GR_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 floating-point pairwise operations - // case IF_SVE_HJ_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point serial reduction (predicated) - // case IF_SVE_HL_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated) - - - + theEmitter->emitIns_R_R_R(INS_sve_and, EA_SCALABLE, REG_V0, REG_P1, REG_V2, + INS_OPTS_SCALABLE_B); // AND ., /M, ., . + theEmitter->emitIns_R_R_R(INS_sve_bic, EA_SCALABLE, REG_V3, REG_P4, REG_V5, + INS_OPTS_SCALABLE_H); // BIC ., /M, ., . + theEmitter->emitIns_R_R_R(INS_sve_eor, EA_SCALABLE, REG_V14, REG_P5, REG_V16, + INS_OPTS_SCALABLE_S); // EOR ., /M, ., . + theEmitter->emitIns_R_R_R(INS_sve_orr, EA_SCALABLE, REG_V29, REG_P7, REG_V31, + INS_OPTS_SCALABLE_D); // ORR ., /M, ., . + + theEmitter->emitIns_R_R_R(INS_sve_add, EA_SCALABLE, REG_V5, REG_P6, REG_V7, + INS_OPTS_SCALABLE_B); // ADD ., /M, ., . + theEmitter->emitIns_R_R_R(INS_sve_sub, EA_SCALABLE, REG_V15, REG_P7, REG_V29, + INS_OPTS_SCALABLE_H); // SUB ., /M, ., . + theEmitter->emitIns_R_R_R(INS_sve_subr, EA_SCALABLE, REG_V2, REG_P0, REG_V13, + INS_OPTS_SCALABLE_S); // SUBR ., /M, ., . + + theEmitter->emitIns_R_R_R(INS_sve_sdiv, EA_SCALABLE, REG_V3, REG_P2, REG_V9, + INS_OPTS_SCALABLE_S); // SDIV ., /M, ., . + theEmitter->emitIns_R_R_R(INS_sve_sdivr, EA_SCALABLE, REG_V31, REG_P3, REG_V29, + INS_OPTS_SCALABLE_D); // SDIVR ., /M, ., . + theEmitter->emitIns_R_R_R(INS_sve_udiv, EA_SCALABLE, REG_V1, REG_P0, REG_V0, + INS_OPTS_SCALABLE_S); // UDIV ., /M, ., . + theEmitter->emitIns_R_R_R(INS_sve_udivr, EA_SCALABLE, REG_V13, REG_P7, REG_V15, + INS_OPTS_SCALABLE_D); // UDIVR ., /M, ., . + + theEmitter->emitIns_R_R_R(INS_sve_smax, EA_SCALABLE, REG_V24, REG_P0, REG_V2, + INS_OPTS_SCALABLE_B); // SMAX ., /M, ., . + theEmitter->emitIns_R_R_R(INS_sve_smin, EA_SCALABLE, REG_V9, REG_P1, REG_V27, + INS_OPTS_SCALABLE_H); // SMIN ., /M, ., . + theEmitter->emitIns_R_R_R(INS_sve_sabd, EA_SCALABLE, REG_V5, REG_P2, REG_V6, + INS_OPTS_SCALABLE_B); // SABD ., /M, ., . + theEmitter->emitIns_R_R_R(INS_sve_uabd, EA_SCALABLE, REG_V23, REG_P3, REG_V9, + INS_OPTS_SCALABLE_S); // UABD ., /M, ., . + theEmitter->emitIns_R_R_R(INS_sve_umax, EA_SCALABLE, REG_V15, REG_P4, REG_V2, + INS_OPTS_SCALABLE_S); // UMAX ., /M, ., . + theEmitter->emitIns_R_R_R(INS_sve_umin, EA_SCALABLE, REG_V12, REG_P7, REG_V0, + INS_OPTS_SCALABLE_D); // UMIN ., /M, ., . + + theEmitter->emitIns_R_R_R(INS_sve_mul, EA_SCALABLE, REG_V5, REG_P1, REG_V3, + INS_OPTS_SCALABLE_D); // IF_SVE_AE_3A /* MUL ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_smulh, EA_SCALABLE, REG_V17, REG_P5, REG_V5, + INS_OPTS_SCALABLE_S); // IF_SVE_AE_3A /* SMULH ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_umulh, EA_SCALABLE, REG_V12, REG_P2, REG_V24, + INS_OPTS_SCALABLE_B); // IF_SVE_AE_3A /* UMULH ., /M, ., . */ + + theEmitter->emitIns_R_R_R(INS_sve_asr, EA_SCALABLE, REG_V5, REG_P0, REG_V21, + INS_OPTS_SCALABLE_S); // IF_SVE_AN_3A /* ASR ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_asrr, EA_SCALABLE, REG_V1, REG_P7, REG_V20, + INS_OPTS_SCALABLE_B); // IF_SVE_AN_3A /* ASRR ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_lsl, EA_SCALABLE, REG_V0, REG_P2, REG_V0, + INS_OPTS_SCALABLE_H); // IF_SVE_AN_3A /* LSL ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_lslr, EA_SCALABLE, REG_V27, REG_P6, REG_V31, + INS_OPTS_SCALABLE_D); // IF_SVE_AN_3A /* LSLR ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_lsr, EA_SCALABLE, REG_V5, REG_P5, REG_V6, + INS_OPTS_SCALABLE_B); // IF_SVE_AN_3A /* LSR ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_lsrr, EA_SCALABLE, REG_V15, REG_P4, REG_V17, + INS_OPTS_SCALABLE_S); // IF_SVE_AN_3A /* LSRR ., /M, ., . */ + + theEmitter->emitIns_R_R_R(INS_sve_asr, EA_SCALABLE, REG_V4, REG_P3, REG_V24, + INS_OPTS_SCALABLE_WIDE_B); // IF_SVE_AO_3A /* ASR ., /M, ., .D + // */ + theEmitter->emitIns_R_R_R(INS_sve_lsl, EA_SCALABLE, REG_V19, REG_P7, REG_V3, + INS_OPTS_SCALABLE_WIDE_H); // IF_SVE_AO_3A /* LSL ., /M, ., .D + // */ + theEmitter->emitIns_R_R_R(INS_sve_lsr, EA_SCALABLE, REG_V0, REG_P0, REG_V0, + INS_OPTS_SCALABLE_WIDE_S); // IF_SVE_AO_3A /* LSR ., /M, ., .D + // */ + + // TODO-SVE: Currently, these are all printed with /M on the predicate. There should be no predicate extension on + // these. + theEmitter->emitIns_R_R_R(INS_sve_clasta, EA_SCALABLE, REG_V31, REG_P7, REG_V31, + INS_OPTS_SCALABLE_B); // IF_SVE_CM_3A /* CLASTA ., , ., . */ + theEmitter->emitIns_R_R_R(INS_sve_clastb, EA_SCALABLE, REG_V30, REG_P6, REG_V30, + INS_OPTS_SCALABLE_D); // IF_SVE_CM_3A /* CLASTB ., , ., . */ + + theEmitter->emitIns_R_R_R(INS_sve_clasta, EA_2BYTE, REG_V12, REG_P1, REG_V15, + INS_OPTS_SCALABLE_H_TO_SIMD); // IF_SVE_CN_3A /* CLASTA , , , . + // */ + theEmitter->emitIns_R_R_R(INS_sve_clastb, EA_4BYTE, REG_V13, REG_P2, REG_V16, + INS_OPTS_SCALABLE_S_TO_SIMD); // IF_SVE_CN_3A /* CLASTB , , , . + // */ + + // Note: EA_4BYTE used for B and H (destination register is W) + theEmitter->emitIns_R_R_R(INS_sve_clasta, EA_4BYTE, REG_R0, REG_P0, REG_V0, + INS_OPTS_SCALABLE_B_TO_SCALAR); // IF_SVE_CO_3A /* CLASTA , , , + // . */ + theEmitter->emitIns_R_R_R(INS_sve_clasta, EA_4BYTE, REG_R1, REG_P2, REG_V3, + INS_OPTS_SCALABLE_H_TO_SCALAR); // IF_SVE_CO_3A /* CLASTA , , , + // . */ + theEmitter->emitIns_R_R_R(INS_sve_clastb, EA_4BYTE, REG_R23, REG_P5, REG_V12, + INS_OPTS_SCALABLE_S_TO_SCALAR); // IF_SVE_CO_3A /* CLASTB , , , + // . */ + theEmitter->emitIns_R_R_R(INS_sve_clastb, EA_8BYTE, REG_R3, REG_P6, REG_V9, + INS_OPTS_SCALABLE_D_TO_SCALAR); // IF_SVE_CO_3A /* CLASTB , , , + // . */ + + theEmitter->emitIns_R_R_R(INS_sve_shadd, EA_SCALABLE, REG_V15, REG_P0, REG_V10, + INS_OPTS_SCALABLE_B); // IF_SVE_EP_3A /* SHADD ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_shsub, EA_SCALABLE, REG_V16, REG_P1, REG_V11, + INS_OPTS_SCALABLE_H); // IF_SVE_EP_3A /* SHSUB ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_shsubr, EA_SCALABLE, REG_V17, REG_P2, REG_V12, + INS_OPTS_SCALABLE_S); // IF_SVE_EP_3A /* SHSUBR ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_srhadd, EA_SCALABLE, REG_V18, REG_P3, REG_V13, + INS_OPTS_SCALABLE_D); // IF_SVE_EP_3A /* SRHADD ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_uhadd, EA_SCALABLE, REG_V19, REG_P4, REG_V14, + INS_OPTS_SCALABLE_B); // IF_SVE_EP_3A /* UHADD ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_uhsub, EA_SCALABLE, REG_V20, REG_P5, REG_V15, + INS_OPTS_SCALABLE_H); // IF_SVE_EP_3A /* UHSUB ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_uhsubr, EA_SCALABLE, REG_V21, REG_P6, REG_V16, + INS_OPTS_SCALABLE_S); // IF_SVE_EP_3A /* UHSUBR ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_urhadd, EA_SCALABLE, REG_V22, REG_P7, REG_V17, + INS_OPTS_SCALABLE_D); // IF_SVE_EP_3A /* URHADD ., /M, ., . */ + + theEmitter->emitIns_R_R_R(INS_sve_addp, EA_SCALABLE, REG_V23, REG_P6, REG_V18, + INS_OPTS_SCALABLE_B); // IF_SVE_ER_3A /* ADDP ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_smaxp, EA_SCALABLE, REG_V24, REG_P5, REG_V19, + INS_OPTS_SCALABLE_H); // IF_SVE_ER_3A /* SMAXP ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_sminp, EA_SCALABLE, REG_V25, REG_P4, REG_V20, + INS_OPTS_SCALABLE_S); // IF_SVE_ER_3A /* SMINP ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_umaxp, EA_SCALABLE, REG_V26, REG_P3, REG_V21, + INS_OPTS_SCALABLE_D); // IF_SVE_ER_3A /* UMAXP ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_uminp, EA_SCALABLE, REG_V27, REG_P2, REG_V22, + INS_OPTS_SCALABLE_B); // IF_SVE_ER_3A /* UMINP ., /M, ., . */ + + theEmitter->emitIns_R_R_R(INS_sve_sqadd, EA_SCALABLE, REG_V28, REG_P1, REG_V23, + INS_OPTS_SCALABLE_B); // IF_SVE_ET_3A /* SQADD ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_sqsub, EA_SCALABLE, REG_V29, REG_P0, REG_V24, + INS_OPTS_SCALABLE_H); // IF_SVE_ET_3A /* SQSUB ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_sqsubr, EA_SCALABLE, REG_V30, REG_P1, REG_V25, + INS_OPTS_SCALABLE_H); // IF_SVE_ET_3A /* SQSUBR ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_suqadd, EA_SCALABLE, REG_V31, REG_P2, REG_V26, + INS_OPTS_SCALABLE_B); // IF_SVE_ET_3A /* SUQADD ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_uqadd, EA_SCALABLE, REG_V0, REG_P3, REG_V27, + INS_OPTS_SCALABLE_S); // IF_SVE_ET_3A /* UQADD ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_uqsub, EA_SCALABLE, REG_V1, REG_P4, REG_V28, + INS_OPTS_SCALABLE_D); // IF_SVE_ET_3A /* UQSUB ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_uqsubr, EA_SCALABLE, REG_V2, REG_P5, REG_V29, + INS_OPTS_SCALABLE_B); // IF_SVE_ET_3A /* UQSUBR ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_usqadd, EA_SCALABLE, REG_V3, REG_P6, REG_V30, + INS_OPTS_SCALABLE_B); // IF_SVE_ET_3A /* USQADD ., /M, ., . */ + +// TODO in this PR.... +// case IF_SVE_EU_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating/rounding bitwise shift left (predicated) +// case IF_SVE_GR_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 floating-point pairwise operations +// case IF_SVE_HJ_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point serial reduction (predicated) +// case IF_SVE_HL_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated) #endif // ALL_ARM64_EMITTER_UNIT_TESTS_SVE diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index 8ca70a92b9cdc8..e24ba91ba94f53 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -515,7 +515,7 @@ class emitter #ifdef TARGET_AMD64 OPSZP = OPSZ8, #else - OPSZP = OPSZ4, + OPSZP = OPSZ4, #endif }; diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index e10c9e020d743c..a854bd76c697fe 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -943,60 +943,61 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(datasize == EA_8BYTE); break; - case IF_SVE_AA_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise logical operations (predicated) - case IF_SVE_AB_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer add/subtract vectors (predicated) - case IF_SVE_AD_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer min/max/difference (predicated) - case IF_SVE_AE_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer multiply vectors (predicated) - case IF_SVE_AN_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by vector (predicated) - case IF_SVE_CM_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally broadcast element to vector - case IF_SVE_EP_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer halving add/subtract (predicated) - case IF_SVE_ER_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer pairwise arithmetic - case IF_SVE_ET_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating add/subtract - case IF_SVE_EU_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating/rounding bitwise shift left (predicated) - case IF_SVE_GR_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 floating-point pairwise operations - case IF_SVE_HJ_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point serial reduction (predicated) - case IF_SVE_HL_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated) + case IF_SVE_AA_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise logical operations (predicated) + case IF_SVE_AB_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer add/subtract vectors (predicated) + case IF_SVE_AD_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer min/max/difference (predicated) + case IF_SVE_AE_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer multiply vectors (predicated) + case IF_SVE_AN_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by vector (predicated) + case IF_SVE_CM_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally broadcast element to vector + case IF_SVE_EP_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer halving add/subtract (predicated) + case IF_SVE_ER_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer pairwise arithmetic + case IF_SVE_ET_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating add/subtract + case IF_SVE_EU_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating/rounding bitwise shift left + // (predicated) + case IF_SVE_GR_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 floating-point pairwise operations + case IF_SVE_HJ_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point serial reduction (predicated) + case IF_SVE_HL_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated) elemsize = id->idOpSize(); assert(insOptsScalableSimple(id->idInsOpt())); // xx - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isLowPredicateRegister(id->idReg2())); // ggg - assert(isVectorRegister(id->idReg3())); // mmmmm + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // mmmmm assert(isScalableVectorSize(elemsize)); break; - case IF_SVE_AC_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer divide vectors (predicated) + case IF_SVE_AC_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer divide vectors (predicated) elemsize = id->idOpSize(); assert(insOptsScalableWords(id->idInsOpt())); // xx - assert(isVectorRegister(id->idReg1())); // ddddd + assert(isVectorRegister(id->idReg1())); // ddddd assert(isLowPredicateRegister(id->idReg2())); // ggg - assert(isVectorRegister(id->idReg3())); // mmmmm + assert(isVectorRegister(id->idReg3())); // mmmmm assert(isScalableVectorSize(elemsize)); break; - case IF_SVE_AO_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by wide elements (predicated) + case IF_SVE_AO_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by wide elements (predicated) elemsize = id->idOpSize(); - assert(insOptsScalableWide(id->idInsOpt())); // xx - assert(isVectorRegister(id->idReg1())); // ddddd + assert(insOptsScalableWide(id->idInsOpt())); // xx + assert(isVectorRegister(id->idReg1())); // ddddd assert(isLowPredicateRegister(id->idReg2())); // ggg - assert(isVectorRegister(id->idReg3())); // mmmmm + assert(isVectorRegister(id->idReg3())); // mmmmm assert(isScalableVectorSize(elemsize)); break; - case IF_SVE_CN_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to SIMD&FP scalar + case IF_SVE_CN_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to SIMD&FP scalar elemsize = id->idOpSize(); assert(insOptsScalableToSimd(id->idInsOpt())); // xx - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isLowPredicateRegister(id->idReg2())); // ggg - assert(isVectorRegister(id->idReg3())); // mmmmm + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // mmmmm assert(isValidVectorElemsize(elemsize)); break; - case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register + case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register elemsize = id->idOpSize(); assert(insOptsScalableToScalar(id->idInsOpt())); // xx - assert(isGeneralRegister(id->idReg1())); // ddddd - assert(isLowPredicateRegister(id->idReg2())); // ggg - assert(isVectorRegister(id->idReg3())); // mmmmm + assert(isGeneralRegister(id->idReg1())); // ddddd + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // mmmmm assert(isValidScalarDatasize(elemsize)); break; @@ -11900,7 +11901,6 @@ void emitter::emitIns_Call(EmitCallType callType, return 0; } - BYTE* emitter::emitOutputLoadLabel(BYTE* dst, BYTE* srcAddr, BYTE* dstAddr, instrDescJmp* id) { instruction ins = id->idIns(); @@ -13848,36 +13848,37 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) dst += emitOutput_Instr(dst, code); break; - case IF_SVE_AA_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise logical operations (predicated) - case IF_SVE_AB_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer add/subtract vectors (predicated) - case IF_SVE_AC_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer divide vectors (predicated) - case IF_SVE_AD_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer min/max/difference (predicated) - case IF_SVE_AE_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer multiply vectors (predicated) - case IF_SVE_AN_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by vector (predicated) - case IF_SVE_AO_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by wide elements (predicated) - case IF_SVE_CM_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally broadcast element to vector - case IF_SVE_CN_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to SIMD&FP scalar - case IF_SVE_EP_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer halving add/subtract (predicated) - case IF_SVE_ER_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer pairwise arithmetic - case IF_SVE_ET_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating add/subtract - case IF_SVE_EU_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating/rounding bitwise shift left (predicated) - case IF_SVE_GR_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 floating-point pairwise operations - case IF_SVE_HJ_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point serial reduction (predicated) - case IF_SVE_HL_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated) + case IF_SVE_AA_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise logical operations (predicated) + case IF_SVE_AB_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer add/subtract vectors (predicated) + case IF_SVE_AC_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer divide vectors (predicated) + case IF_SVE_AD_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer min/max/difference (predicated) + case IF_SVE_AE_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer multiply vectors (predicated) + case IF_SVE_AN_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by vector (predicated) + case IF_SVE_AO_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by wide elements (predicated) + case IF_SVE_CM_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally broadcast element to vector + case IF_SVE_CN_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to SIMD&FP scalar + case IF_SVE_EP_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer halving add/subtract (predicated) + case IF_SVE_ER_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer pairwise arithmetic + case IF_SVE_ET_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating add/subtract + case IF_SVE_EU_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating/rounding bitwise shift left + // (predicated) + case IF_SVE_GR_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 floating-point pairwise operations + case IF_SVE_HJ_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point serial reduction (predicated) + case IF_SVE_HL_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated) code = emitInsCodeSve(ins, fmt); - code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg - code |= insEncodeReg_V_9_to_5(id->idReg3()); // mmmmm - code |= insEncodeSveElemsize(id->idInsOpt()); // xx + code |= insEncodeReg_V_9_to_5(id->idReg3()); // mmmmm + code |= insEncodeSveElemsize(id->idInsOpt()); // xx dst += emitOutput_Instr(dst, code); break; - case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register + case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register code = emitInsCodeSve(ins, fmt); - code |= insEncodeReg_Rd(id->idReg1()); // ddddd + code |= insEncodeReg_Rd(id->idReg1()); // ddddd code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg - code |= insEncodeReg_V_9_to_5(id->idReg3()); // mmmmm - code |= insEncodeSveElemsize(id->idInsOpt()); // xx + code |= insEncodeReg_V_9_to_5(id->idReg3()); // mmmmm + code |= insEncodeSveElemsize(id->idInsOpt()); // xx dst += emitOutput_Instr(dst, code); break; @@ -16074,38 +16075,39 @@ void emitter::emitDispInsHelp( } break; - case IF_SVE_AA_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise logical operations (predicated) - case IF_SVE_AB_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer add/subtract vectors (predicated) - case IF_SVE_AC_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer divide vectors (predicated) - case IF_SVE_AD_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer min/max/difference (predicated) - case IF_SVE_AE_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer multiply vectors (predicated) - case IF_SVE_AN_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by vector (predicated) - case IF_SVE_CM_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally broadcast element to vector - case IF_SVE_EP_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer halving add/subtract (predicated) - case IF_SVE_ER_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer pairwise arithmetic - case IF_SVE_ET_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating add/subtract - case IF_SVE_EU_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating/rounding bitwise shift left (predicated) - case IF_SVE_GR_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 floating-point pairwise operations - case IF_SVE_HJ_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point serial reduction (predicated) - case IF_SVE_HL_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated) - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispPredicateReg(id->idReg2(), true, true); // ggg - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + case IF_SVE_AA_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise logical operations (predicated) + case IF_SVE_AB_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer add/subtract vectors (predicated) + case IF_SVE_AC_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer divide vectors (predicated) + case IF_SVE_AD_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer min/max/difference (predicated) + case IF_SVE_AE_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer multiply vectors (predicated) + case IF_SVE_AN_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by vector (predicated) + case IF_SVE_CM_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally broadcast element to vector + case IF_SVE_EP_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer halving add/subtract (predicated) + case IF_SVE_ER_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer pairwise arithmetic + case IF_SVE_ET_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating add/subtract + case IF_SVE_EU_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating/rounding bitwise shift left + // (predicated) + case IF_SVE_GR_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 floating-point pairwise operations + case IF_SVE_HJ_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point serial reduction (predicated) + case IF_SVE_HL_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated) + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispPredicateReg(id->idReg2(), true, true); // ggg + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm break; - case IF_SVE_AO_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by wide elements (predicated) - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispPredicateReg(id->idReg2(), true, true); // ggg - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + case IF_SVE_AO_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by wide elements (predicated) + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispPredicateReg(id->idReg2(), true, true); // ggg + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd emitDispSveReg(id->idReg3(), INS_OPTS_SCALABLE_D, false); // mmmmm break; - case IF_SVE_CN_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to SIMD&FP scalar - case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register - emitDispReg(id->idReg1(), size, true); // ddddd - emitDispPredicateReg(id->idReg2(), true, true); // ggg - emitDispReg(id->idReg1(), size, true); // ddddd + case IF_SVE_CN_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to SIMD&FP scalar + case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register + emitDispReg(id->idReg1(), size, true); // ddddd + emitDispPredicateReg(id->idReg2(), true, true); // ggg + emitDispReg(id->idReg1(), size, true); // ddddd emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm break; @@ -18294,23 +18296,24 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins } break; - case IF_SVE_AA_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise logical operations (predicated) - case IF_SVE_AB_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer add/subtract vectors (predicated) - case IF_SVE_AC_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer divide vectors (predicated) - case IF_SVE_AD_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer min/max/difference (predicated) - case IF_SVE_AE_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer multiply vectors (predicated) - case IF_SVE_AN_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by vector (predicated) - case IF_SVE_AO_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by wide elements (predicated) - case IF_SVE_CM_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally broadcast element to vector - case IF_SVE_CN_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to SIMD&FP scalar - case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register - case IF_SVE_EP_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer halving add/subtract (predicated) - case IF_SVE_ER_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer pairwise arithmetic - case IF_SVE_ET_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating add/subtract - case IF_SVE_EU_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating/rounding bitwise shift left (predicated) - case IF_SVE_GR_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 floating-point pairwise operations - case IF_SVE_HJ_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point serial reduction (predicated) - case IF_SVE_HL_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated) + case IF_SVE_AA_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise logical operations (predicated) + case IF_SVE_AB_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer add/subtract vectors (predicated) + case IF_SVE_AC_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer divide vectors (predicated) + case IF_SVE_AD_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer min/max/difference (predicated) + case IF_SVE_AE_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer multiply vectors (predicated) + case IF_SVE_AN_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by vector (predicated) + case IF_SVE_AO_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by wide elements (predicated) + case IF_SVE_CM_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally broadcast element to vector + case IF_SVE_CN_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to SIMD&FP scalar + case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register + case IF_SVE_EP_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer halving add/subtract (predicated) + case IF_SVE_ER_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer pairwise arithmetic + case IF_SVE_ET_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating add/subtract + case IF_SVE_EU_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating/rounding bitwise shift left + // (predicated) + case IF_SVE_GR_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 floating-point pairwise operations + case IF_SVE_HJ_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point serial reduction (predicated) + case IF_SVE_HL_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated) result.insThroughput = PERFSCORE_THROUGHPUT_1C; result.insLatency = PERFSCORE_LATENCY_1C; break; diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 522880eca2668c..5aaf55348ac17a 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -844,7 +844,8 @@ inline static bool insOptsConvertIntToFloat(insOpts opt) inline static bool insOptsScalable(insOpts opt) { // Opt is any of the scalable types. - return insOptsScalableSimple(opt) || insOptsScalableWide(opt) || insOptsScalableToSimd(opt) || insOptsScalableToScalar(opt); + return insOptsScalableSimple(opt) || insOptsScalableWide(opt) || insOptsScalableToSimd(opt) || + insOptsScalableToScalar(opt); } inline static bool insOptsScalableSimple(insOpts opt) @@ -865,17 +866,16 @@ inline static bool insOptsScalableWide(insOpts opt) inline static bool insOptsScalableToSimd(insOpts opt) { - return ((opt == INS_OPTS_SCALABLE_B_TO_SIMD || opt == INS_OPTS_SCALABLE_H_TO_SIMD || opt == INS_OPTS_SCALABLE_S_TO_SIMD || - opt == INS_OPTS_SCALABLE_D_TO_SIMD)); + return ((opt == INS_OPTS_SCALABLE_B_TO_SIMD || opt == INS_OPTS_SCALABLE_H_TO_SIMD || + opt == INS_OPTS_SCALABLE_S_TO_SIMD || opt == INS_OPTS_SCALABLE_D_TO_SIMD)); } inline static bool insOptsScalableToScalar(insOpts opt) { - return ((opt == INS_OPTS_SCALABLE_B_TO_SCALAR || opt == INS_OPTS_SCALABLE_H_TO_SCALAR || opt == INS_OPTS_SCALABLE_S_TO_SCALAR || - opt == INS_OPTS_SCALABLE_D_TO_SCALAR)); + return ((opt == INS_OPTS_SCALABLE_B_TO_SCALAR || opt == INS_OPTS_SCALABLE_H_TO_SCALAR || + opt == INS_OPTS_SCALABLE_S_TO_SCALAR || opt == INS_OPTS_SCALABLE_D_TO_SCALAR)); } - static bool isValidImmCond(ssize_t imm); static bool isValidImmCondFlags(ssize_t imm); static bool isValidImmCondFlagsImm5(ssize_t imm); From 89e8ccdf6fbe88f831d9d06405492dbbf28d57e1 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 24 Nov 2023 09:26:23 +0000 Subject: [PATCH 17/32] Add IF_SVE_EU_3A --- src/coreclr/jit/codegenarm64.cpp | 14 +++++++++++++- src/coreclr/jit/emitarm64.cpp | 19 +++++++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index f82137b04972e1..dde213588ed1f7 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -10257,8 +10257,20 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R(INS_sve_usqadd, EA_SCALABLE, REG_V3, REG_P6, REG_V30, INS_OPTS_SCALABLE_B); // IF_SVE_ET_3A /* USQADD ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_sqrshl, EA_SCALABLE, REG_V4, REG_P7, REG_V31, INS_OPTS_SCALABLE_B); // IF_SVE_EU_3A /* SQRSHL ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_sqrshlr, EA_SCALABLE, REG_V5, REG_P0, REG_V30, INS_OPTS_SCALABLE_H); // IF_SVE_EU_3A /* SQRSHLR ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_sqshl, EA_SCALABLE, REG_V6, REG_P1, REG_V29, INS_OPTS_SCALABLE_S); // IF_SVE_EU_3A /* SQSHL ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_sqshlr, EA_SCALABLE, REG_V7, REG_P2, REG_V28, INS_OPTS_SCALABLE_D); // IF_SVE_EU_3A /* SQSHLR ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_srshl, EA_SCALABLE, REG_V8, REG_P3, REG_V27, INS_OPTS_SCALABLE_B); // IF_SVE_EU_3A /* SRSHL ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_srshlr, EA_SCALABLE, REG_V9, REG_P4, REG_V26, INS_OPTS_SCALABLE_H); // IF_SVE_EU_3A /* SRSHLR ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_uqrshl, EA_SCALABLE, REG_V10, REG_P5, REG_V25, INS_OPTS_SCALABLE_S); // IF_SVE_EU_3A /* UQRSHL ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_uqrshlr, EA_SCALABLE, REG_V11, REG_P6, REG_V24, INS_OPTS_SCALABLE_D); // IF_SVE_EU_3A /* UQRSHLR ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_uqshl, EA_SCALABLE, REG_V12, REG_P7, REG_V23, INS_OPTS_SCALABLE_B); // IF_SVE_EU_3A /* UQSHL ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_uqshlr, EA_SCALABLE, REG_V13, REG_P0, REG_V22, INS_OPTS_SCALABLE_H); // IF_SVE_EU_3A /* UQSHLR ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_urshl, EA_SCALABLE, REG_V14, REG_P1, REG_V21, INS_OPTS_SCALABLE_S); // IF_SVE_EU_3A /* URSHL ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_urshlr, EA_SCALABLE, REG_V15, REG_P2, REG_V20, INS_OPTS_SCALABLE_D); // IF_SVE_EU_3A /* URSHLR ., /M, ., . */ + // TODO in this PR.... -// case IF_SVE_EU_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating/rounding bitwise shift left (predicated) // case IF_SVE_GR_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 floating-point pairwise operations // case IF_SVE_HJ_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point serial reduction (predicated) // case IF_SVE_HL_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index a854bd76c697fe..73bc9b9a1bae2a 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -8252,6 +8252,25 @@ void emitter::emitIns_R_R_R( fmt = IF_SVE_ET_3A; break; + case INS_sve_sqrshl: + case INS_sve_sqrshlr: + case INS_sve_sqshl: + case INS_sve_sqshlr: + case INS_sve_srshl: + case INS_sve_srshlr: + case INS_sve_uqrshl: + case INS_sve_uqrshlr: + case INS_sve_uqshl: + case INS_sve_uqshlr: + case INS_sve_urshl: + case INS_sve_urshlr: + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(insOptsScalableSimple(opt)); + fmt = IF_SVE_EU_3A; + break; + default: unreached(); break; From 947e76c967e7447b0eeacfef6995a0e977160fa1 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 24 Nov 2023 09:41:09 +0000 Subject: [PATCH 18/32] IF_SVE_EU_3A --- src/coreclr/jit/codegenarm64.cpp | 7 ++++++- src/coreclr/jit/emitarm64.cpp | 22 +++++++++++++++++++++- src/coreclr/jit/emitarm64.h | 9 +++++++++ 3 files changed, 36 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index dde213588ed1f7..9f621dc9714ad2 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -10270,8 +10270,13 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R(INS_sve_urshl, EA_SCALABLE, REG_V14, REG_P1, REG_V21, INS_OPTS_SCALABLE_S); // IF_SVE_EU_3A /* URSHL ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_urshlr, EA_SCALABLE, REG_V15, REG_P2, REG_V20, INS_OPTS_SCALABLE_D); // IF_SVE_EU_3A /* URSHLR ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_faddp, EA_SCALABLE, REG_V16, REG_P3, REG_V19, INS_OPTS_SCALABLE_H); // IF_SVE_GR_3A /* FADDP ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_fmaxnmp, EA_SCALABLE, REG_V17, REG_P4, REG_V18, INS_OPTS_SCALABLE_S); // IF_SVE_GR_3A /* FMAXNMP ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_fmaxp, EA_SCALABLE, REG_V18, REG_P5, REG_V17, INS_OPTS_SCALABLE_D); // IF_SVE_GR_3A /* FMAXP ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_fminnmp, EA_SCALABLE, REG_V19, REG_P6, REG_V16, INS_OPTS_SCALABLE_S); // IF_SVE_GR_3A /* FMINNMP ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_fminp, EA_SCALABLE, REG_V20, REG_P7, REG_V15, INS_OPTS_SCALABLE_H); // IF_SVE_GR_3A /* FMINP ., /M, ., . */ + // TODO in this PR.... -// case IF_SVE_GR_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 floating-point pairwise operations // case IF_SVE_HJ_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point serial reduction (predicated) // case IF_SVE_HL_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 73bc9b9a1bae2a..a3ec2b8452e816 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -954,7 +954,6 @@ void emitter::emitInsSanityCheck(instrDesc* id) case IF_SVE_ET_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating add/subtract case IF_SVE_EU_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating/rounding bitwise shift left // (predicated) - case IF_SVE_GR_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 floating-point pairwise operations case IF_SVE_HJ_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point serial reduction (predicated) case IF_SVE_HL_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated) elemsize = id->idOpSize(); @@ -1001,6 +1000,15 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(isValidScalarDatasize(elemsize)); break; + case IF_SVE_GR_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 floating-point pairwise operations + elemsize = id->idOpSize(); + assert(insOptsScalableFloat(id->idInsOpt())); // xx + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // mmmmm + assert(isScalableVectorSize(elemsize)); + break; + default: printf("unexpected format %s\n", emitIfName(id->idInsFmt())); assert(!"Unexpected format"); @@ -8271,6 +8279,18 @@ void emitter::emitIns_R_R_R( fmt = IF_SVE_EU_3A; break; + case INS_sve_faddp: + case INS_sve_fmaxnmp: + case INS_sve_fmaxp: + case INS_sve_fminnmp: + case INS_sve_fminp: + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(insOptsScalableFloat(opt)); + fmt = IF_SVE_GR_3A; + break; + default: unreached(); break; diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 5aaf55348ac17a..6f663c7a658fa5 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -850,15 +850,24 @@ inline static bool insOptsScalable(insOpts opt) inline static bool insOptsScalableSimple(insOpts opt) { + // Opt is any of the standard scable types. return ((opt == INS_OPTS_SCALABLE_B || opt == INS_OPTS_SCALABLE_H || opt == INS_OPTS_SCALABLE_S || opt == INS_OPTS_SCALABLE_D)); } inline static bool insOptsScalableWords(insOpts opt) { + // Opt is any of the standard word and above scable types. return ((opt == INS_OPTS_SCALABLE_S || opt == INS_OPTS_SCALABLE_D)); } +inline static bool insOptsScalableFloat(insOpts opt) +{ + // Opt is any of the standard scable types that are valid for FP. + return ((opt == INS_OPTS_SCALABLE_B || opt == INS_OPTS_SCALABLE_H || opt == INS_OPTS_SCALABLE_S || + opt == INS_OPTS_SCALABLE_D)); +} + inline static bool insOptsScalableWide(insOpts opt) { return ((opt == INS_OPTS_SCALABLE_WIDE_B || opt == INS_OPTS_SCALABLE_WIDE_H || opt == INS_OPTS_SCALABLE_WIDE_S)); From 60567909c06bbf790b0a6e5dfcce286287499b7a Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 24 Nov 2023 10:05:23 +0000 Subject: [PATCH 19/32] Add IF_SVE_HJ_3A --- src/coreclr/jit/codegenarm64.cpp | 5 ++++- src/coreclr/jit/emitarm64.cpp | 17 ++++++++++++++--- src/coreclr/jit/emitarm64.h | 12 ++++++++++++ 3 files changed, 30 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 9f621dc9714ad2..641c3ad9962b1d 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -10276,8 +10276,11 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R(INS_sve_fminnmp, EA_SCALABLE, REG_V19, REG_P6, REG_V16, INS_OPTS_SCALABLE_S); // IF_SVE_GR_3A /* FMINNMP ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_fminp, EA_SCALABLE, REG_V20, REG_P7, REG_V15, INS_OPTS_SCALABLE_H); // IF_SVE_GR_3A /* FMINP ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_fadda, EA_2BYTE, REG_V21, REG_P6, REG_V14, INS_OPTS_SCALABLE_H_TO_SIMD); // IF_SVE_HJ_3A /* FADDA , , , . */ + theEmitter->emitIns_R_R_R(INS_sve_fadda, EA_4BYTE, REG_V22, REG_P5, REG_V13, INS_OPTS_SCALABLE_S_TO_SIMD); // IF_SVE_HJ_3A /* FADDA , , , . */ + theEmitter->emitIns_R_R_R(INS_sve_fadda, EA_8BYTE, REG_V23, REG_P4, REG_V12, INS_OPTS_SCALABLE_D_TO_SIMD); // IF_SVE_HJ_3A /* FADDA , , , . */ + // TODO in this PR.... -// case IF_SVE_HJ_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point serial reduction (predicated) // case IF_SVE_HL_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated) #endif // ALL_ARM64_EMITTER_UNIT_TESTS_SVE diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index a3ec2b8452e816..ff2cba04035bd3 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -954,7 +954,6 @@ void emitter::emitInsSanityCheck(instrDesc* id) case IF_SVE_ET_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating add/subtract case IF_SVE_EU_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating/rounding bitwise shift left // (predicated) - case IF_SVE_HJ_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point serial reduction (predicated) case IF_SVE_HL_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated) elemsize = id->idOpSize(); assert(insOptsScalableSimple(id->idInsOpt())); // xx @@ -983,12 +982,13 @@ void emitter::emitInsSanityCheck(instrDesc* id) break; case IF_SVE_CN_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to SIMD&FP scalar + case IF_SVE_HJ_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point serial reduction (predicated) elemsize = id->idOpSize(); assert(insOptsScalableToSimd(id->idInsOpt())); // xx assert(isVectorRegister(id->idReg1())); // ddddd assert(isLowPredicateRegister(id->idReg2())); // ggg assert(isVectorRegister(id->idReg3())); // mmmmm - assert(isValidVectorElemsize(elemsize)); + assert(isValidVectorElemsizeSveFloat(elemsize)); break; case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register @@ -8207,12 +8207,14 @@ void emitter::emitIns_R_R_R( } else if (insOptsScalableToSimd(opt)) { + assert(isFloatReg(reg1)); assert(isValidVectorElemsize(size)); fmt = IF_SVE_CN_3A; } else { assert(insOptsScalableToScalar(opt)); + assert(isGeneralRegister(reg1)); assert(isValidScalarDatasize(size)); fmt = IF_SVE_CO_3A; } @@ -8291,6 +8293,15 @@ void emitter::emitIns_R_R_R( fmt = IF_SVE_GR_3A; break; + case INS_sve_fadda: + assert(isFloatReg(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(insOptsScalableToSimdFloat(opt)); + assert(isValidVectorElemsizeSveFloat(size)); + fmt = IF_SVE_HJ_3A; + break; + default: unreached(); break; @@ -16127,7 +16138,6 @@ void emitter::emitDispInsHelp( case IF_SVE_EU_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating/rounding bitwise shift left // (predicated) case IF_SVE_GR_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 floating-point pairwise operations - case IF_SVE_HJ_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point serial reduction (predicated) case IF_SVE_HL_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated) emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd emitDispPredicateReg(id->idReg2(), true, true); // ggg @@ -16144,6 +16154,7 @@ void emitter::emitDispInsHelp( case IF_SVE_CN_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to SIMD&FP scalar case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register + case IF_SVE_HJ_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point serial reduction (predicated) emitDispReg(id->idReg1(), size, true); // ddddd emitDispPredicateReg(id->idReg2(), true, true); // ggg emitDispReg(id->idReg1(), size, true); // ddddd diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 6f663c7a658fa5..9acf13ac294473 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -705,6 +705,11 @@ inline static bool isValidVectorElemsizeFloat(emitAttr size) return (size == EA_8BYTE) || (size == EA_4BYTE); } +inline static bool isValidVectorElemsizeSveFloat(emitAttr size) +{ + return (size == EA_8BYTE) || (size == EA_4BYTE) || (size == EA_2BYTE); +} + inline static bool isScalableVectorSize(emitAttr size) { return (size == EA_SCALABLE); @@ -879,6 +884,13 @@ inline static bool insOptsScalableToSimd(insOpts opt) opt == INS_OPTS_SCALABLE_S_TO_SIMD || opt == INS_OPTS_SCALABLE_D_TO_SIMD)); } +inline static bool insOptsScalableToSimdFloat(insOpts opt) +{ + // Opt is any of the SIMD scable types that are valid for FP. + return ((opt == INS_OPTS_SCALABLE_H_TO_SIMD || opt == INS_OPTS_SCALABLE_S_TO_SIMD || + opt == INS_OPTS_SCALABLE_D_TO_SIMD)); +} + inline static bool insOptsScalableToScalar(insOpts opt) { return ((opt == INS_OPTS_SCALABLE_B_TO_SCALAR || opt == INS_OPTS_SCALABLE_H_TO_SCALAR || From af7b818f716105115eed4826b27e52bcc29bb0f5 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 24 Nov 2023 14:02:06 +0000 Subject: [PATCH 20/32] Add IF_SVE_HJ_3A --- src/coreclr/jit/codegenarm64.cpp | 17 +++++++++++++++-- src/coreclr/jit/emitarm64.cpp | 26 +++++++++++++++++++++++++- 2 files changed, 40 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 641c3ad9962b1d..ca2a96624e1dc9 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -10280,8 +10280,21 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R(INS_sve_fadda, EA_4BYTE, REG_V22, REG_P5, REG_V13, INS_OPTS_SCALABLE_S_TO_SIMD); // IF_SVE_HJ_3A /* FADDA , , , . */ theEmitter->emitIns_R_R_R(INS_sve_fadda, EA_8BYTE, REG_V23, REG_P4, REG_V12, INS_OPTS_SCALABLE_D_TO_SIMD); // IF_SVE_HJ_3A /* FADDA , , , . */ -// TODO in this PR.... -// case IF_SVE_HL_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated) + theEmitter->emitIns_R_R_R(INS_sve_fabd, EA_SCALABLE, REG_V24, REG_P3, REG_V11, INS_OPTS_SCALABLE_H); // IF_SVE_HL_3A /* FABD ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_fadd, EA_SCALABLE, REG_V25, REG_P2, REG_V10, INS_OPTS_SCALABLE_S); // IF_SVE_HL_3A /* FADD ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_famax, EA_SCALABLE, REG_V26, REG_P1, REG_V9, INS_OPTS_SCALABLE_D); // IF_SVE_HL_3A /* FAMAX ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_famin, EA_SCALABLE, REG_V27, REG_P0, REG_V8, INS_OPTS_SCALABLE_H); // IF_SVE_HL_3A /* FAMIN ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_fdiv, EA_SCALABLE, REG_V28, REG_P0, REG_V7, INS_OPTS_SCALABLE_S); // IF_SVE_HL_3A /* FDIV ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_fdivr, EA_SCALABLE, REG_V29, REG_P1, REG_V6, INS_OPTS_SCALABLE_D); // IF_SVE_HL_3A /* FDIVR ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_fmax, EA_SCALABLE, REG_V30, REG_P2, REG_V5, INS_OPTS_SCALABLE_H); // IF_SVE_HL_3A /* FMAX ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_fmaxnm, EA_SCALABLE, REG_V31, REG_P3, REG_V4, INS_OPTS_SCALABLE_S); // IF_SVE_HL_3A /* FMAXNM ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_fmin, EA_SCALABLE, REG_V0, REG_P4, REG_V3, INS_OPTS_SCALABLE_D); // IF_SVE_HL_3A /* FMIN ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_fminnm, EA_SCALABLE, REG_V1, REG_P5, REG_V2, INS_OPTS_SCALABLE_H); // IF_SVE_HL_3A /* FMINNM ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_fmul, EA_SCALABLE, REG_V2, REG_P6, REG_V1, INS_OPTS_SCALABLE_S); // IF_SVE_HL_3A /* FMUL ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_fmulx, EA_SCALABLE, REG_V3, REG_P7, REG_V0, INS_OPTS_SCALABLE_D); // IF_SVE_HL_3A /* FMULX ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_fscale, EA_SCALABLE, REG_V4, REG_P6, REG_V31, INS_OPTS_SCALABLE_H); // IF_SVE_HL_3A /* FSCALE ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_fsub, EA_SCALABLE, REG_V5, REG_P5, REG_V30, INS_OPTS_SCALABLE_S); // IF_SVE_HL_3A /* FSUB ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_fsubr, EA_SCALABLE, REG_V6, REG_P4, REG_V29, INS_OPTS_SCALABLE_D); // IF_SVE_HL_3A /* FSUBR ., /M, ., . */ #endif // ALL_ARM64_EMITTER_UNIT_TESTS_SVE diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index ff2cba04035bd3..7d02215f4b0660 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -954,7 +954,6 @@ void emitter::emitInsSanityCheck(instrDesc* id) case IF_SVE_ET_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating add/subtract case IF_SVE_EU_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating/rounding bitwise shift left // (predicated) - case IF_SVE_HL_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated) elemsize = id->idOpSize(); assert(insOptsScalableSimple(id->idInsOpt())); // xx assert(isVectorRegister(id->idReg1())); // ddddd @@ -1001,6 +1000,7 @@ void emitter::emitInsSanityCheck(instrDesc* id) break; case IF_SVE_GR_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 floating-point pairwise operations + case IF_SVE_HL_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated) elemsize = id->idOpSize(); assert(insOptsScalableFloat(id->idInsOpt())); // xx assert(isVectorRegister(id->idReg1())); // ddddd @@ -8302,6 +8302,30 @@ void emitter::emitIns_R_R_R( fmt = IF_SVE_HJ_3A; break; + + + case INS_sve_fabd: + case INS_sve_fadd: + case INS_sve_famax: + case INS_sve_famin: + case INS_sve_fdiv: + case INS_sve_fdivr: + case INS_sve_fmax: + case INS_sve_fmaxnm: + case INS_sve_fmin: + case INS_sve_fminnm: + case INS_sve_fmul: + case INS_sve_fmulx: + case INS_sve_fscale: + case INS_sve_fsub: + case INS_sve_fsubr: + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(insOptsScalableFloat(opt)); + fmt = IF_SVE_HL_3A; + break; + default: unreached(); break; From 819ff135f411ced98358d1898098e89e1fa37fea Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 24 Nov 2023 14:27:29 +0000 Subject: [PATCH 21/32] Add PredicateType --- src/coreclr/jit/emitarm64.cpp | 34 +++++++++++++++++++++++++--------- src/coreclr/jit/emitarm64.h | 9 ++++++++- 2 files changed, 33 insertions(+), 10 deletions(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 7d02215f4b0660..f6e4fa66ba4b2c 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -14511,18 +14511,16 @@ void emitter::emitDispVectorElemList( //------------------------------------------------------------------------ // emitDispPredicateReg: Display a predicate register name with with an arrangement suffix // -void emitter::emitDispPredicateReg(regNumber reg, bool merge, bool addComma) +void emitter::emitDispPredicateReg(regNumber reg, PredicateType ptype, bool addComma) { assert(isPredicateRegister(reg)); printf(emitPredicateRegName(reg)); - // TODO-SVE: Some instructions have a bit to indicate Zero or Merge. This will probably - // need encoding in opts or similar. - if (merge) + if (ptype == PREDICATE_MERGE) { printf("/m"); } - else + else if (ptype == PREDICATE_ZERO) { printf("/z"); } @@ -16155,7 +16153,6 @@ void emitter::emitDispInsHelp( case IF_SVE_AD_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer min/max/difference (predicated) case IF_SVE_AE_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer multiply vectors (predicated) case IF_SVE_AN_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by vector (predicated) - case IF_SVE_CM_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally broadcast element to vector case IF_SVE_EP_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer halving add/subtract (predicated) case IF_SVE_ER_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer pairwise arithmetic case IF_SVE_ET_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating add/subtract @@ -16164,23 +16161,42 @@ void emitter::emitDispInsHelp( case IF_SVE_GR_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 floating-point pairwise operations case IF_SVE_HL_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated) emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispPredicateReg(id->idReg2(), true, true); // ggg + emitDispPredicateReg(id->idReg2(), PREDICATE_MERGE, true); // ggg emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm break; case IF_SVE_AO_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by wide elements (predicated) emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispPredicateReg(id->idReg2(), true, true); // ggg + emitDispPredicateReg(id->idReg2(), PREDICATE_MERGE, true); // ggg emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd emitDispSveReg(id->idReg3(), INS_OPTS_SCALABLE_D, false); // mmmmm break; + case IF_SVE_CM_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally broadcast element to vector + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispPredicateReg(id->idReg2(), PREDICATE_NONE, true); // ggg + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm + break; + case IF_SVE_CN_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to SIMD&FP scalar + emitDispReg(id->idReg1(), size, true); // ddddd + emitDispPredicateReg(id->idReg2(), PREDICATE_NONE, true); // ggg + emitDispReg(id->idReg1(), size, true); // ddddd + emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm + break; + case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register + emitDispReg(id->idReg1(), size, true); // ddddd + emitDispPredicateReg(id->idReg2(), PREDICATE_NONE, true); // ggg + emitDispReg(id->idReg1(), size, true); // ddddd + emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm + break; + case IF_SVE_HJ_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point serial reduction (predicated) emitDispReg(id->idReg1(), size, true); // ddddd - emitDispPredicateReg(id->idReg2(), true, true); // ggg + emitDispPredicateReg(id->idReg2(), PREDICATE_MERGE, true); // ggg emitDispReg(id->idReg1(), size, true); // ddddd emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm break; diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 9acf13ac294473..b0b902e7e6c4af 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -19,6 +19,13 @@ static bool strictArmAsm; /* Debug-only routines to display instructions */ /************************************************************************/ +enum PredicateType +{ + PREDICATE_NONE = 0, + PREDICATE_MERGE, + PREDICATE_ZERO, +}; + const char* emitSveRegName(regNumber reg); const char* emitVectorRegName(regNumber reg); const char* emitPredicateRegName(regNumber reg); @@ -45,7 +52,7 @@ void emitDispVectorReg(regNumber reg, insOpts opt, bool addComma); void emitDispVectorRegIndex(regNumber reg, emitAttr elemsize, ssize_t index, bool addComma); void emitDispVectorRegList(regNumber firstReg, unsigned listSize, insOpts opt, bool addComma); void emitDispVectorElemList(regNumber firstReg, unsigned listSize, emitAttr elemsize, unsigned index, bool addComma); -void emitDispPredicateReg(regNumber reg, bool merge, bool addComma); +void emitDispPredicateReg(regNumber reg, PredicateType ptype, bool addComma); void emitDispArrangement(insOpts opt); void emitDispElemsize(emitAttr elemsize); void emitDispShiftedReg(regNumber reg, insOpts opt, ssize_t imm, emitAttr attr); From 9c06fe72a70674f2e26deb970e1cb4609ac3d6ca Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 24 Nov 2023 14:43:15 +0000 Subject: [PATCH 22/32] Remove tests unsupported by capstone --- src/coreclr/jit/codegenarm64.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index ca2a96624e1dc9..30316558db8708 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -10114,8 +10114,6 @@ void CodeGen::genArm64EmitterUnitTests() genDefineTempLabel(genCreateTempLabel()); - // TODO-SVE: Fix once we add Z and predicate registers - theEmitter->emitIns_R_R_R(INS_sve_and, EA_SCALABLE, REG_V0, REG_P1, REG_V2, INS_OPTS_SCALABLE_B); // AND ., /M, ., . theEmitter->emitIns_R_R_R(INS_sve_bic, EA_SCALABLE, REG_V3, REG_P4, REG_V5, @@ -10184,8 +10182,6 @@ void CodeGen::genArm64EmitterUnitTests() INS_OPTS_SCALABLE_WIDE_S); // IF_SVE_AO_3A /* LSR ., /M, ., .D // */ - // TODO-SVE: Currently, these are all printed with /M on the predicate. There should be no predicate extension on - // these. theEmitter->emitIns_R_R_R(INS_sve_clasta, EA_SCALABLE, REG_V31, REG_P7, REG_V31, INS_OPTS_SCALABLE_B); // IF_SVE_CM_3A /* CLASTA ., , ., . */ theEmitter->emitIns_R_R_R(INS_sve_clastb, EA_SCALABLE, REG_V30, REG_P6, REG_V30, @@ -10282,8 +10278,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R(INS_sve_fabd, EA_SCALABLE, REG_V24, REG_P3, REG_V11, INS_OPTS_SCALABLE_H); // IF_SVE_HL_3A /* FABD ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_fadd, EA_SCALABLE, REG_V25, REG_P2, REG_V10, INS_OPTS_SCALABLE_S); // IF_SVE_HL_3A /* FADD ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_famax, EA_SCALABLE, REG_V26, REG_P1, REG_V9, INS_OPTS_SCALABLE_D); // IF_SVE_HL_3A /* FAMAX ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_famin, EA_SCALABLE, REG_V27, REG_P0, REG_V8, INS_OPTS_SCALABLE_H); // IF_SVE_HL_3A /* FAMIN ., /M, ., . */ + // These are not yet supported by capstone. + // theEmitter->emitIns_R_R_R(INS_sve_famax, EA_SCALABLE, REG_V26, REG_P1, REG_V9, INS_OPTS_SCALABLE_D); // IF_SVE_HL_3A /* FAMAX ., /M, ., . */ + // theEmitter->emitIns_R_R_R(INS_sve_famin, EA_SCALABLE, REG_V27, REG_P0, REG_V8, INS_OPTS_SCALABLE_H); // IF_SVE_HL_3A /* FAMIN ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_fdiv, EA_SCALABLE, REG_V28, REG_P0, REG_V7, INS_OPTS_SCALABLE_S); // IF_SVE_HL_3A /* FDIV ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_fdivr, EA_SCALABLE, REG_V29, REG_P1, REG_V6, INS_OPTS_SCALABLE_D); // IF_SVE_HL_3A /* FDIVR ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_fmax, EA_SCALABLE, REG_V30, REG_P2, REG_V5, INS_OPTS_SCALABLE_H); // IF_SVE_HL_3A /* FMAX ., /M, ., . */ From 7b74dc6682f2bccd95dbceecefad91d9f173fb1f Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 24 Nov 2023 14:46:01 +0000 Subject: [PATCH 23/32] Fix formatting --- src/coreclr/jit/codegenarm64.cpp | 114 ++++++++++++++++++++----------- src/coreclr/jit/emitarm64.cpp | 58 ++++++++-------- 2 files changed, 104 insertions(+), 68 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 30316558db8708..f79722bcdf2f54 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -10253,45 +10253,83 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R(INS_sve_usqadd, EA_SCALABLE, REG_V3, REG_P6, REG_V30, INS_OPTS_SCALABLE_B); // IF_SVE_ET_3A /* USQADD ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_sqrshl, EA_SCALABLE, REG_V4, REG_P7, REG_V31, INS_OPTS_SCALABLE_B); // IF_SVE_EU_3A /* SQRSHL ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_sqrshlr, EA_SCALABLE, REG_V5, REG_P0, REG_V30, INS_OPTS_SCALABLE_H); // IF_SVE_EU_3A /* SQRSHLR ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_sqshl, EA_SCALABLE, REG_V6, REG_P1, REG_V29, INS_OPTS_SCALABLE_S); // IF_SVE_EU_3A /* SQSHL ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_sqshlr, EA_SCALABLE, REG_V7, REG_P2, REG_V28, INS_OPTS_SCALABLE_D); // IF_SVE_EU_3A /* SQSHLR ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_srshl, EA_SCALABLE, REG_V8, REG_P3, REG_V27, INS_OPTS_SCALABLE_B); // IF_SVE_EU_3A /* SRSHL ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_srshlr, EA_SCALABLE, REG_V9, REG_P4, REG_V26, INS_OPTS_SCALABLE_H); // IF_SVE_EU_3A /* SRSHLR ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_uqrshl, EA_SCALABLE, REG_V10, REG_P5, REG_V25, INS_OPTS_SCALABLE_S); // IF_SVE_EU_3A /* UQRSHL ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_uqrshlr, EA_SCALABLE, REG_V11, REG_P6, REG_V24, INS_OPTS_SCALABLE_D); // IF_SVE_EU_3A /* UQRSHLR ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_uqshl, EA_SCALABLE, REG_V12, REG_P7, REG_V23, INS_OPTS_SCALABLE_B); // IF_SVE_EU_3A /* UQSHL ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_uqshlr, EA_SCALABLE, REG_V13, REG_P0, REG_V22, INS_OPTS_SCALABLE_H); // IF_SVE_EU_3A /* UQSHLR ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_urshl, EA_SCALABLE, REG_V14, REG_P1, REG_V21, INS_OPTS_SCALABLE_S); // IF_SVE_EU_3A /* URSHL ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_urshlr, EA_SCALABLE, REG_V15, REG_P2, REG_V20, INS_OPTS_SCALABLE_D); // IF_SVE_EU_3A /* URSHLR ., /M, ., . */ - - theEmitter->emitIns_R_R_R(INS_sve_faddp, EA_SCALABLE, REG_V16, REG_P3, REG_V19, INS_OPTS_SCALABLE_H); // IF_SVE_GR_3A /* FADDP ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_fmaxnmp, EA_SCALABLE, REG_V17, REG_P4, REG_V18, INS_OPTS_SCALABLE_S); // IF_SVE_GR_3A /* FMAXNMP ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_fmaxp, EA_SCALABLE, REG_V18, REG_P5, REG_V17, INS_OPTS_SCALABLE_D); // IF_SVE_GR_3A /* FMAXP ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_fminnmp, EA_SCALABLE, REG_V19, REG_P6, REG_V16, INS_OPTS_SCALABLE_S); // IF_SVE_GR_3A /* FMINNMP ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_fminp, EA_SCALABLE, REG_V20, REG_P7, REG_V15, INS_OPTS_SCALABLE_H); // IF_SVE_GR_3A /* FMINP ., /M, ., . */ - - theEmitter->emitIns_R_R_R(INS_sve_fadda, EA_2BYTE, REG_V21, REG_P6, REG_V14, INS_OPTS_SCALABLE_H_TO_SIMD); // IF_SVE_HJ_3A /* FADDA , , , . */ - theEmitter->emitIns_R_R_R(INS_sve_fadda, EA_4BYTE, REG_V22, REG_P5, REG_V13, INS_OPTS_SCALABLE_S_TO_SIMD); // IF_SVE_HJ_3A /* FADDA , , , . */ - theEmitter->emitIns_R_R_R(INS_sve_fadda, EA_8BYTE, REG_V23, REG_P4, REG_V12, INS_OPTS_SCALABLE_D_TO_SIMD); // IF_SVE_HJ_3A /* FADDA , , , . */ - - theEmitter->emitIns_R_R_R(INS_sve_fabd, EA_SCALABLE, REG_V24, REG_P3, REG_V11, INS_OPTS_SCALABLE_H); // IF_SVE_HL_3A /* FABD ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_fadd, EA_SCALABLE, REG_V25, REG_P2, REG_V10, INS_OPTS_SCALABLE_S); // IF_SVE_HL_3A /* FADD ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_sqrshl, EA_SCALABLE, REG_V4, REG_P7, REG_V31, + INS_OPTS_SCALABLE_B); // IF_SVE_EU_3A /* SQRSHL ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_sqrshlr, EA_SCALABLE, REG_V5, REG_P0, REG_V30, + INS_OPTS_SCALABLE_H); // IF_SVE_EU_3A /* SQRSHLR ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_sqshl, EA_SCALABLE, REG_V6, REG_P1, REG_V29, + INS_OPTS_SCALABLE_S); // IF_SVE_EU_3A /* SQSHL ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_sqshlr, EA_SCALABLE, REG_V7, REG_P2, REG_V28, + INS_OPTS_SCALABLE_D); // IF_SVE_EU_3A /* SQSHLR ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_srshl, EA_SCALABLE, REG_V8, REG_P3, REG_V27, + INS_OPTS_SCALABLE_B); // IF_SVE_EU_3A /* SRSHL ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_srshlr, EA_SCALABLE, REG_V9, REG_P4, REG_V26, + INS_OPTS_SCALABLE_H); // IF_SVE_EU_3A /* SRSHLR ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_uqrshl, EA_SCALABLE, REG_V10, REG_P5, REG_V25, + INS_OPTS_SCALABLE_S); // IF_SVE_EU_3A /* UQRSHL ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_uqrshlr, EA_SCALABLE, REG_V11, REG_P6, REG_V24, + INS_OPTS_SCALABLE_D); // IF_SVE_EU_3A /* UQRSHLR ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_uqshl, EA_SCALABLE, REG_V12, REG_P7, REG_V23, + INS_OPTS_SCALABLE_B); // IF_SVE_EU_3A /* UQSHL ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_uqshlr, EA_SCALABLE, REG_V13, REG_P0, REG_V22, + INS_OPTS_SCALABLE_H); // IF_SVE_EU_3A /* UQSHLR ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_urshl, EA_SCALABLE, REG_V14, REG_P1, REG_V21, + INS_OPTS_SCALABLE_S); // IF_SVE_EU_3A /* URSHL ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_urshlr, EA_SCALABLE, REG_V15, REG_P2, REG_V20, + INS_OPTS_SCALABLE_D); // IF_SVE_EU_3A /* URSHLR ., /M, ., . */ + + theEmitter->emitIns_R_R_R(INS_sve_faddp, EA_SCALABLE, REG_V16, REG_P3, REG_V19, + INS_OPTS_SCALABLE_H); // IF_SVE_GR_3A /* FADDP ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_fmaxnmp, EA_SCALABLE, REG_V17, REG_P4, REG_V18, + INS_OPTS_SCALABLE_S); // IF_SVE_GR_3A /* FMAXNMP ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_fmaxp, EA_SCALABLE, REG_V18, REG_P5, REG_V17, + INS_OPTS_SCALABLE_D); // IF_SVE_GR_3A /* FMAXP ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_fminnmp, EA_SCALABLE, REG_V19, REG_P6, REG_V16, + INS_OPTS_SCALABLE_S); // IF_SVE_GR_3A /* FMINNMP ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_fminp, EA_SCALABLE, REG_V20, REG_P7, REG_V15, + INS_OPTS_SCALABLE_H); // IF_SVE_GR_3A /* FMINP ., /M, ., . */ + + theEmitter->emitIns_R_R_R(INS_sve_fadda, EA_2BYTE, REG_V21, REG_P6, REG_V14, + INS_OPTS_SCALABLE_H_TO_SIMD); // IF_SVE_HJ_3A /* FADDA , , , . + // */ + theEmitter->emitIns_R_R_R(INS_sve_fadda, EA_4BYTE, REG_V22, REG_P5, REG_V13, + INS_OPTS_SCALABLE_S_TO_SIMD); // IF_SVE_HJ_3A /* FADDA , , , . + // */ + theEmitter->emitIns_R_R_R(INS_sve_fadda, EA_8BYTE, REG_V23, REG_P4, REG_V12, + INS_OPTS_SCALABLE_D_TO_SIMD); // IF_SVE_HJ_3A /* FADDA , , , . + // */ + + theEmitter->emitIns_R_R_R(INS_sve_fabd, EA_SCALABLE, REG_V24, REG_P3, REG_V11, + INS_OPTS_SCALABLE_H); // IF_SVE_HL_3A /* FABD ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_fadd, EA_SCALABLE, REG_V25, REG_P2, REG_V10, + INS_OPTS_SCALABLE_S); // IF_SVE_HL_3A /* FADD ., /M, ., . */ // These are not yet supported by capstone. - // theEmitter->emitIns_R_R_R(INS_sve_famax, EA_SCALABLE, REG_V26, REG_P1, REG_V9, INS_OPTS_SCALABLE_D); // IF_SVE_HL_3A /* FAMAX ., /M, ., . */ - // theEmitter->emitIns_R_R_R(INS_sve_famin, EA_SCALABLE, REG_V27, REG_P0, REG_V8, INS_OPTS_SCALABLE_H); // IF_SVE_HL_3A /* FAMIN ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_fdiv, EA_SCALABLE, REG_V28, REG_P0, REG_V7, INS_OPTS_SCALABLE_S); // IF_SVE_HL_3A /* FDIV ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_fdivr, EA_SCALABLE, REG_V29, REG_P1, REG_V6, INS_OPTS_SCALABLE_D); // IF_SVE_HL_3A /* FDIVR ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_fmax, EA_SCALABLE, REG_V30, REG_P2, REG_V5, INS_OPTS_SCALABLE_H); // IF_SVE_HL_3A /* FMAX ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_fmaxnm, EA_SCALABLE, REG_V31, REG_P3, REG_V4, INS_OPTS_SCALABLE_S); // IF_SVE_HL_3A /* FMAXNM ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_fmin, EA_SCALABLE, REG_V0, REG_P4, REG_V3, INS_OPTS_SCALABLE_D); // IF_SVE_HL_3A /* FMIN ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_fminnm, EA_SCALABLE, REG_V1, REG_P5, REG_V2, INS_OPTS_SCALABLE_H); // IF_SVE_HL_3A /* FMINNM ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_fmul, EA_SCALABLE, REG_V2, REG_P6, REG_V1, INS_OPTS_SCALABLE_S); // IF_SVE_HL_3A /* FMUL ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_fmulx, EA_SCALABLE, REG_V3, REG_P7, REG_V0, INS_OPTS_SCALABLE_D); // IF_SVE_HL_3A /* FMULX ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_fscale, EA_SCALABLE, REG_V4, REG_P6, REG_V31, INS_OPTS_SCALABLE_H); // IF_SVE_HL_3A /* FSCALE ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_fsub, EA_SCALABLE, REG_V5, REG_P5, REG_V30, INS_OPTS_SCALABLE_S); // IF_SVE_HL_3A /* FSUB ., /M, ., . */ - theEmitter->emitIns_R_R_R(INS_sve_fsubr, EA_SCALABLE, REG_V6, REG_P4, REG_V29, INS_OPTS_SCALABLE_D); // IF_SVE_HL_3A /* FSUBR ., /M, ., . */ + // theEmitter->emitIns_R_R_R(INS_sve_famax, EA_SCALABLE, REG_V26, REG_P1, REG_V9, INS_OPTS_SCALABLE_D); // + // IF_SVE_HL_3A /* FAMAX ., /M, ., . */ + // theEmitter->emitIns_R_R_R(INS_sve_famin, EA_SCALABLE, REG_V27, REG_P0, REG_V8, INS_OPTS_SCALABLE_H); // + // IF_SVE_HL_3A /* FAMIN ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_fdiv, EA_SCALABLE, REG_V28, REG_P0, REG_V7, + INS_OPTS_SCALABLE_S); // IF_SVE_HL_3A /* FDIV ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_fdivr, EA_SCALABLE, REG_V29, REG_P1, REG_V6, + INS_OPTS_SCALABLE_D); // IF_SVE_HL_3A /* FDIVR ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_fmax, EA_SCALABLE, REG_V30, REG_P2, REG_V5, + INS_OPTS_SCALABLE_H); // IF_SVE_HL_3A /* FMAX ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_fmaxnm, EA_SCALABLE, REG_V31, REG_P3, REG_V4, + INS_OPTS_SCALABLE_S); // IF_SVE_HL_3A /* FMAXNM ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_fmin, EA_SCALABLE, REG_V0, REG_P4, REG_V3, + INS_OPTS_SCALABLE_D); // IF_SVE_HL_3A /* FMIN ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_fminnm, EA_SCALABLE, REG_V1, REG_P5, REG_V2, + INS_OPTS_SCALABLE_H); // IF_SVE_HL_3A /* FMINNM ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_fmul, EA_SCALABLE, REG_V2, REG_P6, REG_V1, + INS_OPTS_SCALABLE_S); // IF_SVE_HL_3A /* FMUL ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_fmulx, EA_SCALABLE, REG_V3, REG_P7, REG_V0, + INS_OPTS_SCALABLE_D); // IF_SVE_HL_3A /* FMULX ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_fscale, EA_SCALABLE, REG_V4, REG_P6, REG_V31, + INS_OPTS_SCALABLE_H); // IF_SVE_HL_3A /* FSCALE ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_fsub, EA_SCALABLE, REG_V5, REG_P5, REG_V30, + INS_OPTS_SCALABLE_S); // IF_SVE_HL_3A /* FSUB ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_fsubr, EA_SCALABLE, REG_V6, REG_P4, REG_V29, + INS_OPTS_SCALABLE_D); // IF_SVE_HL_3A /* FSUBR ., /M, ., . */ #endif // ALL_ARM64_EMITTER_UNIT_TESTS_SVE diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index f6e4fa66ba4b2c..809ac781c4d6bd 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1002,10 +1002,10 @@ void emitter::emitInsSanityCheck(instrDesc* id) case IF_SVE_GR_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 floating-point pairwise operations case IF_SVE_HL_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated) elemsize = id->idOpSize(); - assert(insOptsScalableFloat(id->idInsOpt())); // xx - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isLowPredicateRegister(id->idReg2())); // ggg - assert(isVectorRegister(id->idReg3())); // mmmmm + assert(insOptsScalableFloat(id->idInsOpt())); // xx + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // mmmmm assert(isScalableVectorSize(elemsize)); break; @@ -8302,8 +8302,6 @@ void emitter::emitIns_R_R_R( fmt = IF_SVE_HJ_3A; break; - - case INS_sve_fabd: case INS_sve_fadd: case INS_sve_famax: @@ -16160,45 +16158,45 @@ void emitter::emitDispInsHelp( // (predicated) case IF_SVE_GR_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 floating-point pairwise operations case IF_SVE_HL_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated) - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispPredicateReg(id->idReg2(), PREDICATE_MERGE, true); // ggg - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispPredicateReg(id->idReg2(), PREDICATE_MERGE, true); // ggg + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm break; case IF_SVE_AO_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by wide elements (predicated) - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispPredicateReg(id->idReg2(), PREDICATE_MERGE, true); // ggg - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispSveReg(id->idReg3(), INS_OPTS_SCALABLE_D, false); // mmmmm + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispPredicateReg(id->idReg2(), PREDICATE_MERGE, true); // ggg + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispSveReg(id->idReg3(), INS_OPTS_SCALABLE_D, false); // mmmmm break; case IF_SVE_CM_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally broadcast element to vector - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispPredicateReg(id->idReg2(), PREDICATE_NONE, true); // ggg - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispPredicateReg(id->idReg2(), PREDICATE_NONE, true); // ggg + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm break; case IF_SVE_CN_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to SIMD&FP scalar - emitDispReg(id->idReg1(), size, true); // ddddd - emitDispPredicateReg(id->idReg2(), PREDICATE_NONE, true); // ggg - emitDispReg(id->idReg1(), size, true); // ddddd - emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm + emitDispReg(id->idReg1(), size, true); // ddddd + emitDispPredicateReg(id->idReg2(), PREDICATE_NONE, true); // ggg + emitDispReg(id->idReg1(), size, true); // ddddd + emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm break; case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register - emitDispReg(id->idReg1(), size, true); // ddddd - emitDispPredicateReg(id->idReg2(), PREDICATE_NONE, true); // ggg - emitDispReg(id->idReg1(), size, true); // ddddd - emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm + emitDispReg(id->idReg1(), size, true); // ddddd + emitDispPredicateReg(id->idReg2(), PREDICATE_NONE, true); // ggg + emitDispReg(id->idReg1(), size, true); // ddddd + emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm break; case IF_SVE_HJ_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point serial reduction (predicated) - emitDispReg(id->idReg1(), size, true); // ddddd - emitDispPredicateReg(id->idReg2(), PREDICATE_MERGE, true); // ggg - emitDispReg(id->idReg1(), size, true); // ddddd - emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm + emitDispReg(id->idReg1(), size, true); // ddddd + emitDispPredicateReg(id->idReg2(), PREDICATE_MERGE, true); // ggg + emitDispReg(id->idReg1(), size, true); // ddddd + emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm break; default: From 5e9dad8b7ecb82011190dcee914a708cb1a3166f Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 24 Nov 2023 15:56:11 +0000 Subject: [PATCH 24/32] Comment out unit test define --- src/coreclr/jit/codegenarm64.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index f79722bcdf2f54..20b791348d0cea 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -5436,7 +5436,7 @@ void CodeGen::genProfilingLeaveCallback(unsigned helper) #define ALL_ARM64_EMITTER_UNIT_TESTS // #define ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL // #define ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD -#define ALL_ARM64_EMITTER_UNIT_TESTS_SVE +// #define ALL_ARM64_EMITTER_UNIT_TESTS_SVE #if defined(DEBUG) void CodeGen::genArm64EmitterUnitTests() @@ -7307,8 +7307,8 @@ void CodeGen::genArm64EmitterUnitTests() genDefineTempLabel(genCreateTempLabel()); - // theEmitter->emitIns_I(INS_brk, EA_PTRSIZE, 0); - // theEmitter->emitIns_I(INS_brk, EA_PTRSIZE, 65535); + theEmitter->emitIns_I(INS_brk, EA_PTRSIZE, 0); + theEmitter->emitIns_I(INS_brk, EA_PTRSIZE, 65535); theEmitter->emitIns_BARR(INS_dsb, INS_BARRIER_OSHLD); theEmitter->emitIns_BARR(INS_dmb, INS_BARRIER_OSHST); From 889c9092d27083ee3af5987a5a4eccd4c1d225cd Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 24 Nov 2023 17:17:35 +0000 Subject: [PATCH 25/32] Add parentheses --- src/coreclr/jit/emitarm64.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index b0b902e7e6c4af..c46dd9b5172e3a 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -863,45 +863,45 @@ inline static bool insOptsScalable(insOpts opt) inline static bool insOptsScalableSimple(insOpts opt) { // Opt is any of the standard scable types. - return ((opt == INS_OPTS_SCALABLE_B || opt == INS_OPTS_SCALABLE_H || opt == INS_OPTS_SCALABLE_S || - opt == INS_OPTS_SCALABLE_D)); + return ((opt == INS_OPTS_SCALABLE_B) || (opt == INS_OPTS_SCALABLE_H) || (opt == INS_OPTS_SCALABLE_S) || + (opt == INS_OPTS_SCALABLE_D)); } inline static bool insOptsScalableWords(insOpts opt) { // Opt is any of the standard word and above scable types. - return ((opt == INS_OPTS_SCALABLE_S || opt == INS_OPTS_SCALABLE_D)); + return ((opt == INS_OPTS_SCALABLE_S) || (opt == INS_OPTS_SCALABLE_D)); } inline static bool insOptsScalableFloat(insOpts opt) { // Opt is any of the standard scable types that are valid for FP. - return ((opt == INS_OPTS_SCALABLE_B || opt == INS_OPTS_SCALABLE_H || opt == INS_OPTS_SCALABLE_S || - opt == INS_OPTS_SCALABLE_D)); + return ((opt == INS_OPTS_SCALABLE_B) || (opt == INS_OPTS_SCALABLE_H) || (opt == INS_OPTS_SCALABLE_S) || + (opt == INS_OPTS_SCALABLE_D)); } inline static bool insOptsScalableWide(insOpts opt) { - return ((opt == INS_OPTS_SCALABLE_WIDE_B || opt == INS_OPTS_SCALABLE_WIDE_H || opt == INS_OPTS_SCALABLE_WIDE_S)); + return ((opt == INS_OPTS_SCALABLE_WIDE_B) || (opt == INS_OPTS_SCALABLE_WIDE_H) || (opt == INS_OPTS_SCALABLE_WIDE_S)); } inline static bool insOptsScalableToSimd(insOpts opt) { - return ((opt == INS_OPTS_SCALABLE_B_TO_SIMD || opt == INS_OPTS_SCALABLE_H_TO_SIMD || - opt == INS_OPTS_SCALABLE_S_TO_SIMD || opt == INS_OPTS_SCALABLE_D_TO_SIMD)); + return ((opt == INS_OPTS_SCALABLE_B_TO_SIMD) || (opt == INS_OPTS_SCALABLE_H_TO_SIMD) || + (opt == INS_OPTS_SCALABLE_S_TO_SIMD) || (opt == INS_OPTS_SCALABLE_D_TO_SIMD)); } inline static bool insOptsScalableToSimdFloat(insOpts opt) { // Opt is any of the SIMD scable types that are valid for FP. - return ((opt == INS_OPTS_SCALABLE_H_TO_SIMD || opt == INS_OPTS_SCALABLE_S_TO_SIMD || - opt == INS_OPTS_SCALABLE_D_TO_SIMD)); + return ((opt == INS_OPTS_SCALABLE_H_TO_SIMD) || (opt == INS_OPTS_SCALABLE_S_TO_SIMD) || + (opt == INS_OPTS_SCALABLE_D_TO_SIMD)); } inline static bool insOptsScalableToScalar(insOpts opt) { - return ((opt == INS_OPTS_SCALABLE_B_TO_SCALAR || opt == INS_OPTS_SCALABLE_H_TO_SCALAR || - opt == INS_OPTS_SCALABLE_S_TO_SCALAR || opt == INS_OPTS_SCALABLE_D_TO_SCALAR)); + return ((opt == INS_OPTS_SCALABLE_B_TO_SCALAR) || (opt == INS_OPTS_SCALABLE_H_TO_SCALAR) || + (opt == INS_OPTS_SCALABLE_S_TO_SCALAR) || (opt == INS_OPTS_SCALABLE_D_TO_SCALAR)); } static bool isValidImmCond(ssize_t imm); From 1e3e98ec0a7c84f34093199f1b2caf1d56260f37 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 27 Nov 2023 09:09:29 +0000 Subject: [PATCH 26/32] Fix fadda predicate and comment typos --- src/coreclr/jit/emitarm64.cpp | 2 +- src/coreclr/jit/emitarm64.h | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 809ac781c4d6bd..8180e6db36d4a4 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -16194,7 +16194,7 @@ void emitter::emitDispInsHelp( case IF_SVE_HJ_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point serial reduction (predicated) emitDispReg(id->idReg1(), size, true); // ddddd - emitDispPredicateReg(id->idReg2(), PREDICATE_MERGE, true); // ggg + emitDispPredicateReg(id->idReg2(), PREDICATE_NONE, true); // ggg emitDispReg(id->idReg1(), size, true); // ddddd emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm break; diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index c46dd9b5172e3a..09b973d6ef0e4c 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -862,20 +862,20 @@ inline static bool insOptsScalable(insOpts opt) inline static bool insOptsScalableSimple(insOpts opt) { - // Opt is any of the standard scable types. + // Opt is any of the standard scalable types. return ((opt == INS_OPTS_SCALABLE_B) || (opt == INS_OPTS_SCALABLE_H) || (opt == INS_OPTS_SCALABLE_S) || (opt == INS_OPTS_SCALABLE_D)); } inline static bool insOptsScalableWords(insOpts opt) { - // Opt is any of the standard word and above scable types. + // Opt is any of the standard word and above scalable types. return ((opt == INS_OPTS_SCALABLE_S) || (opt == INS_OPTS_SCALABLE_D)); } inline static bool insOptsScalableFloat(insOpts opt) { - // Opt is any of the standard scable types that are valid for FP. + // Opt is any of the standard scalable types that are valid for FP. return ((opt == INS_OPTS_SCALABLE_B) || (opt == INS_OPTS_SCALABLE_H) || (opt == INS_OPTS_SCALABLE_S) || (opt == INS_OPTS_SCALABLE_D)); } @@ -893,7 +893,7 @@ inline static bool insOptsScalableToSimd(insOpts opt) inline static bool insOptsScalableToSimdFloat(insOpts opt) { - // Opt is any of the SIMD scable types that are valid for FP. + // Opt is any of the SIMD scalable types that are valid for FP. return ((opt == INS_OPTS_SCALABLE_H_TO_SIMD) || (opt == INS_OPTS_SCALABLE_S_TO_SIMD) || (opt == INS_OPTS_SCALABLE_D_TO_SIMD)); } From f2c714b14fd38aa2de4ef6eb992beb838ac2ea40 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 27 Nov 2023 09:15:08 +0000 Subject: [PATCH 27/32] Fix formatting --- src/coreclr/jit/emitarm64.cpp | 6 +++--- src/coreclr/jit/emitarm64.h | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 8180e6db36d4a4..228e6d4dabdde3 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -16193,10 +16193,10 @@ void emitter::emitDispInsHelp( break; case IF_SVE_HJ_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point serial reduction (predicated) - emitDispReg(id->idReg1(), size, true); // ddddd + emitDispReg(id->idReg1(), size, true); // ddddd emitDispPredicateReg(id->idReg2(), PREDICATE_NONE, true); // ggg - emitDispReg(id->idReg1(), size, true); // ddddd - emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm + emitDispReg(id->idReg1(), size, true); // ddddd + emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm break; default: diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 09b973d6ef0e4c..441ed8b2d7b240 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -882,7 +882,8 @@ inline static bool insOptsScalableFloat(insOpts opt) inline static bool insOptsScalableWide(insOpts opt) { - return ((opt == INS_OPTS_SCALABLE_WIDE_B) || (opt == INS_OPTS_SCALABLE_WIDE_H) || (opt == INS_OPTS_SCALABLE_WIDE_S)); + return ((opt == INS_OPTS_SCALABLE_WIDE_B) || (opt == INS_OPTS_SCALABLE_WIDE_H) || + (opt == INS_OPTS_SCALABLE_WIDE_S)); } inline static bool insOptsScalableToSimd(insOpts opt) From 96271ae2f321a3ff0141eafdd7f943c32cb0cc33 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 27 Nov 2023 12:05:55 +0000 Subject: [PATCH 28/32] Better commenting + fix up errors found --- src/coreclr/jit/codegenarm64.cpp | 188 ++++++++++++++++--------------- src/coreclr/jit/emitarm64.cpp | 36 +++--- src/coreclr/jit/emitarm64.h | 3 +- 3 files changed, 121 insertions(+), 106 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 20b791348d0cea..6be622511ab72f 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -10114,6 +10114,7 @@ void CodeGen::genArm64EmitterUnitTests() genDefineTempLabel(genCreateTempLabel()); + // IF_SVE_AA_3A theEmitter->emitIns_R_R_R(INS_sve_and, EA_SCALABLE, REG_V0, REG_P1, REG_V2, INS_OPTS_SCALABLE_B); // AND ., /M, ., . theEmitter->emitIns_R_R_R(INS_sve_bic, EA_SCALABLE, REG_V3, REG_P4, REG_V5, @@ -10123,6 +10124,7 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R(INS_sve_orr, EA_SCALABLE, REG_V29, REG_P7, REG_V31, INS_OPTS_SCALABLE_D); // ORR ., /M, ., . + // IF_SVE_AB_3A theEmitter->emitIns_R_R_R(INS_sve_add, EA_SCALABLE, REG_V5, REG_P6, REG_V7, INS_OPTS_SCALABLE_B); // ADD ., /M, ., . theEmitter->emitIns_R_R_R(INS_sve_sub, EA_SCALABLE, REG_V15, REG_P7, REG_V29, @@ -10130,6 +10132,7 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R(INS_sve_subr, EA_SCALABLE, REG_V2, REG_P0, REG_V13, INS_OPTS_SCALABLE_S); // SUBR ., /M, ., . + // IF_SVE_AC_3A theEmitter->emitIns_R_R_R(INS_sve_sdiv, EA_SCALABLE, REG_V3, REG_P2, REG_V9, INS_OPTS_SCALABLE_S); // SDIV ., /M, ., . theEmitter->emitIns_R_R_R(INS_sve_sdivr, EA_SCALABLE, REG_V31, REG_P3, REG_V29, @@ -10139,6 +10142,7 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R(INS_sve_udivr, EA_SCALABLE, REG_V13, REG_P7, REG_V15, INS_OPTS_SCALABLE_D); // UDIVR ., /M, ., . + // IF_SVE_AD_3A theEmitter->emitIns_R_R_R(INS_sve_smax, EA_SCALABLE, REG_V24, REG_P0, REG_V2, INS_OPTS_SCALABLE_B); // SMAX ., /M, ., . theEmitter->emitIns_R_R_R(INS_sve_smin, EA_SCALABLE, REG_V9, REG_P1, REG_V27, @@ -10152,184 +10156,186 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R(INS_sve_umin, EA_SCALABLE, REG_V12, REG_P7, REG_V0, INS_OPTS_SCALABLE_D); // UMIN ., /M, ., . + // IF_SVE_AE_3A theEmitter->emitIns_R_R_R(INS_sve_mul, EA_SCALABLE, REG_V5, REG_P1, REG_V3, - INS_OPTS_SCALABLE_D); // IF_SVE_AE_3A /* MUL ., /M, ., . */ + INS_OPTS_SCALABLE_D); /* MUL ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_smulh, EA_SCALABLE, REG_V17, REG_P5, REG_V5, - INS_OPTS_SCALABLE_S); // IF_SVE_AE_3A /* SMULH ., /M, ., . */ + INS_OPTS_SCALABLE_S); /* SMULH ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_umulh, EA_SCALABLE, REG_V12, REG_P2, REG_V24, - INS_OPTS_SCALABLE_B); // IF_SVE_AE_3A /* UMULH ., /M, ., . */ + INS_OPTS_SCALABLE_B); /* UMULH ., /M, ., . */ + // IF_SVE_AN_3A theEmitter->emitIns_R_R_R(INS_sve_asr, EA_SCALABLE, REG_V5, REG_P0, REG_V21, - INS_OPTS_SCALABLE_S); // IF_SVE_AN_3A /* ASR ., /M, ., . */ + INS_OPTS_SCALABLE_S); /* ASR ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_asrr, EA_SCALABLE, REG_V1, REG_P7, REG_V20, - INS_OPTS_SCALABLE_B); // IF_SVE_AN_3A /* ASRR ., /M, ., . */ + INS_OPTS_SCALABLE_B); /* ASRR ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_lsl, EA_SCALABLE, REG_V0, REG_P2, REG_V0, - INS_OPTS_SCALABLE_H); // IF_SVE_AN_3A /* LSL ., /M, ., . */ + INS_OPTS_SCALABLE_H); /* LSL ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_lslr, EA_SCALABLE, REG_V27, REG_P6, REG_V31, - INS_OPTS_SCALABLE_D); // IF_SVE_AN_3A /* LSLR ., /M, ., . */ + INS_OPTS_SCALABLE_D); /* LSLR ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_lsr, EA_SCALABLE, REG_V5, REG_P5, REG_V6, - INS_OPTS_SCALABLE_B); // IF_SVE_AN_3A /* LSR ., /M, ., . */ + INS_OPTS_SCALABLE_B); /* LSR ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_lsrr, EA_SCALABLE, REG_V15, REG_P4, REG_V17, - INS_OPTS_SCALABLE_S); // IF_SVE_AN_3A /* LSRR ., /M, ., . */ + INS_OPTS_SCALABLE_S); /* LSRR ., /M, ., . */ + // IF_SVE_AO_3A theEmitter->emitIns_R_R_R(INS_sve_asr, EA_SCALABLE, REG_V4, REG_P3, REG_V24, - INS_OPTS_SCALABLE_WIDE_B); // IF_SVE_AO_3A /* ASR ., /M, ., .D - // */ + INS_OPTS_SCALABLE_WIDE_B); /* ASR ., /M, ., .D */ theEmitter->emitIns_R_R_R(INS_sve_lsl, EA_SCALABLE, REG_V19, REG_P7, REG_V3, - INS_OPTS_SCALABLE_WIDE_H); // IF_SVE_AO_3A /* LSL ., /M, ., .D - // */ + INS_OPTS_SCALABLE_WIDE_H); /* LSL ., /M, ., .D */ theEmitter->emitIns_R_R_R(INS_sve_lsr, EA_SCALABLE, REG_V0, REG_P0, REG_V0, - INS_OPTS_SCALABLE_WIDE_S); // IF_SVE_AO_3A /* LSR ., /M, ., .D - // */ + INS_OPTS_SCALABLE_WIDE_S); /* LSR ., /M, ., .D */ + // IF_SVE_CM_3A theEmitter->emitIns_R_R_R(INS_sve_clasta, EA_SCALABLE, REG_V31, REG_P7, REG_V31, - INS_OPTS_SCALABLE_B); // IF_SVE_CM_3A /* CLASTA ., , ., . */ + INS_OPTS_SCALABLE_B); /* CLASTA ., , ., . */ theEmitter->emitIns_R_R_R(INS_sve_clastb, EA_SCALABLE, REG_V30, REG_P6, REG_V30, - INS_OPTS_SCALABLE_D); // IF_SVE_CM_3A /* CLASTB ., , ., . */ + INS_OPTS_SCALABLE_D); /* CLASTB ., , ., . */ + // IF_SVE_CN_3A theEmitter->emitIns_R_R_R(INS_sve_clasta, EA_2BYTE, REG_V12, REG_P1, REG_V15, - INS_OPTS_SCALABLE_H_TO_SIMD); // IF_SVE_CN_3A /* CLASTA , , , . - // */ + INS_OPTS_SCALABLE_H_TO_SIMD); /* CLASTA , , , . */ theEmitter->emitIns_R_R_R(INS_sve_clastb, EA_4BYTE, REG_V13, REG_P2, REG_V16, - INS_OPTS_SCALABLE_S_TO_SIMD); // IF_SVE_CN_3A /* CLASTB , , , . - // */ + INS_OPTS_SCALABLE_S_TO_SIMD); /* CLASTB , , , . */ + theEmitter->emitIns_R_R_R(INS_sve_clastb, EA_8BYTE, REG_V14, REG_P0, REG_V17, + INS_OPTS_SCALABLE_D_TO_SIMD); /* CLASTB , , , . */ + // IF_SVE_CO_3A // Note: EA_4BYTE used for B and H (destination register is W) theEmitter->emitIns_R_R_R(INS_sve_clasta, EA_4BYTE, REG_R0, REG_P0, REG_V0, - INS_OPTS_SCALABLE_B_TO_SCALAR); // IF_SVE_CO_3A /* CLASTA , , , - // . */ + INS_OPTS_SCALABLE_B_TO_SCALAR); /* CLASTA , , , . */ theEmitter->emitIns_R_R_R(INS_sve_clasta, EA_4BYTE, REG_R1, REG_P2, REG_V3, - INS_OPTS_SCALABLE_H_TO_SCALAR); // IF_SVE_CO_3A /* CLASTA , , , - // . */ + INS_OPTS_SCALABLE_H_TO_SCALAR); /* CLASTA , , , . */ theEmitter->emitIns_R_R_R(INS_sve_clastb, EA_4BYTE, REG_R23, REG_P5, REG_V12, - INS_OPTS_SCALABLE_S_TO_SCALAR); // IF_SVE_CO_3A /* CLASTB , , , - // . */ + INS_OPTS_SCALABLE_S_TO_SCALAR); /* CLASTB , , , . */ theEmitter->emitIns_R_R_R(INS_sve_clastb, EA_8BYTE, REG_R3, REG_P6, REG_V9, - INS_OPTS_SCALABLE_D_TO_SCALAR); // IF_SVE_CO_3A /* CLASTB , , , - // . */ + INS_OPTS_SCALABLE_D_TO_SCALAR); /* CLASTB , , , . */ + // IF_SVE_EP_3A theEmitter->emitIns_R_R_R(INS_sve_shadd, EA_SCALABLE, REG_V15, REG_P0, REG_V10, - INS_OPTS_SCALABLE_B); // IF_SVE_EP_3A /* SHADD ., /M, ., . */ + INS_OPTS_SCALABLE_B); /* SHADD ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_shsub, EA_SCALABLE, REG_V16, REG_P1, REG_V11, - INS_OPTS_SCALABLE_H); // IF_SVE_EP_3A /* SHSUB ., /M, ., . */ + INS_OPTS_SCALABLE_H); /* SHSUB ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_shsubr, EA_SCALABLE, REG_V17, REG_P2, REG_V12, - INS_OPTS_SCALABLE_S); // IF_SVE_EP_3A /* SHSUBR ., /M, ., . */ + INS_OPTS_SCALABLE_S); /* SHSUBR ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_srhadd, EA_SCALABLE, REG_V18, REG_P3, REG_V13, - INS_OPTS_SCALABLE_D); // IF_SVE_EP_3A /* SRHADD ., /M, ., . */ + INS_OPTS_SCALABLE_D); /* SRHADD ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_uhadd, EA_SCALABLE, REG_V19, REG_P4, REG_V14, - INS_OPTS_SCALABLE_B); // IF_SVE_EP_3A /* UHADD ., /M, ., . */ + INS_OPTS_SCALABLE_B); /* UHADD ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_uhsub, EA_SCALABLE, REG_V20, REG_P5, REG_V15, - INS_OPTS_SCALABLE_H); // IF_SVE_EP_3A /* UHSUB ., /M, ., . */ + INS_OPTS_SCALABLE_H); /* UHSUB ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_uhsubr, EA_SCALABLE, REG_V21, REG_P6, REG_V16, - INS_OPTS_SCALABLE_S); // IF_SVE_EP_3A /* UHSUBR ., /M, ., . */ + INS_OPTS_SCALABLE_S); /* UHSUBR ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_urhadd, EA_SCALABLE, REG_V22, REG_P7, REG_V17, - INS_OPTS_SCALABLE_D); // IF_SVE_EP_3A /* URHADD ., /M, ., . */ + INS_OPTS_SCALABLE_D); /* URHADD ., /M, ., . */ + // IF_SVE_ER_3A theEmitter->emitIns_R_R_R(INS_sve_addp, EA_SCALABLE, REG_V23, REG_P6, REG_V18, - INS_OPTS_SCALABLE_B); // IF_SVE_ER_3A /* ADDP ., /M, ., . */ + INS_OPTS_SCALABLE_B); /* ADDP ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_smaxp, EA_SCALABLE, REG_V24, REG_P5, REG_V19, - INS_OPTS_SCALABLE_H); // IF_SVE_ER_3A /* SMAXP ., /M, ., . */ + INS_OPTS_SCALABLE_H); /* SMAXP ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_sminp, EA_SCALABLE, REG_V25, REG_P4, REG_V20, - INS_OPTS_SCALABLE_S); // IF_SVE_ER_3A /* SMINP ., /M, ., . */ + INS_OPTS_SCALABLE_S); /* SMINP ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_umaxp, EA_SCALABLE, REG_V26, REG_P3, REG_V21, - INS_OPTS_SCALABLE_D); // IF_SVE_ER_3A /* UMAXP ., /M, ., . */ + INS_OPTS_SCALABLE_D); /* UMAXP ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_uminp, EA_SCALABLE, REG_V27, REG_P2, REG_V22, - INS_OPTS_SCALABLE_B); // IF_SVE_ER_3A /* UMINP ., /M, ., . */ + INS_OPTS_SCALABLE_B); /* UMINP ., /M, ., . */ + // IF_SVE_ET_3A theEmitter->emitIns_R_R_R(INS_sve_sqadd, EA_SCALABLE, REG_V28, REG_P1, REG_V23, - INS_OPTS_SCALABLE_B); // IF_SVE_ET_3A /* SQADD ., /M, ., . */ + INS_OPTS_SCALABLE_B); /* SQADD ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_sqsub, EA_SCALABLE, REG_V29, REG_P0, REG_V24, - INS_OPTS_SCALABLE_H); // IF_SVE_ET_3A /* SQSUB ., /M, ., . */ + INS_OPTS_SCALABLE_H); /* SQSUB ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_sqsubr, EA_SCALABLE, REG_V30, REG_P1, REG_V25, - INS_OPTS_SCALABLE_H); // IF_SVE_ET_3A /* SQSUBR ., /M, ., . */ + INS_OPTS_SCALABLE_H); /* SQSUBR ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_suqadd, EA_SCALABLE, REG_V31, REG_P2, REG_V26, - INS_OPTS_SCALABLE_B); // IF_SVE_ET_3A /* SUQADD ., /M, ., . */ + INS_OPTS_SCALABLE_B); /* SUQADD ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_uqadd, EA_SCALABLE, REG_V0, REG_P3, REG_V27, - INS_OPTS_SCALABLE_S); // IF_SVE_ET_3A /* UQADD ., /M, ., . */ + INS_OPTS_SCALABLE_S); /* UQADD ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_uqsub, EA_SCALABLE, REG_V1, REG_P4, REG_V28, - INS_OPTS_SCALABLE_D); // IF_SVE_ET_3A /* UQSUB ., /M, ., . */ + INS_OPTS_SCALABLE_D); /* UQSUB ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_uqsubr, EA_SCALABLE, REG_V2, REG_P5, REG_V29, - INS_OPTS_SCALABLE_B); // IF_SVE_ET_3A /* UQSUBR ., /M, ., . */ + INS_OPTS_SCALABLE_B); /* UQSUBR ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_usqadd, EA_SCALABLE, REG_V3, REG_P6, REG_V30, - INS_OPTS_SCALABLE_B); // IF_SVE_ET_3A /* USQADD ., /M, ., . */ + INS_OPTS_SCALABLE_B); /* USQADD ., /M, ., . */ + // IF_SVE_EU_3A theEmitter->emitIns_R_R_R(INS_sve_sqrshl, EA_SCALABLE, REG_V4, REG_P7, REG_V31, - INS_OPTS_SCALABLE_B); // IF_SVE_EU_3A /* SQRSHL ., /M, ., . */ + INS_OPTS_SCALABLE_B); /* SQRSHL ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_sqrshlr, EA_SCALABLE, REG_V5, REG_P0, REG_V30, - INS_OPTS_SCALABLE_H); // IF_SVE_EU_3A /* SQRSHLR ., /M, ., . */ + INS_OPTS_SCALABLE_H); /* SQRSHLR ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_sqshl, EA_SCALABLE, REG_V6, REG_P1, REG_V29, - INS_OPTS_SCALABLE_S); // IF_SVE_EU_3A /* SQSHL ., /M, ., . */ + INS_OPTS_SCALABLE_S); /* SQSHL ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_sqshlr, EA_SCALABLE, REG_V7, REG_P2, REG_V28, - INS_OPTS_SCALABLE_D); // IF_SVE_EU_3A /* SQSHLR ., /M, ., . */ + INS_OPTS_SCALABLE_D); /* SQSHLR ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_srshl, EA_SCALABLE, REG_V8, REG_P3, REG_V27, - INS_OPTS_SCALABLE_B); // IF_SVE_EU_3A /* SRSHL ., /M, ., . */ + INS_OPTS_SCALABLE_B); /* SRSHL ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_srshlr, EA_SCALABLE, REG_V9, REG_P4, REG_V26, - INS_OPTS_SCALABLE_H); // IF_SVE_EU_3A /* SRSHLR ., /M, ., . */ + INS_OPTS_SCALABLE_H); /* SRSHLR ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_uqrshl, EA_SCALABLE, REG_V10, REG_P5, REG_V25, - INS_OPTS_SCALABLE_S); // IF_SVE_EU_3A /* UQRSHL ., /M, ., . */ + INS_OPTS_SCALABLE_S); /* UQRSHL ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_uqrshlr, EA_SCALABLE, REG_V11, REG_P6, REG_V24, - INS_OPTS_SCALABLE_D); // IF_SVE_EU_3A /* UQRSHLR ., /M, ., . */ + INS_OPTS_SCALABLE_D); /* UQRSHLR ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_uqshl, EA_SCALABLE, REG_V12, REG_P7, REG_V23, - INS_OPTS_SCALABLE_B); // IF_SVE_EU_3A /* UQSHL ., /M, ., . */ + INS_OPTS_SCALABLE_B); /* UQSHL ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_uqshlr, EA_SCALABLE, REG_V13, REG_P0, REG_V22, - INS_OPTS_SCALABLE_H); // IF_SVE_EU_3A /* UQSHLR ., /M, ., . */ + INS_OPTS_SCALABLE_H); /* UQSHLR ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_urshl, EA_SCALABLE, REG_V14, REG_P1, REG_V21, - INS_OPTS_SCALABLE_S); // IF_SVE_EU_3A /* URSHL ., /M, ., . */ + INS_OPTS_SCALABLE_S); /* URSHL ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_urshlr, EA_SCALABLE, REG_V15, REG_P2, REG_V20, - INS_OPTS_SCALABLE_D); // IF_SVE_EU_3A /* URSHLR ., /M, ., . */ + INS_OPTS_SCALABLE_D); /* URSHLR ., /M, ., . */ + // IF_SVE_GR_3A theEmitter->emitIns_R_R_R(INS_sve_faddp, EA_SCALABLE, REG_V16, REG_P3, REG_V19, - INS_OPTS_SCALABLE_H); // IF_SVE_GR_3A /* FADDP ., /M, ., . */ + INS_OPTS_SCALABLE_H); /* FADDP ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_fmaxnmp, EA_SCALABLE, REG_V17, REG_P4, REG_V18, - INS_OPTS_SCALABLE_S); // IF_SVE_GR_3A /* FMAXNMP ., /M, ., . */ + INS_OPTS_SCALABLE_S); /* FMAXNMP ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_fmaxp, EA_SCALABLE, REG_V18, REG_P5, REG_V17, - INS_OPTS_SCALABLE_D); // IF_SVE_GR_3A /* FMAXP ., /M, ., . */ + INS_OPTS_SCALABLE_D); /* FMAXP ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_fminnmp, EA_SCALABLE, REG_V19, REG_P6, REG_V16, - INS_OPTS_SCALABLE_S); // IF_SVE_GR_3A /* FMINNMP ., /M, ., . */ + INS_OPTS_SCALABLE_S); /* FMINNMP ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_fminp, EA_SCALABLE, REG_V20, REG_P7, REG_V15, - INS_OPTS_SCALABLE_H); // IF_SVE_GR_3A /* FMINP ., /M, ., . */ + INS_OPTS_SCALABLE_H); /* FMINP ., /M, ., . */ + // IF_SVE_HJ_3A theEmitter->emitIns_R_R_R(INS_sve_fadda, EA_2BYTE, REG_V21, REG_P6, REG_V14, - INS_OPTS_SCALABLE_H_TO_SIMD); // IF_SVE_HJ_3A /* FADDA , , , . - // */ + INS_OPTS_SCALABLE_H_TO_SIMD); /* FADDA , , , . */ theEmitter->emitIns_R_R_R(INS_sve_fadda, EA_4BYTE, REG_V22, REG_P5, REG_V13, - INS_OPTS_SCALABLE_S_TO_SIMD); // IF_SVE_HJ_3A /* FADDA , , , . - // */ + INS_OPTS_SCALABLE_S_TO_SIMD); /* FADDA , , , . */ theEmitter->emitIns_R_R_R(INS_sve_fadda, EA_8BYTE, REG_V23, REG_P4, REG_V12, - INS_OPTS_SCALABLE_D_TO_SIMD); // IF_SVE_HJ_3A /* FADDA , , , . - // */ - + INS_OPTS_SCALABLE_D_TO_SIMD); /* FADDA , , , . */ + // IF_SVE_HL_3A theEmitter->emitIns_R_R_R(INS_sve_fabd, EA_SCALABLE, REG_V24, REG_P3, REG_V11, - INS_OPTS_SCALABLE_H); // IF_SVE_HL_3A /* FABD ., /M, ., . */ + INS_OPTS_SCALABLE_H); /* FABD ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_fadd, EA_SCALABLE, REG_V25, REG_P2, REG_V10, - INS_OPTS_SCALABLE_S); // IF_SVE_HL_3A /* FADD ., /M, ., . */ + INS_OPTS_SCALABLE_S); /* FADD ., /M, ., . */ // These are not yet supported by capstone. - // theEmitter->emitIns_R_R_R(INS_sve_famax, EA_SCALABLE, REG_V26, REG_P1, REG_V9, INS_OPTS_SCALABLE_D); // - // IF_SVE_HL_3A /* FAMAX ., /M, ., . */ - // theEmitter->emitIns_R_R_R(INS_sve_famin, EA_SCALABLE, REG_V27, REG_P0, REG_V8, INS_OPTS_SCALABLE_H); // - // IF_SVE_HL_3A /* FAMIN ., /M, ., . */ + // theEmitter->emitIns_R_R_R(INS_sve_famax, EA_SCALABLE, REG_V26, REG_P1, REG_V9, INS_OPTS_SCALABLE_D); + /* FAMAX ., /M, ., . */ + // theEmitter->emitIns_R_R_R(INS_sve_famin, EA_SCALABLE, REG_V27, REG_P0, REG_V8, INS_OPTS_SCALABLE_H); + /* FAMIN ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_fdiv, EA_SCALABLE, REG_V28, REG_P0, REG_V7, - INS_OPTS_SCALABLE_S); // IF_SVE_HL_3A /* FDIV ., /M, ., . */ + INS_OPTS_SCALABLE_S); /* FDIV ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_fdivr, EA_SCALABLE, REG_V29, REG_P1, REG_V6, - INS_OPTS_SCALABLE_D); // IF_SVE_HL_3A /* FDIVR ., /M, ., . */ + INS_OPTS_SCALABLE_D); /* FDIVR ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_fmax, EA_SCALABLE, REG_V30, REG_P2, REG_V5, - INS_OPTS_SCALABLE_H); // IF_SVE_HL_3A /* FMAX ., /M, ., . */ + INS_OPTS_SCALABLE_H); /* FMAX ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_fmaxnm, EA_SCALABLE, REG_V31, REG_P3, REG_V4, - INS_OPTS_SCALABLE_S); // IF_SVE_HL_3A /* FMAXNM ., /M, ., . */ + INS_OPTS_SCALABLE_S); /* FMAXNM ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_fmin, EA_SCALABLE, REG_V0, REG_P4, REG_V3, - INS_OPTS_SCALABLE_D); // IF_SVE_HL_3A /* FMIN ., /M, ., . */ + INS_OPTS_SCALABLE_D); /* FMIN ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_fminnm, EA_SCALABLE, REG_V1, REG_P5, REG_V2, - INS_OPTS_SCALABLE_H); // IF_SVE_HL_3A /* FMINNM ., /M, ., . */ + INS_OPTS_SCALABLE_H); /* FMINNM ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_fmul, EA_SCALABLE, REG_V2, REG_P6, REG_V1, - INS_OPTS_SCALABLE_S); // IF_SVE_HL_3A /* FMUL ., /M, ., . */ + INS_OPTS_SCALABLE_S); /* FMUL ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_fmulx, EA_SCALABLE, REG_V3, REG_P7, REG_V0, - INS_OPTS_SCALABLE_D); // IF_SVE_HL_3A /* FMULX ., /M, ., . */ + INS_OPTS_SCALABLE_D); /* FMULX ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_fscale, EA_SCALABLE, REG_V4, REG_P6, REG_V31, - INS_OPTS_SCALABLE_H); // IF_SVE_HL_3A /* FSCALE ., /M, ., . */ + INS_OPTS_SCALABLE_H); /* FSCALE ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_fsub, EA_SCALABLE, REG_V5, REG_P5, REG_V30, - INS_OPTS_SCALABLE_S); // IF_SVE_HL_3A /* FSUB ., /M, ., . */ + INS_OPTS_SCALABLE_S); /* FSUB ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_fsubr, EA_SCALABLE, REG_V6, REG_P4, REG_V29, - INS_OPTS_SCALABLE_D); // IF_SVE_HL_3A /* FSUBR ., /M, ., . */ + INS_OPTS_SCALABLE_D); /* FSUBR ., /M, ., . */ #endif // ALL_ARM64_EMITTER_UNIT_TESTS_SVE diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 228e6d4dabdde3..fe3c9231bb7205 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -943,6 +943,7 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(datasize == EA_8BYTE); break; + // Scalable. case IF_SVE_AA_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise logical operations (predicated) case IF_SVE_AB_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer add/subtract vectors (predicated) case IF_SVE_AD_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer min/max/difference (predicated) @@ -962,6 +963,7 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(isScalableVectorSize(elemsize)); break; + // Scalable, .S or .D. case IF_SVE_AC_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer divide vectors (predicated) elemsize = id->idOpSize(); assert(insOptsScalableWords(id->idInsOpt())); // xx @@ -971,6 +973,7 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(isScalableVectorSize(elemsize)); break; + // Scalable Wide. case IF_SVE_AO_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by wide elements (predicated) elemsize = id->idOpSize(); assert(insOptsScalableWide(id->idInsOpt())); // xx @@ -980,16 +983,27 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(isScalableVectorSize(elemsize)); break; + // Scalable to Simd. case IF_SVE_CN_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to SIMD&FP scalar - case IF_SVE_HJ_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point serial reduction (predicated) elemsize = id->idOpSize(); assert(insOptsScalableToSimd(id->idInsOpt())); // xx assert(isVectorRegister(id->idReg1())); // ddddd assert(isLowPredicateRegister(id->idReg2())); // ggg assert(isVectorRegister(id->idReg3())); // mmmmm + assert(isValidVectorElemsize(elemsize)); + break; + + // Scalable to FP Simd. + case IF_SVE_HJ_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point serial reduction (predicated) + elemsize = id->idOpSize(); + assert(insOptsScalableToSimdFloat(id->idInsOpt())); // xx + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // mmmmm assert(isValidVectorElemsizeSveFloat(elemsize)); break; + // Scalable to general register. case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register elemsize = id->idOpSize(); assert(insOptsScalableToScalar(id->idInsOpt())); // xx @@ -999,6 +1013,7 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(isValidScalarDatasize(elemsize)); break; + // Scalable FP. case IF_SVE_GR_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 floating-point pairwise operations case IF_SVE_HL_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated) elemsize = id->idOpSize(); @@ -13920,6 +13935,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) dst += emitOutput_Instr(dst, code); break; + // Scalable. case IF_SVE_AA_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise logical operations (predicated) case IF_SVE_AB_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer add/subtract vectors (predicated) case IF_SVE_AC_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer divide vectors (predicated) @@ -13945,6 +13961,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) dst += emitOutput_Instr(dst, code); break; + // Scalable to general register. case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register code = emitInsCodeSve(ins, fmt); code |= insEncodeReg_Rd(id->idReg1()); // ddddd @@ -16145,6 +16162,7 @@ void emitter::emitDispInsHelp( } break; + // Scalable. case IF_SVE_AA_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise logical operations (predicated) case IF_SVE_AB_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer add/subtract vectors (predicated) case IF_SVE_AC_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer divide vectors (predicated) @@ -16164,6 +16182,7 @@ void emitter::emitDispInsHelp( emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm break; + // Scalable. Reg3 is .D. case IF_SVE_AO_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by wide elements (predicated) emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd emitDispPredicateReg(id->idReg2(), PREDICATE_MERGE, true); // ggg @@ -16171,6 +16190,7 @@ void emitter::emitDispInsHelp( emitDispSveReg(id->idReg3(), INS_OPTS_SCALABLE_D, false); // mmmmm break; + // Scalable. No predicate type. case IF_SVE_CM_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally broadcast element to vector emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd emitDispPredicateReg(id->idReg2(), PREDICATE_NONE, true); // ggg @@ -16178,20 +16198,9 @@ void emitter::emitDispInsHelp( emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm break; + // Scalable to general register or SIMD. No predicate type. case IF_SVE_CN_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to SIMD&FP scalar - emitDispReg(id->idReg1(), size, true); // ddddd - emitDispPredicateReg(id->idReg2(), PREDICATE_NONE, true); // ggg - emitDispReg(id->idReg1(), size, true); // ddddd - emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm - break; - case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register - emitDispReg(id->idReg1(), size, true); // ddddd - emitDispPredicateReg(id->idReg2(), PREDICATE_NONE, true); // ggg - emitDispReg(id->idReg1(), size, true); // ddddd - emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm - break; - case IF_SVE_HJ_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point serial reduction (predicated) emitDispReg(id->idReg1(), size, true); // ddddd emitDispPredicateReg(id->idReg2(), PREDICATE_NONE, true); // ggg @@ -18384,6 +18393,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins } break; + // Scalable. case IF_SVE_AA_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise logical operations (predicated) case IF_SVE_AB_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer add/subtract vectors (predicated) case IF_SVE_AC_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer divide vectors (predicated) diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 441ed8b2d7b240..0926a95abb5bb4 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -876,8 +876,7 @@ inline static bool insOptsScalableWords(insOpts opt) inline static bool insOptsScalableFloat(insOpts opt) { // Opt is any of the standard scalable types that are valid for FP. - return ((opt == INS_OPTS_SCALABLE_B) || (opt == INS_OPTS_SCALABLE_H) || (opt == INS_OPTS_SCALABLE_S) || - (opt == INS_OPTS_SCALABLE_D)); + return ((opt == INS_OPTS_SCALABLE_H) || (opt == INS_OPTS_SCALABLE_S) || (opt == INS_OPTS_SCALABLE_D)); } inline static bool insOptsScalableWide(insOpts opt) From 2f763f19f7815438e89db62b7eae1adbbdf76022 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 27 Nov 2023 15:26:00 +0000 Subject: [PATCH 29/32] Better descriptions for insOpts functions --- src/coreclr/jit/emitarm64.cpp | 2 +- src/coreclr/jit/emitarm64.h | 15 +++++++++------ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index fe3c9231bb7205..6f4c2c9f8bfac3 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -16182,7 +16182,7 @@ void emitter::emitDispInsHelp( emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm break; - // Scalable. Reg3 is .D. + // Scalable. Reg3 has elements of size 8 bytes. case IF_SVE_AO_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by wide elements (predicated) emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd emitDispPredicateReg(id->idReg2(), PREDICATE_MERGE, true); // ggg diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 0926a95abb5bb4..7e084f2408c2c7 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -856,50 +856,53 @@ inline static bool insOptsConvertIntToFloat(insOpts opt) inline static bool insOptsScalable(insOpts opt) { // Opt is any of the scalable types. - return insOptsScalableSimple(opt) || insOptsScalableWide(opt) || insOptsScalableToSimd(opt) || - insOptsScalableToScalar(opt); + return ((insOptsScalableSimple(opt)) || (insOptsScalableWide(opt)) || (insOptsScalableToSimd(opt)) || + (insOptsScalableToScalar(opt))); } inline static bool insOptsScalableSimple(insOpts opt) { - // Opt is any of the standard scalable types. + // `opt` is any of the standard scalable types. return ((opt == INS_OPTS_SCALABLE_B) || (opt == INS_OPTS_SCALABLE_H) || (opt == INS_OPTS_SCALABLE_S) || (opt == INS_OPTS_SCALABLE_D)); } inline static bool insOptsScalableWords(insOpts opt) { - // Opt is any of the standard word and above scalable types. + // `opt` is any of the standard word and above scalable types. return ((opt == INS_OPTS_SCALABLE_S) || (opt == INS_OPTS_SCALABLE_D)); } inline static bool insOptsScalableFloat(insOpts opt) { - // Opt is any of the standard scalable types that are valid for FP. + // `opt` is any of the standard scalable types that are valid for FP. return ((opt == INS_OPTS_SCALABLE_H) || (opt == INS_OPTS_SCALABLE_S) || (opt == INS_OPTS_SCALABLE_D)); } inline static bool insOptsScalableWide(insOpts opt) { + // `opt` is any of the scalable types that are valid for widening to size D. return ((opt == INS_OPTS_SCALABLE_WIDE_B) || (opt == INS_OPTS_SCALABLE_WIDE_H) || (opt == INS_OPTS_SCALABLE_WIDE_S)); } inline static bool insOptsScalableToSimd(insOpts opt) { + // `opt` is any of the scalable types that are valid for conversion to a scalar in a SIMD register. return ((opt == INS_OPTS_SCALABLE_B_TO_SIMD) || (opt == INS_OPTS_SCALABLE_H_TO_SIMD) || (opt == INS_OPTS_SCALABLE_S_TO_SIMD) || (opt == INS_OPTS_SCALABLE_D_TO_SIMD)); } inline static bool insOptsScalableToSimdFloat(insOpts opt) { - // Opt is any of the SIMD scalable types that are valid for FP. + // `opt` is any of the scalable types that are valid for conversion to an FP scalar in a SIMD register. return ((opt == INS_OPTS_SCALABLE_H_TO_SIMD) || (opt == INS_OPTS_SCALABLE_S_TO_SIMD) || (opt == INS_OPTS_SCALABLE_D_TO_SIMD)); } inline static bool insOptsScalableToScalar(insOpts opt) { + // `opt` is any of the SIMD scalable types that are valid for conversion to scalar. return ((opt == INS_OPTS_SCALABLE_B_TO_SCALAR) || (opt == INS_OPTS_SCALABLE_H_TO_SCALAR) || (opt == INS_OPTS_SCALABLE_S_TO_SCALAR) || (opt == INS_OPTS_SCALABLE_D_TO_SCALAR)); } From 11d38ba32084fa7aea1cd706096a6ba69e34dc5d Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 27 Nov 2023 16:36:01 +0000 Subject: [PATCH 30/32] Add latencies --- src/coreclr/jit/emit.h | 2 + src/coreclr/jit/emitarm64.cpp | 112 +++++++++++++++++++++++++++++++--- 2 files changed, 107 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index e24ba91ba94f53..26c83cd513f3b0 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -1711,7 +1711,9 @@ class emitter #define PERFSCORE_THROUGHPUT_8C 8.0f // slower - 8 cycles #define PERFSCORE_THROUGHPUT_9C 9.0f // slower - 9 cycles #define PERFSCORE_THROUGHPUT_10C 10.0f // slower - 10 cycles +#define PERFSCORE_THROUGHPUT_11C 10.0f // slower - 10 cycles #define PERFSCORE_THROUGHPUT_13C 13.0f // slower - 13 cycles +#define PERFSCORE_THROUGHPUT_14C 13.0f // slower - 13 cycles #define PERFSCORE_THROUGHPUT_19C 19.0f // slower - 19 cycles #define PERFSCORE_THROUGHPUT_25C 25.0f // slower - 25 cycles #define PERFSCORE_THROUGHPUT_33C 33.0f // slower - 33 cycles diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 6f4c2c9f8bfac3..fb670a53122642 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -18393,27 +18393,125 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins } break; - // Scalable. + // SVE latencies from Arm Neoverse N2 Software Optimization Guide, Issue 5.0, Revision: r0p3 + + // Predicate logical case IF_SVE_AA_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise logical operations (predicated) + result.insLatency = PERFSCORE_LATENCY_1C; + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + break; + + // Arithmetic, basic case IF_SVE_AB_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer add/subtract vectors (predicated) - case IF_SVE_AC_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer divide vectors (predicated) + case IF_SVE_EP_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer halving add/subtract (predicated) + // Max/min, basic and pairwise case IF_SVE_AD_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer min/max/difference (predicated) + result.insLatency = PERFSCORE_LATENCY_2C; + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + break; + + // Divides, 32 bit (Note: worse for 64 bit) + case IF_SVE_AC_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer divide vectors (predicated) + result.insLatency = PERFSCORE_LATENCY_12C; // 7 to 12 + result.insThroughput = PERFSCORE_THROUGHPUT_11C; // 1/11 to 1/7 + break; + + // Multiply, B, H, S element size (Note: D element size is slightly slower) case IF_SVE_AE_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer multiply vectors (predicated) + result.insLatency = PERFSCORE_LATENCY_4C; + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + break; + + // Arithmetic, shift case IF_SVE_AN_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by vector (predicated) case IF_SVE_AO_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by wide elements (predicated) + result.insLatency = PERFSCORE_LATENCY_2C; + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + break; + + // Conditional extract operations, SIMD&FP scalar and vector forms case IF_SVE_CM_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally broadcast element to vector case IF_SVE_CN_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to SIMD&FP scalar + result.insLatency = PERFSCORE_LATENCY_3C; + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + break; + + // Conditional extract operations, scalar form case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register - case IF_SVE_EP_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer halving add/subtract (predicated) + result.insLatency = PERFSCORE_LATENCY_8C; + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + break; + + // Arithmetic, pairwise add + // Max/min, basic and pairwise case IF_SVE_ER_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer pairwise arithmetic + result.insLatency = PERFSCORE_LATENCY_2C; + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + break; + + // Arithmetic, complex case IF_SVE_ET_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating add/subtract - case IF_SVE_EU_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating/rounding bitwise shift left - // (predicated) + result.insLatency = PERFSCORE_LATENCY_2C; + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + break; + + // Arithmetic, shift complex + case IF_SVE_EU_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating/rounding bitwise shift left (predicated) + result.insLatency = PERFSCORE_LATENCY_4C; + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + break; + + // Floating point arithmetic + // Floating point min/max pairwise case IF_SVE_GR_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 floating-point pairwise operations + result.insLatency = PERFSCORE_LATENCY_2C; + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + break; + + // Floating point associative add, F64. (Note: Worse for F32 and F16) case IF_SVE_HJ_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point serial reduction (predicated) + result.insLatency = PERFSCORE_LATENCY_4C; + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + break; + case IF_SVE_HL_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated) - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_1C; + switch (ins) + { + // Floating point absolute value/difference + case INS_sve_fabd: + // Floating point min/max + case INS_sve_fmax: + case INS_sve_fmaxnm: + case INS_sve_fmin: + case INS_sve_fminnm: + // Floating point arithmetic + case INS_sve_fadd: + case INS_sve_fsub: + case INS_sve_fsubr: + result.insLatency = PERFSCORE_LATENCY_2C; + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + break; + + // Floating point divide, F64 (Note: Worse for F32, F16) + case INS_sve_fdiv: + case INS_sve_fdivr: + result.insLatency = PERFSCORE_LATENCY_15C; // 7 to 15 + result.insThroughput = PERFSCORE_THROUGHPUT_14C; // 1/14 to 1/7 + break; + + // Floating point multiply + case INS_sve_fmul: + case INS_sve_fmulx: + case INS_sve_fscale: + result.insLatency = PERFSCORE_LATENCY_3C; + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + break; + + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } break; default: From cffafad9c041c4dd8d29aa0c5ec4af0d7319565a Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 27 Nov 2023 16:40:36 +0000 Subject: [PATCH 31/32] Fix formatting --- src/coreclr/jit/codegenarm64.cpp | 2 +- src/coreclr/jit/emitarm64.cpp | 13 +++++++------ src/coreclr/jit/emitarm64.h | 2 +- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 6be622511ab72f..d1784e019703f5 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -10213,7 +10213,7 @@ void CodeGen::genArm64EmitterUnitTests() // IF_SVE_EP_3A theEmitter->emitIns_R_R_R(INS_sve_shadd, EA_SCALABLE, REG_V15, REG_P0, REG_V10, - INS_OPTS_SCALABLE_B); /* SHADD ., /M, ., . */ + INS_OPTS_SCALABLE_B); /* SHADD ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_shsub, EA_SCALABLE, REG_V16, REG_P1, REG_V11, INS_OPTS_SCALABLE_H); /* SHSUB ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_shsubr, EA_SCALABLE, REG_V17, REG_P2, REG_V12, diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index fb670a53122642..dcb9557f0d278e 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -997,9 +997,9 @@ void emitter::emitInsSanityCheck(instrDesc* id) case IF_SVE_HJ_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point serial reduction (predicated) elemsize = id->idOpSize(); assert(insOptsScalableToSimdFloat(id->idInsOpt())); // xx - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isLowPredicateRegister(id->idReg2())); // ggg - assert(isVectorRegister(id->idReg3())); // mmmmm + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // mmmmm assert(isValidVectorElemsizeSveFloat(elemsize)); break; @@ -18412,7 +18412,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins // Divides, 32 bit (Note: worse for 64 bit) case IF_SVE_AC_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer divide vectors (predicated) - result.insLatency = PERFSCORE_LATENCY_12C; // 7 to 12 + result.insLatency = PERFSCORE_LATENCY_12C; // 7 to 12 result.insThroughput = PERFSCORE_THROUGHPUT_11C; // 1/11 to 1/7 break; @@ -18456,7 +18456,8 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins break; // Arithmetic, shift complex - case IF_SVE_EU_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating/rounding bitwise shift left (predicated) + case IF_SVE_EU_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating/rounding bitwise shift left + // (predicated) result.insLatency = PERFSCORE_LATENCY_4C; result.insThroughput = PERFSCORE_THROUGHPUT_1C; break; @@ -18495,7 +18496,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins // Floating point divide, F64 (Note: Worse for F32, F16) case INS_sve_fdiv: case INS_sve_fdivr: - result.insLatency = PERFSCORE_LATENCY_15C; // 7 to 15 + result.insLatency = PERFSCORE_LATENCY_15C; // 7 to 15 result.insThroughput = PERFSCORE_THROUGHPUT_14C; // 1/14 to 1/7 break; diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 7e084f2408c2c7..46b60a98d2f414 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -857,7 +857,7 @@ inline static bool insOptsScalable(insOpts opt) { // Opt is any of the scalable types. return ((insOptsScalableSimple(opt)) || (insOptsScalableWide(opt)) || (insOptsScalableToSimd(opt)) || - (insOptsScalableToScalar(opt))); + (insOptsScalableToScalar(opt))); } inline static bool insOptsScalableSimple(insOpts opt) From 15a5fe81ee75a7a65d4edb3e7a6539fd3b207419 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 27 Nov 2023 16:49:19 +0000 Subject: [PATCH 32/32] Add emitDispLowPredicateReg Change-Id: I263ae1c649a66de393bca0c8f5ac2f9c3311fcbe --- src/coreclr/jit/emitarm64.cpp | 42 ++++++++++++++++++++++------------- src/coreclr/jit/emitarm64.h | 1 + 2 files changed, 27 insertions(+), 16 deletions(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index dcb9557f0d278e..c4b8eaa50224c1 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -14544,6 +14544,16 @@ void emitter::emitDispPredicateReg(regNumber reg, PredicateType ptype, bool addC emitDispComma(); } +//------------------------------------------------------------------------ +// emitDispLowPredicateReg: Display a low predicate register name with with an arrangement suffix +// +void emitter::emitDispLowPredicateReg(regNumber reg, PredicateType ptype, bool addComma) +{ + assert(isLowPredicateRegister(reg)); + reg = (regNumber)((((unsigned)reg - REG_PREDICATE_FIRST) & 0x7) + REG_PREDICATE_FIRST); + emitDispPredicateReg(reg, ptype, addComma); +} + //------------------------------------------------------------------------ // emitDispArrangement: Display a SIMD vector arrangement suffix // @@ -16176,36 +16186,36 @@ void emitter::emitDispInsHelp( // (predicated) case IF_SVE_GR_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 floating-point pairwise operations case IF_SVE_HL_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated) - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispPredicateReg(id->idReg2(), PREDICATE_MERGE, true); // ggg - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispLowPredicateReg(id->idReg2(), PREDICATE_MERGE, true); // ggg + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm break; // Scalable. Reg3 has elements of size 8 bytes. case IF_SVE_AO_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by wide elements (predicated) - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispPredicateReg(id->idReg2(), PREDICATE_MERGE, true); // ggg - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispSveReg(id->idReg3(), INS_OPTS_SCALABLE_D, false); // mmmmm + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispLowPredicateReg(id->idReg2(), PREDICATE_MERGE, true); // ggg + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispSveReg(id->idReg3(), INS_OPTS_SCALABLE_D, false); // mmmmm break; // Scalable. No predicate type. case IF_SVE_CM_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally broadcast element to vector - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispPredicateReg(id->idReg2(), PREDICATE_NONE, true); // ggg - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispLowPredicateReg(id->idReg2(), PREDICATE_NONE, true); // ggg + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm break; // Scalable to general register or SIMD. No predicate type. case IF_SVE_CN_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to SIMD&FP scalar case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register case IF_SVE_HJ_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point serial reduction (predicated) - emitDispReg(id->idReg1(), size, true); // ddddd - emitDispPredicateReg(id->idReg2(), PREDICATE_NONE, true); // ggg - emitDispReg(id->idReg1(), size, true); // ddddd - emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm + emitDispReg(id->idReg1(), size, true); // ddddd + emitDispLowPredicateReg(id->idReg2(), PREDICATE_NONE, true); // ggg + emitDispReg(id->idReg1(), size, true); // ddddd + emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm break; default: diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 46b60a98d2f414..94ac7336813a2e 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -53,6 +53,7 @@ void emitDispVectorRegIndex(regNumber reg, emitAttr elemsize, ssize_t index, boo void emitDispVectorRegList(regNumber firstReg, unsigned listSize, insOpts opt, bool addComma); void emitDispVectorElemList(regNumber firstReg, unsigned listSize, emitAttr elemsize, unsigned index, bool addComma); void emitDispPredicateReg(regNumber reg, PredicateType ptype, bool addComma); +void emitDispLowPredicateReg(regNumber reg, PredicateType ptype, bool addComma); void emitDispArrangement(insOpts opt); void emitDispElemsize(emitAttr elemsize); void emitDispShiftedReg(regNumber reg, insOpts opt, ssize_t imm, emitAttr attr);