From b1b87812a7eab7ce99833b01fa53545bc43a7ae8 Mon Sep 17 00:00:00 2001 From: Clinton Ingram Date: Sat, 26 Oct 2024 14:49:18 -0700 Subject: [PATCH 1/6] add GFNI intrinsics --- src/coreclr/inc/clrconfigvalues.h | 1 + src/coreclr/inc/corinfoinstructionset.h | 212 +++++++++------ src/coreclr/inc/jiteeversionguid.h | 10 +- src/coreclr/inc/readytoruninstructionset.h | 3 + src/coreclr/jit/compiler.cpp | 7 + src/coreclr/jit/emitxarch.cpp | 9 + src/coreclr/jit/hwintrinsic.cpp | 3 + src/coreclr/jit/hwintrinsiclistxarch.h | 33 +++ src/coreclr/jit/hwintrinsicxarch.cpp | 41 ++- src/coreclr/jit/instrsxarch.h | 5 +- src/coreclr/jit/jitconfigvalues.h | 1 + src/coreclr/jit/lowerxarch.cpp | 25 ++ .../Compiler/HardwareIntrinsicHelpers.cs | 12 + .../Runtime/ReadyToRunInstructionSet.cs | 3 + .../Runtime/ReadyToRunInstructionSetHelper.cs | 6 + .../JitInterface/CorInfoInstructionSet.cs | 244 ++++++++++++------ .../ThunkGenerator/InstructionSetDesc.txt | 8 + .../Compiler/HardwareIntrinsicHelpers.Aot.cs | 3 +- src/coreclr/vm/codeman.cpp | 13 +- .../ILLink.Substitutions.NoX86Intrinsics.xml | 12 + .../System.Private.CoreLib.Shared.projitems | 5 +- .../Intrinsics/X86/Gfni.NotSupported.cs | 117 +++++++++ .../src/System/Runtime/Intrinsics/X86/Gfni.cs | 121 +++++++++ .../ref/System.Runtime.Intrinsics.cs | 31 +++ src/native/minipal/cpufeatures.c | 5 + src/native/minipal/cpufeatures.h | 1 + 26 files changed, 746 insertions(+), 185 deletions(-) create mode 100644 src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Gfni.NotSupported.cs create mode 100644 src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Gfni.cs diff --git a/src/coreclr/inc/clrconfigvalues.h b/src/coreclr/inc/clrconfigvalues.h index 00d9745e177e7f..9b12a16d4d9730 100644 --- a/src/coreclr/inc/clrconfigvalues.h +++ b/src/coreclr/inc/clrconfigvalues.h @@ -778,6 +778,7 @@ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVXVNNI, W("EnableAVXVNNI RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableBMI1, W("EnableBMI1"), 1, "Allows BMI1+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableBMI2, W("EnableBMI2"), 1, "Allows BMI2+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableFMA, W("EnableFMA"), 1, "Allows FMA+ hardware intrinsics to be disabled") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableGFNI, W("EnableGFNI"), 1, "Allows GFNI+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableLZCNT, W("EnableLZCNT"), 1, "Allows LZCNT+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnablePCLMULQDQ, W("EnablePCLMULQDQ"), 1, "Allows PCLMULQDQ+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableMOVBE, W("EnableMOVBE"), 1, "Allows MOVBE+ hardware intrinsics to be disabled") diff --git a/src/coreclr/inc/corinfoinstructionset.h b/src/coreclr/inc/corinfoinstructionset.h index ce41b79ae7dc76..7e5169b32fce1e 100644 --- a/src/coreclr/inc/corinfoinstructionset.h +++ b/src/coreclr/inc/corinfoinstructionset.h @@ -66,48 +66,51 @@ enum CORINFO_InstructionSet InstructionSet_MOVBE=21, InstructionSet_X86Serialize=22, InstructionSet_EVEX=23, - InstructionSet_AVX512F=24, - InstructionSet_AVX512F_VL=25, - InstructionSet_AVX512BW=26, - InstructionSet_AVX512BW_VL=27, - InstructionSet_AVX512CD=28, - InstructionSet_AVX512CD_VL=29, - InstructionSet_AVX512DQ=30, - InstructionSet_AVX512DQ_VL=31, - InstructionSet_AVX512VBMI=32, - InstructionSet_AVX512VBMI_VL=33, - InstructionSet_AVX10v1=34, - InstructionSet_AVX10v1_V512=35, - InstructionSet_VectorT128=36, - InstructionSet_VectorT256=37, - InstructionSet_VectorT512=38, - InstructionSet_X86Base_X64=39, - InstructionSet_SSE_X64=40, - InstructionSet_SSE2_X64=41, - InstructionSet_SSE3_X64=42, - InstructionSet_SSSE3_X64=43, - InstructionSet_SSE41_X64=44, - InstructionSet_SSE42_X64=45, - InstructionSet_AVX_X64=46, - InstructionSet_AVX2_X64=47, - InstructionSet_AES_X64=48, - InstructionSet_BMI1_X64=49, - InstructionSet_BMI2_X64=50, - InstructionSet_FMA_X64=51, - InstructionSet_LZCNT_X64=52, - InstructionSet_PCLMULQDQ_X64=53, - InstructionSet_POPCNT_X64=54, - InstructionSet_AVXVNNI_X64=55, - InstructionSet_MOVBE_X64=56, - InstructionSet_X86Serialize_X64=57, - InstructionSet_EVEX_X64=58, - InstructionSet_AVX512F_X64=59, - InstructionSet_AVX512BW_X64=60, - InstructionSet_AVX512CD_X64=61, - InstructionSet_AVX512DQ_X64=62, - InstructionSet_AVX512VBMI_X64=63, - InstructionSet_AVX10v1_X64=64, - InstructionSet_AVX10v1_V512_X64=65, + InstructionSet_GFNI=24, + InstructionSet_GFNI_V256=25, + InstructionSet_GFNI_V512=26, + InstructionSet_AVX512F=27, + InstructionSet_AVX512F_VL=28, + InstructionSet_AVX512BW=29, + InstructionSet_AVX512BW_VL=30, + InstructionSet_AVX512CD=31, + InstructionSet_AVX512CD_VL=32, + InstructionSet_AVX512DQ=33, + InstructionSet_AVX512DQ_VL=34, + InstructionSet_AVX512VBMI=35, + InstructionSet_AVX512VBMI_VL=36, + InstructionSet_AVX10v1=37, + InstructionSet_AVX10v1_V512=38, + InstructionSet_VectorT128=39, + InstructionSet_VectorT256=40, + InstructionSet_VectorT512=41, + InstructionSet_X86Base_X64=42, + InstructionSet_SSE_X64=43, + InstructionSet_SSE2_X64=44, + InstructionSet_SSE3_X64=45, + InstructionSet_SSSE3_X64=46, + InstructionSet_SSE41_X64=47, + InstructionSet_SSE42_X64=48, + InstructionSet_AVX_X64=49, + InstructionSet_AVX2_X64=50, + InstructionSet_AES_X64=51, + InstructionSet_BMI1_X64=52, + InstructionSet_BMI2_X64=53, + InstructionSet_FMA_X64=54, + InstructionSet_LZCNT_X64=55, + InstructionSet_PCLMULQDQ_X64=56, + InstructionSet_POPCNT_X64=57, + InstructionSet_AVXVNNI_X64=58, + InstructionSet_MOVBE_X64=59, + InstructionSet_X86Serialize_X64=60, + InstructionSet_EVEX_X64=61, + InstructionSet_AVX512F_X64=62, + InstructionSet_AVX512BW_X64=63, + InstructionSet_AVX512CD_X64=64, + InstructionSet_AVX512DQ_X64=65, + InstructionSet_AVX512VBMI_X64=66, + InstructionSet_AVX10v1_X64=67, + InstructionSet_AVX10v1_V512_X64=68, #endif // TARGET_AMD64 #ifdef TARGET_X86 InstructionSet_X86Base=1, @@ -133,48 +136,51 @@ enum CORINFO_InstructionSet InstructionSet_MOVBE=21, InstructionSet_X86Serialize=22, InstructionSet_EVEX=23, - InstructionSet_AVX512F=24, - InstructionSet_AVX512F_VL=25, - InstructionSet_AVX512BW=26, - InstructionSet_AVX512BW_VL=27, - InstructionSet_AVX512CD=28, - InstructionSet_AVX512CD_VL=29, - InstructionSet_AVX512DQ=30, - InstructionSet_AVX512DQ_VL=31, - InstructionSet_AVX512VBMI=32, - InstructionSet_AVX512VBMI_VL=33, - InstructionSet_AVX10v1=34, - InstructionSet_AVX10v1_V512=35, - InstructionSet_VectorT128=36, - InstructionSet_VectorT256=37, - InstructionSet_VectorT512=38, - InstructionSet_X86Base_X64=39, - InstructionSet_SSE_X64=40, - InstructionSet_SSE2_X64=41, - InstructionSet_SSE3_X64=42, - InstructionSet_SSSE3_X64=43, - InstructionSet_SSE41_X64=44, - InstructionSet_SSE42_X64=45, - InstructionSet_AVX_X64=46, - InstructionSet_AVX2_X64=47, - InstructionSet_AES_X64=48, - InstructionSet_BMI1_X64=49, - InstructionSet_BMI2_X64=50, - InstructionSet_FMA_X64=51, - InstructionSet_LZCNT_X64=52, - InstructionSet_PCLMULQDQ_X64=53, - InstructionSet_POPCNT_X64=54, - InstructionSet_AVXVNNI_X64=55, - InstructionSet_MOVBE_X64=56, - InstructionSet_X86Serialize_X64=57, - InstructionSet_EVEX_X64=58, - InstructionSet_AVX512F_X64=59, - InstructionSet_AVX512BW_X64=60, - InstructionSet_AVX512CD_X64=61, - InstructionSet_AVX512DQ_X64=62, - InstructionSet_AVX512VBMI_X64=63, - InstructionSet_AVX10v1_X64=64, - InstructionSet_AVX10v1_V512_X64=65, + InstructionSet_GFNI=24, + InstructionSet_GFNI_V256=25, + InstructionSet_GFNI_V512=26, + InstructionSet_AVX512F=27, + InstructionSet_AVX512F_VL=28, + InstructionSet_AVX512BW=29, + InstructionSet_AVX512BW_VL=30, + InstructionSet_AVX512CD=31, + InstructionSet_AVX512CD_VL=32, + InstructionSet_AVX512DQ=33, + InstructionSet_AVX512DQ_VL=34, + InstructionSet_AVX512VBMI=35, + InstructionSet_AVX512VBMI_VL=36, + InstructionSet_AVX10v1=37, + InstructionSet_AVX10v1_V512=38, + InstructionSet_VectorT128=39, + InstructionSet_VectorT256=40, + InstructionSet_VectorT512=41, + InstructionSet_X86Base_X64=42, + InstructionSet_SSE_X64=43, + InstructionSet_SSE2_X64=44, + InstructionSet_SSE3_X64=45, + InstructionSet_SSSE3_X64=46, + InstructionSet_SSE41_X64=47, + InstructionSet_SSE42_X64=48, + InstructionSet_AVX_X64=49, + InstructionSet_AVX2_X64=50, + InstructionSet_AES_X64=51, + InstructionSet_BMI1_X64=52, + InstructionSet_BMI2_X64=53, + InstructionSet_FMA_X64=54, + InstructionSet_LZCNT_X64=55, + InstructionSet_PCLMULQDQ_X64=56, + InstructionSet_POPCNT_X64=57, + InstructionSet_AVXVNNI_X64=58, + InstructionSet_MOVBE_X64=59, + InstructionSet_X86Serialize_X64=60, + InstructionSet_EVEX_X64=61, + InstructionSet_AVX512F_X64=62, + InstructionSet_AVX512BW_X64=63, + InstructionSet_AVX512CD_X64=64, + InstructionSet_AVX512DQ_X64=65, + InstructionSet_AVX512VBMI_X64=66, + InstructionSet_AVX10v1_X64=67, + InstructionSet_AVX10v1_V512_X64=68, #endif // TARGET_X86 }; @@ -600,6 +606,16 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_AVXVNNI); if (resultflags.HasInstructionSet(InstructionSet_X86Serialize) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) resultflags.RemoveInstructionSet(InstructionSet_X86Serialize); + if (resultflags.HasInstructionSet(InstructionSet_GFNI) && !resultflags.HasInstructionSet(InstructionSet_SSE41)) + resultflags.RemoveInstructionSet(InstructionSet_GFNI); + if (resultflags.HasInstructionSet(InstructionSet_GFNI_V256) && !resultflags.HasInstructionSet(InstructionSet_GFNI)) + resultflags.RemoveInstructionSet(InstructionSet_GFNI_V256); + if (resultflags.HasInstructionSet(InstructionSet_GFNI_V256) && !resultflags.HasInstructionSet(InstructionSet_AVX)) + resultflags.RemoveInstructionSet(InstructionSet_GFNI_V256); + if (resultflags.HasInstructionSet(InstructionSet_GFNI_V512) && !resultflags.HasInstructionSet(InstructionSet_GFNI)) + resultflags.RemoveInstructionSet(InstructionSet_GFNI_V512); + if (resultflags.HasInstructionSet(InstructionSet_GFNI_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) + resultflags.RemoveInstructionSet(InstructionSet_GFNI_V512); if (resultflags.HasInstructionSet(InstructionSet_AVX10v1) && !resultflags.HasInstructionSet(InstructionSet_EVEX)) resultflags.RemoveInstructionSet(InstructionSet_AVX10v1); if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX10v1)) @@ -706,6 +722,16 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_AVXVNNI); if (resultflags.HasInstructionSet(InstructionSet_X86Serialize) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) resultflags.RemoveInstructionSet(InstructionSet_X86Serialize); + if (resultflags.HasInstructionSet(InstructionSet_GFNI) && !resultflags.HasInstructionSet(InstructionSet_SSE41)) + resultflags.RemoveInstructionSet(InstructionSet_GFNI); + if (resultflags.HasInstructionSet(InstructionSet_GFNI_V256) && !resultflags.HasInstructionSet(InstructionSet_GFNI)) + resultflags.RemoveInstructionSet(InstructionSet_GFNI_V256); + if (resultflags.HasInstructionSet(InstructionSet_GFNI_V256) && !resultflags.HasInstructionSet(InstructionSet_AVX)) + resultflags.RemoveInstructionSet(InstructionSet_GFNI_V256); + if (resultflags.HasInstructionSet(InstructionSet_GFNI_V512) && !resultflags.HasInstructionSet(InstructionSet_GFNI)) + resultflags.RemoveInstructionSet(InstructionSet_GFNI_V512); + if (resultflags.HasInstructionSet(InstructionSet_GFNI_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) + resultflags.RemoveInstructionSet(InstructionSet_GFNI_V512); if (resultflags.HasInstructionSet(InstructionSet_AVX10v1) && !resultflags.HasInstructionSet(InstructionSet_EVEX)) resultflags.RemoveInstructionSet(InstructionSet_AVX10v1); if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX10v1)) @@ -896,6 +922,12 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "EVEX"; case InstructionSet_EVEX_X64 : return "EVEX_X64"; + case InstructionSet_GFNI : + return "GFNI"; + case InstructionSet_GFNI_V256 : + return "GFNI_V256"; + case InstructionSet_GFNI_V512 : + return "GFNI_V512"; case InstructionSet_AVX512F : return "AVX512F"; case InstructionSet_AVX512F_X64 : @@ -988,6 +1020,12 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "X86Serialize"; case InstructionSet_EVEX : return "EVEX"; + case InstructionSet_GFNI : + return "GFNI"; + case InstructionSet_GFNI_V256 : + return "GFNI_V256"; + case InstructionSet_GFNI_V512 : + return "GFNI_V512"; case InstructionSet_AVX512F : return "AVX512F"; case InstructionSet_AVX512F_VL : @@ -1073,6 +1111,9 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst case READYTORUN_INSTRUCTION_Movbe: return InstructionSet_MOVBE; case READYTORUN_INSTRUCTION_X86Serialize: return InstructionSet_X86Serialize; case READYTORUN_INSTRUCTION_EVEX: return InstructionSet_EVEX; + case READYTORUN_INSTRUCTION_Gfni: return InstructionSet_GFNI; + case READYTORUN_INSTRUCTION_Gfni_V256: return InstructionSet_GFNI_V256; + case READYTORUN_INSTRUCTION_Gfni_V512: return InstructionSet_GFNI_V512; case READYTORUN_INSTRUCTION_Avx512F: return InstructionSet_AVX512F; case READYTORUN_INSTRUCTION_Avx512F_VL: return InstructionSet_AVX512F_VL; case READYTORUN_INSTRUCTION_Avx512BW: return InstructionSet_AVX512BW; @@ -1110,6 +1151,9 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst case READYTORUN_INSTRUCTION_Movbe: return InstructionSet_MOVBE; case READYTORUN_INSTRUCTION_X86Serialize: return InstructionSet_X86Serialize; case READYTORUN_INSTRUCTION_EVEX: return InstructionSet_EVEX; + case READYTORUN_INSTRUCTION_Gfni: return InstructionSet_GFNI; + case READYTORUN_INSTRUCTION_Gfni_V256: return InstructionSet_GFNI_V256; + case READYTORUN_INSTRUCTION_Gfni_V512: return InstructionSet_GFNI_V512; case READYTORUN_INSTRUCTION_Avx512F: return InstructionSet_AVX512F; case READYTORUN_INSTRUCTION_Avx512F_VL: return InstructionSet_AVX512F_VL; case READYTORUN_INSTRUCTION_Avx512BW: return InstructionSet_AVX512BW; diff --git a/src/coreclr/inc/jiteeversionguid.h b/src/coreclr/inc/jiteeversionguid.h index 07e246cd3fa13e..82fbaf1c0d108b 100644 --- a/src/coreclr/inc/jiteeversionguid.h +++ b/src/coreclr/inc/jiteeversionguid.h @@ -43,11 +43,11 @@ typedef const GUID *LPCGUID; #define GUID_DEFINED #endif // !GUID_DEFINED -constexpr GUID JITEEVersionIdentifier = { /* 04021b93-e969-41ed-96cd-4c583673b9ab */ - 0x04021b93, - 0xe969, - 0x41ed, - {0x96, 0xcd, 0x4c, 0x58, 0x36, 0x73, 0xb9, 0xab} +constexpr GUID JITEEVersionIdentifier = { /* 64146448-11b1-4f94-b1f2-edce91fbcb33 */ + 0x64146448, + 0x11b1, + 0x4f94, + {0xb1, 0xf2, 0xed, 0xce, 0x91, 0xfb, 0xcb, 0x33} }; ////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/coreclr/inc/readytoruninstructionset.h b/src/coreclr/inc/readytoruninstructionset.h index 4ad8c6b4e5912c..efb8d987415314 100644 --- a/src/coreclr/inc/readytoruninstructionset.h +++ b/src/coreclr/inc/readytoruninstructionset.h @@ -55,6 +55,9 @@ enum ReadyToRunInstructionSet READYTORUN_INSTRUCTION_Avx10v1=44, READYTORUN_INSTRUCTION_Avx10v1_V512=46, READYTORUN_INSTRUCTION_EVEX=47, + READYTORUN_INSTRUCTION_Gfni=48, + READYTORUN_INSTRUCTION_Gfni_V256=49, + READYTORUN_INSTRUCTION_Gfni_V512=50, }; diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 3d2f568301bfa6..90c63689c89ce4 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -6161,6 +6161,13 @@ int Compiler::compCompile(CORINFO_MODULE_HANDLE classPtr, instructionSetFlags.AddInstructionSet(InstructionSet_FMA); } + if (JitConfig.EnableGFNI() != 0) + { + instructionSetFlags.AddInstructionSet(InstructionSet_GFNI); + instructionSetFlags.AddInstructionSet(InstructionSet_GFNI_V256); + instructionSetFlags.AddInstructionSet(InstructionSet_GFNI_V512); + } + if (JitConfig.EnableLZCNT() != 0) { instructionSetFlags.AddInstructionSet(InstructionSet_LZCNT); diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 16d35f828e996a..3e101eb526b0c6 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -1587,6 +1587,12 @@ bool emitter::TakesRexWPrefix(const instrDesc* id) const return false; } + case INS_gf2p8affineinvqb: + case INS_gf2p8affineqb: + { + return TakesVexPrefix(ins); + } + default: { unreached(); @@ -19824,6 +19830,9 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case INS_vpdpwssd: case INS_vpdpbusds: case INS_vpdpwssds: + case INS_gf2p8affineinvqb: + case INS_gf2p8affineqb: + case INS_gf2p8mulb: result.insThroughput = PERFSCORE_THROUGHPUT_2X; result.insLatency += PERFSCORE_LATENCY_5C; break; diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index 48ad12bf76a1c3..388a9a12b865ee 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -789,6 +789,9 @@ static const HWIntrinsicIsaRange hwintrinsicIsaRangeArray[] = { { NI_Illegal, NI_Illegal }, // MOVBE { FIRST_NI_X86Serialize, LAST_NI_X86Serialize }, { NI_Illegal, NI_Illegal }, // EVEX + { FIRST_NI_GFNI, LAST_NI_GFNI }, + { FIRST_NI_GFNI_V256, LAST_NI_GFNI_V256 }, + { FIRST_NI_GFNI_V512, LAST_NI_GFNI_V512 }, { FIRST_NI_AVX512F, LAST_NI_AVX512F }, { FIRST_NI_AVX512F_VL, LAST_NI_AVX512F_VL }, { FIRST_NI_AVX512BW, LAST_NI_AVX512BW }, diff --git a/src/coreclr/jit/hwintrinsiclistxarch.h b/src/coreclr/jit/hwintrinsiclistxarch.h index cf1562838b9807..54cc11641b3d9e 100644 --- a/src/coreclr/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/jit/hwintrinsiclistxarch.h @@ -1554,6 +1554,39 @@ HARDWARE_INTRINSIC(POPCNT_X64, PopCount, HARDWARE_INTRINSIC(X86Serialize, Serialize, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Barrier) #define LAST_NI_X86Serialize NI_X86Serialize_Serialize +// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** +// ISA Function name SIMD size NumArg Instructions Category Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** +// GFNI Intrinsics +#define FIRST_NI_GFNI NI_GFNI_GaloisFieldAffineTransform +HARDWARE_INTRINSIC(GFNI, GaloisFieldAffineTransform, 16, 3, {INS_invalid, INS_gf2p8affineqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_gf2p8affineqb, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(GFNI, GaloisFieldAffineTransformInverse, 16, 3, {INS_invalid, INS_gf2p8affineinvqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_gf2p8affineinvqb, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(GFNI, GaloisFieldMultiply, 16, 2, {INS_invalid, INS_gf2p8mulb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbMaskingCompatible) +#define LAST_NI_GFNI NI_GFNI_GaloisFieldMultiply + +// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** +// ISA Function name SIMD size NumArg Instructions Category Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** +// GFNI Intrinsics +#define FIRST_NI_GFNI_V256 NI_GFNI_V256_GaloisFieldAffineTransform +HARDWARE_INTRINSIC(GFNI_V256, GaloisFieldAffineTransform, 32, 3, {INS_invalid, INS_gf2p8affineqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_gf2p8affineqb, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(GFNI_V256, GaloisFieldAffineTransformInverse, 32, 3, {INS_invalid, INS_gf2p8affineinvqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_gf2p8affineinvqb, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(GFNI_V256, GaloisFieldMultiply, 32, 2, {INS_invalid, INS_gf2p8mulb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbMaskingCompatible) +#define LAST_NI_GFNI_V256 NI_GFNI_V256_GaloisFieldMultiply + +// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** +// ISA Function name SIMD size NumArg Instructions Category Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** +// GFNI Intrinsics +#define FIRST_NI_GFNI_V512 NI_GFNI_V512_GaloisFieldAffineTransform +HARDWARE_INTRINSIC(GFNI_V512, GaloisFieldAffineTransform, 64, 3, {INS_invalid, INS_gf2p8affineqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_gf2p8affineqb, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(GFNI_V512, GaloisFieldAffineTransformInverse, 64, 3, {INS_invalid, INS_gf2p8affineinvqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_gf2p8affineinvqb, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(GFNI_V512, GaloisFieldMultiply, 64, 2, {INS_invalid, INS_gf2p8mulb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbMaskingCompatible) +#define LAST_NI_GFNI_V512 NI_GFNI_V512_GaloisFieldMultiply + // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg Instructions Category Flags // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index 8c5f27234053f4..a3be62e49a3bfe 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -103,13 +103,32 @@ static CORINFO_InstructionSet VLVersionOfIsa(CORINFO_InstructionSet isa) } //------------------------------------------------------------------------ -// V512VersionOfIsa: Gets the corresponding AVX10V512 only InstructionSet for a given InstructionSet +// V256VersionOfIsa: Gets the corresponding V256 only InstructionSet for a given InstructionSet // // Arguments: // isa -- The InstructionSet ID // // Return Value: -// The AVX10V512 only InstructionSet associated with isa +// The V256 only InstructionSet associated with isa +static CORINFO_InstructionSet V256VersionOfIsa(CORINFO_InstructionSet isa) +{ + switch (isa) + { + case InstructionSet_GFNI: + return InstructionSet_GFNI_V256; + default: + return InstructionSet_NONE; + } +} + +//------------------------------------------------------------------------ +// V512VersionOfIsa: Gets the corresponding V512 only InstructionSet for a given InstructionSet +// +// Arguments: +// isa -- The InstructionSet ID +// +// Return Value: +// The V512 only InstructionSet associated with isa static CORINFO_InstructionSet V512VersionOfIsa(CORINFO_InstructionSet isa) { switch (isa) @@ -118,6 +137,8 @@ static CORINFO_InstructionSet V512VersionOfIsa(CORINFO_InstructionSet isa) return InstructionSet_AVX10v1_V512; case InstructionSet_AVX10v1_X64: return InstructionSet_AVX10v1_V512_X64; + case InstructionSet_GFNI: + return InstructionSet_GFNI_V512; default: return InstructionSet_NONE; } @@ -205,6 +226,13 @@ static CORINFO_InstructionSet lookupInstructionSet(const char* className) return InstructionSet_FMA; } } + else if (className[0] == 'G') + { + if (strcmp(className, "Gfni") == 0) + { + return InstructionSet_GFNI; + } + } else if (className[0] == 'L') { if (strcmp(className, "Lzcnt") == 0) @@ -330,7 +358,11 @@ CORINFO_InstructionSet HWIntrinsicInfo::lookupIsa(const char* className, if (className[0] == 'V') { - if (strcmp(className, "V512") == 0) + if (strcmp(className, "V256") == 0) + { + return V256VersionOfIsa(enclosingIsa); + } + else if (strcmp(className, "V512") == 0) { return V512VersionOfIsa(enclosingIsa); } @@ -873,6 +905,9 @@ bool HWIntrinsicInfo::isFullyImplementedIsa(CORINFO_InstructionSet isa) case InstructionSet_AVX10v1_V512: case InstructionSet_AVX10v1_V512_X64: case InstructionSet_EVEX: + case InstructionSet_GFNI: + case InstructionSet_GFNI_V256: + case InstructionSet_GFNI_V512: { return true; } diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h index 7e46fdd2e2d687..b347f162b628d0 100644 --- a/src/coreclr/jit/instrsxarch.h +++ b/src/coreclr/jit/instrsxarch.h @@ -393,7 +393,7 @@ INST3(psignb, "psignb", IUM_WR, BAD_CODE, BAD_CODE, INST3(psignd, "psignd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0A), INS_TT_NONE, Input_32Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed SIGN INST3(psignw, "psignw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x09), INS_TT_NONE, Input_16Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed SIGN -// AESNI & PCLMULQDQ +// AESNI, PCLMULQDQ, & GFNI INST3(aesdec, "aesdec", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDE), INS_TT_NONE, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Perform one round of an AES decryption flow INST3(aesdeclast, "aesdeclast", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDF), INS_TT_NONE, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Perform last round of an AES decryption flow INST3(aesenc, "aesenc", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDC), INS_TT_NONE, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Perform one round of an AES encryption flow @@ -401,6 +401,9 @@ INST3(aesenclast, "aesenclast", IUM_WR, BAD_CODE, BAD_CODE, INST3(aesimc, "aesimc", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDB), INS_TT_NONE, REX_WIG | Encoding_VEX) // Perform the AES InvMixColumn Transformation INST3(aeskeygenassist, "aeskeygenassist", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0xDF), INS_TT_NONE, REX_WIG | Encoding_VEX) // AES Round Key Generation Assist INST3(pclmulqdq, "pclmulqdq" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x44), INS_TT_NONE, Input_64Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Perform a carry-less multiplication of two quadwords +INST3(gf2p8affineinvqb, "gf2p8affineinvqb", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0xCF), INS_TT_FULL, Input_64Bit | REX_WX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Galois Field Affine Transformation Inverse +INST3(gf2p8affineqb, "gf2p8affineqb", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0xCE), INS_TT_FULL, Input_64Bit | REX_WX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Galois Field Affine Transformation +INST3(gf2p8mulb, "gf2p8mulb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xCF), INS_TT_FULL_MEM, Input_8Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Galois Field Multiply Bytes // SSE4.1 INST3(blendpd, "blendpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0D), INS_TT_NONE, Input_64Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Packed Double Precision Floating-Point Values diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 23158d49342dec..d06c5ead258e56 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -404,6 +404,7 @@ RELEASE_CONFIG_INTEGER(EnableAVXVNNI, W("EnableAVXVNNI"), RELEASE_CONFIG_INTEGER(EnableBMI1, W("EnableBMI1"), 1) // Allows BMI1+ hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnableBMI2, W("EnableBMI2"), 1) // Allows BMI2+ hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnableFMA, W("EnableFMA"), 1) // Allows FMA+ hardware intrinsics to be disabled +RELEASE_CONFIG_INTEGER(EnableGFNI, W("EnableGFNI"), 1) // Allows GFNI+ hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnableLZCNT, W("EnableLZCNT"), 1) // Allows LZCNT+ hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnablePCLMULQDQ, W("EnablePCLMULQDQ"), 1) // Allows PCLMULQDQ+ hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnablePOPCNT, W("EnablePOPCNT"), 1) // Allows POPCNT+ hardware intrinsics to be disabled diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 833746babb0829..4803c889426f21 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -2592,6 +2592,19 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) return LowerHWIntrinsicTernaryLogic(node); } + case NI_GFNI_GaloisFieldAffineTransform: + case NI_GFNI_GaloisFieldAffineTransformInverse: + case NI_GFNI_V256_GaloisFieldAffineTransform: + case NI_GFNI_V256_GaloisFieldAffineTransformInverse: + case NI_GFNI_V512_GaloisFieldAffineTransform: + case NI_GFNI_V512_GaloisFieldAffineTransformInverse: + { + // Managed API surfaces these with only UBYTE operands. + // We retype in order to support EVEX embedded broadcast of op2 + node->SetSimdBaseJitType(CORINFO_TYPE_ULONG); + break; + } + default: break; } @@ -9412,6 +9425,12 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre case NI_AVX10v1_TernaryLogic: case NI_AVX10v1_V512_Range: case NI_AVX10v1_V512_Reduce: + case NI_GFNI_GaloisFieldAffineTransform: + case NI_GFNI_GaloisFieldAffineTransformInverse: + case NI_GFNI_V256_GaloisFieldAffineTransform: + case NI_GFNI_V256_GaloisFieldAffineTransformInverse: + case NI_GFNI_V512_GaloisFieldAffineTransform: + case NI_GFNI_V512_GaloisFieldAffineTransformInverse: { assert(!supportsSIMDScalarLoads); @@ -11328,6 +11347,12 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) case NI_AVX10v1_V512_InsertVector128: case NI_AVX10v1_V512_InsertVector256: case NI_AVX10v1_V512_Range: + case NI_GFNI_GaloisFieldAffineTransform: + case NI_GFNI_GaloisFieldAffineTransformInverse: + case NI_GFNI_V256_GaloisFieldAffineTransform: + case NI_GFNI_V256_GaloisFieldAffineTransformInverse: + case NI_GFNI_V512_GaloisFieldAffineTransform: + case NI_GFNI_V512_GaloisFieldAffineTransformInverse: { if (!isContainedImm) { diff --git a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs index 03d6ab76d365be..0801deb8d5118e 100644 --- a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs +++ b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs @@ -78,6 +78,7 @@ private static class XArchIntrinsicConstants public const int Serialize = 0x20000; public const int Avx10v1 = 0x40000; public const int Evex = 0x80000; + public const int Gfni = 0x100000; public static void AddToBuilder(InstructionSetSupportBuilder builder, int flags) { @@ -135,6 +136,14 @@ public static void AddToBuilder(InstructionSetSupportBuilder builder, int flags) builder.AddSupportedInstructionSet("avx10v1_v512"); if ((flags & Evex) != 0) builder.AddSupportedInstructionSet("evex"); + if ((flags & Gfni) != 0) + { + builder.AddSupportedInstructionSet("gfni"); + if ((flags & Avx) != 0) + builder.AddSupportedInstructionSet("gfni_v256"); + if ((flags & Avx512) != 0) + builder.AddSupportedInstructionSet("gfni_v512"); + } } public static int FromInstructionSet(InstructionSet instructionSet) @@ -199,6 +208,9 @@ public static int FromInstructionSet(InstructionSet instructionSet) InstructionSet.X64_AVX10v1_V512_X64 => (Avx10v1 | Avx512), InstructionSet.X64_EVEX => Evex, InstructionSet.X64_EVEX_X64 => Evex, + InstructionSet.X64_GFNI => Gfni, + InstructionSet.X64_GFNI_V256 => (Gfni | Avx), + InstructionSet.X64_GFNI_V512 => (Gfni | Avx512), // Baseline ISAs - they're always available InstructionSet.X64_SSE => 0, diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs index dd6a57731444e5..894ffd8cfa5429 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs @@ -58,6 +58,9 @@ public enum ReadyToRunInstructionSet Avx10v1=44, Avx10v1_V512=46, EVEX=47, + Gfni=48, + Gfni_V256=49, + Gfni_V512=50, } } diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs index 79e1a34afd1650..49f0577f171a91 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs @@ -100,6 +100,9 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.X64_X86Serialize_X64: return ReadyToRunInstructionSet.X86Serialize; case InstructionSet.X64_EVEX: return ReadyToRunInstructionSet.EVEX; case InstructionSet.X64_EVEX_X64: return ReadyToRunInstructionSet.EVEX; + case InstructionSet.X64_GFNI: return ReadyToRunInstructionSet.Gfni; + case InstructionSet.X64_GFNI_V256: return ReadyToRunInstructionSet.Gfni_V256; + case InstructionSet.X64_GFNI_V512: return ReadyToRunInstructionSet.Gfni_V512; case InstructionSet.X64_AVX512F: return ReadyToRunInstructionSet.Avx512F; case InstructionSet.X64_AVX512F_X64: return ReadyToRunInstructionSet.Avx512F; case InstructionSet.X64_AVX512F_VL: return ReadyToRunInstructionSet.Avx512F_VL; @@ -174,6 +177,9 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.X86_X86Serialize_X64: return null; case InstructionSet.X86_EVEX: return ReadyToRunInstructionSet.EVEX; case InstructionSet.X86_EVEX_X64: return null; + case InstructionSet.X86_GFNI: return ReadyToRunInstructionSet.Gfni; + case InstructionSet.X86_GFNI_V256: return ReadyToRunInstructionSet.Gfni_V256; + case InstructionSet.X86_GFNI_V512: return ReadyToRunInstructionSet.Gfni_V512; case InstructionSet.X86_AVX512F: return ReadyToRunInstructionSet.Avx512F; case InstructionSet.X86_AVX512F_X64: return null; case InstructionSet.X86_AVX512F_VL: return ReadyToRunInstructionSet.Avx512F_VL; diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs index 0152638396fa7d..d49b74c051d487 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs @@ -64,6 +64,9 @@ public enum InstructionSet X64_MOVBE = InstructionSet_X64.MOVBE, X64_X86Serialize = InstructionSet_X64.X86Serialize, X64_EVEX = InstructionSet_X64.EVEX, + X64_GFNI = InstructionSet_X64.GFNI, + X64_GFNI_V256 = InstructionSet_X64.GFNI_V256, + X64_GFNI_V512 = InstructionSet_X64.GFNI_V512, X64_AVX512F = InstructionSet_X64.AVX512F, X64_AVX512F_VL = InstructionSet_X64.AVX512F_VL, X64_AVX512BW = InstructionSet_X64.AVX512BW, @@ -129,6 +132,9 @@ public enum InstructionSet X86_MOVBE = InstructionSet_X86.MOVBE, X86_X86Serialize = InstructionSet_X86.X86Serialize, X86_EVEX = InstructionSet_X86.EVEX, + X86_GFNI = InstructionSet_X86.GFNI, + X86_GFNI_V256 = InstructionSet_X86.GFNI_V256, + X86_GFNI_V512 = InstructionSet_X86.GFNI_V512, X86_AVX512F = InstructionSet_X86.AVX512F, X86_AVX512F_VL = InstructionSet_X86.AVX512F_VL, X86_AVX512BW = InstructionSet_X86.AVX512BW, @@ -230,48 +236,51 @@ public enum InstructionSet_X64 MOVBE = 21, X86Serialize = 22, EVEX = 23, - AVX512F = 24, - AVX512F_VL = 25, - AVX512BW = 26, - AVX512BW_VL = 27, - AVX512CD = 28, - AVX512CD_VL = 29, - AVX512DQ = 30, - AVX512DQ_VL = 31, - AVX512VBMI = 32, - AVX512VBMI_VL = 33, - AVX10v1 = 34, - AVX10v1_V512 = 35, - VectorT128 = 36, - VectorT256 = 37, - VectorT512 = 38, - X86Base_X64 = 39, - SSE_X64 = 40, - SSE2_X64 = 41, - SSE3_X64 = 42, - SSSE3_X64 = 43, - SSE41_X64 = 44, - SSE42_X64 = 45, - AVX_X64 = 46, - AVX2_X64 = 47, - AES_X64 = 48, - BMI1_X64 = 49, - BMI2_X64 = 50, - FMA_X64 = 51, - LZCNT_X64 = 52, - PCLMULQDQ_X64 = 53, - POPCNT_X64 = 54, - AVXVNNI_X64 = 55, - MOVBE_X64 = 56, - X86Serialize_X64 = 57, - EVEX_X64 = 58, - AVX512F_X64 = 59, - AVX512BW_X64 = 60, - AVX512CD_X64 = 61, - AVX512DQ_X64 = 62, - AVX512VBMI_X64 = 63, - AVX10v1_X64 = 64, - AVX10v1_V512_X64 = 65, + GFNI = 24, + GFNI_V256 = 25, + GFNI_V512 = 26, + AVX512F = 27, + AVX512F_VL = 28, + AVX512BW = 29, + AVX512BW_VL = 30, + AVX512CD = 31, + AVX512CD_VL = 32, + AVX512DQ = 33, + AVX512DQ_VL = 34, + AVX512VBMI = 35, + AVX512VBMI_VL = 36, + AVX10v1 = 37, + AVX10v1_V512 = 38, + VectorT128 = 39, + VectorT256 = 40, + VectorT512 = 41, + X86Base_X64 = 42, + SSE_X64 = 43, + SSE2_X64 = 44, + SSE3_X64 = 45, + SSSE3_X64 = 46, + SSE41_X64 = 47, + SSE42_X64 = 48, + AVX_X64 = 49, + AVX2_X64 = 50, + AES_X64 = 51, + BMI1_X64 = 52, + BMI2_X64 = 53, + FMA_X64 = 54, + LZCNT_X64 = 55, + PCLMULQDQ_X64 = 56, + POPCNT_X64 = 57, + AVXVNNI_X64 = 58, + MOVBE_X64 = 59, + X86Serialize_X64 = 60, + EVEX_X64 = 61, + AVX512F_X64 = 62, + AVX512BW_X64 = 63, + AVX512CD_X64 = 64, + AVX512DQ_X64 = 65, + AVX512VBMI_X64 = 66, + AVX10v1_X64 = 67, + AVX10v1_V512_X64 = 68, } public enum InstructionSet_X86 @@ -301,48 +310,51 @@ public enum InstructionSet_X86 MOVBE = 21, X86Serialize = 22, EVEX = 23, - AVX512F = 24, - AVX512F_VL = 25, - AVX512BW = 26, - AVX512BW_VL = 27, - AVX512CD = 28, - AVX512CD_VL = 29, - AVX512DQ = 30, - AVX512DQ_VL = 31, - AVX512VBMI = 32, - AVX512VBMI_VL = 33, - AVX10v1 = 34, - AVX10v1_V512 = 35, - VectorT128 = 36, - VectorT256 = 37, - VectorT512 = 38, - X86Base_X64 = 39, - SSE_X64 = 40, - SSE2_X64 = 41, - SSE3_X64 = 42, - SSSE3_X64 = 43, - SSE41_X64 = 44, - SSE42_X64 = 45, - AVX_X64 = 46, - AVX2_X64 = 47, - AES_X64 = 48, - BMI1_X64 = 49, - BMI2_X64 = 50, - FMA_X64 = 51, - LZCNT_X64 = 52, - PCLMULQDQ_X64 = 53, - POPCNT_X64 = 54, - AVXVNNI_X64 = 55, - MOVBE_X64 = 56, - X86Serialize_X64 = 57, - EVEX_X64 = 58, - AVX512F_X64 = 59, - AVX512BW_X64 = 60, - AVX512CD_X64 = 61, - AVX512DQ_X64 = 62, - AVX512VBMI_X64 = 63, - AVX10v1_X64 = 64, - AVX10v1_V512_X64 = 65, + GFNI = 24, + GFNI_V256 = 25, + GFNI_V512 = 26, + AVX512F = 27, + AVX512F_VL = 28, + AVX512BW = 29, + AVX512BW_VL = 30, + AVX512CD = 31, + AVX512CD_VL = 32, + AVX512DQ = 33, + AVX512DQ_VL = 34, + AVX512VBMI = 35, + AVX512VBMI_VL = 36, + AVX10v1 = 37, + AVX10v1_V512 = 38, + VectorT128 = 39, + VectorT256 = 40, + VectorT512 = 41, + X86Base_X64 = 42, + SSE_X64 = 43, + SSE2_X64 = 44, + SSE3_X64 = 45, + SSSE3_X64 = 46, + SSE41_X64 = 47, + SSE42_X64 = 48, + AVX_X64 = 49, + AVX2_X64 = 50, + AES_X64 = 51, + BMI1_X64 = 52, + BMI2_X64 = 53, + FMA_X64 = 54, + LZCNT_X64 = 55, + PCLMULQDQ_X64 = 56, + POPCNT_X64 = 57, + AVXVNNI_X64 = 58, + MOVBE_X64 = 59, + X86Serialize_X64 = 60, + EVEX_X64 = 61, + AVX512F_X64 = 62, + AVX512BW_X64 = 63, + AVX512CD_X64 = 64, + AVX512DQ_X64 = 65, + AVX512VBMI_X64 = 66, + AVX10v1_X64 = 67, + AVX10v1_V512_X64 = 68, } public unsafe struct InstructionSetFlags : IEnumerable @@ -738,6 +750,16 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X64_AVX2); if (resultflags.HasInstructionSet(InstructionSet.X64_X86Serialize)) resultflags.AddInstructionSet(InstructionSet.X64_X86Base); + if (resultflags.HasInstructionSet(InstructionSet.X64_GFNI)) + resultflags.AddInstructionSet(InstructionSet.X64_SSE41); + if (resultflags.HasInstructionSet(InstructionSet.X64_GFNI_V256)) + resultflags.AddInstructionSet(InstructionSet.X64_GFNI); + if (resultflags.HasInstructionSet(InstructionSet.X64_GFNI_V256)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX); + if (resultflags.HasInstructionSet(InstructionSet.X64_GFNI_V512)) + resultflags.AddInstructionSet(InstructionSet.X64_GFNI); + if (resultflags.HasInstructionSet(InstructionSet.X64_GFNI_V512)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512F); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1)) resultflags.AddInstructionSet(InstructionSet.X64_EVEX); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512)) @@ -845,6 +867,16 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X86_AVX2); if (resultflags.HasInstructionSet(InstructionSet.X86_X86Serialize)) resultflags.AddInstructionSet(InstructionSet.X86_X86Base); + if (resultflags.HasInstructionSet(InstructionSet.X86_GFNI)) + resultflags.AddInstructionSet(InstructionSet.X86_SSE41); + if (resultflags.HasInstructionSet(InstructionSet.X86_GFNI_V256)) + resultflags.AddInstructionSet(InstructionSet.X86_GFNI); + if (resultflags.HasInstructionSet(InstructionSet.X86_GFNI_V256)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX); + if (resultflags.HasInstructionSet(InstructionSet.X86_GFNI_V512)) + resultflags.AddInstructionSet(InstructionSet.X86_GFNI); + if (resultflags.HasInstructionSet(InstructionSet.X86_GFNI_V512)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX512F); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1)) resultflags.AddInstructionSet(InstructionSet.X86_EVEX); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1_V512)) @@ -1069,6 +1101,16 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNI); if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base)) resultflags.AddInstructionSet(InstructionSet.X64_X86Serialize); + if (resultflags.HasInstructionSet(InstructionSet.X64_SSE41)) + resultflags.AddInstructionSet(InstructionSet.X64_GFNI); + if (resultflags.HasInstructionSet(InstructionSet.X64_GFNI)) + resultflags.AddInstructionSet(InstructionSet.X64_GFNI_V256); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX)) + resultflags.AddInstructionSet(InstructionSet.X64_GFNI_V256); + if (resultflags.HasInstructionSet(InstructionSet.X64_GFNI)) + resultflags.AddInstructionSet(InstructionSet.X64_GFNI_V512); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F)) + resultflags.AddInstructionSet(InstructionSet.X64_GFNI_V512); if (resultflags.HasInstructionSet(InstructionSet.X64_EVEX)) resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1)) @@ -1176,6 +1218,16 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X86_AVXVNNI); if (resultflags.HasInstructionSet(InstructionSet.X86_X86Base)) resultflags.AddInstructionSet(InstructionSet.X86_X86Serialize); + if (resultflags.HasInstructionSet(InstructionSet.X86_SSE41)) + resultflags.AddInstructionSet(InstructionSet.X86_GFNI); + if (resultflags.HasInstructionSet(InstructionSet.X86_GFNI)) + resultflags.AddInstructionSet(InstructionSet.X86_GFNI_V256); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX)) + resultflags.AddInstructionSet(InstructionSet.X86_GFNI_V256); + if (resultflags.HasInstructionSet(InstructionSet.X86_GFNI)) + resultflags.AddInstructionSet(InstructionSet.X86_GFNI_V512); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F)) + resultflags.AddInstructionSet(InstructionSet.X86_GFNI_V512); if (resultflags.HasInstructionSet(InstructionSet.X86_EVEX)) resultflags.AddInstructionSet(InstructionSet.X86_AVX10v1); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1)) @@ -1311,6 +1363,9 @@ public static IEnumerable ArchitectureToValidInstructionSets yield return new InstructionSetInfo("movbe", "Movbe", InstructionSet.X64_MOVBE, true); yield return new InstructionSetInfo("serialize", "X86Serialize", InstructionSet.X64_X86Serialize, true); yield return new InstructionSetInfo("evex", "EVEX", InstructionSet.X64_EVEX, true); + yield return new InstructionSetInfo("gfni", "Gfni", InstructionSet.X64_GFNI, true); + yield return new InstructionSetInfo("gfni_v256", "Gfni_V256", InstructionSet.X64_GFNI_V256, true); + yield return new InstructionSetInfo("gfni_v512", "Gfni_V512", InstructionSet.X64_GFNI_V512, true); yield return new InstructionSetInfo("avx512f", "Avx512F", InstructionSet.X64_AVX512F, true); yield return new InstructionSetInfo("avx512f_vl", "Avx512F_VL", InstructionSet.X64_AVX512F_VL, true); yield return new InstructionSetInfo("avx512bw", "Avx512BW", InstructionSet.X64_AVX512BW, true); @@ -1352,6 +1407,9 @@ public static IEnumerable ArchitectureToValidInstructionSets yield return new InstructionSetInfo("movbe", "Movbe", InstructionSet.X86_MOVBE, true); yield return new InstructionSetInfo("serialize", "X86Serialize", InstructionSet.X86_X86Serialize, true); yield return new InstructionSetInfo("evex", "EVEX", InstructionSet.X86_EVEX, true); + yield return new InstructionSetInfo("gfni", "Gfni", InstructionSet.X86_GFNI, true); + yield return new InstructionSetInfo("gfni_v256", "Gfni_V256", InstructionSet.X86_GFNI_V256, true); + yield return new InstructionSetInfo("gfni_v512", "Gfni_V512", InstructionSet.X86_GFNI_V512, true); yield return new InstructionSetInfo("avx512f", "Avx512F", InstructionSet.X86_AVX512F, true); yield return new InstructionSetInfo("avx512f_vl", "Avx512F_VL", InstructionSet.X86_AVX512F_VL, true); yield return new InstructionSetInfo("avx512bw", "Avx512BW", InstructionSet.X86_AVX512BW, true); @@ -1769,6 +1827,15 @@ public static InstructionSet LookupPlatformIntrinsicInstructionSet(TargetArchite else { return InstructionSet.X64_EVEX; } + case "Gfni": + if (nestedTypeName == "V256") + { return InstructionSet.X64_GFNI_V256; } + else + if (nestedTypeName == "V512") + { return InstructionSet.X64_GFNI_V512; } + else + { return InstructionSet.X64_GFNI; } + case "Avx512F": if (nestedTypeName == "X64") { return InstructionSet.X64_AVX512F_X64; } @@ -1902,6 +1969,15 @@ public static InstructionSet LookupPlatformIntrinsicInstructionSet(TargetArchite case "EVEX": { return InstructionSet.X86_EVEX; } + case "Gfni": + if (nestedTypeName == "V256") + { return InstructionSet.X86_GFNI_V256; } + else + if (nestedTypeName == "V512") + { return InstructionSet.X86_GFNI_V512; } + else + { return InstructionSet.X86_GFNI; } + case "Avx512F": if (nestedTypeName == "VL") { return InstructionSet.X86_AVX512F_VL; } diff --git a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt index 1876255732ab8e..babd6bca007114 100644 --- a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt +++ b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt @@ -48,6 +48,9 @@ instructionset ,X86 ,AvxVnni , ,25 ,AVXVNNI instructionset ,X86 ,Movbe , ,27 ,MOVBE ,movbe instructionset ,X86 ,X86Serialize , ,28 ,X86Serialize ,serialize instructionset ,X86 ,EVEX , ,47 ,EVEX ,evex +instructionset ,X86 ,Gfni , ,48 ,GFNI ,gfni +instructionset ,X86 ,Gfni_V256 , ,49 ,GFNI_V256 ,gfni_v256 +instructionset ,X86 ,Gfni_V512 , ,50 ,GFNI_V512 ,gfni_v512 instructionset ,X86 ,Avx512F , ,29 ,AVX512F ,avx512f instructionset ,X86 ,Avx512F_VL , ,30 ,AVX512F_VL ,avx512f_vl instructionset ,X86 ,Avx512BW , ,31 ,AVX512BW ,avx512bw @@ -144,6 +147,11 @@ implication ,X86 ,AES ,SSE2 implication ,X86 ,PCLMULQDQ ,SSE2 implication ,X86 ,AVXVNNI ,AVX2 implication ,X86 ,X86Serialize ,X86Base +implication ,X86 ,GFNI ,SSE41 +implication ,X86 ,GFNI_V256 ,GFNI +implication ,X86 ,GFNI_V256 ,AVX +implication ,X86 ,GFNI_V512 ,GFNI +implication ,X86 ,GFNI_V512 ,AVX512F implication ,X86 ,AVX10v1 ,EVEX implication ,X86 ,AVX10v1_V512 ,AVX10v1 diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/HardwareIntrinsicHelpers.Aot.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/HardwareIntrinsicHelpers.Aot.cs index 85e7a943dba4af..5a1dea90fae2d2 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/HardwareIntrinsicHelpers.Aot.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/HardwareIntrinsicHelpers.Aot.cs @@ -52,8 +52,7 @@ public static MethodIL EmitIsSupportedIL(MethodDesc method, FieldDesc isSupporte if(!uint.IsPow2((uint)flag)) { // These are the ISAs managed by multiple-bit flags. - // we need to emit different IL to handle the checks. - // For now just Avx10v1_V512 = (Avx10v1 | Avx512) + // We need to emit different IL to handle the checks. // (isSupportedField & flag) == flag codeStream.Emit(ILOpcode.ldsfld, emit.NewToken(isSupportedField)); codeStream.EmitLdc(flag); diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp index fa89ace071698c..3a5be1d7b74682 100644 --- a/src/coreclr/vm/codeman.cpp +++ b/src/coreclr/vm/codeman.cpp @@ -1402,6 +1402,13 @@ void EEJitManager::SetCpuInfo() CPUCompileFlags.Set(InstructionSet_X86Serialize); } + if (((cpuFeatures & XArchIntrinsicConstants_Gfni) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableGFNI)) + { + CPUCompileFlags.Set(InstructionSet_GFNI); + CPUCompileFlags.Set(InstructionSet_GFNI_V256); + CPUCompileFlags.Set(InstructionSet_GFNI_V512); + } + if (((cpuFeatures & XArchIntrinsicConstants_Evex) != 0) && ((cpuFeatures & XArchIntrinsicConstants_Avx10v1) != 0)) { @@ -1409,11 +1416,7 @@ void EEJitManager::SetCpuInfo() { CPUCompileFlags.Set(InstructionSet_EVEX); CPUCompileFlags.Set(InstructionSet_AVX10v1); - - if((cpuFeatures & XArchIntrinsicConstants_Avx512) != 0) - { - CPUCompileFlags.Set(InstructionSet_AVX10v1_V512); - } + CPUCompileFlags.Set(InstructionSet_AVX10v1_V512); } } #elif defined(TARGET_ARM64) diff --git a/src/libraries/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.NoX86Intrinsics.xml b/src/libraries/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.NoX86Intrinsics.xml index 60020ec1e24df4..f7ccf53e81ef3f 100644 --- a/src/libraries/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.NoX86Intrinsics.xml +++ b/src/libraries/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.NoX86Intrinsics.xml @@ -30,6 +30,9 @@ + + + @@ -99,6 +102,15 @@ + + + + + + + + + diff --git a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems index 1b6323b940e867..ee0b72726f9189 100644 --- a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems +++ b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems @@ -2636,6 +2636,8 @@ + + @@ -2674,6 +2676,7 @@ + @@ -2803,4 +2806,4 @@ - \ No newline at end of file + diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Gfni.NotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Gfni.NotSupported.cs new file mode 100644 index 00000000000000..d16e7275a8e9d8 --- /dev/null +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Gfni.NotSupported.cs @@ -0,0 +1,117 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics.CodeAnalysis; +using System.Runtime.CompilerServices; + +namespace System.Runtime.Intrinsics.X86 +{ + /// Provides access to X86 GFNI hardware instructions via intrinsics. + [CLSCompliant(false)] + public abstract class Gfni : Sse41 + { + internal Gfni() { } + + /// Gets a value that indicates whether the APIs in this class are supported. + /// if the APIs are supported; otherwise, . + /// A value of indicates that the APIs will throw . + public static new bool IsSupported { [Intrinsic] get => false; } + + /// Provides access to the X86 GFNI hardware instructions that are only available to 64-bit processes, via intrinsics. + public new abstract class X64 : Sse41.X64 + { + internal X64() { } + + /// Gets a value that indicates whether the APIs in this class are supported. + /// if the APIs are supported; otherwise, . + /// A value of indicates that the APIs will throw . + public static new bool IsSupported { [Intrinsic] get => false; } + } + + public abstract class V256 + { + internal V256() { } + + /// Gets a value that indicates whether the APIs in this class are supported. + /// if the APIs are supported; otherwise, . + /// A value of indicates that the APIs will throw . + public static bool IsSupported { [Intrinsic] get => false; } + + /// + /// __m256i _mm256_gf2p8affineinv_epi64_epi8 (__m256i x, __m256i A, int b) + /// GF2P8AFFINEINVQB ymm1, ymm2/m256, imm8 + /// VGF2P8AFFINEINVQB ymm1, ymm2, ymm3/m256, imm8 + /// VGF2P8AFFINEINVQB ymm1{k1}{z}, ymm2, ymm3/m256/m64bcst, imm8 + /// + public static Vector256 GaloisFieldAffineTransformInverse(Vector256 x, Vector256 a, [ConstantExpected] byte b) => throw new PlatformNotSupportedException(); + /// + /// __m256i _mm256_gf2p8affine_epi64_epi8 (__m256i x, __m256i A, int b) + /// GF2P8AFFINEQB ymm1, ymm2/m256, imm8 + /// VGF2P8AFFINEQB ymm1, ymm2, ymm3/m256, imm8 + /// VGF2P8AFFINEQB ymm1{k1}{z}, ymm2, ymm3/m256/m64bcst, imm8 + /// + public static Vector256 GaloisFieldAffineTransform(Vector256 x, Vector256 a, [ConstantExpected] byte b) => throw new PlatformNotSupportedException(); + /// + /// __m256i _mm256_gf2p8mul_epi8 (__m256i a, __m256i b) + /// GF2P8MULB ymm1, ymm2/m256 + /// VGF2P8MULB ymm1, ymm2, ymm3/m256 + /// VGF2P8MULB ymm1{k1}{z}, ymm2, ymm3/m256 + /// + public static Vector256 GaloisFieldMultiply(Vector256 left, Vector256 right) => throw new PlatformNotSupportedException(); + } + + public abstract class V512 + { + internal V512() { } + + /// Gets a value that indicates whether the APIs in this class are supported. + /// if the APIs are supported; otherwise, . + /// A value of indicates that the APIs will throw . + public static bool IsSupported { [Intrinsic] get => false; } + + /// + /// __m512i _mm512_gf2p8affineinv_epi64_epi8 (__m512i x, __m512i A, int b) + /// GF2P8AFFINEINVQB zmm1, zmm2/m512, imm8 + /// VGF2P8AFFINEINVQB zmm1, zmm2, zmm3/m512, imm8 + /// VGF2P8AFFINEINVQB zmm1{k1}{z}, zmm2, zmm3/m512/m64bcst, imm8 + /// + public static Vector512 GaloisFieldAffineTransformInverse(Vector512 x, Vector512 a, [ConstantExpected] byte b) => throw new PlatformNotSupportedException(); + /// + /// __m512i _mm512_gf2p8affine_epi64_epi8 (__m512i x, __m512i A, int b) + /// GF2P8AFFINEQB zmm1, zmm2/m512, imm8 + /// VGF2P8AFFINEQB zmm1, zmm2, zmm3/m512, imm8 + /// VGF2P8AFFINEQB zmm1{k1}{z}, zmm2, zmm3/m512/m64bcst, imm8 + /// + public static Vector512 GaloisFieldAffineTransform(Vector512 x, Vector512 a, [ConstantExpected] byte b) => throw new PlatformNotSupportedException(); + /// + /// __m512i _mm512_gf2p8mul_epi8 (__m512i a, __m512i b) + /// GF2P8MULB zmm1, zmm2/m512 + /// VGF2P8MULB zmm1, zmm2, zmm3/m512 + /// VGF2P8MULB zmm1{k1}{z}, zmm2, zmm3/m512 + /// + public static Vector512 GaloisFieldMultiply(Vector512 left, Vector512 right) => throw new PlatformNotSupportedException(); + } + + /// + /// __m128i _mm_gf2p8affineinv_epi64_epi8 (__m128i x, __m128i A, int b) + /// GF2P8AFFINEINVQB xmm1, xmm2/m128, imm8 + /// VGF2P8AFFINEINVQB xmm1, xmm2, xmm3/m128, imm8 + /// VGF2P8AFFINEINVQB xmm1{k1}{z}, xmm2, xmm3/m128/m64bcst, imm8 + /// + public static Vector128 GaloisFieldAffineTransformInverse(Vector128 x, Vector128 a, [ConstantExpected] byte b) => throw new PlatformNotSupportedException(); + /// + /// __m128i _mm_gf2p8affine_epi64_epi8 (__m128i x, __m128i A, int b) + /// GF2P8AFFINEQB xmm1, xmm2/m128, imm8 + /// VGF2P8AFFINEQB xmm1, xmm2, xmm3/m128, imm8 + /// VGF2P8AFFINEQB xmm1{k1}{z}, xmm2, xmm3/m128/m64bcst, imm8 + /// + public static Vector128 GaloisFieldAffineTransform(Vector128 x, Vector128 a, [ConstantExpected] byte b) => throw new PlatformNotSupportedException(); + /// + /// __m128i _mm_gf2p8mul_epi8 (__m128i a, __m128i b) + /// GF2P8MULB xmm1, xmm2/m128 + /// VGF2P8MULB xmm1, xmm2, xmm3/m128 + /// VGF2P8MULB xmm1{k1}{z}, xmm2, xmm3/m128 + /// + public static Vector128 GaloisFieldMultiply(Vector128 left, Vector128 right) => throw new PlatformNotSupportedException(); + } +} diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Gfni.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Gfni.cs new file mode 100644 index 00000000000000..4496d9e821bb62 --- /dev/null +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Gfni.cs @@ -0,0 +1,121 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics.CodeAnalysis; +using System.Runtime.CompilerServices; + +namespace System.Runtime.Intrinsics.X86 +{ + /// Provides access to X86 GFNI hardware instructions via intrinsics. + [Intrinsic] + [CLSCompliant(false)] + public abstract class Gfni : Sse41 + { + internal Gfni() { } + + /// Gets a value that indicates whether the APIs in this class are supported. + /// if the APIs are supported; otherwise, . + /// A value of indicates that the APIs will throw . + public static new bool IsSupported { get => IsSupported; } + + /// Provides access to the X86 GFNI hardware instructions that are only available to 64-bit processes, via intrinsics. + [Intrinsic] + public new abstract class X64 : Sse41.X64 + { + internal X64() { } + + /// Gets a value that indicates whether the APIs in this class are supported. + /// if the APIs are supported; otherwise, . + /// A value of indicates that the APIs will throw . + public static new bool IsSupported { get => IsSupported; } + } + + [Intrinsic] + public abstract class V256 + { + internal V256() { } + + /// Gets a value that indicates whether the APIs in this class are supported. + /// if the APIs are supported; otherwise, . + /// A value of indicates that the APIs will throw . + public static bool IsSupported { get => IsSupported; } + + /// + /// __m256i _mm256_gf2p8affineinv_epi64_epi8 (__m256i x, __m256i A, int b) + /// GF2P8AFFINEINVQB ymm1, ymm2/m256, imm8 + /// VGF2P8AFFINEINVQB ymm1, ymm2, ymm3/m256, imm8 + /// VGF2P8AFFINEINVQB ymm1{k1}{z}, ymm2, ymm3/m256/m64bcst, imm8 + /// + public static Vector256 GaloisFieldAffineTransformInverse(Vector256 x, Vector256 a, [ConstantExpected] byte b) => GaloisFieldAffineTransformInverse(x, a, b); + /// + /// __m256i _mm256_gf2p8affine_epi64_epi8 (__m256i x, __m256i A, int b) + /// GF2P8AFFINEQB ymm1, ymm2/m256, imm8 + /// VGF2P8AFFINEQB ymm1, ymm2, ymm3/m256, imm8 + /// VGF2P8AFFINEQB ymm1{k1}{z}, ymm2, ymm3/m256/m64bcst, imm8 + /// + public static Vector256 GaloisFieldAffineTransform(Vector256 x, Vector256 a, [ConstantExpected] byte b) => GaloisFieldAffineTransform(x, a, b); + /// + /// __m256i _mm256_gf2p8mul_epi8 (__m256i a, __m256i b) + /// GF2P8MULB ymm1, ymm2/m256 + /// VGF2P8MULB ymm1, ymm2, ymm3/m256 + /// VGF2P8MULB ymm1{k1}{z}, ymm2, ymm3/m256 + /// + public static Vector256 GaloisFieldMultiply(Vector256 left, Vector256 right) => GaloisFieldMultiply(left, right); + } + + [Intrinsic] + public abstract class V512 + { + internal V512() { } + + /// Gets a value that indicates whether the APIs in this class are supported. + /// if the APIs are supported; otherwise, . + /// A value of indicates that the APIs will throw . + public static bool IsSupported { get => IsSupported; } + + /// + /// __m512i _mm512_gf2p8affineinv_epi64_epi8 (__m512i x, __m512i A, int b) + /// GF2P8AFFINEINVQB zmm1, zmm2/m512, imm8 + /// VGF2P8AFFINEINVQB zmm1, zmm2, zmm3/m512, imm8 + /// VGF2P8AFFINEINVQB zmm1{k1}{z}, zmm2, zmm3/m512/m64bcst, imm8 + /// + public static Vector512 GaloisFieldAffineTransformInverse(Vector512 x, Vector512 a, [ConstantExpected] byte b) => GaloisFieldAffineTransformInverse(x, a, b); + /// + /// __m512i _mm512_gf2p8affine_epi64_epi8 (__m512i x, __m512i A, int b) + /// GF2P8AFFINEQB zmm1, zmm2/m512, imm8 + /// VGF2P8AFFINEQB zmm1, zmm2, zmm3/m512, imm8 + /// VGF2P8AFFINEQB zmm1{k1}{z}, zmm2, zmm3/m512/m64bcst, imm8 + /// + public static Vector512 GaloisFieldAffineTransform(Vector512 x, Vector512 a, [ConstantExpected] byte b) => GaloisFieldAffineTransform(x, a, b); + /// + /// __m512i _mm512_gf2p8mul_epi8 (__m512i a, __m512i b) + /// GF2P8MULB zmm1, zmm2/m512 + /// VGF2P8MULB zmm1, zmm2, zmm3/m512 + /// VGF2P8MULB zmm1{k1}{z}, zmm2, zmm3/m512 + /// + public static Vector512 GaloisFieldMultiply(Vector512 left, Vector512 right) => GaloisFieldMultiply(left, right); + } + + /// + /// __m128i _mm_gf2p8affineinv_epi64_epi8 (__m128i x, __m128i A, int b) + /// GF2P8AFFINEINVQB xmm1, xmm2/m128, imm8 + /// VGF2P8AFFINEINVQB xmm1, xmm2, xmm3/m128, imm8 + /// VGF2P8AFFINEINVQB xmm1{k1}{z}, xmm2, xmm3/m128/m64bcst, imm8 + /// + public static Vector128 GaloisFieldAffineTransformInverse(Vector128 x, Vector128 a, [ConstantExpected] byte b) => GaloisFieldAffineTransformInverse(x, a, b); + /// + /// __m128i _mm_gf2p8affine_epi64_epi8 (__m128i x, __m128i A, int b) + /// GF2P8AFFINEQB xmm1, xmm2/m128, imm8 + /// VGF2P8AFFINEQB xmm1, xmm2, xmm3/m128, imm8 + /// VGF2P8AFFINEQB xmm1{k1}{z}, xmm2, xmm3/m128/m64bcst, imm8 + /// + public static Vector128 GaloisFieldAffineTransform(Vector128 x, Vector128 a, [ConstantExpected] byte b) => GaloisFieldAffineTransform(x, a, b); + /// + /// __m128i _mm_gf2p8mul_epi8 (__m128i a, __m128i b) + /// GF2P8MULB xmm1, xmm2/m128 + /// VGF2P8MULB xmm1, xmm2, xmm3/m128 + /// VGF2P8MULB xmm1{k1}{z}, xmm2, xmm3/m128 + /// + public static Vector128 GaloisFieldMultiply(Vector128 left, Vector128 right) => GaloisFieldMultiply(left, right); + } +} diff --git a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs index 352820d85007c3..d594ac85624881 100644 --- a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs +++ b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs @@ -9162,6 +9162,37 @@ internal X64() { } public static new bool IsSupported { get { throw null; } } } } + + [System.CLSCompliantAttribute(false)] + public abstract partial class Gfni : System.Runtime.Intrinsics.X86.Sse41 + { + internal Gfni() { } + public static new bool IsSupported { get { throw null; } } + public static System.Runtime.Intrinsics.Vector128 GaloisFieldAffineTransformInverse(System.Runtime.Intrinsics.Vector128 x, System.Runtime.Intrinsics.Vector128 a, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] byte b) { throw null; } + public static System.Runtime.Intrinsics.Vector128 GaloisFieldAffineTransform(System.Runtime.Intrinsics.Vector128 x, System.Runtime.Intrinsics.Vector128 a, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] byte b) { throw null; } + public static System.Runtime.Intrinsics.Vector128 GaloisFieldMultiply(System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } + public new abstract partial class X64 : System.Runtime.Intrinsics.X86.Sse41.X64 + { + internal X64() { } + public static new bool IsSupported { get { throw null; } } + } + public abstract partial class V256 + { + internal V256() { } + public static bool IsSupported { get { throw null; } } + public static System.Runtime.Intrinsics.Vector256 GaloisFieldAffineTransformInverse(System.Runtime.Intrinsics.Vector256 x, System.Runtime.Intrinsics.Vector256 a, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] byte b) { throw null; } + public static System.Runtime.Intrinsics.Vector256 GaloisFieldAffineTransform(System.Runtime.Intrinsics.Vector256 x, System.Runtime.Intrinsics.Vector256 a, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] byte b) { throw null; } + public static System.Runtime.Intrinsics.Vector256 GaloisFieldMultiply(System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } + } + public abstract partial class V512 + { + internal V512() { } + public static bool IsSupported { get { throw null; } } + public static System.Runtime.Intrinsics.Vector512 GaloisFieldAffineTransformInverse(System.Runtime.Intrinsics.Vector512 x, System.Runtime.Intrinsics.Vector512 a, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] byte b) { throw null; } + public static System.Runtime.Intrinsics.Vector512 GaloisFieldAffineTransform(System.Runtime.Intrinsics.Vector512 x, System.Runtime.Intrinsics.Vector512 a, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] byte b) { throw null; } + public static System.Runtime.Intrinsics.Vector512 GaloisFieldMultiply(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + } + } } namespace System.Runtime.Intrinsics.Wasm { diff --git a/src/native/minipal/cpufeatures.c b/src/native/minipal/cpufeatures.c index 8d6a063ce4d2fa..bce0721e863d21 100644 --- a/src/native/minipal/cpufeatures.c +++ b/src/native/minipal/cpufeatures.c @@ -216,6 +216,11 @@ int minipal_getcpufeatures(void) { __cpuidex(cpuidInfo, 0x00000007, 0x00000000); + if ((cpuidInfo[CPUID_ECX] & (1 << 8)) != 0) // GFNI + { + result |= XArchIntrinsicConstants_Gfni; + } + if ((cpuidInfo[CPUID_EBX] & (1 << 5)) != 0) // AVX2 { result |= XArchIntrinsicConstants_Avx2; diff --git a/src/native/minipal/cpufeatures.h b/src/native/minipal/cpufeatures.h index 6422fe33f9787b..04dfd09ce012d4 100644 --- a/src/native/minipal/cpufeatures.h +++ b/src/native/minipal/cpufeatures.h @@ -31,6 +31,7 @@ enum XArchIntrinsicConstants XArchIntrinsicConstants_Serialize = 0x20000, XArchIntrinsicConstants_Avx10v1 = 0x40000, XArchIntrinsicConstants_Evex = 0x80000, + XArchIntrinsicConstants_Gfni = 0x100000, }; #endif // HOST_X86 || HOST_AMD64 From f5854547c5b8cae9df3c1798561c50a461cc011e Mon Sep 17 00:00:00 2001 From: Clinton Ingram Date: Thu, 31 Oct 2024 18:29:07 -0700 Subject: [PATCH 2/6] add tests --- .../GenerateHWIntrinsicTests_X86.cs | 77 +++- .../X86/Gfni.V256/Gfni.V256_r.csproj | 14 + .../X86/Gfni.V256/Gfni.V256_ro.csproj | 14 + .../X86/Gfni.V256/Program.Gfni.V256.cs | 17 + .../X86/Gfni.V512/Gfni.V512_r.csproj | 14 + .../X86/Gfni.V512/Gfni.V512_ro.csproj | 14 + .../X86/Gfni.V512/Program.Gfni.V512.cs | 17 + .../HardwareIntrinsics/X86/Gfni/Gfni_r.csproj | 14 + .../X86/Gfni/Gfni_ro.csproj | 14 + .../X86/Gfni/Program.Gfni.cs | 17 + .../X86/Shared/GfniAffineTest.template | 340 ++++++++++++++++++ src/tests/issues.targets | 9 + 12 files changed, 560 insertions(+), 1 deletion(-) create mode 100644 src/tests/JIT/HardwareIntrinsics/X86/Gfni.V256/Gfni.V256_r.csproj create mode 100644 src/tests/JIT/HardwareIntrinsics/X86/Gfni.V256/Gfni.V256_ro.csproj create mode 100644 src/tests/JIT/HardwareIntrinsics/X86/Gfni.V256/Program.Gfni.V256.cs create mode 100644 src/tests/JIT/HardwareIntrinsics/X86/Gfni.V512/Gfni.V512_r.csproj create mode 100644 src/tests/JIT/HardwareIntrinsics/X86/Gfni.V512/Gfni.V512_ro.csproj create mode 100644 src/tests/JIT/HardwareIntrinsics/X86/Gfni.V512/Program.Gfni.V512.cs create mode 100644 src/tests/JIT/HardwareIntrinsics/X86/Gfni/Gfni_r.csproj create mode 100644 src/tests/JIT/HardwareIntrinsics/X86/Gfni/Gfni_ro.csproj create mode 100644 src/tests/JIT/HardwareIntrinsics/X86/Gfni/Program.Gfni.cs create mode 100644 src/tests/JIT/HardwareIntrinsics/X86/Shared/GfniAffineTest.template diff --git a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_X86.cs b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_X86.cs index 42ee1aecd65765..9bb7150abf5242 100644 --- a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_X86.cs +++ b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_X86.cs @@ -83,6 +83,44 @@ succeeded = (expectedResult == result);"; +const string GaloisFieldMultiplyTest_ValidationLogic = @"static {RetBaseType} GaloisFieldMultiply({Op1BaseType} a, {Op1BaseType} b) + { + int ret = 0; + for (int i = 0; i < 8; i++) + { + if (((b >> i) & 1) != 0) + { + ret ^= a << i; + } + } + + for (int i = 14; i >= 8; i--) + { + if (((ret >> i) & 1) != 0) + { + ret ^= 0x11B << (i - 8); + } + } + + return ({RetBaseType})ret; + } + + if ({ValidateFirstResult} && (!mask.HasValue || ((left[0] != default) && result[0] != mask.Value))) + { + succeeded = false; + } + else + { + for (var i = 1; i < RetElementCount; i++) + { + if ({ValidateRemainingResults} && (!mask.HasValue || ((left[i] != default) && result[i] != mask.Value))) + { + succeeded = false; + break; + } + } + }"; + const string HorizontalOpTest_ValidationLogic = @"for (var outer = 0; outer < (LargestVectorSize / 16); outer++) { for (var inner = 0; inner < (8 / sizeof({RetBaseType})); inner++) @@ -190,6 +228,7 @@ (string templateFileName, string outputTemplateName, Dictionary templateData)[] Templates = new[] { ("_BinaryOpTestTemplate.template", "AlternatingBinOpTest.template", new Dictionary { ["TemplateName"] = "Alternating", ["TemplateValidationLogic"] = AlternatingBinOpTest_ValidationLogic }), + ("_BinaryOpTestTemplate.template", "GfniMultiplyTest.template", new Dictionary { ["TemplateName"] = "GfniMultiply",["TemplateValidationLogic"] = GaloisFieldMultiplyTest_ValidationLogic }), ("_BinaryOpTestTemplate.template", "HorizontalBinOpTest.template", new Dictionary { ["TemplateName"] = "Horizontal", ["TemplateValidationLogic"] = HorizontalOpTest_ValidationLogic }), ("_BinaryOpTestTemplate.template", "SimpleBinOpTest.template", new Dictionary { ["TemplateName"] = "Simple", ["TemplateValidationLogic"] = SimpleBinOpTest_ValidationLogic }), ("_BooleanBinaryOpTestTemplate.template", "BooleanBinOpTest.template", new Dictionary { ["TemplateName"] = "Boolean", ["TemplateValidationLogic"] = BooleanOpTest_ValidationLogic }), @@ -3483,6 +3522,39 @@ ("ScalarTernOpTupleBinRetTest.template", new Dictionary { ["Isa"] = "X86Base.X64", ["Method"] = "DivRem", ["RetBaseType"] = "UInt64", ["Op1BaseType"] = "UInt64", ["Op2BaseType"] = "UInt64", ["Op3BaseType"] = "UInt64", ["NextValueOp1"] = "1", ["NextValueOp2"] = "1", ["NextValueOp3"] = "0x8000000000000000", ["ValidateResult"] = "ulong expectedQuotient = 2; ulong expectedReminder = 1; isUnexpectedResult = (expectedQuotient != ret1) || (expectedReminder != ret2);" }), }; +(string templateFileName, Dictionary templateData)[] GfniInputs = new[] +{ + ("GfniAffineTest.template", new Dictionary { ["Isa"] = "Gfni", ["LoadIsa"] = "Sse2", ["Method"] = "GaloisFieldAffineTransform", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte", ["Imm"] = "0", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "NextMatrixByte(i)", ["ValidateFirstResult"] = "result[0] != left[0] >>> 1", ["ValidateRemainingResults"] = "result[i] != left[i] >>> 1"}), + ("GfniAffineTest.template", new Dictionary { ["Isa"] = "Gfni", ["LoadIsa"] = "Sse2", ["Method"] = "GaloisFieldAffineTransform", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte", ["Imm"] = "1", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "NextMatrixByte(i)", ["ValidateFirstResult"] = "result[0] != (left[0] >>> 1 ^ 1)", ["ValidateRemainingResults"] = "result[i] != (left[i] >>> 1 ^ 1)"}), + ("GfniAffineTest.template", new Dictionary { ["Isa"] = "Gfni", ["LoadIsa"] = "Sse2", ["Method"] = "GaloisFieldAffineTransform", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte", ["Imm"] = "170", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != AffineTransform(left[0], new ReadOnlySpan(right, 0, 8), 170)", ["ValidateRemainingResults"] = "result[i] != AffineTransform(left[i], new ReadOnlySpan(right, i & ~7, 8), 170)"}), + ("GfniAffineTest.template", new Dictionary { ["Isa"] = "Gfni", ["LoadIsa"] = "Sse2", ["Method"] = "GaloisFieldAffineTransformInverse", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte", ["Imm"] = "0", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "NextMatrixByte(i)", ["ValidateFirstResult"] = "result[0] != Invert(left[0]) >>> 1", ["ValidateRemainingResults"] = "result[i] != Invert(left[i]) >>> 1"}), + ("GfniAffineTest.template", new Dictionary { ["Isa"] = "Gfni", ["LoadIsa"] = "Sse2", ["Method"] = "GaloisFieldAffineTransformInverse", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte", ["Imm"] = "1", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "NextMatrixByte(i)", ["ValidateFirstResult"] = "result[0] != (Invert(left[0]) >>> 1 ^ 1)", ["ValidateRemainingResults"] = "result[i] != (Invert(left[i]) >>> 1 ^ 1)"}), + ("GfniAffineTest.template", new Dictionary { ["Isa"] = "Gfni", ["LoadIsa"] = "Sse2", ["Method"] = "GaloisFieldAffineTransformInverse", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte", ["Imm"] = "170", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != AffineTransform(Invert(left[0]), new ReadOnlySpan(right, 0, 8), 170)", ["ValidateRemainingResults"] = "result[i] != AffineTransform(Invert(left[i]), new ReadOnlySpan(right, i & ~7, 8), 170)"}), + ("GfniMultiplyTest.template", new Dictionary { ["Isa"] = "Gfni", ["LoadIsa"] = "Sse2", ["Method"] = "GaloisFieldMultiply", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != GaloisFieldMultiply(left[0], right[0])", ["ValidateRemainingResults"] = "result[i] != GaloisFieldMultiply(left[i], right[i])"}), +}; + +(string templateFileName, Dictionary templateData)[] GfniV256Inputs = new[] +{ + ("GfniAffineTest.template", new Dictionary { ["Isa"] = "Gfni.V256", ["LoadIsa"] = "Avx", ["Method"] = "GaloisFieldAffineTransform", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Byte", ["Imm"] = "0", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "NextMatrixByte(i)", ["ValidateFirstResult"] = "result[0] != left[0] >>> 1", ["ValidateRemainingResults"] = "result[i] != left[i] >>> 1"}), + ("GfniAffineTest.template", new Dictionary { ["Isa"] = "Gfni.V256", ["LoadIsa"] = "Avx", ["Method"] = "GaloisFieldAffineTransform", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Byte", ["Imm"] = "1", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "NextMatrixByte(i)", ["ValidateFirstResult"] = "result[0] != (left[0] >>> 1 ^ 1)", ["ValidateRemainingResults"] = "result[i] != (left[i] >>> 1 ^ 1)"}), + ("GfniAffineTest.template", new Dictionary { ["Isa"] = "Gfni.V256", ["LoadIsa"] = "Avx", ["Method"] = "GaloisFieldAffineTransform", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Byte", ["Imm"] = "170", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != AffineTransform(left[0], new ReadOnlySpan(right, 0, 8), 170)", ["ValidateRemainingResults"] = "result[i] != AffineTransform(left[i], new ReadOnlySpan(right, i & ~7, 8), 170)"}), + ("GfniAffineTest.template", new Dictionary { ["Isa"] = "Gfni.V256", ["LoadIsa"] = "Avx", ["Method"] = "GaloisFieldAffineTransformInverse", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Byte", ["Imm"] = "0", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "NextMatrixByte(i)", ["ValidateFirstResult"] = "result[0] != Invert(left[0]) >>> 1", ["ValidateRemainingResults"] = "result[i] != Invert(left[i]) >>> 1"}), + ("GfniAffineTest.template", new Dictionary { ["Isa"] = "Gfni.V256", ["LoadIsa"] = "Avx", ["Method"] = "GaloisFieldAffineTransformInverse", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Byte", ["Imm"] = "1", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "NextMatrixByte(i)", ["ValidateFirstResult"] = "result[0] != (Invert(left[0]) >>> 1 ^ 1)", ["ValidateRemainingResults"] = "result[i] != (Invert(left[i]) >>> 1 ^ 1)"}), + ("GfniAffineTest.template", new Dictionary { ["Isa"] = "Gfni.V256", ["LoadIsa"] = "Avx", ["Method"] = "GaloisFieldAffineTransformInverse", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Byte", ["Imm"] = "170", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != AffineTransform(Invert(left[0]), new ReadOnlySpan(right, 0, 8), 170)", ["ValidateRemainingResults"] = "result[i] != AffineTransform(Invert(left[i]), new ReadOnlySpan(right, i & ~7, 8), 170)"}), + ("GfniMultiplyTest.template", new Dictionary { ["Isa"] = "Gfni.V256", ["LoadIsa"] = "Avx", ["Method"] = "GaloisFieldMultiply", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != GaloisFieldMultiply(left[0], right[0])", ["ValidateRemainingResults"] = "result[i] != GaloisFieldMultiply(left[i], right[i])"}), +}; + +(string templateFileName, Dictionary templateData)[] GfniV512Inputs = new[] +{ + ("GfniAffineTest.template", new Dictionary { ["Isa"] = "Gfni.V512", ["LoadIsa"] = "Avx512F", ["Method"] = "GaloisFieldAffineTransform", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Byte", ["Imm"] = "0", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "NextMatrixByte(i)", ["ValidateFirstResult"] = "result[0] != left[0] >>> 1", ["ValidateRemainingResults"] = "result[i] != left[i] >>> 1"}), + ("GfniAffineTest.template", new Dictionary { ["Isa"] = "Gfni.V512", ["LoadIsa"] = "Avx512F", ["Method"] = "GaloisFieldAffineTransform", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Byte", ["Imm"] = "1", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "NextMatrixByte(i)", ["ValidateFirstResult"] = "result[0] != (left[0] >>> 1 ^ 1)", ["ValidateRemainingResults"] = "result[i] != (left[i] >>> 1 ^ 1)"}), + ("GfniAffineTest.template", new Dictionary { ["Isa"] = "Gfni.V512", ["LoadIsa"] = "Avx512F", ["Method"] = "GaloisFieldAffineTransform", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Byte", ["Imm"] = "170", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != AffineTransform(left[0], new ReadOnlySpan(right, 0, 8), 170)", ["ValidateRemainingResults"] = "result[i] != AffineTransform(left[i], new ReadOnlySpan(right, i & ~7, 8), 170)"}), + ("GfniAffineTest.template", new Dictionary { ["Isa"] = "Gfni.V512", ["LoadIsa"] = "Avx512F", ["Method"] = "GaloisFieldAffineTransformInverse", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Byte", ["Imm"] = "0", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "NextMatrixByte(i)", ["ValidateFirstResult"] = "result[0] != Invert(left[0]) >>> 1", ["ValidateRemainingResults"] = "result[i] != Invert(left[i]) >>> 1"}), + ("GfniAffineTest.template", new Dictionary { ["Isa"] = "Gfni.V512", ["LoadIsa"] = "Avx512F", ["Method"] = "GaloisFieldAffineTransformInverse", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Byte", ["Imm"] = "1", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "NextMatrixByte(i)", ["ValidateFirstResult"] = "result[0] != (Invert(left[0]) >>> 1 ^ 1)", ["ValidateRemainingResults"] = "result[i] != (Invert(left[i]) >>> 1 ^ 1)"}), + ("GfniAffineTest.template", new Dictionary { ["Isa"] = "Gfni.V512", ["LoadIsa"] = "Avx512F", ["Method"] = "GaloisFieldAffineTransformInverse", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Byte", ["Imm"] = "170", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != AffineTransform(Invert(left[0]), new ReadOnlySpan(right, 0, 8), 170)", ["ValidateRemainingResults"] = "result[i] != AffineTransform(Invert(left[i]), new ReadOnlySpan(right, i & ~7, 8), 170)"}), + ("GfniMultiplyTest.template", new Dictionary { ["Isa"] = "Gfni.V512", ["LoadIsa"] = "Avx512F", ["Method"] = "GaloisFieldMultiply", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != GaloisFieldMultiply(left[0], right[0])", ["ValidateRemainingResults"] = "result[i] != GaloisFieldMultiply(left[i], right[i])"}), +}; + Dictionary extraHelperFiles = new Dictionary { ["SseVerify"] = @"..\..\X86\Shared\SseVerify.cs", @@ -3508,7 +3580,7 @@ bool isImmTemplate(string name) name == "ExtractVector128Test.template" || name == "InsertLoadTest.template" || name == "ExtractStoreTest.template" || name == "ImmBinOpTest.template" || name == "AesImmOpTest.template" || name == "PclmulqdqOpTest.template" || - name == "ImmTernOpTest.template"; + name == "ImmTernOpTest.template" || name == "GfniAffineTest.template"; } string projectName = args[0]; @@ -3562,6 +3634,9 @@ bool isImmTemplate(string name) ProcessInputs("Bmi2.X64", Bmi2X64Inputs); ProcessInputs("X86Base", X86BaseInputs); ProcessInputs("X86Base.X64", X86BaseX64Inputs); +ProcessInputs("Gfni", GfniInputs); +ProcessInputs("Gfni.V256", GfniV256Inputs); +ProcessInputs("Gfni.V512", GfniV512Inputs); void ProcessInputs(string groupName, (string templateFileName, Dictionary templateData)[] inputs) { diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Gfni.V256/Gfni.V256_r.csproj b/src/tests/JIT/HardwareIntrinsics/X86/Gfni.V256/Gfni.V256_r.csproj new file mode 100644 index 00000000000000..1181a4692a6abe --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/Gfni.V256/Gfni.V256_r.csproj @@ -0,0 +1,14 @@ + + + X86_Gfni.V256_r + true + + + Embedded + + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Gfni.V256/Gfni.V256_ro.csproj b/src/tests/JIT/HardwareIntrinsics/X86/Gfni.V256/Gfni.V256_ro.csproj new file mode 100644 index 00000000000000..6a7176a8cc25c5 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/Gfni.V256/Gfni.V256_ro.csproj @@ -0,0 +1,14 @@ + + + X86_Gfni.V256_ro + true + + + Embedded + True + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Gfni.V256/Program.Gfni.V256.cs b/src/tests/JIT/HardwareIntrinsics/X86/Gfni.V256/Program.Gfni.V256.cs new file mode 100644 index 00000000000000..e96ee84db1957b --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/Gfni.V256/Program.Gfni.V256.cs @@ -0,0 +1,17 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; + +[assembly:Xunit.ActiveIssue("https://github.com/dotnet/runtime/issues/91392", typeof(TestLibrary.PlatformDetection), nameof(TestLibrary.PlatformDetection.IsMonoLLVMAOT))] +namespace JIT.HardwareIntrinsics.X86._Gfni.V256 +{ + public static partial class Program + { + static Program() + { + + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Gfni.V512/Gfni.V512_r.csproj b/src/tests/JIT/HardwareIntrinsics/X86/Gfni.V512/Gfni.V512_r.csproj new file mode 100644 index 00000000000000..e7f35d018936b7 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/Gfni.V512/Gfni.V512_r.csproj @@ -0,0 +1,14 @@ + + + X86_Gfni.V512_r + true + + + Embedded + + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Gfni.V512/Gfni.V512_ro.csproj b/src/tests/JIT/HardwareIntrinsics/X86/Gfni.V512/Gfni.V512_ro.csproj new file mode 100644 index 00000000000000..bb2cbfdf77b319 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/Gfni.V512/Gfni.V512_ro.csproj @@ -0,0 +1,14 @@ + + + X86_Gfni.V512_ro + true + + + Embedded + True + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Gfni.V512/Program.Gfni.V512.cs b/src/tests/JIT/HardwareIntrinsics/X86/Gfni.V512/Program.Gfni.V512.cs new file mode 100644 index 00000000000000..8cf8a20f1c47ad --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/Gfni.V512/Program.Gfni.V512.cs @@ -0,0 +1,17 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; + +[assembly:Xunit.ActiveIssue("https://github.com/dotnet/runtime/issues/91392", typeof(TestLibrary.PlatformDetection), nameof(TestLibrary.PlatformDetection.IsMonoLLVMAOT))] +namespace JIT.HardwareIntrinsics.X86._Gfni.V512 +{ + public static partial class Program + { + static Program() + { + + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Gfni/Gfni_r.csproj b/src/tests/JIT/HardwareIntrinsics/X86/Gfni/Gfni_r.csproj new file mode 100644 index 00000000000000..a582882424907a --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/Gfni/Gfni_r.csproj @@ -0,0 +1,14 @@ + + + X86_Gfni_r + true + + + Embedded + + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Gfni/Gfni_ro.csproj b/src/tests/JIT/HardwareIntrinsics/X86/Gfni/Gfni_ro.csproj new file mode 100644 index 00000000000000..aaaf8b0bded092 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/Gfni/Gfni_ro.csproj @@ -0,0 +1,14 @@ + + + X86_Gfni_ro + true + + + Embedded + True + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Gfni/Program.Gfni.cs b/src/tests/JIT/HardwareIntrinsics/X86/Gfni/Program.Gfni.cs new file mode 100644 index 00000000000000..16721c947bbf27 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/Gfni/Program.Gfni.cs @@ -0,0 +1,17 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; + +[assembly:Xunit.ActiveIssue("https://github.com/dotnet/runtime/issues/91392", typeof(TestLibrary.PlatformDetection), nameof(TestLibrary.PlatformDetection.IsMonoLLVMAOT))] +namespace JIT.HardwareIntrinsics.X86._Gfni +{ + public static partial class Program + { + static Program() + { + + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Shared/GfniAffineTest.template b/src/tests/JIT/HardwareIntrinsics/X86/Shared/GfniAffineTest.template new file mode 100644 index 00000000000000..c46aa44c4aa316 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/Shared/GfniAffineTest.template @@ -0,0 +1,340 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +/****************************************************************************** + * This file is auto-generated from a template file by the GenerateTests.csx * + * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make * + * changes, please update the corresponding template and run according to the * + * directions listed in the file. * + ******************************************************************************/ + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using Xunit; + +namespace JIT.HardwareIntrinsics.X86 +{ + public static partial class Program + { + [Fact] + public static void {Method}{RetBaseType}{Imm}() + { + var test = new GfniAffineTest__{Method}{RetBaseType}{Imm}(); + + if (test.IsSupported) + { + // Validates basic functionality works, using Unsafe.Read + test.RunBasicScenario_UnsafeRead(); + + if ({LoadIsa}.IsSupported) + { + // Validates basic functionality works, using Load + test.RunBasicScenario_Load(); + + // Validates basic functionality works, using LoadAligned + test.RunBasicScenario_LoadAligned(); + } + + // Validates calling via reflection works, using Unsafe.Read + test.RunReflectionScenario_UnsafeRead(); + + // Validates passing a local works, using Unsafe.Read + test.RunLclVarScenario_UnsafeRead(); + + // Validates passing an instance member of a class works + test.RunClassFldScenario(); + + // Validates passing the field of a local struct works + test.RunStructLclFldScenario(); + + // Validates passing an instance member of a struct works + test.RunStructFldScenario(); + } + else + { + // Validates we throw on unsupported hardware + test.RunUnsupportedScenario(); + } + + if (!test.Succeeded) + { + throw new Exception("One or more scenarios did not complete as expected."); + } + } + } + + public sealed unsafe class GfniAffineTest__{Method}{RetBaseType}{Imm} + { + private struct TestStruct + { + public {Op1VectorType}<{Op1BaseType}> _fld1; + public {Op2VectorType}<{Op2BaseType}> _fld2; + + public static TestStruct Create() + { + var testStruct = new TestStruct(); + + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = {NextValueOp2}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref testStruct._fld2), ref Unsafe.As<{Op2BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + + return testStruct; + } + + public void RunStructFldScenario(GfniAffineTest__{Method}{RetBaseType}{Imm} testClass) + { + var result = {Isa}.{Method}(_fld1, _fld2, {Imm}); + + Unsafe.Write(testClass._dataTable.outArrayPtr, result); + testClass.ValidateResult(_fld1, _fld2, testClass._dataTable.outArrayPtr); + } + } + + private static readonly int LargestVectorSize = {LargestVectorSize}; + + private static readonly int Op1ElementCount = Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>() / sizeof({Op1BaseType}); + private static readonly int Op2ElementCount = Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>() / sizeof({Op2BaseType}); + private static readonly int RetElementCount = Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>() / sizeof({RetBaseType}); + + private static {Op1BaseType}[] _data1 = new {Op1BaseType}[Op1ElementCount]; + private static {Op2BaseType}[] _data2 = new {Op2BaseType}[Op2ElementCount]; + + private {Op1VectorType}<{Op1BaseType}> _fld1; + private {Op2VectorType}<{Op2BaseType}> _fld2; + + private SimpleBinaryOpTest__DataTable<{RetBaseType}, {Op1BaseType}, {Op2BaseType}> _dataTable; + + private static byte NextMatrixByte(int i) => ((ReadOnlySpan)[0x00, 0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02])[i % 8]; + + public GfniAffineTest__{Method}{RetBaseType}{Imm}() + { + Succeeded = true; + + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = {NextValueOp2}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref _fld2), ref Unsafe.As<{Op2BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = {NextValueOp2}; } + _dataTable = new SimpleBinaryOpTest__DataTable<{RetBaseType}, {Op1BaseType}, {Op2BaseType}>(_data1, _data2, new {RetBaseType}[RetElementCount], LargestVectorSize); + } + + public bool IsSupported => {Isa}.IsSupported; + + public bool Succeeded { get; set; } + + public void RunBasicScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead)); + + var result = {Isa}.{Method}( + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray2Ptr), + {Imm} + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunBasicScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load)); + + var result = {Isa}.{Method}( + {LoadIsa}.Load{Op1VectorType}(({Op1BaseType}*)(_dataTable.inArray1Ptr)), + {LoadIsa}.Load{Op2VectorType}(({Op2BaseType}*)(_dataTable.inArray2Ptr)), + {Imm} + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunBasicScenario_LoadAligned() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned)); + + var result = {Isa}.{Method}( + {LoadIsa}.LoadAligned{Op1VectorType}(({Op1BaseType}*)(_dataTable.inArray1Ptr)), + {LoadIsa}.LoadAligned{Op2VectorType}(({Op2BaseType}*)(_dataTable.inArray2Ptr)), + {Imm} + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead)); + + var result = typeof({Isa}).GetMethod(nameof({Isa}.{Method}), new Type[] { typeof({Op1VectorType}<{Op1BaseType}>), typeof({Op2VectorType}<{Op2BaseType}>), typeof(byte) }) + .Invoke(null, new object[] { + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray2Ptr), + (byte){Imm} + }); + + Unsafe.Write(_dataTable.outArrayPtr, ({RetVectorType}<{RetBaseType}>)(result)); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); + + var left = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr); + var right = Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray2Ptr); + var result = {Isa}.{Method}(left, right, {Imm}); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(left, right, _dataTable.outArrayPtr); + } + + public void RunClassFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario)); + + var result = {Isa}.{Method}(_fld1, _fld2, {Imm}); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr); + } + + public void RunStructLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario)); + + var test = TestStruct.Create(); + var result = {Isa}.{Method}(test._fld1, test._fld2, {Imm}); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr); + } + + public void RunStructFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario)); + + var test = TestStruct.Create(); + test.RunStructFldScenario(this); + } + + public void RunUnsupportedScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario)); + + bool succeeded = false; + + try + { + RunBasicScenario_UnsafeRead(); + } + catch (PlatformNotSupportedException) + { + succeeded = true; + } + + if (!succeeded) + { + Succeeded = false; + } + } + + private void ValidateResult({Op1VectorType}<{Op1BaseType}> left, {Op2VectorType}<{Op2BaseType}> right, void* result, [CallerMemberName] string method = "") + { + {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; + {Op2BaseType}[] inArray2 = new {Op2BaseType}[Op2ElementCount]; + {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; + + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), left); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray2[0]), right); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); + + ValidateResult(inArray1, inArray2, outArray, method); + } + + private void ValidateResult(void* left, void* right, void* result, [CallerMemberName] string method = "") + { + {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; + {Op2BaseType}[] inArray2 = new {Op2BaseType}[Op2ElementCount]; + {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; + + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), ref Unsafe.AsRef(left), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray2[0]), ref Unsafe.AsRef(right), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); + + ValidateResult(inArray1, inArray2, outArray, method); + } + + private void ValidateResult({Op1BaseType}[] left, {Op2BaseType}[] right, {RetBaseType}[] result, [CallerMemberName] string method = "") + { + bool succeeded = true; + +#pragma warning disable 8321 // emulation methods not used in all test forms + static {Op1BaseType} Invert({Op1BaseType} x) => ((ReadOnlySpan<{Op1BaseType}>)[ + 0x00, 0x01, 0x8D, 0xF6, 0xCB, 0x52, 0x7B, 0xD1, 0xE8, 0x4F, 0x29, 0xC0, 0xB0, 0xE1, 0xE5, 0xC7, + 0x74, 0xB4, 0xAA, 0x4B, 0x99, 0x2B, 0x60, 0x5F, 0x58, 0x3F, 0xFD, 0xCC, 0xFF, 0x40, 0xEE, 0xB2, + 0x3A, 0x6E, 0x5A, 0xF1, 0x55, 0x4D, 0xA8, 0xC9, 0xC1, 0x0A, 0x98, 0x15, 0x30, 0x44, 0xA2, 0xC2, + 0x2C, 0x45, 0x92, 0x6C, 0xF3, 0x39, 0x66, 0x42, 0xF2, 0x35, 0x20, 0x6F, 0x77, 0xBB, 0x59, 0x19, + 0x1D, 0xFE, 0x37, 0x67, 0x2D, 0x31, 0xF5, 0x69, 0xA7, 0x64, 0xAB, 0x13, 0x54, 0x25, 0xE9, 0x09, + 0xED, 0x5C, 0x05, 0xCA, 0x4C, 0x24, 0x87, 0xBF, 0x18, 0x3E, 0x22, 0xF0, 0x51, 0xEC, 0x61, 0x17, + 0x16, 0x5E, 0xAF, 0xD3, 0x49, 0xA6, 0x36, 0x43, 0xF4, 0x47, 0x91, 0xDF, 0x33, 0x93, 0x21, 0x3B, + 0x79, 0xB7, 0x97, 0x85, 0x10, 0xB5, 0xBA, 0x3C, 0xB6, 0x70, 0xD0, 0x06, 0xA1, 0xFA, 0x81, 0x82, + 0x83, 0x7E, 0x7F, 0x80, 0x96, 0x73, 0xBE, 0x56, 0x9B, 0x9E, 0x95, 0xD9, 0xF7, 0x02, 0xB9, 0xA4, + 0xDE, 0x6A, 0x32, 0x6D, 0xD8, 0x8A, 0x84, 0x72, 0x2A, 0x14, 0x9F, 0x88, 0xF9, 0xDC, 0x89, 0x9A, + 0xFB, 0x7C, 0x2E, 0xC3, 0x8F, 0xB8, 0x65, 0x48, 0x26, 0xC8, 0x12, 0x4A, 0xCE, 0xE7, 0xD2, 0x62, + 0x0C, 0xE0, 0x1F, 0xEF, 0x11, 0x75, 0x78, 0x71, 0xA5, 0x8E, 0x76, 0x3D, 0xBD, 0xBC, 0x86, 0x57, + 0x0B, 0x28, 0x2F, 0xA3, 0xDA, 0xD4, 0xE4, 0x0F, 0xA9, 0x27, 0x53, 0x04, 0x1B, 0xFC, 0xAC, 0xE6, + 0x7A, 0x07, 0xAE, 0x63, 0xC5, 0xDB, 0xE2, 0xEA, 0x94, 0x8B, 0xC4, 0xD5, 0x9D, 0xF8, 0x90, 0x6B, + 0xB1, 0x0D, 0xD6, 0xEB, 0xC6, 0x0E, 0xCF, 0xAD, 0x08, 0x4E, 0xD7, 0xE3, 0x5D, 0x50, 0x1E, 0xB3, + 0x5B, 0x23, 0x38, 0x34, 0x68, 0x46, 0x03, 0x8C, 0xDD, 0x9C, 0x7D, 0xA0, 0xCD, 0x1A, 0x41, 0x1C + ])[x]; + + static {RetBaseType} AffineTransform({Op1BaseType} x, ReadOnlySpan<{Op2BaseType}> A, byte b) + { + int ret = 0; + for (int i = 0; i < 8; i++) + { + ret |= (int.PopCount(A[7 - i] & x) & 1) << i; + } + + return ({RetBaseType})(ret ^ b); + } +#pragma warning restore 8321 + + if ({ValidateFirstResult}) + { + succeeded = false; + } + else + { + for (var i = 1; i < RetElementCount; i++) + { + if ({ValidateRemainingResults}) + { + succeeded = false; + break; + } + } + } + + if (!succeeded) + { + TestLibrary.TestFramework.LogInformation($"{nameof({Isa})}.{nameof({Isa}.{Method})}<{RetBaseType}>({Op1VectorType}<{Op1BaseType}>.{Imm}, {Op2VectorType}<{Op2BaseType}>): {method} failed:"); + TestLibrary.TestFramework.LogInformation($" left: ({string.Join(", ", left)})"); + TestLibrary.TestFramework.LogInformation($" right: ({string.Join(", ", right)})"); + TestLibrary.TestFramework.LogInformation($" result: ({string.Join(", ", result)})"); + TestLibrary.TestFramework.LogInformation(string.Empty); + + Succeeded = false; + } + } + } +} diff --git a/src/tests/issues.targets b/src/tests/issues.targets index 09fb7bf06044c9..61378c3316e0dd 100644 --- a/src/tests/issues.targets +++ b/src/tests/issues.targets @@ -2257,6 +2257,15 @@ https://github.com/dotnet/runtime/issues/75767 + + https://github.com/dotnet/runtime/issues/91392 + + + https://github.com/dotnet/runtime/issues/91392 + + + https://github.com/dotnet/runtime/issues/91392 + https://github.com/dotnet/runtime/issues/75767 From e9f9bcf4c2c8d69782b66d1b39869edfbe98a23b Mon Sep 17 00:00:00 2001 From: Clinton Ingram Date: Tue, 5 Nov 2024 00:36:41 -0800 Subject: [PATCH 3/6] rename file --- .../X86/{Gfni.NotSupported.cs => Gfni.PlatformNotSupported.cs} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/{Gfni.NotSupported.cs => Gfni.PlatformNotSupported.cs} (100%) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Gfni.NotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Gfni.PlatformNotSupported.cs similarity index 100% rename from src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Gfni.NotSupported.cs rename to src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Gfni.PlatformNotSupported.cs From b3b5b0722f5c4e73514072d61a9b51f7156e3408 Mon Sep 17 00:00:00 2001 From: Clinton Ingram Date: Wed, 20 Nov 2024 15:40:56 -0800 Subject: [PATCH 4/6] add missing tests and AOT handling --- src/coreclr/inc/corinfoinstructionset.h | 10 ++++++++ src/coreclr/jit/hwintrinsic.cpp | 1 + src/coreclr/jit/hwintrinsicxarch.cpp | 3 +++ .../Common/Compiler/InstructionSetSupport.cs | 12 ++++++++++ .../tools/Common/InstructionSetHelpers.cs | 3 +++ .../Runtime/ReadyToRunInstructionSetHelper.cs | 2 ++ .../JitInterface/CorInfoInstructionSet.cs | 17 +++++++++++++ .../ThunkGenerator/InstructionSetDesc.txt | 1 + .../ILLink.Substitutions.NoX86Intrinsics.xml | 3 +++ .../ILLink.Substitutions.Intrinsics.x86.xml | 12 ++++++++++ .../X86/General/IsSupported.cs | 4 ++++ .../HardwareIntrinsics/X86/X86Base/CpuId.cs | 23 ++++++++++++++++++ .../SmokeTests/HardwareIntrinsics/Program.cs | 24 +++++++++++++++++++ 13 files changed, 115 insertions(+) diff --git a/src/coreclr/inc/corinfoinstructionset.h b/src/coreclr/inc/corinfoinstructionset.h index 63e724dbc28acd..400942b22b4528 100644 --- a/src/coreclr/inc/corinfoinstructionset.h +++ b/src/coreclr/inc/corinfoinstructionset.h @@ -112,6 +112,7 @@ enum CORINFO_InstructionSet InstructionSet_AVX512VBMI_X64=67, InstructionSet_AVX10v1_X64=68, InstructionSet_AVX10v1_V512_X64=69, + InstructionSet_GFNI_X64=70, #endif // TARGET_AMD64 #ifdef TARGET_X86 InstructionSet_X86Base=1, @@ -183,6 +184,7 @@ enum CORINFO_InstructionSet InstructionSet_AVX512VBMI_X64=67, InstructionSet_AVX10v1_X64=68, InstructionSet_AVX10v1_V512_X64=69, + InstructionSet_GFNI_X64=70, #endif // TARGET_X86 }; @@ -348,6 +350,8 @@ struct CORINFO_InstructionSetFlags AddInstructionSet(InstructionSet_AVX10v1_X64); if (HasInstructionSet(InstructionSet_AVX10v1_V512)) AddInstructionSet(InstructionSet_AVX10v1_V512_X64); + if (HasInstructionSet(InstructionSet_GFNI)) + AddInstructionSet(InstructionSet_GFNI_X64); #endif // TARGET_AMD64 #ifdef TARGET_X86 #endif // TARGET_X86 @@ -528,6 +532,10 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512); if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512_X64) && !resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512)) resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512_X64); + if (resultflags.HasInstructionSet(InstructionSet_GFNI) && !resultflags.HasInstructionSet(InstructionSet_GFNI_X64)) + resultflags.RemoveInstructionSet(InstructionSet_GFNI); + if (resultflags.HasInstructionSet(InstructionSet_GFNI_X64) && !resultflags.HasInstructionSet(InstructionSet_GFNI)) + resultflags.RemoveInstructionSet(InstructionSet_GFNI_X64); if (resultflags.HasInstructionSet(InstructionSet_SSE) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) resultflags.RemoveInstructionSet(InstructionSet_SSE); if (resultflags.HasInstructionSet(InstructionSet_SSE2) && !resultflags.HasInstructionSet(InstructionSet_SSE)) @@ -976,6 +984,8 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "APX"; case InstructionSet_GFNI : return "GFNI"; + case InstructionSet_GFNI_X64 : + return "GFNI_X64"; case InstructionSet_GFNI_V256 : return "GFNI_V256"; case InstructionSet_GFNI_V512 : diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index 4c5104005b9697..3126d37a1cbf3c 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -835,6 +835,7 @@ static const HWIntrinsicIsaRange hwintrinsicIsaRangeArray[] = { { NI_Illegal, NI_Illegal }, // AVX512VBMI_X64 { FIRST_NI_AVX10v1_X64, LAST_NI_AVX10v1_X64 }, { NI_Illegal, NI_Illegal }, // AVX10v1_V512_X64 + { NI_Illegal, NI_Illegal }, // GFNI_X64 #elif defined (TARGET_ARM64) { FIRST_NI_ArmBase, LAST_NI_ArmBase }, { FIRST_NI_AdvSimd, LAST_NI_AdvSimd }, diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index 295dd149fb9dde..d4651444bf5e7b 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -60,6 +60,8 @@ static CORINFO_InstructionSet X64VersionOfIsa(CORINFO_InstructionSet isa) return InstructionSet_BMI2_X64; case InstructionSet_FMA: return InstructionSet_FMA_X64; + case InstructionSet_GFNI: + return InstructionSet_GFNI_X64; case InstructionSet_LZCNT: return InstructionSet_LZCNT_X64; case InstructionSet_PCLMULQDQ: @@ -910,6 +912,7 @@ bool HWIntrinsicInfo::isFullyImplementedIsa(CORINFO_InstructionSet isa) case InstructionSet_AVX10v1_V512_X64: case InstructionSet_EVEX: case InstructionSet_GFNI: + case InstructionSet_GFNI_X64: case InstructionSet_GFNI_V256: case InstructionSet_GFNI_V512: { diff --git a/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs b/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs index 6afe17d2c1cdc5..03ee8203a2b3b9 100644 --- a/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs +++ b/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs @@ -358,10 +358,22 @@ public bool ComputeInstructionSetFlags(int maxVectorTBitWidth, if (_supportedInstructionSets.Contains("avx10v1")) _supportedInstructionSets.Add("avx10v1_v512"); + if (_supportedInstructionSets.Contains("gfni")) + _supportedInstructionSets.Add("gfni_v512"); + if (_supportedInstructionSets.Contains("vpclmul")) _supportedInstructionSets.Add("vpclmul_v512"); } + if (_supportedInstructionSets.Any(iSet => iSet.Contains("avx"))) + { + // These ISAs should automatically extend to 256-bit if + // AVX is enabled. + + if (_supportedInstructionSets.Contains("gfni")) + _supportedInstructionSets.Add("gfni_v256"); + } + foreach (string supported in _supportedInstructionSets) { supportedInstructionSets.AddInstructionSet(instructionSetConversion[supported]); diff --git a/src/coreclr/tools/Common/InstructionSetHelpers.cs b/src/coreclr/tools/Common/InstructionSetHelpers.cs index 95811afd7fbcdb..48e6ce36d807e7 100644 --- a/src/coreclr/tools/Common/InstructionSetHelpers.cs +++ b/src/coreclr/tools/Common/InstructionSetHelpers.cs @@ -193,6 +193,7 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("popcnt"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("lzcnt"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("serialize"); + optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("gfni"); // If AVX was enabled, we can opportunistically enable instruction sets which use the VEX encodings Debug.Assert(InstructionSet.X64_AVX == InstructionSet.X86_AVX); @@ -210,6 +211,7 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("bmi"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("bmi2"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("vpclmul"); + optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("gfni_v256"); } Debug.Assert(InstructionSet.X64_AVX512F == InstructionSet.X86_AVX512F); @@ -228,6 +230,7 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx10v1"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx10v1_v512"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("vpclmul_v512"); + optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("gfni_v512"); } } else if (targetArchitecture == TargetArchitecture.ARM64) diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs index 26c63af6c38d7e..ee5727102a2082 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs @@ -124,6 +124,7 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.X64_VectorT512: return ReadyToRunInstructionSet.VectorT512; case InstructionSet.X64_APX: return ReadyToRunInstructionSet.Apx; case InstructionSet.X64_GFNI: return ReadyToRunInstructionSet.Gfni; + case InstructionSet.X64_GFNI_X64: return ReadyToRunInstructionSet.Gfni; case InstructionSet.X64_GFNI_V256: return ReadyToRunInstructionSet.Gfni_V256; case InstructionSet.X64_GFNI_V512: return ReadyToRunInstructionSet.Gfni_V512; @@ -202,6 +203,7 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.X86_VectorT512: return ReadyToRunInstructionSet.VectorT512; case InstructionSet.X86_APX: return ReadyToRunInstructionSet.Apx; case InstructionSet.X86_GFNI: return ReadyToRunInstructionSet.Gfni; + case InstructionSet.X86_GFNI_X64: return null; case InstructionSet.X86_GFNI_V256: return ReadyToRunInstructionSet.Gfni_V256; case InstructionSet.X86_GFNI_V512: return ReadyToRunInstructionSet.Gfni_V512; diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs index 8ac761b2c438e5..f5483ffff7f5b6 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs @@ -110,6 +110,7 @@ public enum InstructionSet X64_AVX512VBMI_X64 = InstructionSet_X64.AVX512VBMI_X64, X64_AVX10v1_X64 = InstructionSet_X64.AVX10v1_X64, X64_AVX10v1_V512_X64 = InstructionSet_X64.AVX10v1_V512_X64, + X64_GFNI_X64 = InstructionSet_X64.GFNI_X64, X86_X86Base = InstructionSet_X86.X86Base, X86_SSE = InstructionSet_X86.SSE, X86_SSE2 = InstructionSet_X86.SSE2, @@ -179,6 +180,7 @@ public enum InstructionSet X86_AVX512VBMI_X64 = InstructionSet_X86.AVX512VBMI_X64, X86_AVX10v1_X64 = InstructionSet_X86.AVX10v1_X64, X86_AVX10v1_V512_X64 = InstructionSet_X86.AVX10v1_V512_X64, + X86_GFNI_X64 = InstructionSet_X86.GFNI_X64, } public enum InstructionSet_ARM64 { @@ -284,6 +286,7 @@ public enum InstructionSet_X64 AVX512VBMI_X64 = 67, AVX10v1_X64 = 68, AVX10v1_V512_X64 = 69, + GFNI_X64 = 70, } public enum InstructionSet_X86 @@ -359,6 +362,7 @@ public enum InstructionSet_X86 AVX512VBMI_X64 = 67, AVX10v1_X64 = 68, AVX10v1_V512_X64 = 69, + GFNI_X64 = 70, } public unsafe struct InstructionSetFlags : IEnumerable @@ -678,6 +682,10 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512_X64); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512_X64)) resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512); + if (resultflags.HasInstructionSet(InstructionSet.X64_GFNI)) + resultflags.AddInstructionSet(InstructionSet.X64_GFNI_X64); + if (resultflags.HasInstructionSet(InstructionSet.X64_GFNI_X64)) + resultflags.AddInstructionSet(InstructionSet.X64_GFNI); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE)) resultflags.AddInstructionSet(InstructionSet.X64_X86Base); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE2)) @@ -1041,6 +1049,8 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512_X64)) resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512); + if (resultflags.HasInstructionSet(InstructionSet.X64_GFNI_X64)) + resultflags.AddInstructionSet(InstructionSet.X64_GFNI); if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base)) resultflags.AddInstructionSet(InstructionSet.X64_SSE); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE)) @@ -1536,6 +1546,8 @@ public void Set64BitInstructionSetVariants(TargetArchitecture architecture) AddInstructionSet(InstructionSet.X64_AVX10v1_X64); if (HasInstructionSet(InstructionSet.X64_AVX10v1_V512)) AddInstructionSet(InstructionSet.X64_AVX10v1_V512_X64); + if (HasInstructionSet(InstructionSet.X64_GFNI)) + AddInstructionSet(InstructionSet.X64_GFNI_X64); break; case TargetArchitecture.X86: @@ -1586,6 +1598,7 @@ public void Set64BitInstructionSetVariantsUnconditionally(TargetArchitecture arc AddInstructionSet(InstructionSet.X64_AVX512VBMI_X64); AddInstructionSet(InstructionSet.X64_AVX10v1_X64); AddInstructionSet(InstructionSet.X64_AVX10v1_V512_X64); + AddInstructionSet(InstructionSet.X64_GFNI_X64); break; case TargetArchitecture.X86: @@ -1614,6 +1627,7 @@ public void Set64BitInstructionSetVariantsUnconditionally(TargetArchitecture arc AddInstructionSet(InstructionSet.X86_AVX512VBMI_X64); AddInstructionSet(InstructionSet.X86_AVX10v1_X64); AddInstructionSet(InstructionSet.X86_AVX10v1_V512_X64); + AddInstructionSet(InstructionSet.X86_GFNI_X64); break; } } @@ -1919,6 +1933,9 @@ public static InstructionSet LookupPlatformIntrinsicInstructionSet(TargetArchite { return InstructionSet.X64_APX; } case "Gfni": + if (nestedTypeName == "X64") + { return InstructionSet.X64_GFNI_X64; } + else if (nestedTypeName == "V256") { return InstructionSet.X64_GFNI_V256; } else diff --git a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt index d8ea5bfc9ad019..5fffe421a06be1 100644 --- a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt +++ b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt @@ -97,6 +97,7 @@ instructionset64bit,X86 ,AVX512DQ instructionset64bit,X86 ,AVX512VBMI instructionset64bit,X86 ,AVX10v1 instructionset64bit,X86 ,AVX10v1_V512 +instructionset64bit,X86 ,GFNI vectorinstructionset,X86 ,Vector128 vectorinstructionset,X86 ,Vector256 diff --git a/src/libraries/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.NoX86Intrinsics.xml b/src/libraries/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.NoX86Intrinsics.xml index 27496366afef49..9e3e8a1d6e7201 100644 --- a/src/libraries/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.NoX86Intrinsics.xml +++ b/src/libraries/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.NoX86Intrinsics.xml @@ -111,6 +111,9 @@ + + + diff --git a/src/mono/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.Intrinsics.x86.xml b/src/mono/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.Intrinsics.x86.xml index 7050d50dc71c57..1e99c7fba5fefe 100644 --- a/src/mono/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.Intrinsics.x86.xml +++ b/src/mono/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.Intrinsics.x86.xml @@ -81,6 +81,18 @@ + + + + + + + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86/General/IsSupported.cs b/src/tests/JIT/HardwareIntrinsics/X86/General/IsSupported.cs index f03c8671ae9ae8..18241d9ecfc40d 100644 --- a/src/tests/JIT/HardwareIntrinsics/X86/General/IsSupported.cs +++ b/src/tests/JIT/HardwareIntrinsics/X86/General/IsSupported.cs @@ -77,6 +77,10 @@ public static void IsSupported() Convert.ToBoolean(typeof(AvxVnni.X64).GetMethod(issupported).Invoke(null, null)) != AvxVnni.X64.IsSupported || Convert.ToBoolean(typeof(Fma).GetMethod(issupported).Invoke(null, null)) != Fma.IsSupported || Convert.ToBoolean(typeof(Fma.X64).GetMethod(issupported).Invoke(null, null)) != Fma.X64.IsSupported || + Convert.ToBoolean(typeof(Gfni).GetMethod(issupported).Invoke(null, null)) != Gfni.IsSupported || + Convert.ToBoolean(typeof(Gfni.V256).GetMethod(issupported).Invoke(null, null)) != Gfni.V256.IsSupported || + Convert.ToBoolean(typeof(Gfni.V512).GetMethod(issupported).Invoke(null, null)) != Gfni.V512.IsSupported || + Convert.ToBoolean(typeof(Gfni.X64).GetMethod(issupported).Invoke(null, null)) != Gfni.X64.IsSupported || Convert.ToBoolean(typeof(Pclmulqdq).GetMethod(issupported).Invoke(null, null)) != Pclmulqdq.IsSupported || Convert.ToBoolean(typeof(Pclmulqdq.V256).GetMethod(issupported).Invoke(null, null)) != Pclmulqdq.V256.IsSupported || Convert.ToBoolean(typeof(Pclmulqdq.V512).GetMethod(issupported).Invoke(null, null)) != Pclmulqdq.V512.IsSupported || diff --git a/src/tests/JIT/HardwareIntrinsics/X86/X86Base/CpuId.cs b/src/tests/JIT/HardwareIntrinsics/X86/X86Base/CpuId.cs index 48c3421ac83e6b..2b693ef0e678b5 100644 --- a/src/tests/JIT/HardwareIntrinsics/X86/X86Base/CpuId.cs +++ b/src/tests/JIT/HardwareIntrinsics/X86/X86Base/CpuId.cs @@ -108,6 +108,8 @@ public unsafe static void CpuId() testResult = Fail; } + bool isSse41HierarchyDisabled = isHierarchyDisabled; + if (IsBitIncorrect(ecx, 20, typeof(Sse42), Sse42.IsSupported, "SSE42", ref isHierarchyDisabled)) { testResult = Fail; @@ -284,6 +286,27 @@ public unsafe static void CpuId() testResult = Fail; } + isHierarchyDisabled = isSse41HierarchyDisabled; + + if (IsBitIncorrect(ecx, 8, typeof(Gfni), Gfni.IsSupported, "GFNI", ref isHierarchyDisabled)) + { + testResult = Fail; + } + + isHierarchyDisabled = isAvxHierarchyDisabled; + + if (IsBitIncorrect(ecx, 8, typeof(GFNI.V256), Gfni.V256.IsSupported, "GFNI", ref isHierarchyDisabled)) + { + testResult = Fail; + } + + isHierarchyDisabled = isAvx512HierarchyDisabled; + + if (IsBitIncorrect(ecx, 10, typeof(Gfni.V512), Gfni.V512.IsSupported, "GFNI", ref isHierarchyDisabled)) + { + testResult = Fail; + } + isHierarchyDisabled = isAvxHierarchyDisabled; if (IsBitIncorrect(ecx, 10, typeof(Pclmulqdq.V256), Pclmulqdq.V256.IsSupported, "VPCLMULQDQ", ref isHierarchyDisabled)) diff --git a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs index 7d891dbac1a41d..4f456c0e20fea1 100644 --- a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs +++ b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs @@ -71,6 +71,9 @@ static int Main() bool? ExpectedAvx512DQ = false; bool? ExpectedAvx512Vbmi = false; bool? ExpectedX86Serialize = null; + bool? ExpectedGfni = false; + bool? ExpectedGfniV256 = false; + bool? ExpectedGfniV512 = false; #elif SSE42_INTRINSICS bool? ExpectedSse3 = true; bool? ExpectedSsse3 = true; @@ -96,6 +99,9 @@ static int Main() bool? ExpectedAvx512DQ = false; bool? ExpectedAvx512Vbmi = false; bool? ExpectedX86Serialize = null; + bool? ExpectedGfni = null; + bool? ExpectedGfniV256 = false; + bool? ExpectedGfniV512 = false; #elif AVX_INTRINSICS bool? ExpectedSse3 = true; bool? ExpectedSsse3 = true; @@ -121,6 +127,9 @@ static int Main() bool? ExpectedAvx512DQ = false; bool? ExpectedAvx512Vbmi = false; bool? ExpectedX86Serialize = null; + bool? ExpectedGfni = null; + bool? ExpectedGfniV256 = null; + bool? ExpectedGfniV512 = false; #elif AVX2_INTRINSICS bool? ExpectedSse3 = true; bool? ExpectedSsse3 = true; @@ -146,6 +155,9 @@ static int Main() bool? ExpectedAvx512DQ = false; bool? ExpectedAvx512Vbmi = false; bool? ExpectedX86Serialize = null; + bool? ExpectedGfni = null; + bool? ExpectedGfniV256 = null; + bool? ExpectedGfniV512 = false; #elif AVX512_INTRINSICS bool? ExpectedSse3 = true; bool? ExpectedSsse3 = true; @@ -171,6 +183,9 @@ static int Main() bool? ExpectedAvx512DQ = true; bool? ExpectedAvx512Vbmi = null; bool? ExpectedX86Serialize = null; + bool? ExpectedGfni = null; + bool? ExpectedGfniV256 = null; + bool? ExpectedGfniV512 = null; #else #error Who dis? #endif @@ -272,6 +287,11 @@ static int Main() Check("X86Serialize", ExpectedX86Serialize, &X86SerializeIsSupported, X86Serialize.IsSupported, () => { X86Serialize.Serialize(); return true; } ); Check("X86Serialize.X64", ExpectedX86Serialize, &X86SerializeX64IsSupported, X86Serialize.X64.IsSupported, null); + Check("Gfni", ExpectedGfni, &GfniIsSupported, Gfni.IsSupported, () => Gfni.GaloisFieldMultiply(Vector128.Zero, Vector128.Zero, 0).Equals(Vector128.Zero)); + Check("Gfni.V256", ExpectedGfniV256, &GfniV256IsSupported, Gfni.V256.IsSupported, () => Gfni.V256.GaloisFieldMultiply(Vector256.Zero, Vector256.Zero, 0).Equals(Vector256.Zero)); + Check("Gfni.V512", ExpectedGfniV512, &GfniV512IsSupported, Gfni.V512.IsSupported, () => Gfni.V512.GaloisFieldMultiply(Vector512.Zero, Vector512.Zero, 0).Equals(Vector512.Zero)); + Check("Gfni.X64", ExpectedGfni, &GfniX64IsSupported, Gfni.X64.IsSupported, null); + return s_success ? 100 : 1; } @@ -333,6 +353,10 @@ static int Main() static bool Avx512VbmiX64IsSupported() => Avx512Vbmi.X64.IsSupported; static bool X86SerializeIsSupported() => X86Serialize.IsSupported; static bool X86SerializeX64IsSupported() => X86Serialize.X64.IsSupported; + static bool GfniIsSupported() => Gfni.IsSupported; + static bool GfniV256IsSupported() => Gfni.V256.IsSupported; + static bool GfniV512IsSupported() => Gfni.V512.IsSupported; + static bool GfniX64IsSupported() => Gfni.X64.IsSupported; static bool IsConstantTrue(delegate* code) { From 5d5d89701bcca9062c214798de6e4120142c278c Mon Sep 17 00:00:00 2001 From: Clinton Ingram Date: Wed, 20 Nov 2024 16:59:18 -0800 Subject: [PATCH 5/6] fix build --- src/tests/JIT/HardwareIntrinsics/X86/X86Base/CpuId.cs | 4 ++-- .../nativeaot/SmokeTests/HardwareIntrinsics/Program.cs | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/tests/JIT/HardwareIntrinsics/X86/X86Base/CpuId.cs b/src/tests/JIT/HardwareIntrinsics/X86/X86Base/CpuId.cs index 2b693ef0e678b5..d7c3e1aee3af89 100644 --- a/src/tests/JIT/HardwareIntrinsics/X86/X86Base/CpuId.cs +++ b/src/tests/JIT/HardwareIntrinsics/X86/X86Base/CpuId.cs @@ -295,14 +295,14 @@ public unsafe static void CpuId() isHierarchyDisabled = isAvxHierarchyDisabled; - if (IsBitIncorrect(ecx, 8, typeof(GFNI.V256), Gfni.V256.IsSupported, "GFNI", ref isHierarchyDisabled)) + if (IsBitIncorrect(ecx, 8, typeof(Gfni.V256), Gfni.V256.IsSupported, "GFNI", ref isHierarchyDisabled)) { testResult = Fail; } isHierarchyDisabled = isAvx512HierarchyDisabled; - if (IsBitIncorrect(ecx, 10, typeof(Gfni.V512), Gfni.V512.IsSupported, "GFNI", ref isHierarchyDisabled)) + if (IsBitIncorrect(ecx, 8, typeof(Gfni.V512), Gfni.V512.IsSupported, "GFNI", ref isHierarchyDisabled)) { testResult = Fail; } diff --git a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs index 4f456c0e20fea1..b92810e13cdab2 100644 --- a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs +++ b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs @@ -287,9 +287,9 @@ static int Main() Check("X86Serialize", ExpectedX86Serialize, &X86SerializeIsSupported, X86Serialize.IsSupported, () => { X86Serialize.Serialize(); return true; } ); Check("X86Serialize.X64", ExpectedX86Serialize, &X86SerializeX64IsSupported, X86Serialize.X64.IsSupported, null); - Check("Gfni", ExpectedGfni, &GfniIsSupported, Gfni.IsSupported, () => Gfni.GaloisFieldMultiply(Vector128.Zero, Vector128.Zero, 0).Equals(Vector128.Zero)); - Check("Gfni.V256", ExpectedGfniV256, &GfniV256IsSupported, Gfni.V256.IsSupported, () => Gfni.V256.GaloisFieldMultiply(Vector256.Zero, Vector256.Zero, 0).Equals(Vector256.Zero)); - Check("Gfni.V512", ExpectedGfniV512, &GfniV512IsSupported, Gfni.V512.IsSupported, () => Gfni.V512.GaloisFieldMultiply(Vector512.Zero, Vector512.Zero, 0).Equals(Vector512.Zero)); + Check("Gfni", ExpectedGfni, &GfniIsSupported, Gfni.IsSupported, () => Gfni.GaloisFieldMultiply(Vector128.Zero, Vector128.Zero).Equals(Vector128.Zero)); + Check("Gfni.V256", ExpectedGfniV256, &GfniV256IsSupported, Gfni.V256.IsSupported, () => Gfni.V256.GaloisFieldMultiply(Vector256.Zero, Vector256.Zero).Equals(Vector256.Zero)); + Check("Gfni.V512", ExpectedGfniV512, &GfniV512IsSupported, Gfni.V512.IsSupported, () => Gfni.V512.GaloisFieldMultiply(Vector512.Zero, Vector512.Zero).Equals(Vector512.Zero)); Check("Gfni.X64", ExpectedGfni, &GfniX64IsSupported, Gfni.X64.IsSupported, null); return s_success ? 100 : 1; From 3e18b970f7ab5b2b87d8eba0de408d3a2d2f2ea2 Mon Sep 17 00:00:00 2001 From: Clinton Ingram Date: Wed, 20 Nov 2024 17:46:42 -0800 Subject: [PATCH 6/6] fix test result --- src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs index b92810e13cdab2..8ec15aed2ff2fd 100644 --- a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs +++ b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs @@ -71,7 +71,7 @@ static int Main() bool? ExpectedAvx512DQ = false; bool? ExpectedAvx512Vbmi = false; bool? ExpectedX86Serialize = null; - bool? ExpectedGfni = false; + bool? ExpectedGfni = null; bool? ExpectedGfniV256 = false; bool? ExpectedGfniV512 = false; #elif SSE42_INTRINSICS