diff --git a/src/coreclr/jit/assertionprop.cpp b/src/coreclr/jit/assertionprop.cpp index e9eeb35a662d05..498b00a4f9471a 100644 --- a/src/coreclr/jit/assertionprop.cpp +++ b/src/coreclr/jit/assertionprop.cpp @@ -3192,7 +3192,7 @@ GenTree* Compiler::optVNConstantPropOnTree(BasicBlock* block, GenTree* tree) simd8_t value = vnStore->ConstantValue(vnCns); GenTreeVecCon* vecCon = gtNewVconNode(tree->TypeGet()); - vecCon->gtSimd8Val = value; + memcpy(&vecCon->gtSimdVal, &value, sizeof(simd8_t)); conValTree = vecCon; break; @@ -3203,7 +3203,7 @@ GenTree* Compiler::optVNConstantPropOnTree(BasicBlock* block, GenTree* tree) simd12_t value = vnStore->ConstantValue(vnCns); GenTreeVecCon* vecCon = gtNewVconNode(tree->TypeGet()); - vecCon->gtSimd12Val = value; + memcpy(&vecCon->gtSimdVal, &value, sizeof(simd12_t)); conValTree = vecCon; break; @@ -3214,7 +3214,7 @@ GenTree* Compiler::optVNConstantPropOnTree(BasicBlock* block, GenTree* tree) simd16_t value = vnStore->ConstantValue(vnCns); GenTreeVecCon* vecCon = gtNewVconNode(tree->TypeGet()); - vecCon->gtSimd16Val = value; + memcpy(&vecCon->gtSimdVal, &value, sizeof(simd16_t)); conValTree = vecCon; break; @@ -3222,12 +3222,22 @@ GenTree* Compiler::optVNConstantPropOnTree(BasicBlock* block, GenTree* tree) #if defined(TARGET_XARCH) case TYP_SIMD32: - case TYP_SIMD64: // TODO-XArch-AVX512: Fix once GenTreeVecCon supports gtSimd64Val. { simd32_t value = vnStore->ConstantValue(vnCns); GenTreeVecCon* vecCon = gtNewVconNode(tree->TypeGet()); - vecCon->gtSimd32Val = value; + memcpy(&vecCon->gtSimdVal, &value, sizeof(simd32_t)); + + conValTree = vecCon; + break; + } + + case TYP_SIMD64: + { + simd64_t value = vnStore->ConstantValue(vnCns); + + GenTreeVecCon* vecCon = gtNewVconNode(tree->TypeGet()); + memcpy(&vecCon->gtSimdVal, &value, sizeof(simd64_t)); conValTree = vecCon; break; diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 602f6e5b1b7b15..9bc7f917af42f5 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -2388,16 +2388,16 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre // Get a temp integer register to compute long address. regNumber addrReg = tree->GetSingleTempReg(); - simd8_t constValue = vecCon->gtSimd8Val; - CORINFO_FIELD_HANDLE hnd = emit->emitSimd8Const(constValue); + simd8_t constValue; + memcpy(&constValue, &vecCon->gtSimdVal, sizeof(simd8_t)); + CORINFO_FIELD_HANDLE hnd = emit->emitSimd8Const(constValue); emit->emitIns_R_C(INS_ldr, attr, targetReg, addrReg, hnd, 0); } break; } case TYP_SIMD12: - case TYP_SIMD16: { if (vecCon->IsAllBitsSet()) { @@ -2413,14 +2413,33 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre regNumber addrReg = tree->GetSingleTempReg(); simd16_t constValue = {}; - - if (vecCon->TypeIs(TYP_SIMD12)) - memcpy(&constValue, &vecCon->gtSimd12Val, sizeof(simd12_t)); - else - constValue = vecCon->gtSimd16Val; + memcpy(&constValue, &vecCon->gtSimdVal, sizeof(simd12_t)); CORINFO_FIELD_HANDLE hnd = emit->emitSimd16Const(constValue); + emit->emitIns_R_C(INS_ldr, attr, targetReg, addrReg, hnd, 0); + } + break; + } + case TYP_SIMD16: + { + if (vecCon->IsAllBitsSet()) + { + emit->emitIns_R_I(INS_mvni, attr, targetReg, 0, INS_OPTS_4S); + } + else if (vecCon->IsZero()) + { + emit->emitIns_R_I(INS_movi, attr, targetReg, 0, INS_OPTS_4S); + } + else + { + // Get a temp integer register to compute long address. + regNumber addrReg = tree->GetSingleTempReg(); + + simd16_t constValue; + memcpy(&constValue, &vecCon->gtSimdVal, sizeof(simd16_t)); + + CORINFO_FIELD_HANDLE hnd = emit->emitSimd16Const(constValue); emit->emitIns_R_C(INS_ldr, attr, targetReg, addrReg, hnd, 0); } break; diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index e837c87a5c0018..076d0c624370fa 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -550,34 +550,40 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre #if defined(FEATURE_SIMD) case TYP_SIMD8: { - simd8_t constValue = vecCon->gtSimd8Val; - CORINFO_FIELD_HANDLE hnd = emit->emitSimd8Const(constValue); + simd8_t constValue; + memcpy(&constValue, &vecCon->gtSimdVal, sizeof(simd8_t)); + CORINFO_FIELD_HANDLE hnd = emit->emitSimd8Const(constValue); emit->emitIns_R_C(ins_Load(targetType), attr, targetReg, hnd, 0); break; } case TYP_SIMD12: - case TYP_SIMD16: { simd16_t constValue = {}; - - if (vecCon->TypeIs(TYP_SIMD12)) - memcpy(&constValue, &vecCon->gtSimd12Val, sizeof(simd12_t)); - else - constValue = vecCon->gtSimd16Val; + memcpy(&constValue, &vecCon->gtSimdVal, sizeof(simd12_t)); CORINFO_FIELD_HANDLE hnd = emit->emitSimd16Const(constValue); + emit->emitIns_R_C(ins_Load(targetType), attr, targetReg, hnd, 0); + break; + } + case TYP_SIMD16: + { + simd16_t constValue; + memcpy(&constValue, &vecCon->gtSimdVal, sizeof(simd16_t)); + + CORINFO_FIELD_HANDLE hnd = emit->emitSimd16Const(constValue); emit->emitIns_R_C(ins_Load(targetType), attr, targetReg, hnd, 0); break; } case TYP_SIMD32: { - simd32_t constValue = vecCon->gtSimd32Val; - CORINFO_FIELD_HANDLE hnd = emit->emitSimd32Const(constValue); + simd32_t constValue; + memcpy(&constValue, &vecCon->gtSimdVal, sizeof(simd32_t)); + CORINFO_FIELD_HANDLE hnd = emit->emitSimd32Const(constValue); emit->emitIns_R_C(ins_Load(targetType), attr, targetReg, hnd, 0); break; } @@ -585,11 +591,9 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre case TYP_SIMD64: { simd64_t constValue; - // TODO-XArch-AVX512: Fix once GenTreeVecCon supports gtSimd64Val. - constValue.v256[0] = vecCon->gtSimd32Val; - constValue.v256[1] = vecCon->gtSimd32Val; - CORINFO_FIELD_HANDLE hnd = emit->emitSimd64Const(constValue); + memcpy(&constValue, &vecCon->gtSimdVal, sizeof(simd64_t)); + CORINFO_FIELD_HANDLE hnd = emit->emitSimd64Const(constValue); emit->emitIns_R_C(ins_Load(targetType), attr, targetReg, hnd, 0); break; } diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 5a84e860fbefc0..330ec8894a926a 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -252,9 +252,9 @@ void GenTree::InitNodeSize() } GenTree::s_gtNodeSizes[GT_CALL] = TREE_NODE_SZ_LARGE; -#ifndef HOST_64BIT +#ifdef TARGET_XARCH GenTree::s_gtNodeSizes[GT_CNS_VEC] = TREE_NODE_SZ_LARGE; -#endif +#endif // TARGET_XARCH GenTree::s_gtNodeSizes[GT_CAST] = TREE_NODE_SZ_LARGE; GenTree::s_gtNodeSizes[GT_FTN_ADDR] = TREE_NODE_SZ_LARGE; GenTree::s_gtNodeSizes[GT_BOX] = TREE_NODE_SZ_LARGE; @@ -303,10 +303,10 @@ void GenTree::InitNodeSize() static_assert_no_msg(sizeof(GenTreeLngCon) <= TREE_NODE_SZ_SMALL); static_assert_no_msg(sizeof(GenTreeDblCon) <= TREE_NODE_SZ_SMALL); static_assert_no_msg(sizeof(GenTreeStrCon) <= TREE_NODE_SZ_SMALL); -#ifdef HOST_64BIT - static_assert_no_msg(sizeof(GenTreeVecCon) <= TREE_NODE_SZ_SMALL); -#else +#ifdef TARGET_XARCH static_assert_no_msg(sizeof(GenTreeVecCon) <= TREE_NODE_SZ_LARGE); // *** large node +#else + static_assert_no_msg(sizeof(GenTreeVecCon) <= TREE_NODE_SZ_SMALL); #endif static_assert_no_msg(sizeof(GenTreeLclVarCommon) <= TREE_NODE_SZ_SMALL); static_assert_no_msg(sizeof(GenTreeLclVar) <= TREE_NODE_SZ_SMALL); @@ -3050,33 +3050,45 @@ unsigned Compiler::gtHashValue(GenTree* tree) { #if defined(FEATURE_SIMD) #if defined(TARGET_XARCH) - case TYP_SIMD64: // TODO-XArch-AVX512: Fix once GenTreeVecCon supports gtSimd64Val. + case TYP_SIMD64: + { + add = genTreeHashAdd(ulo32(add), vecCon->gtSimdVal.u32[15]); + add = genTreeHashAdd(ulo32(add), vecCon->gtSimdVal.u32[14]); + add = genTreeHashAdd(ulo32(add), vecCon->gtSimdVal.u32[13]); + add = genTreeHashAdd(ulo32(add), vecCon->gtSimdVal.u32[12]); + add = genTreeHashAdd(ulo32(add), vecCon->gtSimdVal.u32[11]); + add = genTreeHashAdd(ulo32(add), vecCon->gtSimdVal.u32[10]); + add = genTreeHashAdd(ulo32(add), vecCon->gtSimdVal.u32[9]); + add = genTreeHashAdd(ulo32(add), vecCon->gtSimdVal.u32[8]); + FALLTHROUGH; + } + case TYP_SIMD32: { - add = genTreeHashAdd(ulo32(add), vecCon->gtSimd32Val.u32[7]); - add = genTreeHashAdd(ulo32(add), vecCon->gtSimd32Val.u32[6]); - add = genTreeHashAdd(ulo32(add), vecCon->gtSimd32Val.u32[5]); - add = genTreeHashAdd(ulo32(add), vecCon->gtSimd32Val.u32[4]); + add = genTreeHashAdd(ulo32(add), vecCon->gtSimdVal.u32[7]); + add = genTreeHashAdd(ulo32(add), vecCon->gtSimdVal.u32[6]); + add = genTreeHashAdd(ulo32(add), vecCon->gtSimdVal.u32[5]); + add = genTreeHashAdd(ulo32(add), vecCon->gtSimdVal.u32[4]); FALLTHROUGH; } #endif // TARGET_XARCH case TYP_SIMD16: { - add = genTreeHashAdd(ulo32(add), vecCon->gtSimd16Val.u32[3]); + add = genTreeHashAdd(ulo32(add), vecCon->gtSimdVal.u32[3]); FALLTHROUGH; } case TYP_SIMD12: { - add = genTreeHashAdd(ulo32(add), vecCon->gtSimd12Val.u32[2]); + add = genTreeHashAdd(ulo32(add), vecCon->gtSimdVal.u32[2]); FALLTHROUGH; } case TYP_SIMD8: { - add = genTreeHashAdd(ulo32(add), vecCon->gtSimd8Val.u32[1]); - add = genTreeHashAdd(ulo32(add), vecCon->gtSimd8Val.u32[0]); + add = genTreeHashAdd(ulo32(add), vecCon->gtSimdVal.u32[1]); + add = genTreeHashAdd(ulo32(add), vecCon->gtSimdVal.u32[0]); break; } #endif // FEATURE_SIMD @@ -7265,197 +7277,177 @@ GenTreeVecCon* Compiler::gtNewVconNode(var_types type) GenTree* Compiler::gtNewAllBitsSetConNode(var_types type) { - GenTree* allBitsSet; +#ifdef FEATURE_SIMD + if (varTypeIsSIMD(type)) + { + GenTreeVecCon* allBitsSet = gtNewVconNode(type); + allBitsSet->gtSimdVal = simd_t::AllBitsSet(); + return allBitsSet; + } +#endif // FEATURE_SIMD switch (type) { case TYP_INT: - allBitsSet = gtNewIconNode(-1); - break; + { + return gtNewIconNode(-1); + } case TYP_LONG: - allBitsSet = gtNewLconNode(-1); - break; - -#ifdef FEATURE_SIMD - case TYP_SIMD8: - case TYP_SIMD12: - case TYP_SIMD16: -#if defined(TARGET_XARCH) - case TYP_SIMD32: - case TYP_SIMD64: // TODO-XArch-AVX512: Fix once GenTreeVecCon supports gtSimd64Val. -#endif // TARGET_XARCH - { - allBitsSet = gtNewVconNode(type); - allBitsSet->AsVecCon()->gtSimd32Val.i64[0] = -1; - allBitsSet->AsVecCon()->gtSimd32Val.i64[1] = -1; - allBitsSet->AsVecCon()->gtSimd32Val.i64[2] = -1; - allBitsSet->AsVecCon()->gtSimd32Val.i64[3] = -1; - break; + { + return gtNewLconNode(-1); } -#endif // FEATURE_SIMD default: + { unreached(); + } } - - return allBitsSet; } GenTree* Compiler::gtNewZeroConNode(var_types type) { - GenTree* zero; +#ifdef FEATURE_SIMD + if (varTypeIsSIMD(type)) + { + GenTreeVecCon* allBitsSet = gtNewVconNode(type); + allBitsSet->gtSimdVal = simd_t::Zero(); + return allBitsSet; + } +#endif // FEATURE_SIMD switch (genActualType(type)) { case TYP_INT: case TYP_REF: case TYP_BYREF: - zero = gtNewIconNode(0, type); - break; + { + return gtNewIconNode(0, type); + } case TYP_LONG: - zero = gtNewLconNode(0); - break; + { + return gtNewLconNode(0); + } case TYP_FLOAT: case TYP_DOUBLE: - zero = gtNewDconNode(0.0, type); - break; - -#ifdef FEATURE_SIMD - case TYP_SIMD8: - case TYP_SIMD12: - case TYP_SIMD16: -#if defined(TARGET_XARCH) - case TYP_SIMD32: - case TYP_SIMD64: // TODO-XArch-AVX512: Fix once GenTreeVecCon supports gtSimd64Val. -#endif // TARGET_XARCH { - zero = gtNewVconNode(type); - zero->AsVecCon()->gtSimd32Val = {}; - break; + return gtNewDconNode(0.0, type); } -#endif // FEATURE_SIMD default: + { unreached(); + } } - - return zero; } GenTree* Compiler::gtNewOneConNode(var_types type, var_types simdBaseType /* = TYP_UNDEF */) { - GenTree* one; - - switch (type) +#if defined(FEATURE_SIMD) + if (varTypeIsSIMD(type)) { - case TYP_INT: - case TYP_UINT: - one = gtNewIconNode(1); - break; + GenTreeVecCon* one = gtNewVconNode(type); - case TYP_LONG: - case TYP_ULONG: - one = gtNewLconNode(1); - break; + unsigned simdSize = genTypeSize(type); + uint32_t simdLength = getSIMDVectorLength(simdSize, simdBaseType); - case TYP_FLOAT: - case TYP_DOUBLE: - one = gtNewDconNode(1.0, type); - break; - -#ifdef FEATURE_SIMD - case TYP_SIMD8: - case TYP_SIMD12: - case TYP_SIMD16: -#if defined(TARGET_XARCH) - case TYP_SIMD32: - case TYP_SIMD64: // TODO-XArch-AVX512: Fix once GenTreeVecCon supports gtSimd64Val. -#endif // TARGET_XARCH + switch (simdBaseType) { - GenTreeVecCon* vecCon = gtNewVconNode(type); - - unsigned simdSize = genTypeSize(type); - uint32_t simdLength = getSIMDVectorLength(simdSize, simdBaseType); - - switch (simdBaseType) + case TYP_BYTE: + case TYP_UBYTE: { - case TYP_BYTE: - case TYP_UBYTE: - { - for (uint32_t index = 0; index < simdLength; index++) - { - vecCon->gtSimd32Val.u8[index] = 1; - } - break; - } - - case TYP_SHORT: - case TYP_USHORT: + for (uint32_t index = 0; index < simdLength; index++) { - for (uint32_t index = 0; index < simdLength; index++) - { - vecCon->gtSimd32Val.u16[index] = 1; - } - break; + one->gtSimdVal.u8[index] = 1; } + break; + } - case TYP_INT: - case TYP_UINT: + case TYP_SHORT: + case TYP_USHORT: + { + for (uint32_t index = 0; index < simdLength; index++) { - for (uint32_t index = 0; index < simdLength; index++) - { - vecCon->gtSimd32Val.u32[index] = 1; - } - break; + one->gtSimdVal.u16[index] = 1; } + break; + } - case TYP_LONG: - case TYP_ULONG: + case TYP_INT: + case TYP_UINT: + { + for (uint32_t index = 0; index < simdLength; index++) { - for (uint32_t index = 0; index < simdLength; index++) - { - vecCon->gtSimd32Val.u64[index] = 1; - } - break; + one->gtSimdVal.u32[index] = 1; } + break; + } - case TYP_FLOAT: + case TYP_LONG: + case TYP_ULONG: + { + for (uint32_t index = 0; index < simdLength; index++) { - for (uint32_t index = 0; index < simdLength; index++) - { - vecCon->gtSimd32Val.f32[index] = 1.0f; - } - break; + one->gtSimdVal.u64[index] = 1; } + break; + } - case TYP_DOUBLE: + case TYP_FLOAT: + { + for (uint32_t index = 0; index < simdLength; index++) { - for (uint32_t index = 0; index < simdLength; index++) - { - vecCon->gtSimd32Val.f64[index] = 1.0; - } - break; + one->gtSimdVal.f32[index] = 1.0f; } + break; + } - default: + case TYP_DOUBLE: + { + for (uint32_t index = 0; index < simdLength; index++) { - unreached(); + one->gtSimdVal.f64[index] = 1.0; } + break; } - one = vecCon; - break; + default: + { + unreached(); + } } + + return one; + } #endif // FEATURE_SIMD + switch (type) + { + case TYP_INT: + case TYP_UINT: + { + return gtNewIconNode(1); + } + + case TYP_LONG: + case TYP_ULONG: + { + return gtNewLconNode(1); + } + + case TYP_FLOAT: + case TYP_DOUBLE: + { + return gtNewDconNode(1.0, type); + } + default: + { unreached(); + } } - - return one; } GenTreeLclVar* Compiler::gtNewStoreLclVar(unsigned dstLclNum, GenTree* src) @@ -8420,7 +8412,7 @@ GenTree* Compiler::gtClone(GenTree* tree, bool complexOK) case GT_CNS_VEC: { GenTreeVecCon* vecCon = gtNewVconNode(tree->TypeGet()); - vecCon->gtSimd32Val = tree->AsVecCon()->gtSimd32Val; + vecCon->gtSimdVal = tree->AsVecCon()->gtSimdVal; copy = vecCon; break; } @@ -8603,7 +8595,7 @@ GenTree* Compiler::gtCloneExpr( case GT_CNS_VEC: { GenTreeVecCon* vecCon = gtNewVconNode(tree->TypeGet()); - vecCon->gtSimd32Val = tree->AsVecCon()->gtSimd32Val; + vecCon->gtSimdVal = tree->AsVecCon()->gtSimdVal; copy = vecCon; goto DONE; } @@ -11559,54 +11551,60 @@ void Compiler::gtDispConst(GenTree* tree) printf(""); break; +#if defined(FEATURE_SIMD) case GT_CNS_VEC: { GenTreeVecCon* vecCon = tree->AsVecCon(); switch (vecCon->TypeGet()) { -#if defined(FEATURE_SIMD) case TYP_SIMD8: { - simd8_t simdVal = vecCon->gtSimd8Val; - printf("<0x%08x, 0x%08x>", simdVal.u32[0], simdVal.u32[1]); + printf("<0x%08x, 0x%08x>", vecCon->gtSimdVal.u32[0], vecCon->gtSimdVal.u32[1]); break; } case TYP_SIMD12: { - simd12_t simdVal = vecCon->gtSimd12Val; - printf("<0x%08x, 0x%08x, 0x%08x>", simdVal.u32[0], simdVal.u32[1], simdVal.u32[2]); + printf("<0x%08x, 0x%08x, 0x%08x>", vecCon->gtSimdVal.u32[0], vecCon->gtSimdVal.u32[1], + vecCon->gtSimdVal.u32[2]); break; } case TYP_SIMD16: { - simd16_t simdVal = vecCon->gtSimd16Val; - printf("<0x%08x, 0x%08x, 0x%08x, 0x%08x>", simdVal.u32[0], simdVal.u32[1], simdVal.u32[2], - simdVal.u32[3]); + printf("<0x%08x, 0x%08x, 0x%08x, 0x%08x>", vecCon->gtSimdVal.u32[0], vecCon->gtSimdVal.u32[1], + vecCon->gtSimdVal.u32[2], vecCon->gtSimdVal.u32[3]); break; } #if defined(TARGET_XARCH) case TYP_SIMD32: - case TYP_SIMD64: // TODO-XArch-AVX512: Fix once GenTreeVecCon supports gtSimd64Val. { - simd32_t simdVal = vecCon->gtSimd32Val; - printf("<0x%016llx, 0x%016llx, 0x%016llx, 0x%016llx>", simdVal.u64[0], simdVal.u64[1], - simdVal.u64[2], simdVal.u64[3]); + printf("<0x%016llx, 0x%016llx, 0x%016llx, 0x%016llx>", vecCon->gtSimdVal.u64[0], + vecCon->gtSimdVal.u64[1], vecCon->gtSimdVal.u64[2], vecCon->gtSimdVal.u64[3]); + break; + } + + case TYP_SIMD64: + { + printf("<0x%016llx, 0x%016llx, 0x%016llx, 0x%016llx, 0x%016llx, 0x%016llx, 0x%016llx, 0x%016llx>", + vecCon->gtSimdVal.u64[0], vecCon->gtSimdVal.u64[1], vecCon->gtSimdVal.u64[2], + vecCon->gtSimdVal.u64[3], vecCon->gtSimdVal.u64[4], vecCon->gtSimdVal.u64[5], + vecCon->gtSimdVal.u64[6], vecCon->gtSimdVal.u64[7]); break; } #endif // TARGET_XARCH -#endif // FEATURE_SIMD default: { unreached(); } } + break; } +#endif // FEATURE_SIMD default: assert(!"unexpected constant node"); @@ -19186,7 +19184,7 @@ GenTree* Compiler::gtNewSimdAbsNode( // This is -0.0f. We use the bit pattern to avoid // compiler issues on some platforms. - bitMask->gtSimd32Val.u32[i] = 0x80000000; + bitMask->gtSimdVal.u32[i] = 0x80000000; } } else @@ -19198,7 +19196,7 @@ GenTree* Compiler::gtNewSimdAbsNode( // This is -0.0. We use the bit pattern to avoid // compiler issues on some platforms. - bitMask->gtSimd32Val.u64[i] = 0x8000000000000000; + bitMask->gtSimdVal.u64[i] = 0x8000000000000000; } } @@ -20239,11 +20237,10 @@ GenTree* Compiler::gtNewSimdCmpOpNode(genTreeOps op, for (unsigned i = 0; i < (simdSize / 8); i++) { - vecCon1->gtSimd32Val.u64[i] = constVal; + vecCon1->gtSimdVal.u64[i] = constVal; } - GenTreeVecCon* vecCon2 = gtNewVconNode(type); - vecCon2->gtSimd32Val = vecCon1->gtSimd32Val; + GenTree* vecCon2 = gtCloneExpr(vecCon1); // op1 = op1 - constVector op1 = gtNewSimdBinOpNode(GT_SUB, type, op1, vecCon1, opJitType, simdSize, isSimdAsHWIntrinsic); @@ -20471,11 +20468,10 @@ GenTree* Compiler::gtNewSimdCmpOpNode(genTreeOps op, for (unsigned i = 0; i < (simdSize / 8); i++) { - vecCon1->gtSimd32Val.u64[i] = constVal; + vecCon1->gtSimdVal.u64[i] = constVal; } - GenTreeVecCon* vecCon2 = gtNewVconNode(type); - vecCon2->gtSimd32Val = vecCon1->gtSimd32Val; + GenTree* vecCon2 = gtCloneExpr(vecCon1); // op1 = op1 - constVector op1 = gtNewSimdBinOpNode(GT_SUB, type, op1, vecCon1, opJitType, simdSize, isSimdAsHWIntrinsic); @@ -20989,7 +20985,7 @@ GenTree* Compiler::gtNewSimdCreateBroadcastNode( for (unsigned i = 0; i < simdSize; i++) { - vecCon->gtSimd32Val.u8[i] = cnsVal; + vecCon->gtSimdVal.u8[i] = cnsVal; } break; } @@ -21001,7 +20997,7 @@ GenTree* Compiler::gtNewSimdCreateBroadcastNode( for (unsigned i = 0; i < (simdSize / 2); i++) { - vecCon->gtSimd32Val.u16[i] = cnsVal; + vecCon->gtSimdVal.u16[i] = cnsVal; } break; } @@ -21013,7 +21009,7 @@ GenTree* Compiler::gtNewSimdCreateBroadcastNode( for (unsigned i = 0; i < (simdSize / 4); i++) { - vecCon->gtSimd32Val.u32[i] = cnsVal; + vecCon->gtSimdVal.u32[i] = cnsVal; } break; } @@ -21025,7 +21021,7 @@ GenTree* Compiler::gtNewSimdCreateBroadcastNode( for (unsigned i = 0; i < (simdSize / 8); i++) { - vecCon->gtSimd32Val.u64[i] = cnsVal; + vecCon->gtSimdVal.u64[i] = cnsVal; } break; } @@ -21036,7 +21032,7 @@ GenTree* Compiler::gtNewSimdCreateBroadcastNode( for (unsigned i = 0; i < (simdSize / 4); i++) { - vecCon->gtSimd32Val.f32[i] = cnsVal; + vecCon->gtSimdVal.f32[i] = cnsVal; } break; } @@ -21047,7 +21043,7 @@ GenTree* Compiler::gtNewSimdCreateBroadcastNode( for (unsigned i = 0; i < (simdSize / 8); i++) { - vecCon->gtSimd32Val.f64[i] = cnsVal; + vecCon->gtSimdVal.f64[i] = cnsVal; } break; } @@ -21114,53 +21110,53 @@ GenTree* Compiler::gtNewSimdCreateScalarNode( if (op1->IsIntegralConst() || op1->IsCnsFltOrDbl()) { GenTreeVecCon* vecCon = gtNewVconNode(type); - vecCon->gtSimd32Val = {}; + vecCon->gtSimdVal = {}; switch (simdBaseType) { case TYP_BYTE: case TYP_UBYTE: { - uint8_t cnsVal = static_cast(op1->AsIntConCommon()->IntegralValue()); - vecCon->gtSimd32Val.u8[0] = cnsVal; + uint8_t cnsVal = static_cast(op1->AsIntConCommon()->IntegralValue()); + vecCon->gtSimdVal.u8[0] = cnsVal; break; } case TYP_SHORT: case TYP_USHORT: { - uint16_t cnsVal = static_cast(op1->AsIntConCommon()->IntegralValue()); - vecCon->gtSimd32Val.u16[0] = cnsVal; + uint16_t cnsVal = static_cast(op1->AsIntConCommon()->IntegralValue()); + vecCon->gtSimdVal.u16[0] = cnsVal; break; } case TYP_INT: case TYP_UINT: { - uint32_t cnsVal = static_cast(op1->AsIntConCommon()->IntegralValue()); - vecCon->gtSimd32Val.u32[0] = cnsVal; + uint32_t cnsVal = static_cast(op1->AsIntConCommon()->IntegralValue()); + vecCon->gtSimdVal.u32[0] = cnsVal; break; } case TYP_LONG: case TYP_ULONG: { - uint64_t cnsVal = static_cast(op1->AsIntConCommon()->IntegralValue()); - vecCon->gtSimd32Val.u64[0] = cnsVal; + uint64_t cnsVal = static_cast(op1->AsIntConCommon()->IntegralValue()); + vecCon->gtSimdVal.u64[0] = cnsVal; break; } case TYP_FLOAT: { - float cnsVal = static_cast(op1->AsDblCon()->DconValue()); - vecCon->gtSimd32Val.f32[0] = cnsVal; + float cnsVal = static_cast(op1->AsDblCon()->DconValue()); + vecCon->gtSimdVal.f32[0] = cnsVal; break; } case TYP_DOUBLE: { - double cnsVal = static_cast(op1->AsDblCon()->DconValue()); - vecCon->gtSimd32Val.f64[0] = cnsVal; + double cnsVal = static_cast(op1->AsDblCon()->DconValue()); + vecCon->gtSimdVal.f64[0] = cnsVal; break; } @@ -21241,7 +21237,7 @@ GenTree* Compiler::gtNewSimdCreateScalarUnsafeNode( for (unsigned i = 0; i < simdSize; i++) { - vecCon->gtSimd32Val.u8[i] = cnsVal; + vecCon->gtSimdVal.u8[i] = cnsVal; } break; } @@ -21253,7 +21249,7 @@ GenTree* Compiler::gtNewSimdCreateScalarUnsafeNode( for (unsigned i = 0; i < (simdSize / 2); i++) { - vecCon->gtSimd32Val.u16[i] = cnsVal; + vecCon->gtSimdVal.u16[i] = cnsVal; } break; } @@ -21265,7 +21261,7 @@ GenTree* Compiler::gtNewSimdCreateScalarUnsafeNode( for (unsigned i = 0; i < (simdSize / 4); i++) { - vecCon->gtSimd32Val.u32[i] = cnsVal; + vecCon->gtSimdVal.u32[i] = cnsVal; } break; } @@ -21277,7 +21273,7 @@ GenTree* Compiler::gtNewSimdCreateScalarUnsafeNode( for (unsigned i = 0; i < (simdSize / 8); i++) { - vecCon->gtSimd32Val.u64[i] = cnsVal; + vecCon->gtSimdVal.u64[i] = cnsVal; } break; } @@ -21288,7 +21284,7 @@ GenTree* Compiler::gtNewSimdCreateScalarUnsafeNode( for (unsigned i = 0; i < (simdSize / 4); i++) { - vecCon->gtSimd32Val.f32[i] = cnsVal; + vecCon->gtSimdVal.f32[i] = cnsVal; } break; } @@ -21299,7 +21295,7 @@ GenTree* Compiler::gtNewSimdCreateScalarUnsafeNode( for (unsigned i = 0; i < (simdSize / 8); i++) { - vecCon->gtSimd32Val.f64[i] = cnsVal; + vecCon->gtSimdVal.f64[i] = cnsVal; } break; } @@ -21771,14 +21767,11 @@ GenTree* Compiler::gtNewSimdMaxNode(var_types type, for (unsigned i = 0; i < (simdSize / 8); i++) { - vecCon1->gtSimd32Val.u64[i] = constVal; + vecCon1->gtSimdVal.u64[i] = constVal; } - GenTreeVecCon* vecCon2 = gtNewVconNode(type); - vecCon2->gtSimd32Val = vecCon1->gtSimd32Val; - - GenTreeVecCon* vecCon3 = gtNewVconNode(type); - vecCon3->gtSimd32Val = vecCon2->gtSimd32Val; + GenTree* vecCon2 = gtCloneExpr(vecCon1); + GenTree* vecCon3 = gtCloneExpr(vecCon2); // op1 = op1 - constVector // -or- @@ -22107,11 +22100,10 @@ GenTree* Compiler::gtNewSimdNarrowNode(var_types type, for (unsigned i = 0; i < (simdSize / 8); i++) { - vecCon1->gtSimd32Val.u64[i] = 0x00FF00FF00FF00FF; + vecCon1->gtSimdVal.u64[i] = 0x00FF00FF00FF00FF; } - GenTreeVecCon* vecCon2 = gtNewVconNode(type); - vecCon2->gtSimd32Val = vecCon1->gtSimd32Val; + GenTree* vecCon2 = gtCloneExpr(vecCon1); tmp1 = gtNewSimdHWIntrinsicNode(type, op1, vecCon1, NI_SSE2_And, simdBaseJitType, simdSize, isSimdAsHWIntrinsic); @@ -22151,11 +22143,10 @@ GenTree* Compiler::gtNewSimdNarrowNode(var_types type, for (unsigned i = 0; i < (simdSize / 8); i++) { - vecCon1->gtSimd32Val.u64[i] = 0x0000FFFF0000FFFF; + vecCon1->gtSimdVal.u64[i] = 0x0000FFFF0000FFFF; } - GenTreeVecCon* vecCon2 = gtNewVconNode(type); - vecCon2->gtSimd32Val = vecCon1->gtSimd32Val; + GenTree* vecCon2 = gtCloneExpr(vecCon1); tmp1 = gtNewSimdHWIntrinsicNode(type, op1, vecCon1, NI_SSE2_And, simdBaseJitType, simdSize, isSimdAsHWIntrinsic); @@ -22260,11 +22251,10 @@ GenTree* Compiler::gtNewSimdNarrowNode(var_types type, for (unsigned i = 0; i < (simdSize / 8); i++) { - vecCon1->gtSimd16Val.u64[i] = 0x00FF00FF00FF00FF; + vecCon1->gtSimdVal.u64[i] = 0x00FF00FF00FF00FF; } - GenTreeVecCon* vecCon2 = gtNewVconNode(type); - vecCon2->gtSimd16Val = vecCon1->gtSimd16Val; + GenTree* vecCon2 = gtCloneExpr(vecCon1); tmp1 = gtNewSimdHWIntrinsicNode(type, op1, vecCon1, NI_SSE2_And, simdBaseJitType, simdSize, isSimdAsHWIntrinsic); @@ -22303,11 +22293,10 @@ GenTree* Compiler::gtNewSimdNarrowNode(var_types type, for (unsigned i = 0; i < (simdSize / 8); i++) { - vecCon1->gtSimd16Val.u64[i] = 0x0000FFFF0000FFFF; + vecCon1->gtSimdVal.u64[i] = 0x0000FFFF0000FFFF; } - GenTreeVecCon* vecCon2 = gtNewVconNode(type); - vecCon2->gtSimd16Val = vecCon1->gtSimd16Val; + GenTree* vecCon2 = gtCloneExpr(vecCon1); tmp1 = gtNewSimdHWIntrinsicNode(type, op1, vecCon1, NI_SSE2_And, simdBaseJitType, simdSize, isSimdAsHWIntrinsic); @@ -22523,8 +22512,8 @@ GenTree* Compiler::gtNewSimdShuffleNode(var_types type, bool crossLane = false; bool needsZero = varTypeIsSmallInt(simdBaseType); uint64_t value = 0; - simd32_t vecCns = {}; - simd32_t mskCns = {}; + simd_t vecCns = {}; + simd_t mskCns = {}; for (size_t index = 0; index < elementCount; index++) { @@ -22648,8 +22637,8 @@ GenTree* Compiler::gtNewSimdShuffleNode(var_types type, vecCns.u32[i] = (uint8_t)(vecCns.u8[i * elementSize] / elementSize); } - op2 = gtNewVconNode(type); - op2->AsVecCon()->gtSimd32Val = vecCns; + op2 = gtNewVconNode(type); + op2->AsVecCon()->gtSimdVal = vecCns; // swap the operands to match the encoding requirements retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX2_PermuteVar8x32, simdBaseJitType, simdSize, @@ -22727,7 +22716,7 @@ GenTree* Compiler::gtNewSimdShuffleNode(var_types type, return retNode; #elif defined(TARGET_ARM64) uint64_t value = 0; - simd16_t vecCns = {}; + simd_t vecCns = {}; for (size_t index = 0; index < elementCount; index++) { @@ -22762,8 +22751,8 @@ GenTree* Compiler::gtNewSimdShuffleNode(var_types type, // VectorTableLookup is only valid on byte/sbyte simdBaseJitType = varTypeIsUnsigned(simdBaseType) ? CORINFO_TYPE_UBYTE : CORINFO_TYPE_BYTE; - op2 = gtNewVconNode(type); - op2->AsVecCon()->gtSimd16Val = vecCns; + op2 = gtNewVconNode(type); + op2->AsVecCon()->gtSimdVal = vecCns; return gtNewSimdHWIntrinsicNode(type, op1, op2, lookupIntrinsic, simdBaseJitType, simdSize, isSimdAsHWIntrinsic); #else diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 9972b71d8b8c54..edfaa7b9516ec4 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -6316,7 +6316,13 @@ struct GenTreeVecCon : public GenTree simd8_t gtSimd8Val; simd12_t gtSimd12Val; simd16_t gtSimd16Val; + +#if defined(TARGET_XARCH) simd32_t gtSimd32Val; + simd64_t gtSimd64Val; +#endif // TARGET_XARCH + + simd_t gtSimdVal; }; #if defined(FEATURE_HW_INTRINSICS) @@ -6539,26 +6545,28 @@ struct GenTreeVecCon : public GenTree #if defined(FEATURE_SIMD) case TYP_SIMD8: { - return (gtSimd8Val.u64[0] == 0xFFFFFFFFFFFFFFFF); + return gtSimd8Val.IsAllBitsSet(); } case TYP_SIMD12: { - return (gtSimd12Val.u32[0] == 0xFFFFFFFF) && (gtSimd12Val.u32[1] == 0xFFFFFFFF) && - (gtSimd12Val.u32[2] == 0xFFFFFFFF); + return gtSimd12Val.IsAllBitsSet(); } case TYP_SIMD16: { - return (gtSimd16Val.u64[0] == 0xFFFFFFFFFFFFFFFF) && (gtSimd16Val.u64[1] == 0xFFFFFFFFFFFFFFFF); + return gtSimd16Val.IsAllBitsSet(); } #if defined(TARGET_XARCH) case TYP_SIMD32: - case TYP_SIMD64: // TODO-XArch-AVX512: Fix once GenTreeVecCon supports gtSimd64Val. { - return (gtSimd32Val.u64[0] == 0xFFFFFFFFFFFFFFFF) && (gtSimd32Val.u64[1] == 0xFFFFFFFFFFFFFFFF) && - (gtSimd32Val.u64[2] == 0xFFFFFFFFFFFFFFFF) && (gtSimd32Val.u64[3] == 0xFFFFFFFFFFFFFFFF); + return gtSimd32Val.IsAllBitsSet(); + } + + case TYP_SIMD64: + { + return gtSimd64Val.IsAllBitsSet(); } #endif // TARGET_XARCH #endif // FEATURE_SIMD @@ -6584,30 +6592,28 @@ struct GenTreeVecCon : public GenTree #if defined(FEATURE_SIMD) case TYP_SIMD8: { - return (left->gtSimd8Val.u64[0] == right->gtSimd8Val.u64[0]); + return left->gtSimd8Val == right->gtSimd8Val; } case TYP_SIMD12: { - return (left->gtSimd12Val.u32[0] == right->gtSimd12Val.u32[0]) && - (left->gtSimd12Val.u32[1] == right->gtSimd12Val.u32[1]) && - (left->gtSimd12Val.u32[2] == right->gtSimd12Val.u32[2]); + return left->gtSimd12Val == right->gtSimd12Val; } case TYP_SIMD16: { - return (left->gtSimd16Val.u64[0] == right->gtSimd16Val.u64[0]) && - (left->gtSimd16Val.u64[1] == right->gtSimd16Val.u64[1]); + return left->gtSimd16Val == right->gtSimd16Val; } #if defined(TARGET_XARCH) case TYP_SIMD32: - case TYP_SIMD64: // TODO-XArch-AVX512: Fix once GenTreeVecCon supports gtSimd64Val. { - return (left->gtSimd32Val.u64[0] == right->gtSimd32Val.u64[0]) && - (left->gtSimd32Val.u64[1] == right->gtSimd32Val.u64[1]) && - (left->gtSimd32Val.u64[2] == right->gtSimd32Val.u64[2]) && - (left->gtSimd32Val.u64[3] == right->gtSimd32Val.u64[3]); + return left->gtSimd32Val == right->gtSimd32Val; + } + + case TYP_SIMD64: + { + return left->gtSimd64Val == right->gtSimd64Val; } #endif // TARGET_XARCH #endif // FEATURE_SIMD @@ -6626,26 +6632,28 @@ struct GenTreeVecCon : public GenTree #if defined(FEATURE_SIMD) case TYP_SIMD8: { - return (gtSimd8Val.u64[0] == 0x0000000000000000); + return gtSimd8Val.IsZero(); } case TYP_SIMD12: { - return (gtSimd12Val.u32[0] == 0x00000000) && (gtSimd12Val.u32[1] == 0x00000000) && - (gtSimd12Val.u32[2] == 0x00000000); + return gtSimd12Val.IsZero(); } case TYP_SIMD16: { - return (gtSimd16Val.u64[0] == 0x0000000000000000) && (gtSimd16Val.u64[1] == 0x0000000000000000); + return gtSimd16Val.IsZero(); } #if defined(TARGET_XARCH) case TYP_SIMD32: - case TYP_SIMD64: // TODO-XArch-AVX512: Fix once GenTreeVecCon supports gtSimd64Val. { - return (gtSimd32Val.u64[0] == 0x0000000000000000) && (gtSimd32Val.u64[1] == 0x0000000000000000) && - (gtSimd32Val.u64[2] == 0x0000000000000000) && (gtSimd32Val.u64[3] == 0x0000000000000000); + return gtSimd32Val.IsZero(); + } + + case TYP_SIMD64: + { + return gtSimd64Val.IsZero(); } #endif // TARGET_XARCH #endif // FEATURE_SIMD @@ -6660,6 +6668,12 @@ struct GenTreeVecCon : public GenTree GenTreeVecCon(var_types type) : GenTree(GT_CNS_VEC, type) { assert(varTypeIsSIMD(type)); + +#if defined(TARGET_XARCH) + assert(sizeof(simd_t) == sizeof(simd64_t)); +#else + assert(sizeof(simd_t) == sizeof(simd16_t)); +#endif } #if DEBUGGABLE_GENTREE @@ -9050,44 +9064,44 @@ inline uint64_t GenTree::GetIntegralVectorConstElement(size_t index, var_types s { case TYP_BYTE: { - return node->gtSimd32Val.i8[index]; + return node->gtSimdVal.i8[index]; } case TYP_UBYTE: { - return node->gtSimd32Val.u8[index]; + return node->gtSimdVal.u8[index]; } case TYP_SHORT: { - return node->gtSimd32Val.i16[index]; + return node->gtSimdVal.i16[index]; } case TYP_USHORT: { - return node->gtSimd32Val.u16[index]; + return node->gtSimdVal.u16[index]; } case TYP_INT: case TYP_FLOAT: { - return node->gtSimd32Val.i32[index]; + return node->gtSimdVal.i32[index]; } case TYP_UINT: { - return node->gtSimd32Val.u32[index]; + return node->gtSimdVal.u32[index]; } case TYP_LONG: case TYP_DOUBLE: { - return node->gtSimd32Val.i64[index]; + return node->gtSimdVal.i64[index]; } case TYP_ULONG: { - return node->gtSimd32Val.u64[index]; + return node->gtSimdVal.u64[index]; } default: @@ -9865,7 +9879,13 @@ inline GenTree* GenTree::GetIndirOrArrMetaDataAddr() /*****************************************************************************/ const size_t TREE_NODE_SZ_SMALL = sizeof(GenTreeLclFld); -const size_t TREE_NODE_SZ_LARGE = sizeof(GenTreeCall); + +// For some configurations, such as x86 release, GenTreeVecCon is +// the largest by a small margin due to needing to carry a simd64_t +// constant value. Otherwise, GenTreeCall is the largest. + +const size_t TREE_NODE_SZ_LARGE = + (sizeof(GenTreeVecCon) < sizeof(GenTreeCall)) ? sizeof(GenTreeCall) : sizeof(GenTreeVecCon); enum varRefKinds { diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 93e36b710e065b..179f19bac40c54 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -458,8 +458,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, GenTreeVecCon* vecCon = op1->AsVecCon(); vecCon->gtType = TYP_SIMD16; - vecCon->gtSimd16Val.f32[2] = 0.0f; - vecCon->gtSimd16Val.f32[3] = 0.0f; + vecCon->gtSimdVal.f32[2] = 0.0f; + vecCon->gtSimdVal.f32[3] = 0.0f; return vecCon; } @@ -488,7 +488,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, GenTreeVecCon* vecCon = op1->AsVecCon(); vecCon->gtType = TYP_SIMD16; - vecCon->gtSimd16Val.f32[3] = 0.0f; + vecCon->gtSimdVal.f32[3] = 0.0f; return vecCon; } @@ -712,7 +712,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, for (uint32_t index = 0; index < sig->numArgs; index++) { cnsVal = static_cast(impPopStack().val->AsIntConCommon()->IntegralValue()); - vecCon->gtSimd16Val.u8[simdLength - 1 - index] = cnsVal; + vecCon->gtSimdVal.u8[simdLength - 1 - index] = cnsVal; } break; } @@ -725,7 +725,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, for (uint32_t index = 0; index < sig->numArgs; index++) { cnsVal = static_cast(impPopStack().val->AsIntConCommon()->IntegralValue()); - vecCon->gtSimd16Val.u16[simdLength - 1 - index] = cnsVal; + vecCon->gtSimdVal.u16[simdLength - 1 - index] = cnsVal; } break; } @@ -738,7 +738,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, for (uint32_t index = 0; index < sig->numArgs; index++) { cnsVal = static_cast(impPopStack().val->AsIntConCommon()->IntegralValue()); - vecCon->gtSimd16Val.u32[simdLength - 1 - index] = cnsVal; + vecCon->gtSimdVal.u32[simdLength - 1 - index] = cnsVal; } break; } @@ -751,7 +751,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, for (uint32_t index = 0; index < sig->numArgs; index++) { cnsVal = static_cast(impPopStack().val->AsIntConCommon()->IntegralValue()); - vecCon->gtSimd16Val.u64[simdLength - 1 - index] = cnsVal; + vecCon->gtSimdVal.u64[simdLength - 1 - index] = cnsVal; } break; } @@ -763,7 +763,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, for (uint32_t index = 0; index < sig->numArgs; index++) { cnsVal = static_cast(impPopStack().val->AsDblCon()->DconValue()); - vecCon->gtSimd16Val.f32[simdLength - 1 - index] = cnsVal; + vecCon->gtSimdVal.f32[simdLength - 1 - index] = cnsVal; } break; } @@ -775,7 +775,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, for (uint32_t index = 0; index < sig->numArgs; index++) { cnsVal = static_cast(impPopStack().val->AsDblCon()->DconValue()); - vecCon->gtSimd16Val.f64[simdLength - 1 - index] = cnsVal; + vecCon->gtSimdVal.f64[simdLength - 1 - index] = cnsVal; } break; } @@ -969,13 +969,13 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, simdBaseType = TYP_UBYTE; simdBaseJitType = CORINFO_TYPE_UBYTE; - vecCon2->gtSimd16Val.u64[0] = 0x8080808080808080; - vecCon3->gtSimd16Val.u64[0] = 0x00FFFEFDFCFBFAF9; + vecCon2->gtSimdVal.u64[0] = 0x8080808080808080; + vecCon3->gtSimdVal.u64[0] = 0x00FFFEFDFCFBFAF9; if (simdSize == 16) { - vecCon2->gtSimd16Val.u64[1] = 0x8080808080808080; - vecCon3->gtSimd16Val.u64[1] = 0x00FFFEFDFCFBFAF9; + vecCon2->gtSimdVal.u64[1] = 0x8080808080808080; + vecCon3->gtSimdVal.u64[1] = 0x00FFFEFDFCFBFAF9; } break; } @@ -986,13 +986,13 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, simdBaseType = TYP_USHORT; simdBaseJitType = CORINFO_TYPE_USHORT; - vecCon2->gtSimd16Val.u64[0] = 0x8000800080008000; - vecCon3->gtSimd16Val.u64[0] = 0xFFF4FFF3FFF2FFF1; + vecCon2->gtSimdVal.u64[0] = 0x8000800080008000; + vecCon3->gtSimdVal.u64[0] = 0xFFF4FFF3FFF2FFF1; if (simdSize == 16) { - vecCon2->gtSimd16Val.u64[1] = 0x8000800080008000; - vecCon3->gtSimd16Val.u64[1] = 0xFFF8FFF7FFF6FFF5; + vecCon2->gtSimdVal.u64[1] = 0x8000800080008000; + vecCon3->gtSimdVal.u64[1] = 0xFFF8FFF7FFF6FFF5; } break; } @@ -1004,13 +1004,13 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, simdBaseType = TYP_INT; simdBaseJitType = CORINFO_TYPE_INT; - vecCon2->gtSimd16Val.u64[0] = 0x8000000080000000; - vecCon3->gtSimd16Val.u64[0] = 0xFFFFFFE2FFFFFFE1; + vecCon2->gtSimdVal.u64[0] = 0x8000000080000000; + vecCon3->gtSimdVal.u64[0] = 0xFFFFFFE2FFFFFFE1; if (simdSize == 16) { - vecCon2->gtSimd16Val.u64[1] = 0x8000000080000000; - vecCon3->gtSimd16Val.u64[1] = 0xFFFFFFE4FFFFFFE3; + vecCon2->gtSimdVal.u64[1] = 0x8000000080000000; + vecCon3->gtSimdVal.u64[1] = 0xFFFFFFE4FFFFFFE3; } break; } @@ -1022,13 +1022,13 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, simdBaseType = TYP_LONG; simdBaseJitType = CORINFO_TYPE_LONG; - vecCon2->gtSimd16Val.u64[0] = 0x8000000000000000; - vecCon3->gtSimd16Val.u64[0] = 0xFFFFFFFFFFFFFFC1; + vecCon2->gtSimdVal.u64[0] = 0x8000000000000000; + vecCon3->gtSimdVal.u64[0] = 0xFFFFFFFFFFFFFFC1; if (simdSize == 16) { - vecCon2->gtSimd16Val.u64[1] = 0x8000000000000000; - vecCon3->gtSimd16Val.u64[1] = 0xFFFFFFFFFFFFFFC2; + vecCon2->gtSimdVal.u64[1] = 0x8000000000000000; + vecCon3->gtSimdVal.u64[1] = 0xFFFFFFFFFFFFFFC2; } break; } diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index 3b46d9ba9c7a07..1d8afd109b7fdd 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -667,8 +667,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, GenTreeVecCon* vecCon = op1->AsVecCon(); vecCon->gtType = TYP_SIMD16; - vecCon->gtSimd16Val.f32[2] = 0.0f; - vecCon->gtSimd16Val.f32[3] = 0.0f; + vecCon->gtSimdVal.f32[2] = 0.0f; + vecCon->gtSimdVal.f32[3] = 0.0f; return vecCon; } @@ -697,7 +697,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, GenTreeVecCon* vecCon = op1->AsVecCon(); vecCon->gtType = TYP_SIMD16; - vecCon->gtSimd16Val.f32[3] = 0.0f; + vecCon->gtSimdVal.f32[3] = 0.0f; return vecCon; } @@ -942,11 +942,10 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, } } - // TODO-XArch-AVX512 : Add this path for simd64 once vecCon supports simd64. - if (isConstant && (simdSize != 64)) + if (isConstant) { - // Some of the below code assumes 16 or 32 byte SIMD types - assert((simdSize == 16) || (simdSize == 32)); + // Some of the below code assumes 16/32/64 byte SIMD types + assert((simdSize == 16) || (simdSize == 32) || (simdSize == 64)); GenTreeVecCon* vecCon = gtNewVconNode(retType); @@ -960,7 +959,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, for (uint32_t index = 0; index < sig->numArgs; index++) { cnsVal = static_cast(impPopStack().val->AsIntConCommon()->IntegralValue()); - vecCon->gtSimd32Val.u8[simdLength - 1 - index] = cnsVal; + vecCon->gtSimdVal.u8[simdLength - 1 - index] = cnsVal; } break; } @@ -973,7 +972,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, for (uint32_t index = 0; index < sig->numArgs; index++) { cnsVal = static_cast(impPopStack().val->AsIntConCommon()->IntegralValue()); - vecCon->gtSimd32Val.u16[simdLength - 1 - index] = cnsVal; + vecCon->gtSimdVal.u16[simdLength - 1 - index] = cnsVal; } break; } @@ -986,7 +985,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, for (uint32_t index = 0; index < sig->numArgs; index++) { cnsVal = static_cast(impPopStack().val->AsIntConCommon()->IntegralValue()); - vecCon->gtSimd32Val.u32[simdLength - 1 - index] = cnsVal; + vecCon->gtSimdVal.u32[simdLength - 1 - index] = cnsVal; } break; } @@ -999,7 +998,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, for (uint32_t index = 0; index < sig->numArgs; index++) { cnsVal = static_cast(impPopStack().val->AsIntConCommon()->IntegralValue()); - vecCon->gtSimd32Val.u64[simdLength - 1 - index] = cnsVal; + vecCon->gtSimdVal.u64[simdLength - 1 - index] = cnsVal; } break; } @@ -1011,7 +1010,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, for (uint32_t index = 0; index < sig->numArgs; index++) { cnsVal = static_cast(impPopStack().val->AsDblCon()->DconValue()); - vecCon->gtSimd32Val.f32[simdLength - 1 - index] = cnsVal; + vecCon->gtSimdVal.f32[simdLength - 1 - index] = cnsVal; } break; } @@ -1023,7 +1022,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, for (uint32_t index = 0; index < sig->numArgs; index++) { double cnsVal = static_cast(impPopStack().val->AsDblCon()->DconValue()); - vecCon->gtSimd32Val.f64[simdLength - 1 - index] = cnsVal; + vecCon->gtSimdVal.f64[simdLength - 1 - index] = cnsVal; } break; } @@ -1274,7 +1273,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, case TYP_SHORT: case TYP_USHORT: { - simd32_t simd32Val = {}; + simd_t simdVal = {}; assert((simdSize == 16) || (simdSize == 32) || (simdSize == 64)); simdBaseJitType = varTypeIsUnsigned(simdBaseType) ? CORINFO_TYPE_UBYTE : CORINFO_TYPE_BYTE; @@ -1284,15 +1283,15 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, // // The most significant bit being set means zero the value - simd32Val.u64[0] = 0x0F0D0B0907050301; - simd32Val.u64[1] = 0x8080808080808080; + simdVal.u64[0] = 0x0F0D0B0907050301; + simdVal.u64[1] = 0x8080808080808080; if (simdSize == 32) { // Vector256 works on 2x128-bit lanes, so repeat the same indices for the upper lane - simd32Val.u64[2] = 0x0F0D0B0907050301; - simd32Val.u64[3] = 0x8080808080808080; + simdVal.u64[2] = 0x0F0D0B0907050301; + simdVal.u64[3] = 0x8080808080808080; shuffleIntrinsic = NI_AVX2_Shuffle; moveMaskIntrinsic = NI_AVX2_MoveMask; @@ -1307,8 +1306,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, return nullptr; } - op2 = gtNewVconNode(simdType); - op2->AsVecCon()->gtSimd32Val = simd32Val; + op2 = gtNewVconNode(simdType); + memcpy(&op2->AsVecCon()->gtSimdVal, &simdVal, simdSize); op1 = impSIMDPopStack(simdType); op1 = gtNewSimdHWIntrinsicNode(simdType, op1, op2, shuffleIntrinsic, simdBaseJitType, simdSize); diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index f09909d0597829..735b9c2bdff8b7 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -3981,7 +3981,7 @@ GenTree* Compiler::impImportStaticReadOnlyField(CORINFO_FIELD_HANDLE field, CORI if (hwAccelerated) { GenTreeVecCon* vec = gtNewVconNode(simdType); - memcpy(&vec->gtSimd32Val, buffer, totalSize); + memcpy(&vec->gtSimdVal, buffer, totalSize); return vec; } } diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index 58e61928e0e042..d24de92aaa6f87 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -3522,11 +3522,11 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, if (callJitType == CORINFO_TYPE_FLOAT) { - vecCon->gtSimd16Val.f32[0] = (float)op1->AsDblCon()->DconValue(); + vecCon->gtSimdVal.f32[0] = (float)op1->AsDblCon()->DconValue(); } else { - vecCon->gtSimd16Val.f64[0] = op1->AsDblCon()->DconValue(); + vecCon->gtSimdVal.f64[0] = op1->AsDblCon()->DconValue(); } op1 = vecCon; diff --git a/src/coreclr/jit/importervectorization.cpp b/src/coreclr/jit/importervectorization.cpp index c7759b57ca7d33..bf1e67952cc2e4 100644 --- a/src/coreclr/jit/importervectorization.cpp +++ b/src/coreclr/jit/importervectorization.cpp @@ -70,7 +70,7 @@ static bool ConvertToLowerCase(WCHAR* input, WCHAR* mask, int length) #if defined(FEATURE_HW_INTRINSICS) //------------------------------------------------------------------------ -// CreateConstVector: a helper to create Vector128/256.Create() node +// CreateConstVector: a helper to create Vector128/256/512.Create() node // // Arguments: // comp - Compiler object @@ -78,19 +78,26 @@ static bool ConvertToLowerCase(WCHAR* input, WCHAR* mask, int length) // cns - Constant data // // Return Value: -// GenTreeVecCon node representing Vector128/256.Create() +// GenTreeVecCon node representing Vector128/256/512.Create() // static GenTreeVecCon* CreateConstVector(Compiler* comp, var_types simdType, WCHAR* cns) { #ifdef TARGET_XARCH - if (simdType >= TYP_SIMD32) + if (simdType == TYP_SIMD64) + { + simd64_t simd64Val = {}; + GenTreeVecCon* vecCon = comp->gtNewVconNode(simdType); + + memcpy(&vecCon->gtSimdVal, cns, sizeof(simd64_t)); + return vecCon; + } + + if (simdType == TYP_SIMD32) { - assert((simdType == TYP_SIMD32) || (simdType == TYP_SIMD64)); - // TODO-XArch-AVX512: Fix once GenTreeVecCon supports gtSimd64Val. simd32_t simd32Val = {}; GenTreeVecCon* vecCon = comp->gtNewVconNode(simdType); - memcpy(&vecCon->gtSimd32Val, cns, sizeof(simd32_t)); + memcpy(&vecCon->gtSimdVal, cns, sizeof(simd32_t)); return vecCon; } #endif // TARGET_XARCH @@ -100,7 +107,7 @@ static GenTreeVecCon* CreateConstVector(Compiler* comp, var_types simdType, WCHA simd16_t simd16Val = {}; GenTreeVecCon* vecCon = comp->gtNewVconNode(simdType); - memcpy(&vecCon->gtSimd16Val, cns, sizeof(simd16_t)); + memcpy(&vecCon->gtSimdVal, cns, sizeof(simd16_t)); return vecCon; } diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp index 5b21f54e3c8e6e..8c5006a8b9d5ee 100644 --- a/src/coreclr/jit/instr.cpp +++ b/src/coreclr/jit/instr.cpp @@ -772,34 +772,37 @@ CodeGen::OperandDesc CodeGen::genOperandDesc(GenTree* op) #if defined(FEATURE_SIMD) case TYP_SIMD8: { - simd8_t constValue = op->AsVecCon()->gtSimd8Val; + simd8_t constValue; + memcpy(&constValue, &op->AsVecCon()->gtSimdVal, sizeof(simd8_t)); return OperandDesc(emit->emitSimd8Const(constValue)); } case TYP_SIMD12: - case TYP_SIMD16: { simd16_t constValue = {}; - - if (op->TypeIs(TYP_SIMD12)) - memcpy(&constValue, &op->AsVecCon()->gtSimd12Val, sizeof(simd12_t)); - else - constValue = op->AsVecCon()->gtSimd16Val; - + memcpy(&constValue, &op->AsVecCon()->gtSimdVal, sizeof(simd12_t)); + return OperandDesc(emit->emitSimd16Const(constValue)); + } + case TYP_SIMD16: + { + simd16_t constValue; + memcpy(&constValue, &op->AsVecCon()->gtSimdVal, sizeof(simd16_t)); return OperandDesc(emit->emitSimd16Const(constValue)); } #if defined(TARGET_XARCH) case TYP_SIMD32: { - simd32_t constValue = op->AsVecCon()->gtSimd32Val; + simd32_t constValue; + memcpy(&constValue, &op->AsVecCon()->gtSimdVal, sizeof(simd32_t)); return OperandDesc(emit->emitSimd32Const(constValue)); } - case TYP_SIMD64: // TODO-XArch-AVX512: Fix once GenTreeVecCon supports gtSimd64Val. + case TYP_SIMD64: { - simd32_t constValue = op->AsVecCon()->gtSimd32Val; - return OperandDesc(emit->emitSimd32Const(constValue)); + simd64_t constValue; + memcpy(&constValue, &op->AsVecCon()->gtSimdVal, sizeof(simd64_t)); + return OperandDesc(emit->emitSimd64Const(constValue)); } #endif // TARGET_XARCH #endif // FEATURE_SIMD diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 72f223f758be01..eff64c6b27c240 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -1430,7 +1430,7 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) CorInfoType simdBaseJitType = node->GetSimdBaseJitType(); var_types simdBaseType = node->GetSimdBaseType(); unsigned simdSize = node->GetSimdSize(); - simd32_t simd32Val = {}; + simd_t simdVal = {}; if ((simdSize == 8) && (simdType == TYP_DOUBLE)) { @@ -1443,7 +1443,7 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) assert(varTypeIsArithmetic(simdBaseType)); assert(simdSize != 0); - bool isConstant = GenTreeVecCon::IsHWIntrinsicCreateConstant(node, simd32Val); + bool isConstant = GenTreeVecCon::IsHWIntrinsicCreateConstant(node, simdVal); bool isCreateScalar = (intrinsicId == NI_Vector64_CreateScalar) || (intrinsicId == NI_Vector128_CreateScalar); size_t argCnt = node->GetOperandCount(); @@ -1468,7 +1468,7 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) GenTreeVecCon* vecCon = comp->gtNewVconNode(simdType); - vecCon->gtSimd32Val = simd32Val; + vecCon->gtSimdVal = simdVal; BlockRange().InsertBefore(node, vecCon); LIR::Use use; diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 761e9f09b0b264..02892cc146e85c 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -1186,7 +1186,7 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) op1->ChangeType(node->TypeGet()); // Ensure the upper values are zero by zero-initialization. - op1->AsVecCon()->gtSimd32Val = {}; + op1->AsVecCon()->gtSimdVal = {}; // While this case is unlikely, we'll handle it here to simplify some // of the logic that exists below. Effectively `Insert(zero, zero, idx)` @@ -1912,7 +1912,7 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) CorInfoType simdBaseJitType = node->GetSimdBaseJitType(); var_types simdBaseType = node->GetSimdBaseType(); unsigned simdSize = node->GetSimdSize(); - simd64_t simd64Val = {}; + simd_t simdVal = {}; if ((simdSize == 8) && (simdType == TYP_DOUBLE)) { @@ -1934,16 +1934,7 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) GenTree* tmp2 = nullptr; GenTree* tmp3 = nullptr; - bool isConstant = false; - // TODO-XArch-AVX512: Keep only one path once GenTreeVecCon supports gtSimd64Val. - if (simdSize != 64) - { - isConstant = GenTreeVecCon::IsHWIntrinsicCreateConstant(node, simd64Val.v256[0]); - } - else - { - isConstant = GenTreeVecCon::IsHWIntrinsicCreateConstant(node, simd64Val); - } + bool isConstant = GenTreeVecCon::IsHWIntrinsicCreateConstant(node, simdVal); bool isCreateScalar = (intrinsicId == NI_Vector128_CreateScalar) || (intrinsicId == NI_Vector256_CreateScalar); size_t argCnt = node->GetOperandCount(); @@ -1963,69 +1954,23 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) BlockRange().Remove(arg); } - if (simdSize != 64) - { - GenTreeVecCon* vecCon = comp->gtNewVconNode(simdType); - - vecCon->gtSimd32Val = simd64Val.v256[0]; - BlockRange().InsertBefore(node, vecCon); - - LIR::Use use; - if (BlockRange().TryGetUse(node, &use)) - { - use.ReplaceWith(vecCon); - } - else - { - vecCon->SetUnusedValue(); - } - - BlockRange().Remove(node); + GenTreeVecCon* vecCon = comp->gtNewVconNode(simdType); + memcpy(&vecCon->gtSimdVal, &simdVal, simdSize); + BlockRange().InsertBefore(node, vecCon); - return LowerNode(vecCon); + LIR::Use use; + if (BlockRange().TryGetUse(node, &use)) + { + use.ReplaceWith(vecCon); } else { - assert(comp->IsBaselineVector512IsaSupportedDebugOnly()); - - // TODO-XArch-AVX512: Fix once GenTreeVecCon supports gtSimd64Val. - // We will be constructing the following parts: - // /--* op1 T - // +--* ... T - // lo = * HWINTRINSIC simd32 T Create - // /--* ... T - // +--* opN T - // hi = * HWINTRINSIC simd32 T Create - // +--* lo simd32 - // tmp1 = * HWINTRINSIC simd64 T ToVector512Unsafe - // idx = CNS_INT int 1 - // /--* tmp1 simd64 - // +--* hi simd32 - // +--* idx int - // node = * HWINTRINSIC simd64 T InsertVector256 - - GenTreeVecCon* vecCon0 = comp->gtNewVconNode(TYP_SIMD32); - vecCon0->gtSimd32Val = simd64Val.v256[0]; - BlockRange().InsertBefore(node, vecCon0); - LowerNode(vecCon0); - GenTreeVecCon* vecCon1 = comp->gtNewVconNode(TYP_SIMD32); - vecCon1->gtSimd32Val = simd64Val.v256[1]; - BlockRange().InsertAfter(vecCon0, vecCon1); - - tmp1 = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD64, vecCon0, NI_Vector256_ToVector512Unsafe, simdBaseJitType, - 32); - BlockRange().InsertAfter(vecCon1, tmp1); - - idx = comp->gtNewIconNode(0x01, TYP_INT); - BlockRange().InsertAfter(tmp1, idx); + vecCon->SetUnusedValue(); + } - node->ResetHWIntrinsicId(NI_AVX512F_InsertVector256, comp, tmp1, vecCon1, idx); + BlockRange().Remove(node); - LowerNode(vecCon1); - LowerNode(idx); - LowerNode(tmp1); - return LowerNode(node); - } + return LowerNode(vecCon); } else if (argCnt == 1) { @@ -4078,7 +4023,7 @@ GenTree* Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) // ... GenTreeVecCon* vecCon1 = comp->gtNewVconNode(simdType); - vecCon1->gtSimd16Val = simd16Val; + memcpy(&vecCon1->gtSimdVal, &simd16Val, sizeof(simd16_t)); BlockRange().InsertAfter(op1, vecCon1); @@ -4107,7 +4052,7 @@ GenTree* Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) // ... GenTreeVecCon* vecCon2 = comp->gtNewVconNode(simdType); - vecCon2->gtSimd16Val = simd16Val; + memcpy(&vecCon2->gtSimdVal, &simd16Val, sizeof(simd16_t)); BlockRange().InsertAfter(op2, vecCon2); diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 3de6ce1d411916..1ed74aefe169ca 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -10779,17 +10779,9 @@ GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node) return node; } -#if defined(TARGET_XARCH) - // TODO-XArch-AVX512: Enable for simd64 once GenTreeVecCon supports gtSimd64Val. - if (node->TypeGet() == TYP_SIMD64) - { - return node; - } -#endif // TARGET_XARCH - - simd32_t simd32Val = {}; + simd_t simdVal = {}; - if (GenTreeVecCon::IsHWIntrinsicCreateConstant(node, simd32Val)) + if (GenTreeVecCon::IsHWIntrinsicCreateConstant(node, simdVal)) { GenTreeVecCon* vecCon = gtNewVconNode(node->TypeGet()); @@ -10798,7 +10790,7 @@ GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node) DEBUG_DESTROY_NODE(arg); } - vecCon->gtSimd32Val = simd32Val; + vecCon->gtSimdVal = simdVal; INDEBUG(vecCon->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); return vecCon; } diff --git a/src/coreclr/jit/optcse.cpp b/src/coreclr/jit/optcse.cpp index 6a810d0b9d3573..71d6958287d67f 100644 --- a/src/coreclr/jit/optcse.cpp +++ b/src/coreclr/jit/optcse.cpp @@ -2656,7 +2656,7 @@ class CSE_Heuristic int spillSimdRegInProlog = 1; #if defined(TARGET_XARCH) - // If we have a SIMD32 that is live across a call we have even higher spill costs + // If we have a SIMD32/64 that is live across a call we have even higher spill costs // if (candidate->Expr()->TypeIs(TYP_SIMD32, TYP_SIMD64)) { diff --git a/src/coreclr/jit/simd.h b/src/coreclr/jit/simd.h index 127f223e378a47..42989271c51596 100644 --- a/src/coreclr/jit/simd.h +++ b/src/coreclr/jit/simd.h @@ -62,7 +62,31 @@ struct simd8_t bool operator!=(const simd8_t& other) const { - return (u64[0] != other.u64[0]); + return !(*this == other); + } + + static simd8_t AllBitsSet() + { + simd8_t result; + + result.u64[0] = 0xFFFFFFFFFFFFFFFF; + + return result; + } + + bool IsAllBitsSet() const + { + return *this == AllBitsSet(); + } + + bool IsZero() const + { + return *this == Zero(); + } + + static simd8_t Zero() + { + return {}; } }; @@ -92,7 +116,33 @@ struct simd12_t bool operator!=(const simd12_t& other) const { - return (u32[0] != other.u32[0]) || (u32[1] != other.u32[1]) || (u32[2] != other.u32[2]); + return !(*this == other); + } + + static simd12_t AllBitsSet() + { + simd12_t result; + + result.u32[0] = 0xFFFFFFFF; + result.u32[1] = 0xFFFFFFFF; + result.u32[2] = 0xFFFFFFFF; + + return result; + } + + bool IsAllBitsSet() const + { + return *this == AllBitsSet(); + } + + bool IsZero() const + { + return *this == Zero(); + } + + static simd12_t Zero() + { + return {}; } }; @@ -114,15 +164,41 @@ struct simd16_t bool operator==(const simd16_t& other) const { - return (u64[0] == other.u64[0]) && (u64[1] == other.u64[1]); + return (v64[0] == other.v64[0]) && (v64[1] == other.v64[1]); } bool operator!=(const simd16_t& other) const { - return (u64[0] != other.u64[0]) || (u64[1] != other.u64[1]); + return !(*this == other); + } + + static simd16_t AllBitsSet() + { + simd16_t result; + + result.v64[0] = simd8_t::AllBitsSet(); + result.v64[1] = simd8_t::AllBitsSet(); + + return result; + } + + bool IsAllBitsSet() const + { + return *this == AllBitsSet(); + } + + bool IsZero() const + { + return *this == Zero(); + } + + static simd16_t Zero() + { + return {}; } }; +#if defined(TARGET_XARCH) struct simd32_t { union { @@ -142,14 +218,37 @@ struct simd32_t bool operator==(const simd32_t& other) const { - return (u64[0] == other.u64[0]) && (u64[1] == other.u64[1]) && (u64[2] == other.u64[2]) && - (u64[3] == other.u64[3]); + return (v128[0] == other.v128[0]) && (v128[1] == other.v128[1]); } bool operator!=(const simd32_t& other) const { - return (u64[0] != other.u64[0]) || (u64[1] != other.u64[1]) || (u64[2] != other.u64[2]) || - (u64[3] != other.u64[3]); + return !(*this == other); + } + + static simd32_t AllBitsSet() + { + simd32_t result; + + result.v128[0] = simd16_t::AllBitsSet(); + result.v128[1] = simd16_t::AllBitsSet(); + + return result; + } + + bool IsAllBitsSet() const + { + return *this == AllBitsSet(); + } + + bool IsZero() const + { + return *this == Zero(); + } + + static simd32_t Zero() + { + return {}; } }; @@ -178,10 +277,40 @@ struct simd64_t bool operator!=(const simd64_t& other) const { - return (v256[0] != other.v256[0]) || (v256[1] != other.v256[1]); + return !(*this == other); + } + + static simd64_t AllBitsSet() + { + simd64_t result; + + result.v256[0] = simd32_t::AllBitsSet(); + result.v256[1] = simd32_t::AllBitsSet(); + + return result; + } + + bool IsAllBitsSet() const + { + return *this == AllBitsSet(); + } + + bool IsZero() const + { + return *this == Zero(); + } + + static simd64_t Zero() + { + return {}; } }; +typedef simd64_t simd_t; +#else +typedef simd16_t simd_t; +#endif + template TBase EvaluateUnaryScalarSpecialized(genTreeOps oper, TBase arg0) { diff --git a/src/coreclr/jit/simdashwintrinsic.cpp b/src/coreclr/jit/simdashwintrinsic.cpp index e846312c40eb76..2ce3f5476791be 100644 --- a/src/coreclr/jit/simdashwintrinsic.cpp +++ b/src/coreclr/jit/simdashwintrinsic.cpp @@ -815,10 +815,10 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, { GenTreeVecCon* vecCon = gtNewVconNode(retType); - vecCon->gtSimd16Val.f32[0] = 1.0f; - vecCon->gtSimd16Val.f32[1] = 0.0f; - vecCon->gtSimd16Val.f32[2] = 0.0f; - vecCon->gtSimd16Val.f32[3] = 0.0f; + vecCon->gtSimdVal.f32[0] = 1.0f; + vecCon->gtSimdVal.f32[1] = 0.0f; + vecCon->gtSimdVal.f32[2] = 0.0f; + vecCon->gtSimdVal.f32[3] = 0.0f; return vecCon; } @@ -829,10 +829,10 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, { GenTreeVecCon* vecCon = gtNewVconNode(retType); - vecCon->gtSimd16Val.f32[0] = 0.0f; - vecCon->gtSimd16Val.f32[1] = 1.0f; - vecCon->gtSimd16Val.f32[2] = 0.0f; - vecCon->gtSimd16Val.f32[3] = 0.0f; + vecCon->gtSimdVal.f32[0] = 0.0f; + vecCon->gtSimdVal.f32[1] = 1.0f; + vecCon->gtSimdVal.f32[2] = 0.0f; + vecCon->gtSimdVal.f32[3] = 0.0f; return vecCon; } @@ -842,10 +842,10 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, { GenTreeVecCon* vecCon = gtNewVconNode(retType); - vecCon->gtSimd16Val.f32[0] = 0.0f; - vecCon->gtSimd16Val.f32[1] = 0.0f; - vecCon->gtSimd16Val.f32[2] = 1.0f; - vecCon->gtSimd16Val.f32[3] = 0.0f; + vecCon->gtSimdVal.f32[0] = 0.0f; + vecCon->gtSimdVal.f32[1] = 0.0f; + vecCon->gtSimdVal.f32[2] = 1.0f; + vecCon->gtSimdVal.f32[3] = 0.0f; return vecCon; } @@ -855,10 +855,10 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, { GenTreeVecCon* vecCon = gtNewVconNode(retType); - vecCon->gtSimd16Val.f32[0] = 0.0f; - vecCon->gtSimd16Val.f32[1] = 0.0f; - vecCon->gtSimd16Val.f32[2] = 0.0f; - vecCon->gtSimd16Val.f32[3] = 1.0f; + vecCon->gtSimdVal.f32[0] = 0.0f; + vecCon->gtSimdVal.f32[1] = 0.0f; + vecCon->gtSimdVal.f32[2] = 0.0f; + vecCon->gtSimdVal.f32[3] = 1.0f; return vecCon; } @@ -937,10 +937,10 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, { GenTreeVecCon* vecCon = gtNewVconNode(retType); - vecCon->gtSimd16Val.f32[0] = -1.0f; - vecCon->gtSimd16Val.f32[1] = -1.0f; - vecCon->gtSimd16Val.f32[2] = -1.0f; - vecCon->gtSimd16Val.f32[3] = +1.0f; + vecCon->gtSimdVal.f32[0] = -1.0f; + vecCon->gtSimdVal.f32[1] = -1.0f; + vecCon->gtSimdVal.f32[2] = -1.0f; + vecCon->gtSimdVal.f32[3] = +1.0f; return gtNewSimdBinOpNode(GT_MUL, retType, op1, vecCon, simdBaseJitType, simdSize, /* isSimdAsHWIntrinsic */ true); @@ -966,10 +966,10 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, GenTreeVecCon* vecCon = gtNewVconNode(retType); - vecCon->gtSimd16Val.f32[0] = -1.0f; - vecCon->gtSimd16Val.f32[1] = -1.0f; - vecCon->gtSimd16Val.f32[2] = -1.0f; - vecCon->gtSimd16Val.f32[3] = +1.0f; + vecCon->gtSimdVal.f32[0] = -1.0f; + vecCon->gtSimdVal.f32[1] = -1.0f; + vecCon->gtSimdVal.f32[2] = -1.0f; + vecCon->gtSimdVal.f32[3] = +1.0f; GenTree* conjugate = gtNewSimdBinOpNode(GT_MUL, retType, op1, vecCon, simdBaseJitType, simdSize, /* isSimdAsHWIntrinsic */ true); @@ -1923,8 +1923,8 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, float cnsVal = 0; - vecCon->gtSimd8Val.f32[0] = static_cast(op2->AsDblCon()->DconValue()); - vecCon->gtSimd8Val.f32[1] = static_cast(op3->AsDblCon()->DconValue()); + vecCon->gtSimdVal.f32[0] = static_cast(op2->AsDblCon()->DconValue()); + vecCon->gtSimdVal.f32[1] = static_cast(op3->AsDblCon()->DconValue()); copyBlkSrc = vecCon; } @@ -1976,11 +1976,11 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, if (simdSize == 12) { - vecCon->gtSimd12Val.f32[2] = static_cast(op3->AsDblCon()->DconValue()); + vecCon->gtSimdVal.f32[2] = static_cast(op3->AsDblCon()->DconValue()); } else { - vecCon->gtSimd16Val.f32[3] = static_cast(op3->AsDblCon()->DconValue()); + vecCon->gtSimdVal.f32[3] = static_cast(op3->AsDblCon()->DconValue()); } copyBlkSrc = vecCon; @@ -2064,9 +2064,9 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, float cnsVal = 0; - vecCon->gtSimd12Val.f32[0] = static_cast(op2->AsDblCon()->DconValue()); - vecCon->gtSimd12Val.f32[1] = static_cast(op3->AsDblCon()->DconValue()); - vecCon->gtSimd12Val.f32[2] = static_cast(op4->AsDblCon()->DconValue()); + vecCon->gtSimdVal.f32[0] = static_cast(op2->AsDblCon()->DconValue()); + vecCon->gtSimdVal.f32[1] = static_cast(op3->AsDblCon()->DconValue()); + vecCon->gtSimdVal.f32[2] = static_cast(op4->AsDblCon()->DconValue()); copyBlkSrc = vecCon; } @@ -2106,8 +2106,8 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, GenTreeVecCon* vecCon = op2->AsVecCon(); vecCon->gtType = simdType; - vecCon->gtSimd16Val.f32[2] = static_cast(op3->AsDblCon()->DconValue()); - vecCon->gtSimd16Val.f32[3] = static_cast(op4->AsDblCon()->DconValue()); + vecCon->gtSimdVal.f32[2] = static_cast(op3->AsDblCon()->DconValue()); + vecCon->gtSimdVal.f32[3] = static_cast(op4->AsDblCon()->DconValue()); copyBlkSrc = vecCon; } @@ -2191,10 +2191,10 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, float cnsVal = 0; - vecCon->gtSimd16Val.f32[0] = static_cast(op2->AsDblCon()->DconValue()); - vecCon->gtSimd16Val.f32[1] = static_cast(op3->AsDblCon()->DconValue()); - vecCon->gtSimd16Val.f32[2] = static_cast(op4->AsDblCon()->DconValue()); - vecCon->gtSimd16Val.f32[3] = static_cast(op5->AsDblCon()->DconValue()); + vecCon->gtSimdVal.f32[0] = static_cast(op2->AsDblCon()->DconValue()); + vecCon->gtSimdVal.f32[1] = static_cast(op3->AsDblCon()->DconValue()); + vecCon->gtSimdVal.f32[2] = static_cast(op4->AsDblCon()->DconValue()); + vecCon->gtSimdVal.f32[3] = static_cast(op5->AsDblCon()->DconValue()); copyBlkSrc = vecCon; } diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index 39bd8898548e53..a68aa32bbd76bc 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -444,8 +444,10 @@ ValueNumStore::ValueNumStore(Compiler* comp, CompAllocator alloc) , m_simd8CnsMap(nullptr) , m_simd12CnsMap(nullptr) , m_simd16CnsMap(nullptr) +#if defined(TARGET_XARCH) , m_simd32CnsMap(nullptr) , m_simd64CnsMap(nullptr) +#endif // TARGET_XARCH #endif // FEATURE_SIMD , m_VNFunc0Map(nullptr) , m_VNFunc1Map(nullptr) @@ -1957,28 +1959,28 @@ ValueNum ValueNumStore::VNZeroForType(var_types typ) #ifdef FEATURE_SIMD case TYP_SIMD8: { - return VNForSimd8Con({}); + return VNForSimd8Con(simd8_t::Zero()); } case TYP_SIMD12: { - return VNForSimd12Con({}); + return VNForSimd12Con(simd12_t::Zero()); } case TYP_SIMD16: { - return VNForSimd16Con({}); + return VNForSimd16Con(simd16_t::Zero()); } #if defined(TARGET_XARCH) case TYP_SIMD32: { - return VNForSimd32Con({}); + return VNForSimd32Con(simd32_t::Zero()); } case TYP_SIMD64: { - return VNForSimd64Con({}); + return VNForSimd64Con(simd64_t::Zero()); } #endif // TARGET_XARCH #endif // FEATURE_SIMD @@ -2048,53 +2050,28 @@ ValueNum ValueNumStore::VNAllBitsForType(var_types typ) #ifdef FEATURE_SIMD case TYP_SIMD8: { - simd8_t cnsVal; - - cnsVal.u32[0] = 0xFFFFFFFF; - cnsVal.u32[1] = 0xFFFFFFFF; - - return VNForSimd8Con(cnsVal); + return VNForSimd8Con(simd8_t::AllBitsSet()); } case TYP_SIMD12: { - simd12_t cnsVal; - - cnsVal.u32[0] = 0xFFFFFFFF; - cnsVal.u32[1] = 0xFFFFFFFF; - cnsVal.u32[2] = 0xFFFFFFFF; - - return VNForSimd12Con(cnsVal); + return VNForSimd12Con(simd12_t::AllBitsSet()); } case TYP_SIMD16: { - simd16_t cnsVal; - - cnsVal.u32[0] = 0xFFFFFFFF; - cnsVal.u32[1] = 0xFFFFFFFF; - cnsVal.u32[2] = 0xFFFFFFFF; - cnsVal.u32[3] = 0xFFFFFFFF; - - return VNForSimd16Con(cnsVal); + return VNForSimd16Con(simd16_t::AllBitsSet()); } #if defined(TARGET_XARCH) case TYP_SIMD32: { - simd32_t cnsVal; - - cnsVal.u32[0] = 0xFFFFFFFF; - cnsVal.u32[1] = 0xFFFFFFFF; - cnsVal.u32[2] = 0xFFFFFFFF; - cnsVal.u32[3] = 0xFFFFFFFF; - - cnsVal.u32[4] = 0xFFFFFFFF; - cnsVal.u32[5] = 0xFFFFFFFF; - cnsVal.u32[6] = 0xFFFFFFFF; - cnsVal.u32[7] = 0xFFFFFFFF; + return VNForSimd32Con(simd32_t::AllBitsSet()); + } - return VNForSimd32Con(cnsVal); + case TYP_SIMD64: + { + return VNForSimd64Con(simd64_t::AllBitsSet()); } #endif // TARGET_XARCH #endif // FEATURE_SIMD @@ -2111,8 +2088,8 @@ ValueNum ValueNumStore::VNOneForSimdType(var_types simdType, var_types simdBaseT { assert(varTypeIsSIMD(simdType)); - simd32_t simd32Val = {}; - int simdSize = genTypeSize(simdType); + simd_t simdVal = {}; + int simdSize = genTypeSize(simdType); switch (simdBaseType) { @@ -2121,7 +2098,7 @@ ValueNum ValueNumStore::VNOneForSimdType(var_types simdType, var_types simdBaseT { for (int i = 0; i < simdSize; i++) { - simd32Val.u8[i] = 1; + simdVal.u8[i] = 1; } break; } @@ -2131,7 +2108,7 @@ ValueNum ValueNumStore::VNOneForSimdType(var_types simdType, var_types simdBaseT { for (int i = 0; i < (simdSize / 2); i++) { - simd32Val.u16[i] = 1; + simdVal.u16[i] = 1; } break; } @@ -2141,7 +2118,7 @@ ValueNum ValueNumStore::VNOneForSimdType(var_types simdType, var_types simdBaseT { for (int i = 0; i < (simdSize / 4); i++) { - simd32Val.u32[i] = 1; + simdVal.u32[i] = 1; } break; } @@ -2151,7 +2128,7 @@ ValueNum ValueNumStore::VNOneForSimdType(var_types simdType, var_types simdBaseT { for (int i = 0; i < (simdSize / 8); i++) { - simd32Val.u64[i] = 1; + simdVal.u64[i] = 1; } break; } @@ -2160,7 +2137,7 @@ ValueNum ValueNumStore::VNOneForSimdType(var_types simdType, var_types simdBaseT { for (int i = 0; i < (simdSize / 4); i++) { - simd32Val.f32[i] = 1.0f; + simdVal.f32[i] = 1.0f; } break; } @@ -2169,7 +2146,7 @@ ValueNum ValueNumStore::VNOneForSimdType(var_types simdType, var_types simdBaseT { for (int i = 0; i < (simdSize / 8); i++) { - simd32Val.f64[i] = 1.0; + simdVal.f64[i] = 1.0; } break; } @@ -2184,28 +2161,39 @@ ValueNum ValueNumStore::VNOneForSimdType(var_types simdType, var_types simdBaseT { case TYP_SIMD8: { - return VNForSimd8Con(simd32Val.v64[0]); + simd8_t simd8Val; + memcpy(&simd8Val, &simdVal, sizeof(simd8_t)); + return VNForSimd8Con(simd8Val); } case TYP_SIMD12: { - assert(simdBaseType == TYP_FLOAT); - simd12_t simd12Val; - memcpy(&simd12Val, &simd32Val.f32, sizeof(simd12_t)); + memcpy(&simd12Val, &simdVal, sizeof(simd12_t)); return VNForSimd12Con(simd12Val); } case TYP_SIMD16: { - return VNForSimd16Con(simd32Val.v128[0]); + simd16_t simd16Val; + memcpy(&simd16Val, &simdVal, sizeof(simd16_t)); + return VNForSimd16Con(simd16Val); } #if defined(TARGET_XARCH) case TYP_SIMD32: { + simd32_t simd32Val; + memcpy(&simd32Val, &simdVal, sizeof(simd32_t)); return VNForSimd32Con(simd32Val); } + + case TYP_SIMD64: + { + simd64_t simd64Val; + memcpy(&simd64Val, &simdVal, sizeof(simd64_t)); + return VNForSimd64Con(simd64Val); + } #endif // TARGET_XARCH default: @@ -6424,6 +6412,18 @@ simd32_t GetConstantSimd32(ValueNumStore* vns, var_types baseType, ValueNum argV return BroadcastConstantToSimd(vns, baseType, argVN); } + +simd64_t GetConstantSimd64(ValueNumStore* vns, var_types baseType, ValueNum argVN) +{ + assert(vns->IsVNConstant(argVN)); + + if (vns->TypeOfVN(argVN) == TYP_SIMD64) + { + return vns->GetConstantSimd64(argVN); + } + + return BroadcastConstantToSimd(vns, baseType, argVN); +} #endif // TARGET_XARCH ValueNum EvaluateUnarySimd( @@ -6467,6 +6467,15 @@ ValueNum EvaluateUnarySimd( EvaluateUnarySimd(oper, scalar, baseType, &result, arg0); return vns->VNForSimd32Con(result); } + + case TYP_SIMD64: + { + simd64_t arg0 = GetConstantSimd64(vns, baseType, arg0VN); + + simd64_t result = {}; + EvaluateUnarySimd(oper, scalar, baseType, &result, arg0); + return vns->VNForSimd64Con(result); + } #endif // TARGET_XARCH default: @@ -6526,6 +6535,16 @@ ValueNum EvaluateBinarySimd(ValueNumStore* vns, EvaluateBinarySimd(oper, scalar, baseType, &result, arg0, arg1); return vns->VNForSimd32Con(result); } + + case TYP_SIMD64: + { + simd64_t arg0 = GetConstantSimd64(vns, baseType, arg0VN); + simd64_t arg1 = GetConstantSimd64(vns, baseType, arg1VN); + + simd64_t result = {}; + EvaluateBinarySimd(oper, scalar, baseType, &result, arg0, arg1); + return vns->VNForSimd64Con(result); + } #endif // TARGET_XARCH default: @@ -6631,6 +6650,11 @@ ValueNum EvaluateSimdGetElement(ValueNumStore* vns, var_types type, var_types ba { return EvaluateSimdGetElement(vns, baseType, vns->GetConstantSimd32(arg0VN), arg1); } + + case TYP_SIMD64: + { + return EvaluateSimdGetElement(vns, baseType, vns->GetConstantSimd64(arg0VN), arg1); + } #endif // TARGET_XARCH default: @@ -9689,22 +9713,50 @@ void Compiler::fgValueNumberTreeConst(GenTree* tree) #ifdef FEATURE_SIMD case TYP_SIMD8: - tree->gtVNPair.SetBoth(vnStore->VNForSimd8Con(tree->AsVecCon()->gtSimd8Val)); + { + simd8_t simd8Val; + memcpy(&simd8Val, &tree->AsVecCon()->gtSimdVal, sizeof(simd8_t)); + + tree->gtVNPair.SetBoth(vnStore->VNForSimd8Con(simd8Val)); break; + } case TYP_SIMD12: - tree->gtVNPair.SetBoth(vnStore->VNForSimd12Con(tree->AsVecCon()->gtSimd12Val)); + { + simd12_t simd12Val; + memcpy(&simd12Val, &tree->AsVecCon()->gtSimdVal, sizeof(simd12_t)); + + tree->gtVNPair.SetBoth(vnStore->VNForSimd12Con(simd12Val)); break; + } case TYP_SIMD16: - tree->gtVNPair.SetBoth(vnStore->VNForSimd16Con(tree->AsVecCon()->gtSimd16Val)); + { + simd16_t simd16Val; + memcpy(&simd16Val, &tree->AsVecCon()->gtSimdVal, sizeof(simd16_t)); + + tree->gtVNPair.SetBoth(vnStore->VNForSimd16Con(simd16Val)); break; + } #if defined(TARGET_XARCH) case TYP_SIMD32: - case TYP_SIMD64: // TODO-XArch-AVX512: Fix once GenTreeVecCon supports gtSimd64Val. - tree->gtVNPair.SetBoth(vnStore->VNForSimd32Con(tree->AsVecCon()->gtSimd32Val)); + { + simd32_t simd32Val; + memcpy(&simd32Val, &tree->AsVecCon()->gtSimdVal, sizeof(simd32_t)); + + tree->gtVNPair.SetBoth(vnStore->VNForSimd32Con(simd32Val)); break; + } + + case TYP_SIMD64: + { + simd64_t simd64Val; + memcpy(&simd64Val, &tree->AsVecCon()->gtSimdVal, sizeof(simd64_t)); + + tree->gtVNPair.SetBoth(vnStore->VNForSimd64Con(simd64Val)); + break; + } #endif // TARGET_XARCH #endif // FEATURE_SIMD @@ -10053,7 +10105,7 @@ bool Compiler::fgValueNumberConstLoad(GenTreeIndir* tree) { CORINFO_FIELD_HANDLE fieldHandle = fieldSeq->GetFieldHandle(); int size = (int)genTypeSize(tree->TypeGet()); - const int maxElementSize = 32; // SIMD32 + const int maxElementSize = sizeof(simd_t); if ((fieldHandle != nullptr) && (size > 0) && (size <= maxElementSize) && ((size_t)byteOffset < INT_MAX)) { uint8_t buffer[maxElementSize] = {0}; @@ -10153,6 +10205,12 @@ bool Compiler::fgValueNumberConstLoad(GenTreeIndir* tree) tree->gtVNPair.SetBoth(vnStore->VNForSimd32Con(val)); return true; } + case TYP_SIMD64: + { + READ_VALUE(simd64_t); + tree->gtVNPair.SetBoth(vnStore->VNForSimd64Con(val)); + return true; + } #endif // TARGET_XARCH #endif // FEATURE_SIMD default: diff --git a/src/coreclr/jit/valuenum.h b/src/coreclr/jit/valuenum.h index 44fc0d503c9928..50f780c1be96e1 100644 --- a/src/coreclr/jit/valuenum.h +++ b/src/coreclr/jit/valuenum.h @@ -1611,6 +1611,7 @@ class ValueNumStore return m_simd16CnsMap; } +#if defined(TARGET_XARCH) struct Simd32PrimitiveKeyFuncs : public JitKeyFuncsDefEquals { static bool Equals(simd32_t x, simd32_t y) @@ -1688,7 +1689,7 @@ class ValueNumStore } return m_simd64CnsMap; } - +#endif // TARGET_XARCH #endif // FEATURE_SIMD template