-
Notifications
You must be signed in to change notification settings - Fork 5.5k
Get index of first non ascii char #39507
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -489,8 +489,8 @@ public static unsafe nuint GetIndexOfFirstNonAsciiChar(char* pBuffer, nuint buff | |
| // pmovmskb which we know are optimized, and (b) we can avoid downclocking the processor while | ||
| // this method is running. | ||
|
|
||
| return (Sse2.IsSupported) | ||
| ? GetIndexOfFirstNonAsciiChar_Sse2(pBuffer, bufferLength) | ||
| return (BitConverter.IsLittleEndian && (Sse2.IsSupported || AdvSimd.Arm64.IsSupported)) | ||
| ? GetIndexOfFirstNonAsciiChar_Intrinsified(pBuffer, bufferLength) | ||
| : GetIndexOfFirstNonAsciiChar_Default(pBuffer, bufferLength); | ||
| } | ||
|
|
||
|
|
@@ -630,9 +630,9 @@ private static unsafe nuint GetIndexOfFirstNonAsciiChar_Default(char* pBuffer, n | |
| goto Finish; | ||
| } | ||
|
|
||
| private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2(char* pBuffer, nuint bufferLength /* in chars */) | ||
| private static unsafe nuint GetIndexOfFirstNonAsciiChar_Intrinsified(char* pBuffer, nuint bufferLength /* in chars */) | ||
| { | ||
| // This method contains logic optimized for both SSE2 and SSE41. Much of the logic in this method | ||
| // This method contains logic optimized for SSE2, SSE41 and ARM64. Much of the logic in this method | ||
| // will be elided by JIT once we determine which specific ISAs we support. | ||
|
|
||
| // Quick check for empty inputs. | ||
|
|
@@ -647,8 +647,12 @@ private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2(char* pBuffer, nuin | |
| uint SizeOfVector128InBytes = (uint)Unsafe.SizeOf<Vector128<byte>>(); | ||
| uint SizeOfVector128InChars = SizeOfVector128InBytes / sizeof(char); | ||
|
|
||
| Debug.Assert(Sse2.IsSupported, "Should've been checked by caller."); | ||
| Debug.Assert(BitConverter.IsLittleEndian, "SSE2 assumes little-endian."); | ||
| Debug.Assert(Sse2.IsSupported || AdvSimd.Arm64.IsSupported, "Sse2 or AdvSimd64 required."); | ||
| Debug.Assert(BitConverter.IsLittleEndian, "This SSE2/Arm64 implementation assumes little-endian."); | ||
|
|
||
| Vector128<byte> bitmask = BitConverter.IsLittleEndian ? | ||
| Vector128.Create(0x80402010_08040201).AsByte() : | ||
| Vector128.Create(0x01020408_10204080).AsByte(); | ||
|
|
||
| Vector128<ushort> firstVector, secondVector; | ||
| uint currentMask; | ||
|
|
@@ -673,13 +677,35 @@ private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2(char* pBuffer, nuin | |
|
|
||
| // Read the first vector unaligned. | ||
|
|
||
| firstVector = Sse2.LoadVector128((ushort*)pBuffer); // unaligned load | ||
| if (Sse2.IsSupported) | ||
| { | ||
| firstVector = Sse2.LoadVector128((ushort*)pBuffer); // unaligned load | ||
| } | ||
| else if (AdvSimd.Arm64.IsSupported) | ||
| { | ||
| firstVector = AdvSimd.LoadVector128((ushort*)pBuffer); // unaligned load | ||
| } | ||
| else | ||
| { | ||
| throw new PlatformNotSupportedException(); | ||
| } | ||
|
|
||
| // The operation below forces the 0x8000 bit of each WORD to be set iff the WORD element | ||
| // has value >= 0x0800 (non-ASCII). Then we'll treat the vector as a BYTE vector in order | ||
| // has value >= 0x0080 (non-ASCII). Then we'll treat the vector as a BYTE vector in order | ||
| // to extract the mask. Reminder: the 0x0080 bit of each WORD should be ignored. | ||
|
|
||
| currentMask = (uint)Sse2.MoveMask(Sse2.AddSaturate(firstVector, asciiMaskForAddSaturate).AsByte()); | ||
| if (Sse2.IsSupported) | ||
| { | ||
| currentMask = (uint)Sse2.MoveMask(Sse2.AddSaturate(firstVector, asciiMaskForAddSaturate).AsByte()); | ||
| } | ||
| else if (AdvSimd.Arm64.IsSupported) | ||
| { | ||
| currentMask = Unicode.Utf16Utility.GetNonAsciiBytes(AdvSimd.AddSaturate(firstVector, asciiMaskForAddSaturate).AsByte(), bitmask); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You should check out excellent feedback from @TamarChristinaArm in #39050 (comment) and #39050 (comment) about optimizing this code. I think you should follow the same because in the end, all you are doing this calling
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agreed with splitting it off. In this case it looks like this code only cares about the index of the first non-zero element. So we can do even better here. If you use a mask essentially as the sequence to get the first element that has the
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is for little endian btw, for big-endian you need a slight variantion. though this can be avoided loading with |
||
| } | ||
| else | ||
| { | ||
| throw new PlatformNotSupportedException(); | ||
| } | ||
|
|
||
| if ((currentMask & NonAsciiDataSeenMask) != 0) | ||
| { | ||
|
|
@@ -725,9 +751,23 @@ private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2(char* pBuffer, nuin | |
|
|
||
| do | ||
| { | ||
| firstVector = Sse2.LoadAlignedVector128((ushort*)pBuffer); | ||
| secondVector = Sse2.LoadAlignedVector128((ushort*)pBuffer + SizeOfVector128InChars); | ||
| Vector128<ushort> combinedVector = Sse2.Or(firstVector, secondVector); | ||
| Vector128<ushort> combinedVector; | ||
| if (Sse2.IsSupported) | ||
| { | ||
| firstVector = Sse2.LoadAlignedVector128((ushort*)pBuffer); | ||
| secondVector = Sse2.LoadAlignedVector128((ushort*)pBuffer + SizeOfVector128InChars); | ||
| combinedVector = Sse2.Or(firstVector, secondVector); | ||
| } | ||
| else if (AdvSimd.Arm64.IsSupported) | ||
| { | ||
| firstVector = AdvSimd.LoadVector128((ushort*)pBuffer); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just a note for the future, this would be a great place for LoadPair #39243 |
||
| secondVector = AdvSimd.LoadVector128((ushort*)pBuffer + SizeOfVector128InChars); | ||
| combinedVector = AdvSimd.Or(firstVector, secondVector); | ||
| } | ||
| else | ||
| { | ||
| throw new PlatformNotSupportedException(); | ||
| } | ||
|
|
||
| if (Sse41.IsSupported) | ||
| { | ||
|
|
@@ -738,7 +778,7 @@ private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2(char* pBuffer, nuin | |
| goto FoundNonAsciiDataInFirstOrSecondVector; | ||
| } | ||
| } | ||
| else | ||
| else if (Sse2.IsSupported) | ||
| { | ||
| // See comment earlier in the method for an explanation of how the below logic works. | ||
| currentMask = (uint)Sse2.MoveMask(Sse2.AddSaturate(combinedVector, asciiMaskForAddSaturate).AsByte()); | ||
|
|
@@ -747,6 +787,18 @@ private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2(char* pBuffer, nuin | |
| goto FoundNonAsciiDataInFirstOrSecondVector; | ||
| } | ||
| } | ||
| else if (AdvSimd.Arm64.IsSupported) | ||
| { | ||
| currentMask = Unicode.Utf16Utility.GetNonAsciiBytes(AdvSimd.AddSaturate(combinedVector, asciiMaskForAddSaturate).AsByte(), bitmask); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Minor nit: The if (ContainNonAsciiDataInFirstOrSecondVector(..args..))
{
goto FoundNonAsciiDataInFirstOrSecondVector;
} |
||
| if ((currentMask & NonAsciiDataSeenMask) != 0) | ||
| { | ||
| goto FoundNonAsciiDataInFirstOrSecondVector; | ||
| } | ||
| } | ||
| else | ||
| { | ||
| throw new PlatformNotSupportedException(); | ||
| } | ||
|
|
||
| pBuffer += 2 * SizeOfVector128InChars; | ||
| } while (pBuffer <= pFinalVectorReadPos); | ||
|
|
@@ -770,7 +822,18 @@ private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2(char* pBuffer, nuin | |
| // At least one full vector's worth of data remains, so we can safely read it. | ||
| // Remember, at this point pBuffer is still aligned. | ||
|
|
||
| firstVector = Sse2.LoadAlignedVector128((ushort*)pBuffer); | ||
| if (Sse2.IsSupported) | ||
| { | ||
| firstVector = Sse2.LoadAlignedVector128((ushort*)pBuffer); | ||
| } | ||
| else if (AdvSimd.Arm64.IsSupported) | ||
| { | ||
| firstVector = AdvSimd.LoadVector128((ushort*)pBuffer); | ||
| } | ||
| else | ||
| { | ||
| throw new PlatformNotSupportedException(); | ||
| } | ||
|
|
||
| if (Sse41.IsSupported) | ||
| { | ||
|
|
@@ -781,7 +844,7 @@ private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2(char* pBuffer, nuin | |
| goto FoundNonAsciiDataInFirstVector; | ||
| } | ||
| } | ||
| else | ||
| else if (Sse2.IsSupported) | ||
| { | ||
| // See comment earlier in the method for an explanation of how the below logic works. | ||
| currentMask = (uint)Sse2.MoveMask(Sse2.AddSaturate(firstVector, asciiMaskForAddSaturate).AsByte()); | ||
|
|
@@ -790,6 +853,18 @@ private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2(char* pBuffer, nuin | |
| goto FoundNonAsciiDataInCurrentMask; | ||
| } | ||
| } | ||
| else if (AdvSimd.Arm64.IsSupported) | ||
| { | ||
| currentMask = Unicode.Utf16Utility.GetNonAsciiBytes(AdvSimd.AddSaturate(firstVector, asciiMaskForAddSaturate).AsByte(), bitmask); | ||
| if ((currentMask & NonAsciiDataSeenMask) != 0) | ||
| { | ||
| goto FoundNonAsciiDataInCurrentMask; | ||
| } | ||
| } | ||
| else | ||
| { | ||
| throw new PlatformNotSupportedException(); | ||
| } | ||
|
|
||
| IncrementCurrentOffsetBeforeFinalUnalignedVectorRead: | ||
|
|
||
|
|
@@ -803,7 +878,18 @@ private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2(char* pBuffer, nuin | |
| // We need to adjust the pointer because we're re-reading data. | ||
|
|
||
| pBuffer = (char*)((byte*)pBuffer + (bufferLength & (SizeOfVector128InBytes - 1)) - SizeOfVector128InBytes); | ||
| firstVector = Sse2.LoadVector128((ushort*)pBuffer); // unaligned load | ||
| if (Sse2.IsSupported) | ||
| { | ||
| firstVector = Sse2.LoadVector128((ushort*)pBuffer); // unaligned load | ||
| } | ||
| else if (AdvSimd.Arm64.IsSupported) | ||
| { | ||
| firstVector = AdvSimd.LoadVector128((ushort*)pBuffer); // unaligned load | ||
| } | ||
| else | ||
| { | ||
| throw new PlatformNotSupportedException(); | ||
| } | ||
|
|
||
| if (Sse41.IsSupported) | ||
| { | ||
|
|
@@ -814,7 +900,7 @@ private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2(char* pBuffer, nuin | |
| goto FoundNonAsciiDataInFirstVector; | ||
| } | ||
| } | ||
| else | ||
| else if (Sse2.IsSupported) | ||
| { | ||
| // See comment earlier in the method for an explanation of how the below logic works. | ||
| currentMask = (uint)Sse2.MoveMask(Sse2.AddSaturate(firstVector, asciiMaskForAddSaturate).AsByte()); | ||
|
|
@@ -823,6 +909,18 @@ private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2(char* pBuffer, nuin | |
| goto FoundNonAsciiDataInCurrentMask; | ||
| } | ||
| } | ||
| else if (AdvSimd.Arm64.IsSupported) | ||
| { | ||
| currentMask = Unicode.Utf16Utility.GetNonAsciiBytes(AdvSimd.AddSaturate(firstVector, asciiMaskForAddSaturate).AsByte(), bitmask); | ||
| if ((currentMask & NonAsciiDataSeenMask) != 0) | ||
| { | ||
| goto FoundNonAsciiDataInCurrentMask; | ||
| } | ||
| } | ||
| else | ||
| { | ||
| throw new PlatformNotSupportedException(); | ||
| } | ||
|
|
||
| pBuffer += SizeOfVector128InChars; | ||
| } | ||
|
|
@@ -846,14 +944,26 @@ private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2(char* pBuffer, nuin | |
| goto FoundNonAsciiDataInFirstVector; | ||
| } | ||
| } | ||
| else | ||
| else if (Sse2.IsSupported) | ||
| { | ||
| currentMask = (uint)Sse2.MoveMask(Sse2.AddSaturate(firstVector, asciiMaskForAddSaturate).AsByte()); | ||
| if ((currentMask & NonAsciiDataSeenMask) != 0) | ||
| { | ||
| goto FoundNonAsciiDataInCurrentMask; | ||
| } | ||
| } | ||
| else if (AdvSimd.Arm64.IsSupported) | ||
| { | ||
| currentMask = Unicode.Utf16Utility.GetNonAsciiBytes(AdvSimd.AddSaturate(firstVector, asciiMaskForAddSaturate).AsByte(), bitmask); | ||
| if ((currentMask & NonAsciiDataSeenMask) != 0) | ||
| { | ||
| goto FoundNonAsciiDataInCurrentMask; | ||
| } | ||
| } | ||
| else | ||
| { | ||
| throw new PlatformNotSupportedException(); | ||
| } | ||
|
|
||
| // Wasn't the first vector; must be the second. | ||
|
|
||
|
|
@@ -863,7 +973,18 @@ private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2(char* pBuffer, nuin | |
| FoundNonAsciiDataInFirstVector: | ||
|
|
||
| // See comment earlier in the method for an explanation of how the below logic works. | ||
| currentMask = (uint)Sse2.MoveMask(Sse2.AddSaturate(firstVector, asciiMaskForAddSaturate).AsByte()); | ||
| if (Sse2.IsSupported) | ||
| { | ||
| currentMask = (uint)Sse2.MoveMask(Sse2.AddSaturate(firstVector, asciiMaskForAddSaturate).AsByte()); | ||
| } | ||
| else if (AdvSimd.Arm64.IsSupported) | ||
| { | ||
| currentMask = Unicode.Utf16Utility.GetNonAsciiBytes(AdvSimd.AddSaturate(firstVector, asciiMaskForAddSaturate).AsByte(), bitmask); | ||
| } | ||
| else | ||
| { | ||
| throw new PlatformNotSupportedException(); | ||
| } | ||
|
|
||
| FoundNonAsciiDataInCurrentMask: | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this is also true for other PRs that Carlos sent, but can you remind me why we use
bitMaskfor!IsLittleEndianif it is not supported? Is the idea that when we start supporting, the code will just work?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yup, that's the idea.