Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -702,5 +702,13 @@ private bool Fail()
return false;
}
}

/// <summary>
/// Validates that the value is well-formed UTF-8.
/// </summary>
/// <param name="value">The <see cref="ReadOnlySpan{T}"/> string.</param>
/// <returns><c>true</c> if value is well-formed UTF-8, <c>false</c> otherwise.</returns>
public static unsafe bool IsValid(ReadOnlySpan<byte> value) =>
Utf8Utility.GetIndexOfFirstInvalidUtf8Sequence(value, out _) < 0;
}
}
1 change: 1 addition & 0 deletions src/libraries/System.Runtime/ref/System.Runtime.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14787,6 +14787,7 @@ public static partial class Utf8
public static System.Buffers.OperationStatus ToUtf16(System.ReadOnlySpan<byte> source, System.Span<char> destination, out int bytesRead, out int charsWritten, bool replaceInvalidSequences = true, bool isFinalBlock = true) { throw null; }
public static bool TryWrite(System.Span<byte> destination, [System.Runtime.CompilerServices.InterpolatedStringHandlerArgumentAttribute("destination")] ref System.Text.Unicode.Utf8.TryWriteInterpolatedStringHandler handler, out int bytesWritten) { throw null; }
public static bool TryWrite(System.Span<byte> destination, IFormatProvider? provider, [System.Runtime.CompilerServices.InterpolatedStringHandlerArgumentAttribute("destination", "provider")] ref System.Text.Unicode.Utf8.TryWriteInterpolatedStringHandler handler, out int bytesWritten) { throw null; }
public static bool IsValid(System.ReadOnlySpan<byte> value) { throw null; }

@stephentoub stephentoub Jun 24, 2023

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you also search around dotnet/runtime for places this can be used? For example, System.Text.Json currently does this with a public static unsafe bool IsValidUtf8String(ReadOnlySpan<byte> bytes) method that can be entirely replaced when targeting .NET 8+. (Actually, looks like it even has two different helpers for the same thing, with another one public static void ValidateUtf8(ReadOnlySpan<byte> utf8Buffer)).

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One useful pattern to look for is for call sites that invoke UTF8Encoding.GetCharCount and throw away the return value. Those call sites are probably calling GetCharCount just for its side effect of throwing an exception when it sees invalid data.

Another might be to look for places which catch EncoderFallbackException and to see if they're actually trying to convert bytes <-> chars or if they're just performing validation.

[System.ComponentModel.EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)]
[System.Runtime.CompilerServices.InterpolatedStringHandlerAttribute]
public ref struct TryWriteInterpolatedStringHandler
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,8 @@ private static unsafe void GetIndexOfFirstInvalidUtf8Sequence_Test_Core(byte[] i
Assert.Equal(expectedRetVal, actualRetVal);
Assert.Equal(expectedRuneCount, actualRuneCount);
Assert.Equal(expectedSurrogatePairCount, actualSurrogatePairCount);

Assert.True(Utf8.IsValid(boundedMemory.Span) == (expectedRetVal < 0));
}

private static Lazy<GetPointerToFirstInvalidByteDel> CreateGetPointerToFirstInvalidByteFn()
Expand Down