-
Notifications
You must be signed in to change notification settings - Fork 5.5k
Adding Regex.EnumerateMatches #67794
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
4 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
148 changes: 148 additions & 0 deletions
148
...stem.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.EnumerateMatches.cs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,148 @@ | ||
| // Licensed to the .NET Foundation under one or more agreements. | ||
| // The .NET Foundation licenses this file to you under the MIT license. | ||
|
|
||
| using System.Diagnostics; | ||
| using System.Diagnostics.CodeAnalysis; | ||
|
|
||
| namespace System.Text.RegularExpressions | ||
| { | ||
| public partial class Regex | ||
| { | ||
| /// <summary> | ||
| /// Searches an input span for all occurrences of a regular expression and returns a <see cref="ValueMatchEnumerator"/> to iterate over the matches. | ||
| /// </summary> | ||
| /// <remarks> | ||
| /// Each match won't actually happen until <see cref="ValueMatchEnumerator.MoveNext"/> is invoked on the enumerator, with one match being performed per <see cref="ValueMatchEnumerator.MoveNext"/> call. | ||
| /// Since the evaluation of the match happens lazily, any changes to the passed in input in between calls to <see cref="ValueMatchEnumerator.MoveNext"/> will affect the match results. | ||
| /// The enumerator returned by this method, as well as the structs returned by the enumerator that wrap each match found in the input are ref structs which | ||
| /// make this method be amortized allocation free. | ||
| /// </remarks> | ||
| /// <param name="input">The span to search for a match.</param> | ||
| /// <param name="pattern">The regular expression pattern to match.</param> | ||
| /// <returns>A <see cref="ValueMatchEnumerator"/> to iterate over the matches.</returns> | ||
| /// <exception cref="ArgumentNullException"><paramref name="pattern"/> is null.</exception> | ||
| /// <exception cref="RegexParseException">A regular expression parsing error occurred.</exception> | ||
| public static ValueMatchEnumerator EnumerateMatches(ReadOnlySpan<char> input, [StringSyntax(StringSyntaxAttribute.Regex)] string pattern) => | ||
| RegexCache.GetOrAdd(pattern).EnumerateMatches(input); | ||
|
|
||
| /// <summary> | ||
| /// Searches an input span for all occurrences of a regular expression and returns a <see cref="ValueMatchEnumerator"/> to iterate over the matches. | ||
| /// </summary> | ||
| /// <remarks> | ||
| /// Each match won't actually happen until <see cref="ValueMatchEnumerator.MoveNext"/> is invoked on the enumerator, with one match being performed per <see cref="ValueMatchEnumerator.MoveNext"/> call. | ||
| /// Since the evaluation of the match happens lazily, any changes to the passed in input in between calls to <see cref="ValueMatchEnumerator.MoveNext"/> will affect the match results. | ||
| /// The enumerator returned by this method, as well as the structs returned by the enumerator that wrap each match found in the input are ref structs which | ||
| /// make this method be amortized allocation free. | ||
| /// </remarks> | ||
| /// <param name="input">The span to search for a match.</param> | ||
| /// <param name="pattern">The regular expression pattern to match.</param> | ||
| /// <param name="options">A bitwise combination of the enumeration values that specify options for matching.</param> | ||
| /// <returns>A <see cref="ValueMatchEnumerator"/> to iterate over the matches.</returns> | ||
| /// <exception cref="ArgumentNullException"><paramref name="pattern"/> is null.</exception> | ||
| /// <exception cref="ArgumentOutOfRangeException"><paramref name="options"/> is not a valid bitwise combination of RegexOptions values.</exception> | ||
| /// <exception cref="RegexParseException">A regular expression parsing error occurred.</exception> | ||
| public static ValueMatchEnumerator EnumerateMatches(ReadOnlySpan<char> input, [StringSyntax(StringSyntaxAttribute.Regex, "options")] string pattern, RegexOptions options) => | ||
| RegexCache.GetOrAdd(pattern, options, s_defaultMatchTimeout).EnumerateMatches(input); | ||
|
|
||
| /// <summary> | ||
| /// Searches an input span for all occurrences of a regular expression and returns a <see cref="ValueMatchEnumerator"/> to iterate over the matches. | ||
| /// </summary> | ||
| /// <remarks> | ||
| /// Each match won't actually happen until <see cref="ValueMatchEnumerator.MoveNext"/> is invoked on the enumerator, with one match being performed per <see cref="ValueMatchEnumerator.MoveNext"/> call. | ||
| /// Since the evaluation of the match happens lazily, any changes to the passed in input in between calls to <see cref="ValueMatchEnumerator.MoveNext"/> will affect the match results. | ||
| /// The enumerator returned by this method, as well as the structs returned by the enumerator that wrap each match found in the input are ref structs which | ||
| /// make this method be amortized allocation free. | ||
| /// </remarks> | ||
| /// <param name="input">The span to search for a match.</param> | ||
| /// <param name="pattern">The regular expression pattern to match.</param> | ||
| /// <param name="options">A bitwise combination of the enumeration values that specify options for matching.</param> | ||
| /// <param name="matchTimeout">A time-out interval, or <see cref="InfiniteMatchTimeout"/> to indicate that the method should not time out.</param> | ||
| /// <returns>A <see cref="ValueMatchEnumerator"/> to iterate over the matches.</returns> | ||
| /// <exception cref="ArgumentNullException"><paramref name="pattern"/> is null.</exception> | ||
| /// <exception cref="ArgumentOutOfRangeException"><paramref name="options"/> is not a valid bitwise combination of RegexOptions values, or <paramref name="matchTimeout"/> is negative, zero, or greater than approximately 24 days.</exception> | ||
| /// <exception cref="RegexParseException">A regular expression parsing error occurred.</exception> | ||
| public static ValueMatchEnumerator EnumerateMatches(ReadOnlySpan<char> input, [StringSyntax(StringSyntaxAttribute.Regex, "options")] string pattern, RegexOptions options, TimeSpan matchTimeout) => | ||
| RegexCache.GetOrAdd(pattern, options, matchTimeout).EnumerateMatches(input); | ||
|
|
||
| /// <summary> | ||
| /// Searches an input span for all occurrences of a regular expression and returns a <see cref="ValueMatchEnumerator"/> to iterate over the matches. | ||
| /// </summary> | ||
| /// <remarks> | ||
| /// Each match won't actually happen until <see cref="ValueMatchEnumerator.MoveNext"/> is invoked on the enumerator, with one match being performed per <see cref="ValueMatchEnumerator.MoveNext"/> call. | ||
| /// Since the evaluation of the match happens lazily, any changes to the passed in input in between calls to <see cref="ValueMatchEnumerator.MoveNext"/> will affect the match results. | ||
| /// The enumerator returned by this method, as well as the structs returned by the enumerator that wrap each match found in the input are ref structs which | ||
| /// make this method be amortized allocation free. | ||
| /// </remarks> | ||
| /// <param name="input">The span to search for a match.</param> | ||
| /// <returns>A <see cref="ValueMatchEnumerator"/> to iterate over the matches.</returns> | ||
| public ValueMatchEnumerator EnumerateMatches(ReadOnlySpan<char> input) => | ||
| new ValueMatchEnumerator(this, input, RightToLeft ? input.Length : 0); | ||
|
|
||
| /// <summary> | ||
| /// Represents an enumerator containing the set of successful matches found by iteratively applying a regular expression pattern to the input span. | ||
| /// </summary> | ||
| /// <remarks> | ||
| /// The enumerator has no public constructor. The <see cref="Regex.EnumerateMatches(ReadOnlySpan{char})"/> method returns a <see cref="Regex.ValueMatchEnumerator"/> | ||
| /// object.The enumerator will lazily iterate over zero or more <see cref="ValueMatch"/> objects. If there is at least one successful match in the span, then | ||
| /// <see cref="MoveNext"/> returns <see langword="true"/> and <see cref="Current"/> will contain the first <see cref="ValueMatch"/>. If there are no successful matches, | ||
| /// then <see cref="MoveNext"/> returns <see langword="false"/> and <see cref="Current"/> throws an <see cref="InvalidOperationException"/>. | ||
| /// | ||
| /// This type is a ref struct since it stores the input span as a field in order to be able to lazily iterate over it. | ||
|
joperezr marked this conversation as resolved.
|
||
| /// </remarks> | ||
| public ref struct ValueMatchEnumerator | ||
| { | ||
| private readonly Regex _regex; | ||
| private readonly ReadOnlySpan<char> _input; | ||
| private ValueMatch _current; | ||
| private int _startAt; | ||
| private int _prevLen; | ||
|
|
||
| /// <summary> | ||
| /// Creates an instance of the <see cref="ValueMatchEnumerator"/> for the passed in <paramref name="regex"/> which iterates over <paramref name="input"/>. | ||
| /// </summary> | ||
| /// <param name="regex">The <see cref="Regex"/> to use for finding matches.</param> | ||
| /// <param name="input">The input span to iterate over.</param> | ||
| /// <param name="startAt">The position where the engine should start looking for matches from.</param> | ||
| internal ValueMatchEnumerator(Regex regex, ReadOnlySpan<char> input, int startAt) | ||
| { | ||
| _regex = regex; | ||
| _input = input; | ||
| _current = default; | ||
| _startAt = startAt; | ||
| _prevLen = -1; | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// Provides an enumerator that iterates through the matches in the input span. | ||
| /// </summary> | ||
| /// <returns>A copy of this enumerator.</returns> | ||
| public readonly ValueMatchEnumerator GetEnumerator() => this; | ||
|
|
||
| /// <summary> | ||
| /// Advances the enumerator to the next match in the span. | ||
| /// </summary> | ||
| /// <returns> | ||
| /// <see langword="true"/> if the enumerator was successfully advanced to the next element; <see langword="false"/> if the enumerator cannot find additional matches. | ||
| /// </returns> | ||
| public bool MoveNext() | ||
| { | ||
| Match? match = _regex.RunSingleMatch(quick: false, _prevLen, _input, _startAt); | ||
| Debug.Assert(match != null, "Match shouldn't be null because we passed quick = false."); | ||
| if (match != RegularExpressions.Match.Empty) | ||
| { | ||
| _current = new ValueMatch(match.Index, match.Length); | ||
| _startAt = match._textpos; | ||
| _prevLen = match.Length; | ||
| return true; | ||
| } | ||
| return false; | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// Gets the <see cref="ValueMatch"/> element at the current position of the enumerator. | ||
| /// </summary> | ||
| /// <exception cref="InvalidOperationException">Enumeration has either not started or has already finished.</exception> | ||
| public readonly ValueMatch Current => _current; | ||
| } | ||
| } | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
39 changes: 39 additions & 0 deletions
39
...libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/ValueMatch.cs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,39 @@ | ||
| // Licensed to the .NET Foundation under one or more agreements. | ||
| // The .NET Foundation licenses this file to you under the MIT license. | ||
|
|
||
| namespace System.Text.RegularExpressions | ||
| { | ||
| /// <summary> | ||
| /// Represents the results from a single regular expression match. | ||
| /// </summary> | ||
| /// <remarks> | ||
| /// The <see cref="ValueMatch"/> type is immutable and has no public constructor. An instance of the <see cref="ValueMatch"/> struct is returned by the | ||
| /// <see cref="Regex.ValueMatchEnumerator.Current"/> method when iterating over the results from calling <see cref="Regex.EnumerateMatches(ReadOnlySpan{char})"/>. | ||
| /// </remarks> | ||
| public readonly ref struct ValueMatch | ||
| { | ||
| private readonly int _index; | ||
| private readonly int _length; | ||
|
|
||
| /// <summary> | ||
| /// Crates an instance of the <see cref="ValueMatch"/> type based on the passed in <paramref name="index"/> and <paramref name="length"/>. | ||
| /// </summary> | ||
| /// <param name="index">The position in the original span where the first character of the captured sliced span is found.</param> | ||
| /// <param name="length">The length of the captured sliced span.</param> | ||
| internal ValueMatch(int index, int length) | ||
| { | ||
| _index = index; | ||
| _length = length; | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// Gets the position in the original span where the first character of the captured sliced span is found. | ||
| /// </summary> | ||
| public int Index => _index; | ||
|
|
||
| /// <summary> | ||
| /// Gets the length of the captured sliced span. | ||
| /// </summary> | ||
| public int Length => _length; | ||
| } | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.