From b838c33744ffd69094766f202b14bef0c9b1aa5d Mon Sep 17 00:00:00 2001 From: Jose Perez Rodriguez Date: Thu, 7 Apr 2022 16:23:26 -0700 Subject: [PATCH 1/4] Adding Regex.EnumerateMatches() --- .../ref/System.Text.RegularExpressions.cs | 18 ++ .../src/System.Text.RegularExpressions.csproj | 2 + .../Regex.EnumerateMatches.cs | 144 +++++++++++ .../Text/RegularExpressions/Regex.Match.cs | 2 +- .../System/Text/RegularExpressions/Regex.cs | 29 ++- .../Text/RegularExpressions/ValueMatch.cs | 38 +++ .../FunctionalTests/Regex.Count.Tests.cs | 2 +- .../Regex.EnumerateMatches.Tests.cs | 237 ++++++++++++++++++ .../FunctionalTests/Regex.Match.Tests.cs | 2 +- .../Regex.MultipleMatches.Tests.cs | 2 +- ...ystem.Text.RegularExpressions.Tests.csproj | 1 + 11 files changed, 470 insertions(+), 7 deletions(-) create mode 100644 src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.EnumerateMatches.cs create mode 100644 src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/ValueMatch.cs create mode 100644 src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.EnumerateMatches.Tests.cs diff --git a/src/libraries/System.Text.RegularExpressions/ref/System.Text.RegularExpressions.cs b/src/libraries/System.Text.RegularExpressions/ref/System.Text.RegularExpressions.cs index e2870a59329975..73c1a7a7339101 100644 --- a/src/libraries/System.Text.RegularExpressions/ref/System.Text.RegularExpressions.cs +++ b/src/libraries/System.Text.RegularExpressions/ref/System.Text.RegularExpressions.cs @@ -171,6 +171,10 @@ public static void CompileToAssembly(System.Text.RegularExpressions.RegexCompila public static int Count(System.ReadOnlySpan input, [System.Diagnostics.CodeAnalysis.StringSyntax(System.Diagnostics.CodeAnalysis.StringSyntaxAttribute.Regex, "options")] string pattern, System.Text.RegularExpressions.RegexOptions options) { throw null; } public static int Count(System.ReadOnlySpan input, [System.Diagnostics.CodeAnalysis.StringSyntax(System.Diagnostics.CodeAnalysis.StringSyntaxAttribute.Regex, "options")] string pattern, System.Text.RegularExpressions.RegexOptions options, System.TimeSpan matchTimeout) { throw null; } public static string Escape(string str) { throw null; } + public System.Text.RegularExpressions.Regex.ValueMatchEnumerator EnumerateMatches(System.ReadOnlySpan input) { throw null; } + public static System.Text.RegularExpressions.Regex.ValueMatchEnumerator EnumerateMatches(System.ReadOnlySpan input, [System.Diagnostics.CodeAnalysis.StringSyntaxAttribute("Regex")] string pattern) { throw null; } + public static System.Text.RegularExpressions.Regex.ValueMatchEnumerator EnumerateMatches(System.ReadOnlySpan input, [System.Diagnostics.CodeAnalysis.StringSyntaxAttribute("Regex", new object[]{ "options"})] string pattern, System.Text.RegularExpressions.RegexOptions options) { throw null; } + public static System.Text.RegularExpressions.Regex.ValueMatchEnumerator EnumerateMatches(System.ReadOnlySpan input, [System.Diagnostics.CodeAnalysis.StringSyntaxAttribute("Regex", new object[]{ "options"})] string pattern, System.Text.RegularExpressions.RegexOptions options, System.TimeSpan matchTimeout) { throw null; } public string[] GetGroupNames() { throw null; } public int[] GetGroupNumbers() { throw null; } public string GroupNameFromNumber(int i) { throw null; } @@ -220,6 +224,14 @@ void System.Runtime.Serialization.ISerializable.GetObjectData(System.Runtime.Ser protected bool UseOptionC() { throw null; } protected internal bool UseOptionR() { throw null; } protected internal static void ValidateMatchTimeout(System.TimeSpan matchTimeout) { } + public ref partial struct ValueMatchEnumerator + { + private object _dummy; + private int _dummyPrimitive; + public readonly System.Text.RegularExpressions.ValueMatch Current { get { throw null; } } + public readonly System.Text.RegularExpressions.Regex.ValueMatchEnumerator GetEnumerator() { throw null; } + public bool MoveNext() { throw null; } + } } [System.ObsoleteAttribute("Regex.CompileToAssembly is obsolete and not supported. Use the RegexGeneratorAttribute with the regular expression source generator instead.", DiagnosticId = "SYSLIB0036", UrlFormat = "https://aka.ms/dotnet-warnings/{0}")] public partial class RegexCompilationInfo @@ -359,4 +371,10 @@ public abstract partial class RegexRunnerFactory protected RegexRunnerFactory() { } protected internal abstract System.Text.RegularExpressions.RegexRunner CreateInstance(); } + public readonly ref partial struct ValueMatch + { + private readonly int _dummyPrimitive; + public int Index { get { throw null; } } + public int Length { get { throw null; } } + } } diff --git a/src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj b/src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj index 358d5df17070ab..ea6ce993f7d360 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj +++ b/src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj @@ -7,6 +7,7 @@ + @@ -23,6 +24,7 @@ + diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.EnumerateMatches.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.EnumerateMatches.cs new file mode 100644 index 00000000000000..4516a7a343ca2b --- /dev/null +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.EnumerateMatches.cs @@ -0,0 +1,144 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics.CodeAnalysis; + +namespace System.Text.RegularExpressions +{ + public partial class Regex + { + /// + /// Searches an input span for all occurrences of a regular expression and returns a to iterate over the matches. + /// + /// + /// The enumerator returned by this method, as well as the structs returned by the enumerator that wrap each match found in the input are ref structs which + /// make this method be amortized allocation free. + /// + /// The span to search for a match. + /// The regular expression pattern to match. + /// A to iterate over the matches. + /// is null. + /// A regular expression parsing error occurred. + public static ValueMatchEnumerator EnumerateMatches(ReadOnlySpan input, [StringSyntax(StringSyntaxAttribute.Regex)] string pattern) => + RegexCache.GetOrAdd(pattern).EnumerateMatches(input); + + /// + /// Searches an input span for all occurrences of a regular expression and returns a to iterate over the matches. + /// + /// + /// The enumerator returned by this method, as well as the structs returned by the enumerator that wrap each match found in the input are ref structs which + /// make this method be amortized allocation free. + /// + /// The span to search for a match. + /// The regular expression pattern to match. + /// A bitwise combination of the enumeration values that specify options for matching. + /// A to iterate over the matches. + /// is null. + /// is not a valid bitwise combination of RegexOptions values. + /// A regular expression parsing error occurred. + public static ValueMatchEnumerator EnumerateMatches(ReadOnlySpan input, [StringSyntax(StringSyntaxAttribute.Regex, "options")] string pattern, RegexOptions options) => + RegexCache.GetOrAdd(pattern, options, s_defaultMatchTimeout).EnumerateMatches(input); + + /// + /// Searches an input span for all occurrences of a regular expression and returns a to iterate over the matches. + /// + /// + /// The enumerator returned by this method, as well as the structs returned by the enumerator that wrap each match found in the input are ref structs which + /// make this method be amortized allocation free. + /// + /// The span to search for a match. + /// The regular expression pattern to match. + /// A bitwise combination of the enumeration values that specify options for matching. + /// A time-out interval, or to indicate that the method should not time out. + /// A to iterate over the matches. + /// is null. + /// is not a valid bitwise combination of RegexOptions values, or is negative, zero, or greater than approximately 24 days. + /// A regular expression parsing error occurred. + public static ValueMatchEnumerator EnumerateMatches(ReadOnlySpan input, [StringSyntax(StringSyntaxAttribute.Regex, "options")] string pattern, RegexOptions options, TimeSpan matchTimeout) => + RegexCache.GetOrAdd(pattern, options, matchTimeout).EnumerateMatches(input); + + /// + /// Searches an input span for all occurrences of a regular expression and returns a to iterate over the matches. + /// + /// + /// The enumerator returned by this method, as well as the structs returned by the enumerator that wrap each match found in the input are ref structs which + /// make this method be amortized allocation free. + /// + /// The span to search for a match. + /// A to iterate over the matches. + public ValueMatchEnumerator EnumerateMatches(ReadOnlySpan input) => + new ValueMatchEnumerator(this, input, RightToLeft ? input.Length : 0); + + /// + /// Represents an enumerator containing the set of successful matches found by iteratively applying a regular expression pattern to the input span. The + /// enumerator has no public constructor. The method returns a + /// object. + /// + /// + /// The enumerator will lazily iterate over zero or more objects. If there is at least one successful match in the span, then + /// returns and will contain the first . If there are no successful matches, + /// then returns and throws an . + /// + /// This type is a ref struct since it stores the input span as a field in order to be able to lazily iterate over it. + /// + public ref struct ValueMatchEnumerator + { + private readonly Regex _regex; + private readonly ReadOnlySpan _input; + private ValueMatch _matchReference; + private int _startAt; + private int _prevLen; + private bool _hasValidValue; + + /// + /// Creates an instance of the for the passed in which iterates over . + /// + /// The to use for finding matches. + /// The input span to iterate over. + /// The position where the engine should start looking for matches from. + internal ValueMatchEnumerator(Regex regex, ReadOnlySpan input, int startAt) + { + _regex = regex; + _input = input; + _matchReference = default; + _startAt = startAt; + _prevLen = -1; + _hasValidValue = false; + } + + /// + /// Provides an enumerator that iterates through the matches in the input span. + /// + /// A copy of this enumerator. + public readonly ValueMatchEnumerator GetEnumerator() => this; + + /// + /// Advances the enumerator to the next match in the span. + /// + /// + /// if the enumerator was successfully advanced to the next element; if the enumerator cannot find additional matches. + /// + public bool MoveNext() + { + Match? match = _regex.RunSingleMatch(quick: false, _prevLen, _input, _startAt); + if (match is not null && match != RegularExpressions.Match.Empty) + { + _matchReference = new ValueMatch(match); + _hasValidValue = true; + _startAt = match._textpos; + _prevLen = match.Length; + return true; + } + _hasValidValue = false; + _matchReference = default; + return false; + } + + /// + /// Gets the element at the current position of the enumerator. + /// + /// Enumeration has either not started or has already finished. + public readonly ValueMatch Current => _hasValidValue ? _matchReference : throw new InvalidOperationException(SR.EnumNotStarted); + } + } +} diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Match.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Match.cs index fe0a2cdc1ca4f2..a821c0590a6222 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Match.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Match.cs @@ -87,7 +87,7 @@ public bool IsMatch(string input) /// if the regular expression finds a match; otherwise, . /// A time-out ocurred. public bool IsMatch(ReadOnlySpan input) => - RunSingleMatch(input, RightToLeft ? input.Length : 0) is null; + RunSingleMatch(quick: true, -1, input, RightToLeft ? input.Length : 0) is null; /// /// Searches the input string for one or more matches using the previous pattern and options, diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs index b9bcb9866e1d7d..a7e1bc1b75833d 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs @@ -403,7 +403,7 @@ protected void InitializeReferences() } /// Internal worker which will scan the passed in span for a match. Used by public APIs. - internal Match? RunSingleMatch(ReadOnlySpan input, int startat) + internal Match? RunSingleMatch(bool quick, int prevlen, ReadOnlySpan input, int startat) { // startat parameter is always either 0 or input.Length since public API for IsMatch doesn't have an overload // that takes in startat. @@ -413,13 +413,36 @@ protected void InitializeReferences() try { runner.InitializeTimeout(internalMatchTimeout); - runner.InitializeForScan(this, input, startat, quick: true); + runner.InitializeForScan(this, input, startat, quick); + + int stoppos = RightToLeft ? 0 : input.Length; + + // If previous match was empty or failed, advance by one before matching. + if (prevlen == 0) + { + if (runner.runtextstart == stoppos) + { + return RegularExpressions.Match.Empty; + } + + runner.runtextpos += RightToLeft ? -1 : 1; + } runner.Scan(input); // If runmatch is null it means that an override of Scan didn't implement it correctly, so we will // let this null ref since there are lots of ways where you can end up in a erroneous state. - return runner.runmatch!.FoundMatch ? null : RegularExpressions.Match.Empty; + if (runner.runmatch!.FoundMatch) + { + if (quick) + { + return null; + } + runner.runmatch.Tidy(runner.runtextpos, 0); + return runner.runmatch; + } + + return RegularExpressions.Match.Empty; } finally { diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/ValueMatch.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/ValueMatch.cs new file mode 100644 index 00000000000000..be1e746d26bfa4 --- /dev/null +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/ValueMatch.cs @@ -0,0 +1,38 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +namespace System.Text.RegularExpressions +{ + /// + /// Represents the results from a single regular expression match. + /// + /// + /// The type is immutable and has no public constructor. An instance of the struct is returned by the + /// method when iterating over the results from calling . + /// + public readonly ref struct ValueMatch + { + private readonly int _index; + private readonly int _length; + + /// + /// Crates an instance of the type based on the passed in . + /// + /// The object represented by this ValueMatch. + internal ValueMatch(Match match) + { + _index = match.Index; + _length = match.Length; + } + + /// + /// Gets the position in the original span where the first character of the captured sliced span is found. + /// + public int Index => _index; + + /// + /// Gets the length of the captured sliced span. + /// + public int Length => _length; + } +} diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Count.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Count.Tests.cs index 966b54c219d220..c7aab09e9b151d 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Count.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Count.Tests.cs @@ -8,7 +8,7 @@ namespace System.Text.RegularExpressions.Tests { - public class RegexCountTests + public partial class RegexCountTests { [Theory] [MemberData(nameof(Count_ReturnsExpectedCount_TestData))] diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.EnumerateMatches.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.EnumerateMatches.Tests.cs new file mode 100644 index 00000000000000..3e50ab91fb491d --- /dev/null +++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.EnumerateMatches.Tests.cs @@ -0,0 +1,237 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using Xunit; + +namespace System.Text.RegularExpressions.Tests +{ + public class RegexEnumerateMatchesTests + { + public static IEnumerable NoneCompiledBacktracking() + { + yield return new object[] { RegexOptions.None }; + yield return new object[] { RegexOptions.Compiled }; + if (PlatformDetection.IsNetCore) + { + yield return new object[] { RegexHelpers.RegexOptionNonBacktracking }; + } + } + + [Fact] + public void EnumerateMatches_Ctor_Invalid() + { + // Pattern is null + AssertExtensions.Throws("pattern", () => Regex.EnumerateMatches("input", null)); + AssertExtensions.Throws("pattern", () => Regex.EnumerateMatches("input", null, RegexOptions.None)); + AssertExtensions.Throws("pattern", () => Regex.EnumerateMatches("input", null, RegexOptions.None, TimeSpan.FromSeconds(1))); + + // Options are invalid + AssertExtensions.Throws("options", () => Regex.EnumerateMatches("input", "pattern", (RegexOptions)(-1))); + AssertExtensions.Throws("options", () => Regex.EnumerateMatches("input", "pattern", (RegexOptions)(-1), TimeSpan.FromSeconds(1))); + + // 0x400 is new NonBacktracking mode that is now valid, 0x800 is still invalid + AssertExtensions.Throws("options", () => Regex.EnumerateMatches("input", "pattern", (RegexOptions)0x800)); + AssertExtensions.Throws("options", () => Regex.EnumerateMatches("input", "pattern", (RegexOptions)0x800, TimeSpan.FromSeconds(1))); + + // MatchTimeout is invalid + AssertExtensions.Throws("matchTimeout", () => Regex.EnumerateMatches("input", "pattern", RegexOptions.None, TimeSpan.Zero)); + AssertExtensions.Throws("matchTimeout", () => Regex.EnumerateMatches("input", "pattern", RegexOptions.None, TimeSpan.Zero)); + } + + [Theory] + [MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))] + public void Enumerate_No_Match(RegexEngine engine) + { + Regex r = RegexHelpers.GetRegexAsync(engine, @"\da").GetAwaiter().GetResult(); + int count = 0; + foreach(var match in r.EnumerateMatches("1A2b3c4d5e")) + { + count++; + } + Assert.Equal(0, count); + + } + + [Theory] + [MemberData(nameof(NoneCompiledBacktracking))] + public static void EnumerateMatches_Invalid(RegexOptions options) + { + Regex regex = new Regex("e", options); + Regex.ValueMatchEnumerator enumerator = regex.EnumerateMatches("dotnet"); + + Assert.True(ThrowsInvalidOperationException(ref enumerator)); + + while (enumerator.MoveNext()) ; + Assert.True(ThrowsInvalidOperationException(ref enumerator)); + + bool ThrowsInvalidOperationException(ref Regex.ValueMatchEnumerator enumerator) + { + try + { + _ = enumerator.Current; + } + catch (InvalidOperationException) + { + return true; + } + return false; + } + } + + [Theory] + [MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))] + public void EnumerateMatches_Lookahead(RegexEngine engine) + { + if (RegexHelpers.IsNonBacktracking(engine)) + { + // lookaheads not supported + return; + } + + const string Pattern = @"\b(?!un)\w+\b"; + const string Input = "unite one unethical ethics use untie ultimate"; + + Regex r = RegexHelpers.GetRegexAsync(engine, Pattern, RegexOptions.IgnoreCase).GetAwaiter().GetResult(); + int count = 0; + string[] expectedMatches = new[] { "one", "ethics", "use", "ultimate" }; + ReadOnlySpan span = Input.AsSpan(); + foreach (ValueMatch match in r.EnumerateMatches(span)) + { + Assert.Equal(expectedMatches[count++], span.Slice(match.Index, match.Length).ToString()); + } + Assert.Equal(4, count); + } + + [Theory] + [MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))] + public void EnumerateMatches_Lookbehind(RegexEngine engine) + { + if (RegexHelpers.IsNonBacktracking(engine)) + { + // lookbehinds not supported + return; + } + + const string Pattern = @"(?<=\b20)\d{2}\b"; + const string Input = "2010 1999 1861 2140 2009"; + + Regex r = RegexHelpers.GetRegexAsync(engine, Pattern, RegexOptions.IgnoreCase).GetAwaiter().GetResult(); + int count = 0; + string[] expectedMatches = new[] { "10", "09" }; + ReadOnlySpan span = Input.AsSpan(); + foreach (ValueMatch match in r.EnumerateMatches(span)) + { + Assert.Equal(expectedMatches[count++], span.Slice(match.Index, match.Length).ToString()); + } + Assert.Equal(2, count); + } + + [Theory] + [MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))] + public void EnumerateMatches_CheckIndex(RegexEngine engine) + { + const string Pattern = @"e{2}\w\b"; + const string Input = "needing a reed"; + + Regex r = RegexHelpers.GetRegexAsync(engine, Pattern).GetAwaiter().GetResult(); + int count = 0; + string[] expectedMatches = new[] { "eed" }; + int[] expectedIndex = new[] { 11 }; + ReadOnlySpan span = Input.AsSpan(); + foreach (ValueMatch match in r.EnumerateMatches(span)) + { + Assert.Equal(expectedMatches[count], span.Slice(match.Index, match.Length).ToString()); + Assert.Equal(expectedIndex[count++], match.Index); + } + } + } + + public partial class RegexMultipleMatchTests + { + [Theory] + [MemberData(nameof(Matches_TestData))] + public void EnumerateMatches(RegexEngine engine, string pattern, string input, RegexOptions options, CaptureData[] expected) + { + Regex regexAdvanced = RegexHelpers.GetRegexAsync(engine, pattern, options).GetAwaiter().GetResult(); + int count = 0; + var span = input.AsSpan(); + foreach (var match in regexAdvanced.EnumerateMatches(span)) + { + Assert.Equal(expected[count].Index, match.Index); + Assert.Equal(expected[count].Length, match.Length); + Assert.Equal(expected[count].Value, span.Slice(match.Index, match.Length).ToString()); + count++; + } + Assert.Equal(expected.Length, count); + } + } + + public partial class RegexMatchTests + { + [Theory] + [MemberData(nameof(Match_Count_TestData))] + public void EnumerateMatches_Count(RegexEngine engine, string pattern, string input, int expectedCount) + { + Regex r = RegexHelpers.GetRegexAsync(engine, pattern).GetAwaiter().GetResult(); + int count = 0; + foreach (ValueMatch _ in r.EnumerateMatches(input)) + { + count++; + } + Assert.Equal(expectedCount, count); + } + } + + public partial class RegexCountTests + { + [Theory] + [MemberData(nameof(Count_ReturnsExpectedCount_TestData))] + public void EnumerateMatches_ReturnsExpectedCount(RegexEngine engine, string pattern, string input, RegexOptions options, int expectedCount) + { + Regex r = RegexHelpers.GetRegexAsync(engine, pattern, options).GetAwaiter().GetResult(); + int count = 0; + foreach (ValueMatch _ in r.EnumerateMatches(input)) + { + count++; + } + Assert.Equal(expectedCount, count); + + if (options == RegexOptions.None && engine == RegexEngine.Interpreter) + { + count = 0; + foreach (ValueMatch _ in Regex.EnumerateMatches(input, pattern)) + { + count++; + } + Assert.Equal(expectedCount, count); + } + + switch (engine) + { + case RegexEngine.Interpreter: + case RegexEngine.Compiled: + case RegexEngine.NonBacktracking: + RegexOptions engineOptions = RegexHelpers.OptionsFromEngine(engine); + count = 0; + foreach (ValueMatch _ in Regex.EnumerateMatches(input, pattern, options | engineOptions)) + { + count++; + } + Assert.Equal(expectedCount, count); + + count = 0; + foreach (ValueMatch _ in Regex.EnumerateMatches(input, pattern, options | engineOptions, Regex.InfiniteMatchTimeout)) + { + count++; + } + Assert.Equal(expectedCount, count); + break; + } + } + } +} diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs index 327d50ee81f769..40f8cd6d825344 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs @@ -13,7 +13,7 @@ namespace System.Text.RegularExpressions.Tests { - public class RegexMatchTests + public partial class RegexMatchTests { public static IEnumerable Match_MemberData() { diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.MultipleMatches.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.MultipleMatches.Tests.cs index e3884f15a2dcca..65a3b5cd294a15 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.MultipleMatches.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.MultipleMatches.Tests.cs @@ -9,7 +9,7 @@ namespace System.Text.RegularExpressions.Tests { - public class RegexMultipleMatchTests + public partial class RegexMultipleMatchTests { [Theory] [MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))] diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/System.Text.RegularExpressions.Tests.csproj b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/System.Text.RegularExpressions.Tests.csproj index 4be0e31a2c5094..ceed4984ce0dfb 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/System.Text.RegularExpressions.Tests.csproj +++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/System.Text.RegularExpressions.Tests.csproj @@ -47,6 +47,7 @@ + From aba9a540f0510b78d372d1a604e7e98c49f96cff Mon Sep 17 00:00:00 2001 From: Jose Perez Rodriguez Date: Sat, 9 Apr 2022 12:15:24 -0700 Subject: [PATCH 2/4] Addressing some feedback and implementing Count(span) on top of EnumerateMatches and cleaning up some code. --- .../Text/RegularExpressions/Regex.Count.cs | 5 +- .../Regex.EnumerateMatches.cs | 17 +- .../System/Text/RegularExpressions/Regex.cs | 150 +++++++----------- .../Text/RegularExpressions/ValueMatch.cs | 11 +- .../Regex.EnumerateMatches.Tests.cs | 26 --- 5 files changed, 76 insertions(+), 133 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Count.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Count.cs index 1fecf8efc0c167..05b36a8e01d52f 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Count.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Count.cs @@ -38,11 +38,10 @@ public int Count(ReadOnlySpan input) { int count = 0; - RunAllMatchesWithCallback(input, 0, ref count, static (ref int count, Match match) => + foreach (ValueMatch _ in EnumerateMatches(input)) { count++; - return true; - }, reuseMatchObject: true); + } return count; } diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.EnumerateMatches.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.EnumerateMatches.cs index 4516a7a343ca2b..2b426ed735c110 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.EnumerateMatches.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.EnumerateMatches.cs @@ -1,6 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Diagnostics; using System.Diagnostics.CodeAnalysis; namespace System.Text.RegularExpressions @@ -85,10 +86,9 @@ public ref struct ValueMatchEnumerator { private readonly Regex _regex; private readonly ReadOnlySpan _input; - private ValueMatch _matchReference; + private ValueMatch _current; private int _startAt; private int _prevLen; - private bool _hasValidValue; /// /// Creates an instance of the for the passed in which iterates over . @@ -100,10 +100,9 @@ internal ValueMatchEnumerator(Regex regex, ReadOnlySpan input, int startAt { _regex = regex; _input = input; - _matchReference = default; + _current = default; _startAt = startAt; _prevLen = -1; - _hasValidValue = false; } /// @@ -121,16 +120,14 @@ internal ValueMatchEnumerator(Regex regex, ReadOnlySpan input, int startAt public bool MoveNext() { Match? match = _regex.RunSingleMatch(quick: false, _prevLen, _input, _startAt); - if (match is not null && match != RegularExpressions.Match.Empty) + Debug.Assert(match != null, "Match shouldn't be null because we passed quick = false."); + if (match != RegularExpressions.Match.Empty) { - _matchReference = new ValueMatch(match); - _hasValidValue = true; + _current = new ValueMatch(match.Index, match.Length); _startAt = match._textpos; _prevLen = match.Length; return true; } - _hasValidValue = false; - _matchReference = default; return false; } @@ -138,7 +135,7 @@ public bool MoveNext() /// Gets the element at the current position of the enumerator. /// /// Enumeration has either not started or has already finished. - public readonly ValueMatch Current => _hasValidValue ? _matchReference : throw new InvalidOperationException(SR.EnumNotStarted); + public readonly ValueMatch Current => _current; } } } diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs index a7e1bc1b75833d..a5b32bd684060c 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs @@ -458,116 +458,88 @@ internal void RunAllMatchesWithCallback(string input, int startat, ref T RegexRunner runner = Interlocked.Exchange(ref _runner, null) ?? CreateRunner(); try { - // For the string overload, we need to set runtext before starting the match attempts. + // We need to set runtext before starting the match attempts. runner.runtext = input; - RunAllMatchesWithCallbackHelper(input, startat, ref state, callback, runner, usingStringOverload: true, reuseMatchObject); - } - finally - { - runner.runtext = null; // drop reference to text to avoid keeping it alive in a cache. - _runner = runner; - } - } - - /// Internal worker which will scan the passed in string for all matches, and will call for each match found. - internal void RunAllMatchesWithCallback(ReadOnlySpan input, int startat, ref TState state, MatchCallback callback, bool reuseMatchObject) - { - Debug.Assert((uint)startat <= (uint)input.Length); - - RegexRunner runner = Interlocked.Exchange(ref _runner, null) ?? CreateRunner(); - try - { - RunAllMatchesWithCallbackHelper(input, startat, ref state, callback, runner, usingStringOverload: false, reuseMatchObject); - } - finally - { - _runner = runner; - } - } - - /// - /// Helper method used by and - /// which loops to find - /// all matches on the passed in and calls for each match found. - /// - private void RunAllMatchesWithCallbackHelper(ReadOnlySpan input, int startat, ref TState state, MatchCallback callback, RegexRunner runner, bool usingStringOverload, bool reuseMatchObject) - { - runner.InitializeTimeout(internalMatchTimeout); - int runtextpos = startat; - while (true) - { - runner.InitializeForScan(this, input, startat, false); - runner.runtextpos = runtextpos; - - int stoppos = RightToLeft ? 0 : input.Length; + runner.InitializeTimeout(internalMatchTimeout); + int runtextpos = startat; + while (true) + { + runner.InitializeForScan(this, input, startat, false); + runner.runtextpos = runtextpos; - // We get the Match by calling Scan. 'input' parameter is used to set the Match text which is only relevante if we are using the Run string - // overload, as APIs that call the span overload (like Count) don't require match.Text to be set, so we pass null in that case. - Match? match = ScanInternal(reuseMatchObject, input: usingStringOverload ? runner.runtext : null, 0, runner, input, returnNullIfQuick: false); - Debug.Assert(match is not null); + int stoppos = RightToLeft ? 0 : input.Length; - // if we got a match, then call the callback function with the match and prepare for next iteration. - if (match.Success) - { - if (!reuseMatchObject) - { - // We're not reusing match objects, so null out our field reference to the instance. - // It'll be recreated the next time one is needed. - runner.runmatch = null; - } + // We get the Match by calling Scan. 'input' parameter is used to set the Match text. + Match? match = ScanInternal(reuseMatchObject, runner.runtext, 0, runner, input, returnNullIfQuick: false); + Debug.Assert(match is not null); - if (!callback(ref state, match)) + // if we got a match, then call the callback function with the match and prepare for next iteration. + if (match.Success) { - // If the callback returns false, we're done. + if (!reuseMatchObject) + { + // We're not reusing match objects, so null out our field reference to the instance. + // It'll be recreated the next time one is needed. + runner.runmatch = null; + } - if (usingStringOverload && reuseMatchObject) + if (!callback(ref state, match)) { - // We're reusing the single match instance and we were called via the string overload - // which would have set the match's text, so clear it out as well. - // We don't do this if we're not reusing instances, as in that case we're - // dropping the whole reference to the match, and we no longer own the instance - // having handed it out to the callback. - match.Text = null; + // If the callback returns false, we're done. + + if (reuseMatchObject) + { + // We're reusing the single match instance so we clear out match.Text which was set above. + // We don't do this if we're not reusing instances, as in that case we're + // dropping the whole reference to the match, and we no longer own the instance + // having handed it out to the callback. + match.Text = null; + } + return; } - return; - } - // Now that we've matched successfully, update the starting position to reflect - // the current position, just as Match.NextMatch() would pass in _textpos as textstart. - runtextpos = startat = runner.runtextpos; + // Now that we've matched successfully, update the starting position to reflect + // the current position, just as Match.NextMatch() would pass in _textpos as textstart. + runtextpos = startat = runner.runtextpos; - // Reset state for another iteration. - runner.runtrackpos = runner.runtrack!.Length; - runner.runstackpos = runner.runstack!.Length; - runner.runcrawlpos = runner.runcrawl!.Length; + // Reset state for another iteration. + runner.runtrackpos = runner.runtrack!.Length; + runner.runstackpos = runner.runstack!.Length; + runner.runcrawlpos = runner.runcrawl!.Length; - if (match.Length == 0) - { - if (runner.runtextpos == stoppos) + if (match.Length == 0) { - if (usingStringOverload && reuseMatchObject) + if (runner.runtextpos == stoppos) { - // See above comment. - match.Text = null; + if (reuseMatchObject) + { + // See above comment. + match.Text = null; + } + return; } - return; + + runtextpos += RightToLeft ? -1 : 1; } - runtextpos += RightToLeft ? -1 : 1; + // Loop around to perform next match from where we left off. + continue; } - - // Loop around to perform next match from where we left off. - continue; - } - else - { - // We failed to match at this position. If we're at the stopping point, we're done. - if (runner.runtextpos == stoppos) + else { - return; + // We failed to match at this position. If we're at the stopping point, we're done. + if (runner.runtextpos == stoppos) + { + return; + } } } } + finally + { + runner.runtext = null; // drop reference to text to avoid keeping it alive in a cache. + _runner = runner; + } } /// Helper method used by RunSingleMatch and RunAllMatchesWithCallback which calls runner.Scan to find a match on the passed in span. diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/ValueMatch.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/ValueMatch.cs index be1e746d26bfa4..7380edfb422fd9 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/ValueMatch.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/ValueMatch.cs @@ -16,13 +16,14 @@ public readonly ref struct ValueMatch private readonly int _length; /// - /// Crates an instance of the type based on the passed in . + /// Crates an instance of the type based on the passed in and . /// - /// The object represented by this ValueMatch. - internal ValueMatch(Match match) + /// The position in the original span where the first character of the captured sliced span is found. + /// The length of the captured sliced span. + internal ValueMatch(int index, int length) { - _index = match.Index; - _length = match.Length; + _index = index; + _length = length; } /// diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.EnumerateMatches.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.EnumerateMatches.Tests.cs index 3e50ab91fb491d..a8f9839845f375 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.EnumerateMatches.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.EnumerateMatches.Tests.cs @@ -57,32 +57,6 @@ public void Enumerate_No_Match(RegexEngine engine) } - [Theory] - [MemberData(nameof(NoneCompiledBacktracking))] - public static void EnumerateMatches_Invalid(RegexOptions options) - { - Regex regex = new Regex("e", options); - Regex.ValueMatchEnumerator enumerator = regex.EnumerateMatches("dotnet"); - - Assert.True(ThrowsInvalidOperationException(ref enumerator)); - - while (enumerator.MoveNext()) ; - Assert.True(ThrowsInvalidOperationException(ref enumerator)); - - bool ThrowsInvalidOperationException(ref Regex.ValueMatchEnumerator enumerator) - { - try - { - _ = enumerator.Current; - } - catch (InvalidOperationException) - { - return true; - } - return false; - } - } - [Theory] [MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))] public void EnumerateMatches_Lookahead(RegexEngine engine) From 4d097515c7a2913e1fb1d177fa305f5db236fdc7 Mon Sep 17 00:00:00 2001 From: Jose Perez Rodriguez Date: Sat, 9 Apr 2022 21:43:03 -0700 Subject: [PATCH 3/4] Revert Regex.Count implementation over Enumerate --- .../Text/RegularExpressions/Regex.Count.cs | 5 +- .../System/Text/RegularExpressions/Regex.cs | 150 +++++++++++------- 2 files changed, 92 insertions(+), 63 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Count.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Count.cs index 05b36a8e01d52f..1fecf8efc0c167 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Count.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Count.cs @@ -38,10 +38,11 @@ public int Count(ReadOnlySpan input) { int count = 0; - foreach (ValueMatch _ in EnumerateMatches(input)) + RunAllMatchesWithCallback(input, 0, ref count, static (ref int count, Match match) => { count++; - } + return true; + }, reuseMatchObject: true); return count; } diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs index a5b32bd684060c..a7e1bc1b75833d 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs @@ -458,88 +458,116 @@ internal void RunAllMatchesWithCallback(string input, int startat, ref T RegexRunner runner = Interlocked.Exchange(ref _runner, null) ?? CreateRunner(); try { - // We need to set runtext before starting the match attempts. + // For the string overload, we need to set runtext before starting the match attempts. runner.runtext = input; - runner.InitializeTimeout(internalMatchTimeout); - int runtextpos = startat; - while (true) - { - runner.InitializeForScan(this, input, startat, false); - runner.runtextpos = runtextpos; + RunAllMatchesWithCallbackHelper(input, startat, ref state, callback, runner, usingStringOverload: true, reuseMatchObject); + } + finally + { + runner.runtext = null; // drop reference to text to avoid keeping it alive in a cache. + _runner = runner; + } + } + + /// Internal worker which will scan the passed in string for all matches, and will call for each match found. + internal void RunAllMatchesWithCallback(ReadOnlySpan input, int startat, ref TState state, MatchCallback callback, bool reuseMatchObject) + { + Debug.Assert((uint)startat <= (uint)input.Length); + + RegexRunner runner = Interlocked.Exchange(ref _runner, null) ?? CreateRunner(); + try + { + RunAllMatchesWithCallbackHelper(input, startat, ref state, callback, runner, usingStringOverload: false, reuseMatchObject); + } + finally + { + _runner = runner; + } + } + + /// + /// Helper method used by and + /// which loops to find + /// all matches on the passed in and calls for each match found. + /// + private void RunAllMatchesWithCallbackHelper(ReadOnlySpan input, int startat, ref TState state, MatchCallback callback, RegexRunner runner, bool usingStringOverload, bool reuseMatchObject) + { + runner.InitializeTimeout(internalMatchTimeout); + int runtextpos = startat; + while (true) + { + runner.InitializeForScan(this, input, startat, false); + runner.runtextpos = runtextpos; + + int stoppos = RightToLeft ? 0 : input.Length; - int stoppos = RightToLeft ? 0 : input.Length; + // We get the Match by calling Scan. 'input' parameter is used to set the Match text which is only relevante if we are using the Run string + // overload, as APIs that call the span overload (like Count) don't require match.Text to be set, so we pass null in that case. + Match? match = ScanInternal(reuseMatchObject, input: usingStringOverload ? runner.runtext : null, 0, runner, input, returnNullIfQuick: false); + Debug.Assert(match is not null); - // We get the Match by calling Scan. 'input' parameter is used to set the Match text. - Match? match = ScanInternal(reuseMatchObject, runner.runtext, 0, runner, input, returnNullIfQuick: false); - Debug.Assert(match is not null); + // if we got a match, then call the callback function with the match and prepare for next iteration. + if (match.Success) + { + if (!reuseMatchObject) + { + // We're not reusing match objects, so null out our field reference to the instance. + // It'll be recreated the next time one is needed. + runner.runmatch = null; + } - // if we got a match, then call the callback function with the match and prepare for next iteration. - if (match.Success) + if (!callback(ref state, match)) { - if (!reuseMatchObject) - { - // We're not reusing match objects, so null out our field reference to the instance. - // It'll be recreated the next time one is needed. - runner.runmatch = null; - } + // If the callback returns false, we're done. - if (!callback(ref state, match)) + if (usingStringOverload && reuseMatchObject) { - // If the callback returns false, we're done. - - if (reuseMatchObject) - { - // We're reusing the single match instance so we clear out match.Text which was set above. - // We don't do this if we're not reusing instances, as in that case we're - // dropping the whole reference to the match, and we no longer own the instance - // having handed it out to the callback. - match.Text = null; - } - return; + // We're reusing the single match instance and we were called via the string overload + // which would have set the match's text, so clear it out as well. + // We don't do this if we're not reusing instances, as in that case we're + // dropping the whole reference to the match, and we no longer own the instance + // having handed it out to the callback. + match.Text = null; } + return; + } - // Now that we've matched successfully, update the starting position to reflect - // the current position, just as Match.NextMatch() would pass in _textpos as textstart. - runtextpos = startat = runner.runtextpos; + // Now that we've matched successfully, update the starting position to reflect + // the current position, just as Match.NextMatch() would pass in _textpos as textstart. + runtextpos = startat = runner.runtextpos; - // Reset state for another iteration. - runner.runtrackpos = runner.runtrack!.Length; - runner.runstackpos = runner.runstack!.Length; - runner.runcrawlpos = runner.runcrawl!.Length; + // Reset state for another iteration. + runner.runtrackpos = runner.runtrack!.Length; + runner.runstackpos = runner.runstack!.Length; + runner.runcrawlpos = runner.runcrawl!.Length; - if (match.Length == 0) + if (match.Length == 0) + { + if (runner.runtextpos == stoppos) { - if (runner.runtextpos == stoppos) + if (usingStringOverload && reuseMatchObject) { - if (reuseMatchObject) - { - // See above comment. - match.Text = null; - } - return; + // See above comment. + match.Text = null; } - - runtextpos += RightToLeft ? -1 : 1; + return; } - // Loop around to perform next match from where we left off. - continue; + runtextpos += RightToLeft ? -1 : 1; } - else + + // Loop around to perform next match from where we left off. + continue; + } + else + { + // We failed to match at this position. If we're at the stopping point, we're done. + if (runner.runtextpos == stoppos) { - // We failed to match at this position. If we're at the stopping point, we're done. - if (runner.runtextpos == stoppos) - { - return; - } + return; } } } - finally - { - runner.runtext = null; // drop reference to text to avoid keeping it alive in a cache. - _runner = runner; - } } /// Helper method used by RunSingleMatch and RunAllMatchesWithCallback which calls runner.Scan to find a match on the passed in span. From ac0ca12e97f4011f2555b86f51448e5e5560e951 Mon Sep 17 00:00:00 2001 From: Jose Perez Rodriguez Date: Mon, 11 Apr 2022 14:22:15 -0700 Subject: [PATCH 4/4] PR Feedback --- .../Regex.EnumerateMatches.cs | 15 ++++++++--- .../System/Text/RegularExpressions/Regex.cs | 27 ++++++++++++------- .../Regex.EnumerateMatches.Tests.cs | 18 ++----------- 3 files changed, 31 insertions(+), 29 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.EnumerateMatches.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.EnumerateMatches.cs index 2b426ed735c110..44a0cca9e6d8db 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.EnumerateMatches.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.EnumerateMatches.cs @@ -12,6 +12,8 @@ public partial class Regex /// Searches an input span for all occurrences of a regular expression and returns a to iterate over the matches. /// /// + /// Each match won't actually happen until is invoked on the enumerator, with one match being performed per call. + /// Since the evaluation of the match happens lazily, any changes to the passed in input in between calls to will affect the match results. /// The enumerator returned by this method, as well as the structs returned by the enumerator that wrap each match found in the input are ref structs which /// make this method be amortized allocation free. /// @@ -27,6 +29,8 @@ public static ValueMatchEnumerator EnumerateMatches(ReadOnlySpan input, [S /// Searches an input span for all occurrences of a regular expression and returns a to iterate over the matches. /// /// + /// Each match won't actually happen until is invoked on the enumerator, with one match being performed per call. + /// Since the evaluation of the match happens lazily, any changes to the passed in input in between calls to will affect the match results. /// The enumerator returned by this method, as well as the structs returned by the enumerator that wrap each match found in the input are ref structs which /// make this method be amortized allocation free. /// @@ -44,6 +48,8 @@ public static ValueMatchEnumerator EnumerateMatches(ReadOnlySpan input, [S /// Searches an input span for all occurrences of a regular expression and returns a to iterate over the matches. /// /// + /// Each match won't actually happen until is invoked on the enumerator, with one match being performed per call. + /// Since the evaluation of the match happens lazily, any changes to the passed in input in between calls to will affect the match results. /// The enumerator returned by this method, as well as the structs returned by the enumerator that wrap each match found in the input are ref structs which /// make this method be amortized allocation free. /// @@ -62,6 +68,8 @@ public static ValueMatchEnumerator EnumerateMatches(ReadOnlySpan input, [S /// Searches an input span for all occurrences of a regular expression and returns a to iterate over the matches. /// /// + /// Each match won't actually happen until is invoked on the enumerator, with one match being performed per call. + /// Since the evaluation of the match happens lazily, any changes to the passed in input in between calls to will affect the match results. /// The enumerator returned by this method, as well as the structs returned by the enumerator that wrap each match found in the input are ref structs which /// make this method be amortized allocation free. /// @@ -71,12 +79,11 @@ public ValueMatchEnumerator EnumerateMatches(ReadOnlySpan input) => new ValueMatchEnumerator(this, input, RightToLeft ? input.Length : 0); /// - /// Represents an enumerator containing the set of successful matches found by iteratively applying a regular expression pattern to the input span. The - /// enumerator has no public constructor. The method returns a - /// object. + /// Represents an enumerator containing the set of successful matches found by iteratively applying a regular expression pattern to the input span. /// /// - /// The enumerator will lazily iterate over zero or more objects. If there is at least one successful match in the span, then + /// The enumerator has no public constructor. The method returns a + /// object.The enumerator will lazily iterate over zero or more objects. If there is at least one successful match in the span, then /// returns and will contain the first . If there are no successful matches, /// then returns and throws an . /// diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs index a7e1bc1b75833d..525a4a01cd3ee9 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs @@ -415,31 +415,40 @@ protected void InitializeReferences() runner.InitializeTimeout(internalMatchTimeout); runner.InitializeForScan(this, input, startat, quick); - int stoppos = RightToLeft ? 0 : input.Length; - // If previous match was empty or failed, advance by one before matching. if (prevlen == 0) { - if (runner.runtextstart == stoppos) + if (RightToLeft) { - return RegularExpressions.Match.Empty; + if (runner.runtextstart == 0) + { + return RegularExpressions.Match.Empty; + } + runner.runtextpos--; + } + else + { + if (runner.runtextstart == input.Length) + { + return RegularExpressions.Match.Empty; + } + runner.runtextpos++; } - - runner.runtextpos += RightToLeft ? -1 : 1; } runner.Scan(input); // If runmatch is null it means that an override of Scan didn't implement it correctly, so we will // let this null ref since there are lots of ways where you can end up in a erroneous state. - if (runner.runmatch!.FoundMatch) + Match match = runner.runmatch!; + if (match!.FoundMatch) { if (quick) { return null; } - runner.runmatch.Tidy(runner.runtextpos, 0); - return runner.runmatch; + match.Tidy(runner.runtextpos, 0); + return match; } return RegularExpressions.Match.Empty; diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.EnumerateMatches.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.EnumerateMatches.Tests.cs index a8f9839845f375..6defa59ff89324 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.EnumerateMatches.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.EnumerateMatches.Tests.cs @@ -43,20 +43,6 @@ public void EnumerateMatches_Ctor_Invalid() AssertExtensions.Throws("matchTimeout", () => Regex.EnumerateMatches("input", "pattern", RegexOptions.None, TimeSpan.Zero)); } - [Theory] - [MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))] - public void Enumerate_No_Match(RegexEngine engine) - { - Regex r = RegexHelpers.GetRegexAsync(engine, @"\da").GetAwaiter().GetResult(); - int count = 0; - foreach(var match in r.EnumerateMatches("1A2b3c4d5e")) - { - count++; - } - Assert.Equal(0, count); - - } - [Theory] [MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))] public void EnumerateMatches_Lookahead(RegexEngine engine) @@ -133,8 +119,8 @@ public void EnumerateMatches(RegexEngine engine, string pattern, string input, R { Regex regexAdvanced = RegexHelpers.GetRegexAsync(engine, pattern, options).GetAwaiter().GetResult(); int count = 0; - var span = input.AsSpan(); - foreach (var match in regexAdvanced.EnumerateMatches(span)) + ReadOnlySpan span = input.AsSpan(); + foreach (ValueMatch match in regexAdvanced.EnumerateMatches(span)) { Assert.Equal(expected[count].Index, match.Index); Assert.Equal(expected[count].Length, match.Length);