From d1f435abda946d27712218522a8fb5be30ea144e Mon Sep 17 00:00:00 2001 From: bbartels Date: Sat, 6 Jul 2024 22:24:47 +0100 Subject: [PATCH 01/21] Adds SpanSplitEnumreator --- .../System.Memory/ref/System.Memory.cs | 10 ++ .../src/System/MemoryExtensions.cs | 96 +++++++++++++++++++ 2 files changed, 106 insertions(+) diff --git a/src/libraries/System.Memory/ref/System.Memory.cs b/src/libraries/System.Memory/ref/System.Memory.cs index 66b95a12b5c25c..daa6fc6135b10d 100644 --- a/src/libraries/System.Memory/ref/System.Memory.cs +++ b/src/libraries/System.Memory/ref/System.Memory.cs @@ -350,6 +350,10 @@ public static void Sort(this System.Span keys, System.Span(this System.Span keys, System.Span items, System.Comparison comparison) { } public static void Sort(this System.Span span, TComparer comparer) where TComparer : System.Collections.Generic.IComparer? { } public static void Sort(this System.Span keys, System.Span items, TComparer comparer) where TComparer : System.Collections.Generic.IComparer? { } + public static System.MemoryExtensions.SpanSplitEnumerator Split(this System.ReadOnlySpan source, T separator) where T : IEquatable { throw null; } + public static System.MemoryExtensions.SpanSplitEnumerator Split(this System.ReadOnlySpan source, System.ReadOnlySpan separator) where T : IEquatable { throw null; } + public static System.MemoryExtensions.SpanSplitEnumerator SplitAny(this System.ReadOnlySpan source, params System.ReadOnlySpan separators) where T : IEquatable { throw null; } + public static System.MemoryExtensions.SpanSplitEnumerator SplitAny(this System.ReadOnlySpan source, System.Buffers.SearchValues separators) where T : IEquatable { throw null; } public static int Split(this System.ReadOnlySpan source, System.Span destination, char separator, System.StringSplitOptions options = System.StringSplitOptions.None) { throw null; } public static int Split(this System.ReadOnlySpan source, System.Span destination, System.ReadOnlySpan separator, System.StringSplitOptions options = System.StringSplitOptions.None) { throw null; } public static int SplitAny(this System.ReadOnlySpan source, System.Span destination, System.ReadOnlySpan separators, System.StringSplitOptions options = System.StringSplitOptions.None) { throw null; } @@ -430,6 +434,12 @@ public ref struct TryWriteInterpolatedStringHandler public bool AppendFormatted(string? value) { throw null; } public bool AppendFormatted(string? value, int alignment = 0, string? format = null) { throw null; } } + public ref struct SpanSplitEnumerator where T : System.IEquatable + { + public System.MemoryExtensions.SpanSplitEnumerator GetEnumerator() { throw null; } + public readonly System.Range Current { get { throw null; } } + public bool MoveNext() { throw null; } + } } } namespace System.Buffers diff --git a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.cs b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.cs index cb5e969dd1c2aa..a08910f672ee0d 100644 --- a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.cs +++ b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.cs @@ -3639,6 +3639,15 @@ private static void SliceLongerSpanToMatchShorterLength(ref ReadOnlySpan s Debug.Assert(span.Length == other.Length); } + public static SpanSplitEnumerator Split(this ReadOnlySpan source, T separator) + where T : IEquatable => new SpanSplitEnumerator(source, separator); + public static SpanSplitEnumerator Split(this ReadOnlySpan source, ReadOnlySpan separator) + where T : IEquatable => new SpanSplitEnumerator(source, separator, treatAsSingleSeparator: true); + public static SpanSplitEnumerator SplitAny(this ReadOnlySpan source, ReadOnlySpan separators) + where T : IEquatable => new SpanSplitEnumerator(source, separators, treatAsSingleSeparator: false); + public static SpanSplitEnumerator SplitAny(this ReadOnlySpan source, SearchValues separators) + where T : IEquatable => new SpanSplitEnumerator(source, separators); + /// /// Parses the source for the specified , populating the span /// with instances representing the regions between the separators. @@ -4664,5 +4673,92 @@ private bool Fail() return false; } } + + public ref struct SpanSplitEnumerator where T : IEquatable + { + private enum SplitMode + { + None = 0, + SingleToken, + Sequence, + Any, + SearchValues + } + + private readonly ReadOnlySpan _buffer; + + private readonly ReadOnlySpan _separators; + private readonly T _separator = default!; + private readonly ReadOnlySpan _spanSeparator; + private readonly SearchValues _searchValues = default!; + + private readonly int _separatorLength; + private readonly SplitMode _splitMode; + + private readonly bool _isInitialized = true; + + private int _startCurrent = 0; + private int _endCurrent = 0; + private int _startNext = 0; + + public SpanSplitEnumerator GetEnumerator() => this; + + public Range Current => new Range(_startCurrent, _endCurrent); + + internal SpanSplitEnumerator(ReadOnlySpan span, SearchValues searchValues) + { + _buffer = span; + _separatorLength = 1; + _splitMode = SplitMode.SearchValues; + _searchValues = searchValues; + } + + internal SpanSplitEnumerator(ReadOnlySpan span, ReadOnlySpan separator, bool treatAsSingleSeparator) + { + _buffer = span; + _separators = treatAsSingleSeparator ? default : separator; + _spanSeparator = treatAsSingleSeparator ? separator : default; + _separatorLength = (_separators.Length, treatAsSingleSeparator) switch + { + (0, true) or (_, false) => 1, + (_, true) => separator.Length, + }; + _splitMode = treatAsSingleSeparator ? SplitMode.Sequence : SplitMode.Any; + } + + internal SpanSplitEnumerator(ReadOnlySpan span, T separator) + { + _buffer = span; + _separator = separator; + _separatorLength = 1; + _splitMode = SplitMode.SingleToken; + } + + public bool MoveNext() + { + if (!_isInitialized || _startNext > _buffer.Length) + { + return false; + } + + ReadOnlySpan slice = _buffer[_startNext..]; + _startCurrent = _startNext; + + int separatorIndex = _splitMode switch + { + SplitMode.SingleToken => slice.IndexOf(_separator), + SplitMode.Sequence => slice.IndexOf(_spanSeparator), + SplitMode.Any => slice.IndexOfAny(_separators), + SplitMode.SearchValues => _searchValues.IndexOfAny(_buffer), + _ => throw new UnreachableException() + }; + + int elementLength = (separatorIndex != -1 ? separatorIndex : slice.Length); + + _endCurrent = _startCurrent + elementLength; + _startNext = _endCurrent + _separatorLength; + return true; + } + } } } From d973007f908690d43adfe237946451bdfbc942f3 Mon Sep 17 00:00:00 2001 From: bbartels Date: Sat, 6 Jul 2024 23:26:12 +0100 Subject: [PATCH 02/21] Adds params modifier --- src/libraries/System.Memory/ref/System.Memory.cs | 2 +- .../System.Private.CoreLib/src/System/MemoryExtensions.cs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/libraries/System.Memory/ref/System.Memory.cs b/src/libraries/System.Memory/ref/System.Memory.cs index daa6fc6135b10d..670a250bf5eaac 100644 --- a/src/libraries/System.Memory/ref/System.Memory.cs +++ b/src/libraries/System.Memory/ref/System.Memory.cs @@ -352,7 +352,7 @@ public static void Sort(this System.Span span, TComparer compar public static void Sort(this System.Span keys, System.Span items, TComparer comparer) where TComparer : System.Collections.Generic.IComparer? { } public static System.MemoryExtensions.SpanSplitEnumerator Split(this System.ReadOnlySpan source, T separator) where T : IEquatable { throw null; } public static System.MemoryExtensions.SpanSplitEnumerator Split(this System.ReadOnlySpan source, System.ReadOnlySpan separator) where T : IEquatable { throw null; } - public static System.MemoryExtensions.SpanSplitEnumerator SplitAny(this System.ReadOnlySpan source, params System.ReadOnlySpan separators) where T : IEquatable { throw null; } + public static System.MemoryExtensions.SpanSplitEnumerator SplitAny(this System.ReadOnlySpan source, [System.Diagnostics.CodeAnalysis.UnscopedRef] params System.ReadOnlySpan separators) where T : IEquatable { throw null; } public static System.MemoryExtensions.SpanSplitEnumerator SplitAny(this System.ReadOnlySpan source, System.Buffers.SearchValues separators) where T : IEquatable { throw null; } public static int Split(this System.ReadOnlySpan source, System.Span destination, char separator, System.StringSplitOptions options = System.StringSplitOptions.None) { throw null; } public static int Split(this System.ReadOnlySpan source, System.Span destination, System.ReadOnlySpan separator, System.StringSplitOptions options = System.StringSplitOptions.None) { throw null; } diff --git a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.cs b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.cs index a08910f672ee0d..c9762648535c4f 100644 --- a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.cs +++ b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.cs @@ -3643,7 +3643,7 @@ public static SpanSplitEnumerator Split(this ReadOnlySpan source, T sep where T : IEquatable => new SpanSplitEnumerator(source, separator); public static SpanSplitEnumerator Split(this ReadOnlySpan source, ReadOnlySpan separator) where T : IEquatable => new SpanSplitEnumerator(source, separator, treatAsSingleSeparator: true); - public static SpanSplitEnumerator SplitAny(this ReadOnlySpan source, ReadOnlySpan separators) + public static SpanSplitEnumerator SplitAny(this ReadOnlySpan source, [System.Diagnostics.CodeAnalysis.UnscopedRef] params ReadOnlySpan separators) where T : IEquatable => new SpanSplitEnumerator(source, separators, treatAsSingleSeparator: false); public static SpanSplitEnumerator SplitAny(this ReadOnlySpan source, SearchValues separators) where T : IEquatable => new SpanSplitEnumerator(source, separators); From 09a394cbd4d81383845b3bd03dca7403eea182e5 Mon Sep 17 00:00:00 2001 From: bbartels Date: Sun, 7 Jul 2024 23:38:51 +0100 Subject: [PATCH 03/21] Moves implementation to separate file --- .../tests/ReadOnlySpan/Split.char.cs | 261 ++++++++++++++++++ .../tests/System.Memory.Tests.csproj | 1 + .../System.Private.CoreLib.Shared.projitems | 1 + .../src/System/MemoryExtensions.Split.cs | 104 +++++++ .../src/System/MemoryExtensions.cs | 96 ------- 5 files changed, 367 insertions(+), 96 deletions(-) create mode 100644 src/libraries/System.Memory/tests/ReadOnlySpan/Split.char.cs create mode 100644 src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Split.cs diff --git a/src/libraries/System.Memory/tests/ReadOnlySpan/Split.char.cs b/src/libraries/System.Memory/tests/ReadOnlySpan/Split.char.cs new file mode 100644 index 00000000000000..dbad4244a70712 --- /dev/null +++ b/src/libraries/System.Memory/tests/ReadOnlySpan/Split.char.cs @@ -0,0 +1,261 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Linq; +using Xunit; +using SpanSplitEnumerator = System.MemoryExtensions.SpanSplitEnumerator; + +namespace System.SpanTests +{ + public static partial class ReadOnlySpanTests + { + [Fact] + public static void SplitNoMatchSingleResult() + { + ReadOnlySpan value = "a b"; + + string expected = value.ToString(); + var enumerator = value.Split(','); + Assert.True(enumerator.MoveNext()); + Assert.Equal(expected, value[enumerator.Current].ToString()); + } + + [Fact] + public static void DefaultSpanSplitEnumeratorBehavior() + { + var charSpanEnumerator = new SpanSplitEnumerator(); + Assert.Equal(new Range(0, 0), charSpanEnumerator.Current); + Assert.False(charSpanEnumerator.MoveNext()); + + // Implicit DoesNotThrow assertion + charSpanEnumerator.GetEnumerator(); + + var stringSpanEnumerator = new SpanSplitEnumerator(); + Assert.Equal(new Range(0, 0), stringSpanEnumerator.Current); + Assert.False(stringSpanEnumerator.MoveNext()); + stringSpanEnumerator.GetEnumerator(); + } + + [Fact] + public static void ValidateArguments_OverloadWithoutSeparator() + { + ReadOnlySpan buffer = default; + + var enumerator = buffer.Split(' '); + Assert.True(enumerator.MoveNext()); + Assert.Equal(new Range(0, 0), enumerator.Current); + Assert.False(enumerator.MoveNext()); + + buffer = ""; + enumerator = buffer.Split(' '); + Assert.True(enumerator.MoveNext()); + Assert.Equal(new Range(0, 0), enumerator.Current); + Assert.False(enumerator.MoveNext()); + + buffer = " "; + enumerator = buffer.Split(' '); + Assert.True(enumerator.MoveNext()); + Assert.Equal(new Range(0, 0), enumerator.Current); + Assert.True(enumerator.MoveNext()); + Assert.Equal(new Range(1, 1), enumerator.Current); + Assert.False(enumerator.MoveNext()); + } + + [Fact] + public static void ValidateArguments_OverloadWithROSSeparator() + { + // Default buffer + ReadOnlySpan buffer = default; + + var enumerator = buffer.Split(default(char)); + Assert.True(enumerator.MoveNext()); + Assert.Equal(enumerator.Current, new Range(0, 0)); + Assert.False(enumerator.MoveNext()); + + enumerator = buffer.Split(' '); + Assert.True(enumerator.MoveNext()); + Assert.Equal(enumerator.Current, new Range(0, 0)); + Assert.False(enumerator.MoveNext()); + + // Empty buffer + buffer = ""; + + enumerator = buffer.Split(default(char)); + Assert.True(enumerator.MoveNext()); + Assert.Equal(enumerator.Current, new Range(0, 0)); + Assert.False(enumerator.MoveNext()); + + enumerator = buffer.Split(' '); + Assert.True(enumerator.MoveNext()); + Assert.Equal(enumerator.Current, new Range(0, 0)); + Assert.False(enumerator.MoveNext()); + + // Single whitespace buffer + buffer = " "; + + enumerator = buffer.Split(default(char)); + Assert.True(enumerator.MoveNext()); + Assert.False(enumerator.MoveNext()); + + enumerator = buffer.Split(' '); + Assert.Equal(new Range(0, 0), enumerator.Current); + Assert.True(enumerator.MoveNext()); + Assert.Equal(new Range(0, 0), enumerator.Current); + Assert.True(enumerator.MoveNext()); + Assert.Equal(new Range(1, 1), enumerator.Current); + Assert.False(enumerator.MoveNext()); + } + + [Fact] + public static void ValidateArguments_OverloadWithStringSeparator() + { + // Default buffer + ReadOnlySpan buffer = default; + + var enumerator = buffer.Split(null); // null is treated as empty string + Assert.True(enumerator.MoveNext()); + Assert.Equal(enumerator.Current, new Range(0, 0)); + Assert.False(enumerator.MoveNext()); + + enumerator = buffer.Split(""); + Assert.True(enumerator.MoveNext()); + Assert.Equal(enumerator.Current, new Range(0, 0)); + Assert.False(enumerator.MoveNext()); + + enumerator = buffer.Split(" "); + Assert.True(enumerator.MoveNext()); + Assert.Equal(enumerator.Current, new Range(0, 0)); + Assert.False(enumerator.MoveNext()); + + // Empty buffer + buffer = ""; + + enumerator = buffer.Split(null); + Assert.True(enumerator.MoveNext()); + Assert.Equal(enumerator.Current, new Range(0, 0)); + Assert.False(enumerator.MoveNext()); + + enumerator = buffer.Split(""); + Assert.True(enumerator.MoveNext()); + Assert.Equal(enumerator.Current, new Range(0, 0)); + Assert.False(enumerator.MoveNext()); + + enumerator = buffer.Split(" "); + Assert.True(enumerator.MoveNext()); + Assert.Equal(enumerator.Current, new Range(0, 0)); + Assert.False(enumerator.MoveNext()); + + // Single whitespace buffer + buffer = " "; + + enumerator = buffer.Split(null); // null is treated as empty string + Assert.True(enumerator.MoveNext()); + Assert.Equal(enumerator.Current, new Range(0, 0)); + Assert.True(enumerator.MoveNext()); + Assert.Equal(enumerator.Current, new Range(1, 1)); + Assert.False(enumerator.MoveNext()); + + enumerator = buffer.Split(""); + Assert.True(enumerator.MoveNext()); + Assert.Equal(enumerator.Current, new Range(0, 0)); + Assert.True(enumerator.MoveNext()); + Assert.Equal(enumerator.Current, new Range(1, 1)); + Assert.False(enumerator.MoveNext()); + + enumerator = buffer.Split(" "); + Assert.Equal(enumerator.Current, new Range(0, 0)); + Assert.True(enumerator.MoveNext()); + Assert.Equal(enumerator.Current, new Range(0, 0)); + Assert.True(enumerator.MoveNext()); + Assert.Equal(enumerator.Current, new Range(1, 1)); + Assert.False(enumerator.MoveNext()); + } + + [Theory] + [InlineData("", ',', new[] { "" })] + [InlineData(" ", ' ', new[] { "", "" })] + [InlineData(",", ',', new[] { "", "" })] + [InlineData(" ", ' ', new[] { "", "", "", "", "", "" })] + [InlineData(",,", ',', new[] { "", "", "" })] + [InlineData("ab", ',', new[] { "ab" })] + [InlineData("a,b", ',', new[] { "a", "b" })] + [InlineData("a,", ',', new[] { "a", "" })] + [InlineData(",b", ',', new[] { "", "b" })] + [InlineData(",a,b", ',', new[] { "", "a", "b" })] + [InlineData("a,b,", ',', new[] { "a", "b", "" })] + [InlineData("a,b,c", ',', new[] { "a", "b", "c" })] + [InlineData("a,,c", ',', new[] { "a", "", "c" })] + [InlineData(",a,b,c", ',', new[] { "", "a", "b", "c" })] + [InlineData("a,b,c,", ',', new[] { "a", "b", "c", "" })] + [InlineData(",a,b,c,", ',', new[] { "", "a", "b", "c", "" })] + [InlineData("first,second", ',', new[] { "first", "second" })] + [InlineData("first,", ',', new[] { "first", "" })] + [InlineData(",second", ',', new[] { "", "second" })] + [InlineData(",first,second", ',', new[] { "", "first", "second" })] + [InlineData("first,second,", ',', new[] { "first", "second", "" })] + [InlineData("first,second,third", ',', new[] { "first", "second", "third" })] + [InlineData("first,,third", ',', new[] { "first", "", "third" })] + [InlineData(",first,second,third", ',', new[] { "", "first", "second", "third" })] + [InlineData("first,second,third,", ',', new[] { "first", "second", "third", "" })] + [InlineData(",first,second,third,", ',', new[] { "", "first", "second", "third", "" })] + [InlineData("Foo Bar Baz", ' ', new[] { "Foo", "Bar", "Baz" })] + [InlineData("Foo Bar Baz ", ' ', new[] { "Foo", "Bar", "Baz", "" })] + [InlineData(" Foo Bar Baz ", ' ', new[] { "", "Foo", "Bar", "Baz", "" })] + [InlineData(" Foo Bar Baz ", ' ', new[] { "", "Foo", "", "Bar", "Baz", "" })] + [InlineData("Foo Baz Bar", default(char), new[] { "Foo Baz Bar" })] + [InlineData("Foo Baz \x0000 Bar", default(char), new[] { "Foo Baz ", " Bar" })] + [InlineData("Foo Baz \x0000 Bar\x0000", default(char), new[] { "Foo Baz ", " Bar", "" })] + public static void SpanSplitCharSeparator(string valueParam, char separator, string[] expectedParam) + { + char[][] expected = expectedParam.Select(x => x.ToCharArray()).ToArray(); + AssertEqual(expected, valueParam, valueParam.AsSpan().Split(separator)); + } + + [Theory] + [InlineData("", new[] { "" })] + [InlineData(" ", new[] { "", "" })] + [InlineData(" ", new[] { "", "", "", "", "", "" })] + [InlineData(" ", new[] { "", "", "" })] + [InlineData("ab", new[] { "ab" })] + [InlineData("a b", new[] { "a", "b" })] + [InlineData("a ", new[] { "a", "" })] + [InlineData(" b", new[] { "", "b" })] + [InlineData("Foo Bar Baz", new[] { "Foo", "Bar", "Baz" })] + [InlineData("Foo Bar Baz ", new[] { "Foo", "Bar", "Baz", "" })] + [InlineData(" Foo Bar Baz ", new[] { "", "Foo", "Bar", "Baz", "" })] + [InlineData(" Foo Bar Baz ", new[] { "", "Foo", "", "Bar", "Baz", "" })] + public static void SpanSplitDefaultCharSeparator(string valueParam, string[] expectedParam) + { + char[][] expected = expectedParam.Select(x => x.ToCharArray()).ToArray(); + AssertEqual(expected, valueParam, valueParam.AsSpan().Split(' ')); + } + + [Theory] + [InlineData(" Foo Bar Baz,", ", ", new[] { " Foo Bar Baz," })] + [InlineData(" Foo Bar Baz, ", ", ", new[] { " Foo Bar Baz", "" })] + [InlineData(", Foo Bar Baz, ", ", ", new[] { "", "Foo Bar Baz", "" })] + [InlineData(", Foo, Bar, Baz, ", ", ", new[] { "", "Foo", "Bar", "Baz", "" })] + [InlineData(", , Foo Bar, Baz", ", ", new[] { "", "", "Foo Bar", "Baz" })] + [InlineData(", , Foo Bar, Baz, , ", ", ", new[] { "", "", "Foo Bar", "Baz", "", "" })] + [InlineData(", , , , , ", ", ", new[] { "", "", "", "", "", "" })] + [InlineData(" ", " ", new[] { "", "", "", "", "", "" })] + [InlineData(" Foo, Bar Baz ", " ", new[] { "", "Foo, Bar", "Baz", "" })] + public static void SpanSplitStringSeparator(string valueParam, string separator, string[] expectedParam) + { + char[][] expected = expectedParam.Select(x => x.ToCharArray()).ToArray(); + AssertEqual(expected, valueParam, valueParam.AsSpan().Split(separator)); + } + + private static void AssertEqual(T[][] items, ReadOnlySpan orig, MemoryExtensions.SpanSplitEnumerator source) where T : IEquatable + { + foreach (var item in items) + { + Assert.True(source.MoveNext()); + var slice = orig[source.Current]; + Assert.Equal(item, slice.ToArray()); + } + Assert.False(source.MoveNext()); + } + } +} \ No newline at end of file diff --git a/src/libraries/System.Memory/tests/System.Memory.Tests.csproj b/src/libraries/System.Memory/tests/System.Memory.Tests.csproj index a8afce4264197e..6f68804b534732 100644 --- a/src/libraries/System.Memory/tests/System.Memory.Tests.csproj +++ b/src/libraries/System.Memory/tests/System.Memory.Tests.csproj @@ -191,6 +191,7 @@ + diff --git a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems index 233d19f0d5b5f4..eda1bed26e5f49 100644 --- a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems +++ b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems @@ -573,6 +573,7 @@ + diff --git a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Split.cs b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Split.cs new file mode 100644 index 00000000000000..c84d1b885d162b --- /dev/null +++ b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Split.cs @@ -0,0 +1,104 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +using System.Buffers; +using System.Diagnostics; + +namespace System +{ + public static partial class MemoryExtensions + { + public static SpanSplitEnumerator Split(this ReadOnlySpan source, T separator) + where T : IEquatable => new SpanSplitEnumerator(source, separator); + public static SpanSplitEnumerator Split(this ReadOnlySpan source, ReadOnlySpan separator) + where T : IEquatable => new SpanSplitEnumerator(source, separator, treatAsSingleSeparator: true); + public static SpanSplitEnumerator SplitAny(this ReadOnlySpan source, [System.Diagnostics.CodeAnalysis.UnscopedRef] params ReadOnlySpan separators) + where T : IEquatable => new SpanSplitEnumerator(source, separators, treatAsSingleSeparator: false); + public static SpanSplitEnumerator SplitAny(this ReadOnlySpan source, SearchValues separators) + where T : IEquatable => new SpanSplitEnumerator(source, separators); + + public ref struct SpanSplitEnumerator where T : IEquatable + { + private enum SplitMode + { + None = 0, + SingleToken, + Sequence, + Any, + SearchValues + } + + private readonly ReadOnlySpan _span; + + private readonly T _separator = default!; + private readonly ReadOnlySpan _separatorBuffer; + private readonly SearchValues _searchValues = default!; + + private readonly int _separatorLength; + private readonly SplitMode _splitMode; + + private readonly bool _isInitialized = true; + + private int _startCurrent = 0; + private int _endCurrent = 0; + private int _startNext = 0; + + public SpanSplitEnumerator GetEnumerator() => this; + + public Range Current => new Range(_startCurrent, _endCurrent); + + internal SpanSplitEnumerator(ReadOnlySpan span, SearchValues searchValues) + { + _span = span; + _separatorLength = 1; + _splitMode = SplitMode.SearchValues; + _searchValues = searchValues; + } + + internal SpanSplitEnumerator(ReadOnlySpan span, ReadOnlySpan separator, bool treatAsSingleSeparator) + { + _span = span; + _separatorBuffer = separator; + _separatorLength = (_separatorBuffer.Length, treatAsSingleSeparator) switch + { + (0, true) or (_, false) => 1, + _ => separator.Length + }; + _splitMode = treatAsSingleSeparator ? SplitMode.Sequence : SplitMode.Any; + } + + internal SpanSplitEnumerator(ReadOnlySpan span, T separator) + { + _span = span; + _separator = separator; + _separatorLength = 1; + _splitMode = SplitMode.SingleToken; + } + + public bool MoveNext() + { + if (!_isInitialized || _startNext > _span.Length) + { + return false; + } + + ReadOnlySpan slice = _span[_startNext..]; + + int separatorIndex = _splitMode switch + { + SplitMode.SingleToken => slice.IndexOf(_separator), + SplitMode.Sequence => slice.IndexOf(_separatorBuffer), + SplitMode.Any => slice.IndexOfAny(_separatorBuffer), + SplitMode.SearchValues => _searchValues.IndexOfAny(_span), + _ => throw new UnreachableException() + }; + + int elementLength = (separatorIndex != -1 ? separatorIndex : slice.Length); + + _startCurrent = _startNext; + _endCurrent = _startCurrent + elementLength; + _startNext = _endCurrent + _separatorLength; + return true; + } + } + } +} diff --git a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.cs b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.cs index c9762648535c4f..cb5e969dd1c2aa 100644 --- a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.cs +++ b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.cs @@ -3639,15 +3639,6 @@ private static void SliceLongerSpanToMatchShorterLength(ref ReadOnlySpan s Debug.Assert(span.Length == other.Length); } - public static SpanSplitEnumerator Split(this ReadOnlySpan source, T separator) - where T : IEquatable => new SpanSplitEnumerator(source, separator); - public static SpanSplitEnumerator Split(this ReadOnlySpan source, ReadOnlySpan separator) - where T : IEquatable => new SpanSplitEnumerator(source, separator, treatAsSingleSeparator: true); - public static SpanSplitEnumerator SplitAny(this ReadOnlySpan source, [System.Diagnostics.CodeAnalysis.UnscopedRef] params ReadOnlySpan separators) - where T : IEquatable => new SpanSplitEnumerator(source, separators, treatAsSingleSeparator: false); - public static SpanSplitEnumerator SplitAny(this ReadOnlySpan source, SearchValues separators) - where T : IEquatable => new SpanSplitEnumerator(source, separators); - /// /// Parses the source for the specified , populating the span /// with instances representing the regions between the separators. @@ -4673,92 +4664,5 @@ private bool Fail() return false; } } - - public ref struct SpanSplitEnumerator where T : IEquatable - { - private enum SplitMode - { - None = 0, - SingleToken, - Sequence, - Any, - SearchValues - } - - private readonly ReadOnlySpan _buffer; - - private readonly ReadOnlySpan _separators; - private readonly T _separator = default!; - private readonly ReadOnlySpan _spanSeparator; - private readonly SearchValues _searchValues = default!; - - private readonly int _separatorLength; - private readonly SplitMode _splitMode; - - private readonly bool _isInitialized = true; - - private int _startCurrent = 0; - private int _endCurrent = 0; - private int _startNext = 0; - - public SpanSplitEnumerator GetEnumerator() => this; - - public Range Current => new Range(_startCurrent, _endCurrent); - - internal SpanSplitEnumerator(ReadOnlySpan span, SearchValues searchValues) - { - _buffer = span; - _separatorLength = 1; - _splitMode = SplitMode.SearchValues; - _searchValues = searchValues; - } - - internal SpanSplitEnumerator(ReadOnlySpan span, ReadOnlySpan separator, bool treatAsSingleSeparator) - { - _buffer = span; - _separators = treatAsSingleSeparator ? default : separator; - _spanSeparator = treatAsSingleSeparator ? separator : default; - _separatorLength = (_separators.Length, treatAsSingleSeparator) switch - { - (0, true) or (_, false) => 1, - (_, true) => separator.Length, - }; - _splitMode = treatAsSingleSeparator ? SplitMode.Sequence : SplitMode.Any; - } - - internal SpanSplitEnumerator(ReadOnlySpan span, T separator) - { - _buffer = span; - _separator = separator; - _separatorLength = 1; - _splitMode = SplitMode.SingleToken; - } - - public bool MoveNext() - { - if (!_isInitialized || _startNext > _buffer.Length) - { - return false; - } - - ReadOnlySpan slice = _buffer[_startNext..]; - _startCurrent = _startNext; - - int separatorIndex = _splitMode switch - { - SplitMode.SingleToken => slice.IndexOf(_separator), - SplitMode.Sequence => slice.IndexOf(_spanSeparator), - SplitMode.Any => slice.IndexOfAny(_separators), - SplitMode.SearchValues => _searchValues.IndexOfAny(_buffer), - _ => throw new UnreachableException() - }; - - int elementLength = (separatorIndex != -1 ? separatorIndex : slice.Length); - - _endCurrent = _startCurrent + elementLength; - _startNext = _endCurrent + _separatorLength; - return true; - } - } } } From 5d9feb7ce40af7a9c648cac9047b91345215efde Mon Sep 17 00:00:00 2001 From: bbartels Date: Sun, 7 Jul 2024 23:40:08 +0100 Subject: [PATCH 04/21] Adds newline at end of file --- src/libraries/System.Memory/tests/ReadOnlySpan/Split.char.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Memory/tests/ReadOnlySpan/Split.char.cs b/src/libraries/System.Memory/tests/ReadOnlySpan/Split.char.cs index dbad4244a70712..198e67444918da 100644 --- a/src/libraries/System.Memory/tests/ReadOnlySpan/Split.char.cs +++ b/src/libraries/System.Memory/tests/ReadOnlySpan/Split.char.cs @@ -258,4 +258,4 @@ private static void AssertEqual(T[][] items, ReadOnlySpan orig, MemoryExte Assert.False(source.MoveNext()); } } -} \ No newline at end of file +} From 4fe1abe6f204cbc36dab4ad6185a788497d6bd3d Mon Sep 17 00:00:00 2001 From: bbartels Date: Mon, 8 Jul 2024 22:35:46 +0100 Subject: [PATCH 05/21] Moves SpanSplitEnumeratorMode outside of enumerator --- .../src/System/MemoryExtensions.Split.cs | 52 +++++++++---------- 1 file changed, 25 insertions(+), 27 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Split.cs b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Split.cs index c84d1b885d162b..bd8993e3106102 100644 --- a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Split.cs +++ b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Split.cs @@ -16,17 +16,18 @@ public static SpanSplitEnumerator SplitAny(this ReadOnlySpan source, [S public static SpanSplitEnumerator SplitAny(this ReadOnlySpan source, SearchValues separators) where T : IEquatable => new SpanSplitEnumerator(source, separators); - public ref struct SpanSplitEnumerator where T : IEquatable + private enum SpanSplitEnumeratorMode { - private enum SplitMode - { - None = 0, - SingleToken, - Sequence, - Any, - SearchValues - } + None = 0, + SingleToken, + Sequence, + EmptySequence, + Any, + SearchValues + } + public ref struct SpanSplitEnumerator where T : IEquatable + { private readonly ReadOnlySpan _span; private readonly T _separator = default!; @@ -34,9 +35,7 @@ private enum SplitMode private readonly SearchValues _searchValues = default!; private readonly int _separatorLength; - private readonly SplitMode _splitMode; - - private readonly bool _isInitialized = true; + private readonly SpanSplitEnumeratorMode _splitMode; private int _startCurrent = 0; private int _endCurrent = 0; @@ -49,8 +48,7 @@ private enum SplitMode internal SpanSplitEnumerator(ReadOnlySpan span, SearchValues searchValues) { _span = span; - _separatorLength = 1; - _splitMode = SplitMode.SearchValues; + _splitMode = SpanSplitEnumeratorMode.SearchValues; _searchValues = searchValues; } @@ -58,37 +56,37 @@ internal SpanSplitEnumerator(ReadOnlySpan span, ReadOnlySpan separator, bo { _span = span; _separatorBuffer = separator; - _separatorLength = (_separatorBuffer.Length, treatAsSingleSeparator) switch + _splitMode = (separator.Length, treatAsSingleSeparator) switch { - (0, true) or (_, false) => 1, - _ => separator.Length + (0, true) => SpanSplitEnumeratorMode.EmptySequence, + (_, true) => SpanSplitEnumeratorMode.Sequence, + _ => SpanSplitEnumeratorMode.Any }; - _splitMode = treatAsSingleSeparator ? SplitMode.Sequence : SplitMode.Any; } internal SpanSplitEnumerator(ReadOnlySpan span, T separator) { _span = span; _separator = separator; - _separatorLength = 1; - _splitMode = SplitMode.SingleToken; + _splitMode = SpanSplitEnumeratorMode.SingleToken; } public bool MoveNext() { - if (!_isInitialized || _startNext > _span.Length) + if (_splitMode is SpanSplitEnumeratorMode.None || _startNext > _span.Length) { return false; } ReadOnlySpan slice = _span[_startNext..]; - int separatorIndex = _splitMode switch + (int separatorIndex, int separatorLength) = _splitMode switch { - SplitMode.SingleToken => slice.IndexOf(_separator), - SplitMode.Sequence => slice.IndexOf(_separatorBuffer), - SplitMode.Any => slice.IndexOfAny(_separatorBuffer), - SplitMode.SearchValues => _searchValues.IndexOfAny(_span), + SpanSplitEnumeratorMode.SingleToken => (slice.IndexOf(_separator), 1), + SpanSplitEnumeratorMode.Sequence => (slice.IndexOf(_separatorBuffer), _separatorBuffer.Length), + SpanSplitEnumeratorMode.EmptySequence => (slice.IndexOf(_separatorBuffer), 1), + SpanSplitEnumeratorMode.Any => (slice.IndexOfAny(_separatorBuffer), 1), + SpanSplitEnumeratorMode.SearchValues => (_searchValues.IndexOfAny(_span), 1), _ => throw new UnreachableException() }; @@ -96,7 +94,7 @@ public bool MoveNext() _startCurrent = _startNext; _endCurrent = _startCurrent + elementLength; - _startNext = _endCurrent + _separatorLength; + _startNext = _endCurrent + separatorLength; return true; } } From dd98d209a198c045e48edf1eccab4fdc2d60a996 Mon Sep 17 00:00:00 2001 From: bbartels Date: Mon, 8 Jul 2024 22:37:18 +0100 Subject: [PATCH 06/21] Merge --- .../src/System/MemoryExtensions.Split.cs | 50 ++++++++++++++++++- 1 file changed, 49 insertions(+), 1 deletion(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Split.cs b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Split.cs index bd8993e3106102..96a3427b30db5b 100644 --- a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Split.cs +++ b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Split.cs @@ -2,17 +2,49 @@ // The .NET Foundation licenses this file to you under the MIT license. using System.Buffers; using System.Diagnostics; +using System.Diagnostics.CodeAnalysis; namespace System { public static partial class MemoryExtensions { + /// + /// Returns a type that allows for enumeration of each element within a split span + /// using the provided separator character. + /// + /// The source span to be enumerated. + /// The separator character to be used to split the provided span. + /// Returns a . public static SpanSplitEnumerator Split(this ReadOnlySpan source, T separator) where T : IEquatable => new SpanSplitEnumerator(source, separator); + + /// + /// Returns a type that allows for enumeration of each element within a split span + /// using the provided separator span. + /// + /// The source span to be enumerated. + /// The separator span to be used to split the provided span. + /// Returns a . public static SpanSplitEnumerator Split(this ReadOnlySpan source, ReadOnlySpan separator) where T : IEquatable => new SpanSplitEnumerator(source, separator, treatAsSingleSeparator: true); - public static SpanSplitEnumerator SplitAny(this ReadOnlySpan source, [System.Diagnostics.CodeAnalysis.UnscopedRef] params ReadOnlySpan separators) + + /// + /// Returns a type that allows for enumeration of each element within a split span + /// using any of the provided elements. + /// + /// The source span to be enumerated. + /// The separators to be used to split the provided span. + /// Returns a . + public static SpanSplitEnumerator SplitAny(this ReadOnlySpan source, [UnscopedRef] params ReadOnlySpan separators) where T : IEquatable => new SpanSplitEnumerator(source, separators, treatAsSingleSeparator: false); + + /// + /// Returns a type that allows for enumeration of each element within a split span + /// using the provided . + /// + /// The source span to be enumerated. + /// The to be used to split the provided span. + /// Returns a . public static SpanSplitEnumerator SplitAny(this ReadOnlySpan source, SearchValues separators) where T : IEquatable => new SpanSplitEnumerator(source, separators); @@ -26,6 +58,10 @@ private enum SpanSplitEnumeratorMode SearchValues } + /// + /// allows for enumeration of each element within a + /// that has been split using a provided separator. + /// public ref struct SpanSplitEnumerator where T : IEquatable { private readonly ReadOnlySpan _span; @@ -41,8 +77,16 @@ private enum SpanSplitEnumeratorMode private int _endCurrent = 0; private int _startNext = 0; + /// + /// Returns an enumerator that allows for iteration over the split span. + /// + /// Returns a that can be used to iterate over the split span. public SpanSplitEnumerator GetEnumerator() => this; + /// + /// Returns the current element of the enumeration. + /// + /// Returns a instance that indicates the bounds of the current element withing the source span. public Range Current => new Range(_startCurrent, _endCurrent); internal SpanSplitEnumerator(ReadOnlySpan span, SearchValues searchValues) @@ -71,6 +115,10 @@ internal SpanSplitEnumerator(ReadOnlySpan span, T separator) _splitMode = SpanSplitEnumeratorMode.SingleToken; } + /// + /// Advances the enumerator to the next element of the enumeration. + /// + /// if the enumerator was successfully advanced to the next element; if the enumerator has passed the end of the enumeration. public bool MoveNext() { if (_splitMode is SpanSplitEnumeratorMode.None || _startNext > _span.Length) From c707e4b78e670d45a54981828232c7def7a3d7e5 Mon Sep 17 00:00:00 2001 From: bbartels Date: Mon, 8 Jul 2024 22:41:28 +0100 Subject: [PATCH 07/21] Fixes bug --- .../System.Private.CoreLib/src/System/MemoryExtensions.Split.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Split.cs b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Split.cs index 96a3427b30db5b..9c3e7a42764e4e 100644 --- a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Split.cs +++ b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Split.cs @@ -134,7 +134,7 @@ public bool MoveNext() SpanSplitEnumeratorMode.Sequence => (slice.IndexOf(_separatorBuffer), _separatorBuffer.Length), SpanSplitEnumeratorMode.EmptySequence => (slice.IndexOf(_separatorBuffer), 1), SpanSplitEnumeratorMode.Any => (slice.IndexOfAny(_separatorBuffer), 1), - SpanSplitEnumeratorMode.SearchValues => (_searchValues.IndexOfAny(_span), 1), + SpanSplitEnumeratorMode.SearchValues => (_searchValues.IndexOfAny(slice), 1), _ => throw new UnreachableException() }; From 189492acb752f41384f2d9e2b532732dead9f51c Mon Sep 17 00:00:00 2001 From: bbartels Date: Mon, 8 Jul 2024 23:08:48 +0100 Subject: [PATCH 08/21] Removes UnreachableExecption --- .../src/System/MemoryExtensions.Split.cs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Split.cs b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Split.cs index 9c3e7a42764e4e..5db356411a829d 100644 --- a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Split.cs +++ b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Split.cs @@ -51,7 +51,7 @@ public static SpanSplitEnumerator SplitAny(this ReadOnlySpan source, Se private enum SpanSplitEnumeratorMode { None = 0, - SingleToken, + SingleElement, Sequence, EmptySequence, Any, @@ -112,7 +112,7 @@ internal SpanSplitEnumerator(ReadOnlySpan span, T separator) { _span = span; _separator = separator; - _splitMode = SpanSplitEnumeratorMode.SingleToken; + _splitMode = SpanSplitEnumeratorMode.SingleElement; } /// @@ -128,14 +128,14 @@ public bool MoveNext() ReadOnlySpan slice = _span[_startNext..]; + Debug.Assert(_splitmode is not SpanSplitEnumerator.None); (int separatorIndex, int separatorLength) = _splitMode switch { - SpanSplitEnumeratorMode.SingleToken => (slice.IndexOf(_separator), 1), - SpanSplitEnumeratorMode.Sequence => (slice.IndexOf(_separatorBuffer), _separatorBuffer.Length), + SpanSplitEnumeratorMode.SingleElement => (slice.IndexOf(_separator), 1), + SpanSplitEnumeratorMode.Sequence => (slice.IndexOf(_separatorBuffer), _separatorBuffer.Length), SpanSplitEnumeratorMode.EmptySequence => (slice.IndexOf(_separatorBuffer), 1), - SpanSplitEnumeratorMode.Any => (slice.IndexOfAny(_separatorBuffer), 1), - SpanSplitEnumeratorMode.SearchValues => (_searchValues.IndexOfAny(slice), 1), - _ => throw new UnreachableException() + SpanSplitEnumeratorMode.Any => (slice.IndexOfAny(_separatorBuffer), 1), + _ => (_searchValues.IndexOfAny(slice), 1) }; int elementLength = (separatorIndex != -1 ? separatorIndex : slice.Length); From 05accdca880010059b20c715853bd1662b706024 Mon Sep 17 00:00:00 2001 From: Benjamin Bartels Date: Tue, 9 Jul 2024 10:31:01 +0100 Subject: [PATCH 09/21] Removes _separatorLength field --- .../System.Private.CoreLib/src/System/MemoryExtensions.Split.cs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Split.cs b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Split.cs index 5db356411a829d..d5cc80f51924ed 100644 --- a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Split.cs +++ b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Split.cs @@ -70,7 +70,6 @@ private enum SpanSplitEnumeratorMode private readonly ReadOnlySpan _separatorBuffer; private readonly SearchValues _searchValues = default!; - private readonly int _separatorLength; private readonly SpanSplitEnumeratorMode _splitMode; private int _startCurrent = 0; From 2da7163eba60d3f85e1a9051ef205d9d66593e9a Mon Sep 17 00:00:00 2001 From: Benjamin Bartels Date: Tue, 9 Jul 2024 11:49:10 +0100 Subject: [PATCH 10/21] Fixes assertion predicate --- .../System.Private.CoreLib/src/System/MemoryExtensions.Split.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Split.cs b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Split.cs index d5cc80f51924ed..c39b2098ac4e16 100644 --- a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Split.cs +++ b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Split.cs @@ -127,7 +127,7 @@ public bool MoveNext() ReadOnlySpan slice = _span[_startNext..]; - Debug.Assert(_splitmode is not SpanSplitEnumerator.None); + Debug.Assert(_splitMode is not SpanSplitEnumeratorMode.None); (int separatorIndex, int separatorLength) = _splitMode switch { SpanSplitEnumeratorMode.SingleElement => (slice.IndexOf(_separator), 1), From 6a79eb173fc0de9503ead87b6a7117a728dd8c2f Mon Sep 17 00:00:00 2001 From: bbartels Date: Fri, 12 Jul 2024 22:20:58 +0100 Subject: [PATCH 11/21] Adds SpanSplitEnumerator to string.Split tests --- .../src/System/MemoryExtensions.Split.cs | 2 +- .../System/String.SplitTests.cs | 34 +++++++++++++++++-- 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Split.cs b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Split.cs index c39b2098ac4e16..0d03f788bc821b 100644 --- a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Split.cs +++ b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Split.cs @@ -132,7 +132,7 @@ public bool MoveNext() { SpanSplitEnumeratorMode.SingleElement => (slice.IndexOf(_separator), 1), SpanSplitEnumeratorMode.Sequence => (slice.IndexOf(_separatorBuffer), _separatorBuffer.Length), - SpanSplitEnumeratorMode.EmptySequence => (slice.IndexOf(_separatorBuffer), 1), + SpanSplitEnumeratorMode.EmptySequence => (-1, 1), SpanSplitEnumeratorMode.Any => (slice.IndexOfAny(_separatorBuffer), 1), _ => (_searchValues.IndexOfAny(slice), 1) }; diff --git a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/String.SplitTests.cs b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/String.SplitTests.cs index 037e7a98a95878..acacc7b2992568 100644 --- a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/String.SplitTests.cs +++ b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/String.SplitTests.cs @@ -1,6 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Collections.Generic; using System.Linq; using Xunit; @@ -132,6 +133,7 @@ public static void SplitOneCountSingleResult() public static void SplitNoMatchSingleResult() { const string Value = "a b"; + ReadOnlySpan SpanValue = "a b"; const int Count = int.MaxValue; const StringSplitOptions Options = StringSplitOptions.None; @@ -152,17 +154,22 @@ public static void SplitNoMatchSingleResult() Assert.Equal(expected, Value.Split(new[] { "," }, Count, Options)); Range[] ranges = new Range[10]; - Assert.Equal(1, Value.AsSpan().Split(ranges, ',', Options)); + Assert.Equal(1, SpanValue.Split(ranges, ',', Options)); Assert.Equal(0..3, ranges[0]); Array.Clear(ranges); - Assert.Equal(1, Value.AsSpan().Split(ranges, ",", Options)); + Assert.Equal(1, SpanValue.Split(ranges, ",", Options)); Assert.Equal(0..3, ranges[0]); Array.Clear(ranges); - Assert.Equal(1, Value.AsSpan().SplitAny(ranges, ",", Options)); + Assert.Equal(1, SpanValue.SplitAny(ranges, ",", Options)); Assert.Equal(0..3, ranges[0]); Array.Clear(ranges); + + AssertEqual(expected, SpanValue, SpanValue.Split(',')); + AssertEqual(expected, SpanValue, SpanValue.Split(",")); + AssertEqual(expected, SpanValue, SpanValue.SplitAny(',')); + AssertEqual(expected, SpanValue, SpanValue.SplitAny(Buffers.SearchValues.Create([',']))); } private const int M = int.MaxValue; @@ -519,6 +526,9 @@ public static void SplitCharSeparator(string value, char separator, int count, S Assert.Equal(expected, value.Split(new[] { separator })); Assert.Equal(expected, value.Split((ReadOnlySpan)new[] { separator })); Assert.Equal(expected, value.Split(separator.ToString())); + + AssertEqual(expected, value.AsSpan(), value.AsSpan().Split(separator)); + AssertEqual(expected, value.AsSpan(), value.AsSpan().SplitAny(Buffers.SearchValues.Create([separator]))); } Range[] ranges = new Range[count == int.MaxValue ? value.Length + 1 : count]; @@ -576,6 +586,7 @@ public static void SplitStringSeparator(string value, string separator, int coun if (count == int.MaxValue && options == StringSplitOptions.None) { Assert.Equal(expected, value.Split(separator)); + AssertEqual(expected, value.AsSpan(), value.AsSpan().Split(separator)); } Range[] ranges = new Range[count == int.MaxValue ? value.Length + 1 : count]; @@ -634,6 +645,12 @@ public static void SplitCharArraySeparator(string value, char[] separators, int Range[] ranges = new Range[count == int.MaxValue ? value.Length + 1 : count]; Assert.Equal(expected.Length, value.AsSpan().SplitAny(ranges, separators, options)); Assert.Equal(expected, ranges.Take(expected.Length).Select(r => value[r]).ToArray()); + + if (count == int.MaxValue && options is StringSplitOptions.None) + { + AssertEqual(expected, value.AsSpan(), value.AsSpan().SplitAny(separators)); + AssertEqual(expected, value.AsSpan(), value.AsSpan().SplitAny(Buffers.SearchValues.Create(separators))); + } } [Theory] @@ -697,5 +714,16 @@ private static string[] ToStringArray(char[] source) } return result; } + + private static void AssertEqual(string[] items, ReadOnlySpan source, MemoryExtensions.SpanSplitEnumerator enumerator) + { + foreach (var item in items) + { + Assert.True(enumerator.MoveNext()); + var slice = source[enumerator.Current]; + Assert.Equal(item, new string(slice)); + } + Assert.False(enumerator.MoveNext()); + } } } From a8a5b0b97c053ceed0d2cd80ba5cdfa6f499f8d5 Mon Sep 17 00:00:00 2001 From: bbartels Date: Fri, 12 Jul 2024 22:24:25 +0100 Subject: [PATCH 12/21] Special cases empty SplitAny tests --- .../tests/System.Runtime.Tests/System/String.SplitTests.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/String.SplitTests.cs b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/String.SplitTests.cs index acacc7b2992568..5348827e5ed7f7 100644 --- a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/String.SplitTests.cs +++ b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/String.SplitTests.cs @@ -646,7 +646,7 @@ public static void SplitCharArraySeparator(string value, char[] separators, int Assert.Equal(expected.Length, value.AsSpan().SplitAny(ranges, separators, options)); Assert.Equal(expected, ranges.Take(expected.Length).Select(r => value[r]).ToArray()); - if (count == int.MaxValue && options is StringSplitOptions.None) + if (count == int.MaxValue && options is StringSplitOptions.None && separators is { Length: > 0}) { AssertEqual(expected, value.AsSpan(), value.AsSpan().SplitAny(separators)); AssertEqual(expected, value.AsSpan(), value.AsSpan().SplitAny(Buffers.SearchValues.Create(separators))); From f5b8baacf96c9a725a7ccc04bd861ba6acc4f59b Mon Sep 17 00:00:00 2001 From: bbartels Date: Mon, 15 Jul 2024 02:13:55 +0100 Subject: [PATCH 13/21] Rewrites generic type tests --- .../tests/ReadOnlySpan/Split.char.cs | 389 ++++++++---------- .../System/String.SplitTests.cs | 3 +- 2 files changed, 175 insertions(+), 217 deletions(-) diff --git a/src/libraries/System.Memory/tests/ReadOnlySpan/Split.char.cs b/src/libraries/System.Memory/tests/ReadOnlySpan/Split.char.cs index 198e67444918da..50d29a29093356 100644 --- a/src/libraries/System.Memory/tests/ReadOnlySpan/Split.char.cs +++ b/src/libraries/System.Memory/tests/ReadOnlySpan/Split.char.cs @@ -2,260 +2,217 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using System.Buffers; +using System.Collections.Generic; using System.Linq; using Xunit; -using SpanSplitEnumerator = System.MemoryExtensions.SpanSplitEnumerator; namespace System.SpanTests { public static partial class ReadOnlySpanTests { - [Fact] - public static void SplitNoMatchSingleResult() - { - ReadOnlySpan value = "a b"; - - string expected = value.ToString(); - var enumerator = value.Split(','); - Assert.True(enumerator.MoveNext()); - Assert.Equal(expected, value[enumerator.Current].ToString()); - } + public record struct CustomStruct(int value) : IEquatable; + public record class CustomClass(int value) : IEquatable; [Fact] - public static void DefaultSpanSplitEnumeratorBehavior() + public static void DefaultSpanSplitEnumeratorBehaviour() { - var charSpanEnumerator = new SpanSplitEnumerator(); + var charSpanEnumerator = new MemoryExtensions.SpanSplitEnumerator(); Assert.Equal(new Range(0, 0), charSpanEnumerator.Current); Assert.False(charSpanEnumerator.MoveNext()); // Implicit DoesNotThrow assertion charSpanEnumerator.GetEnumerator(); - var stringSpanEnumerator = new SpanSplitEnumerator(); + var stringSpanEnumerator = new MemoryExtensions.SpanSplitEnumerator(); Assert.Equal(new Range(0, 0), stringSpanEnumerator.Current); Assert.False(stringSpanEnumerator.MoveNext()); stringSpanEnumerator.GetEnumerator(); } - [Fact] - public static void ValidateArguments_OverloadWithoutSeparator() - { - ReadOnlySpan buffer = default; - - var enumerator = buffer.Split(' '); - Assert.True(enumerator.MoveNext()); - Assert.Equal(new Range(0, 0), enumerator.Current); - Assert.False(enumerator.MoveNext()); + public static IEnumerable SplitSingleElementSeparatorData => + [ + // Split on default + [ (char[])['a', ' ', 'b'], default(char), (Range[])[0..3] ], + [ (int[]) [1, 2, 3], default(int), (Range[])[0..3] ], + [ (long[])[1, 2, 3], default(long), (Range[])[0..3] ], + [ (byte[])[1, 2, 3], default(byte), (Range[])[0..3] ], + [ (CustomStruct[])[new(1), new(2), new(3)], default(CustomStruct), (Range[])[0..3] ], + [ (CustomClass[])[new(1), new(2), new(3)], default(CustomClass), (Range[])[0..3] ], + + // Split no matching element + [ (char[])['a', ' ', 'b'], ',', (Range[])[0..3] ], + [ (int[]) [1, 2, 3], (int)4, (Range[])[0..3] ], + [ (long[])[1, 2, 3], (long)4, (Range[])[0..3] ], + [ (byte[])[1, 2, 3], (byte)4, (Range[])[0..3] ], + [ (CustomStruct[])[new(1), new(2), new(3)], new CustomStruct(4), (Range[])[0..3] ], + [ (CustomClass[])[new(1), new(2), new(3)], new CustomClass(4), (Range[])[0..3] ], + + // Split on sequence containing only a separator + [ (char[])[','], ',', (Range[])[0..0, 1..1] ], + [ (int[]) [1], (int)1, (Range[])[0..0, 1..1] ], + [ (long[])[1], (long)1, (Range[])[0..0, 1..1] ], + [ (byte[])[1], (byte)1, (Range[])[0..0, 1..1] ], + [ (CustomStruct[])[new(1)], new CustomStruct(1), (Range[])[0..0, 1..1] ], + [ (CustomClass[]) [new(1)], new CustomClass(1), (Range[])[0..0, 1..1] ], + + // Split on empty sequence with default separator + [ (char[])[], default(char), (Range[])[0..0] ], + [ (int[]) [], default(int), (Range[])[0..0] ], + [ (long[])[], default(long), (Range[])[0..0] ], + [ (byte[])[], default(byte), (Range[])[0..0] ], + [ (CustomStruct[])[], default(CustomStruct), (Range[])[0..0] ], + [ (CustomClass[]) [], default(CustomClass), (Range[])[0..0] ], + + [ (char[])['a', ',', 'b'], ',', (Range[]) [ 0..1, 2..3 ] ], + [ (int[]) [1, 2, 3], (int)2, (Range[]) [ 0..1, 2..3 ] ], + [ (long[])[1, 2, 3], (long)2, (Range[]) [ 0..1, 2..3 ] ], + [ (byte[])[1, 2, 3], (byte)2, (Range[]) [ 0..1, 2..3 ] ], + [ (CustomStruct[])[new(1), new(2), new(3)], new CustomStruct(2), (Range[]) [ 0..1, 2..3 ] ], + [ (CustomClass[])[new(1), new(2), new(3)], new CustomClass(2), (Range[]) [ 0..1, 2..3 ] ], + + [ (char[])['a', 'b', ',', ','], ',', (Range[]) [ 0..2, 3..3, 4..4 ] ], + [ (int[]) [1, 3, 2, 2], (int)2, (Range[]) [ 0..2, 3..3, 4..4 ] ], + [ (long[])[1, 3, 2, 2], (long)2, (Range[]) [ 0..2, 3..3, 4..4 ] ], + [ (byte[])[1, 3, 2, 2], (byte)2, (Range[]) [ 0..2, 3..3, 4..4 ] ], + [ (CustomStruct[])[new(1), new(3), new(2), new(2)], new CustomStruct(2), (Range[]) [ 0..2, 3..3, 4..4 ] ], + [ (CustomClass[])[new(1), new(3), new(2), new(2)], new CustomClass(2), (Range[]) [ 0..2, 3..3, 4..4 ] ], + ]; - buffer = ""; - enumerator = buffer.Split(' '); - Assert.True(enumerator.MoveNext()); - Assert.Equal(new Range(0, 0), enumerator.Current); - Assert.False(enumerator.MoveNext()); - - buffer = " "; - enumerator = buffer.Split(' '); - Assert.True(enumerator.MoveNext()); - Assert.Equal(new Range(0, 0), enumerator.Current); - Assert.True(enumerator.MoveNext()); - Assert.Equal(new Range(1, 1), enumerator.Current); - Assert.False(enumerator.MoveNext()); - } - - [Fact] - public static void ValidateArguments_OverloadWithROSSeparator() + [Theory] + [MemberData(nameof(SplitSingleElementSeparatorData))] + public static void Split_SingleElementSeparator(T[] value, T separator, Range[] result) where T : IEquatable { - // Default buffer - ReadOnlySpan buffer = default; - - var enumerator = buffer.Split(default(char)); - Assert.True(enumerator.MoveNext()); - Assert.Equal(enumerator.Current, new Range(0, 0)); - Assert.False(enumerator.MoveNext()); - - enumerator = buffer.Split(' '); - Assert.True(enumerator.MoveNext()); - Assert.Equal(enumerator.Current, new Range(0, 0)); - Assert.False(enumerator.MoveNext()); - - // Empty buffer - buffer = ""; - - enumerator = buffer.Split(default(char)); - Assert.True(enumerator.MoveNext()); - Assert.Equal(enumerator.Current, new Range(0, 0)); - Assert.False(enumerator.MoveNext()); - - enumerator = buffer.Split(' '); - Assert.True(enumerator.MoveNext()); - Assert.Equal(enumerator.Current, new Range(0, 0)); - Assert.False(enumerator.MoveNext()); - - // Single whitespace buffer - buffer = " "; - - enumerator = buffer.Split(default(char)); - Assert.True(enumerator.MoveNext()); - Assert.False(enumerator.MoveNext()); - - enumerator = buffer.Split(' '); - Assert.Equal(new Range(0, 0), enumerator.Current); - Assert.True(enumerator.MoveNext()); - Assert.Equal(new Range(0, 0), enumerator.Current); - Assert.True(enumerator.MoveNext()); - Assert.Equal(new Range(1, 1), enumerator.Current); - Assert.False(enumerator.MoveNext()); + AssertEnsureCorrectEnumeration(new ReadOnlySpan(value).Split(separator), result); } - [Fact] - public static void ValidateArguments_OverloadWithStringSeparator() - { - // Default buffer - ReadOnlySpan buffer = default; - - var enumerator = buffer.Split(null); // null is treated as empty string - Assert.True(enumerator.MoveNext()); - Assert.Equal(enumerator.Current, new Range(0, 0)); - Assert.False(enumerator.MoveNext()); - - enumerator = buffer.Split(""); - Assert.True(enumerator.MoveNext()); - Assert.Equal(enumerator.Current, new Range(0, 0)); - Assert.False(enumerator.MoveNext()); - - enumerator = buffer.Split(" "); - Assert.True(enumerator.MoveNext()); - Assert.Equal(enumerator.Current, new Range(0, 0)); - Assert.False(enumerator.MoveNext()); - - // Empty buffer - buffer = ""; - - enumerator = buffer.Split(null); - Assert.True(enumerator.MoveNext()); - Assert.Equal(enumerator.Current, new Range(0, 0)); - Assert.False(enumerator.MoveNext()); - - enumerator = buffer.Split(""); - Assert.True(enumerator.MoveNext()); - Assert.Equal(enumerator.Current, new Range(0, 0)); - Assert.False(enumerator.MoveNext()); - - enumerator = buffer.Split(" "); - Assert.True(enumerator.MoveNext()); - Assert.Equal(enumerator.Current, new Range(0, 0)); - Assert.False(enumerator.MoveNext()); - - // Single whitespace buffer - buffer = " "; - - enumerator = buffer.Split(null); // null is treated as empty string - Assert.True(enumerator.MoveNext()); - Assert.Equal(enumerator.Current, new Range(0, 0)); - Assert.True(enumerator.MoveNext()); - Assert.Equal(enumerator.Current, new Range(1, 1)); - Assert.False(enumerator.MoveNext()); - - enumerator = buffer.Split(""); - Assert.True(enumerator.MoveNext()); - Assert.Equal(enumerator.Current, new Range(0, 0)); - Assert.True(enumerator.MoveNext()); - Assert.Equal(enumerator.Current, new Range(1, 1)); - Assert.False(enumerator.MoveNext()); - - enumerator = buffer.Split(" "); - Assert.Equal(enumerator.Current, new Range(0, 0)); - Assert.True(enumerator.MoveNext()); - Assert.Equal(enumerator.Current, new Range(0, 0)); - Assert.True(enumerator.MoveNext()); - Assert.Equal(enumerator.Current, new Range(1, 1)); - Assert.False(enumerator.MoveNext()); - } + public static IEnumerable SplitSequenceSeparatorData => + [ + // Split no separators + [ (char[])['a', ' ', 'b'], (char[])[], (Range[])[0..3] ], + [ (int[]) [1, 2, 3], (int[]) [], (Range[])[0..3] ], + [ (long[])[1, 2, 3], (long[])[], (Range[])[0..3] ], + [ (byte[])[1, 2, 3], (byte[])[], (Range[])[0..3] ], + [ (CustomStruct[])[new(1), new(2), new(3)], (CustomStruct[])[], (Range[])[0..3] ], + [ (CustomClass[])[new(1), new(2), new(3)], (CustomClass[])[], (Range[])[0..3] ], + + // Split no matching elements + [ (char[])['a', ' ', 'b'], (char[])[',', '.' ], (Range[])[0..3] ], + [ (int[]) [1, 2, 3], (int[]) [4, 3], (Range[])[0..3] ], + [ (long[])[1, 2, 3], (long[])[4, 3], (Range[])[0..3] ], + [ (byte[])[1, 2, 3], (byte[])[4, 3], (Range[])[0..3] ], + [ (CustomStruct[])[new(1), new(2), new(3)], (CustomStruct[])[new(4), new(3)], (Range[])[0..3] ], + [ (CustomClass[])[new(1), new(2), new(3)], (CustomClass[])[new(4), new(3)], (Range[])[0..3] ], + + // Split on input span with only a single sequence separator + [ (char[])[',', '.'], (char[])[',', '.' ], (Range[])[0..0, 2..2] ], + [ (int[]) [4, 3], (int[]) [4, 3], (Range[])[0..0, 2..2] ], + [ (long[])[4, 3], (long[])[4, 3], (Range[])[0..0, 2..2] ], + [ (byte[])[4, 3], (byte[])[4, 3], (Range[])[0..0, 2..2] ], + [ (CustomStruct[])[new(4), new(3)], (CustomStruct[])[new(4), new(3)], (Range[])[0..0, 2..2] ], + [ (CustomClass[])[new(4), new(3)], (CustomClass[])[new(4), new(3)], (Range[])[0..0, 2..2] ], + + // Split on empty sequence with default separator + [ (char[])[], (char[])[default(char)], (Range[])[0..0] ], + [ (int[]) [], (int[]) [default(int)], (Range[])[0..0] ], + [ (long[])[], (long[])[default(long)], (Range[])[0..0] ], + [ (byte[])[], (byte[])[default(byte)], (Range[])[0..0] ], + [ (CustomStruct[])[], (CustomStruct[])[default], (Range[])[0..0] ], + [ (CustomClass[]) [], (CustomClass[])[default], (Range[])[0..0] ], + + [ (char[])['a', ',', '-', 'b'], (char[])[',', '-'], (Range[]) [ 0..1, 3..4 ] ], + [ (int[]) [1, 2, 4, 3], (int[])[2, 4], (Range[]) [ 0..1, 3..4 ] ], + [ (long[])[1, 2, 4, 3], (long[])[2, 4], (Range[]) [ 0..1, 3..4 ] ], + [ (byte[])[1, 2, 4, 3], (byte[])[2, 4], (Range[]) [ 0..1, 3..4 ] ], + [ (CustomStruct[])[new(1), new(2), new(4), new(3)], (CustomStruct[]) [new(2), new(4)], (Range[]) [ 0..1, 3..4 ] ], + [ (CustomClass[])[new(1), new(2), new(4), new(3)], (CustomClass[])[new(2), new(4)], (Range[]) [ 0..1, 3..4 ] ], + + [ (char[])[',', '-', 'a', ',', '-', 'b'], (char[])[',', '-'], (Range[]) [ 0..0, 2..3, 5..6 ] ], + [ (int[]) [2, 4, 3, 2, 4, 5], (int[]) [2, 4], (Range[]) [ 0..0, 2..3, 5..6 ] ], + [ (long[])[2, 4, 3, 2, 4, 5], (long[])[2, 4], (Range[]) [ 0..0, 2..3, 5..6 ] ], + [ (byte[])[2, 4, 3, 2, 4, 5], (byte[])[2, 4], (Range[]) [ 0..0, 2..3, 5..6 ] ], + [ (CustomStruct[])[new(2), new(4), new(3), new(2), new(4), new(5)], (CustomStruct[]) [new(2), new(4)], (Range[]) [ 0..0, 2..3, 5..6 ] ], + [ (CustomClass[])[new(2), new(4), new(3), new(2), new(4), new(5)], (CustomClass[])[new(2), new(4)], (Range[]) [ 0..0, 2..3, 5..6 ] ], + ]; [Theory] - [InlineData("", ',', new[] { "" })] - [InlineData(" ", ' ', new[] { "", "" })] - [InlineData(",", ',', new[] { "", "" })] - [InlineData(" ", ' ', new[] { "", "", "", "", "", "" })] - [InlineData(",,", ',', new[] { "", "", "" })] - [InlineData("ab", ',', new[] { "ab" })] - [InlineData("a,b", ',', new[] { "a", "b" })] - [InlineData("a,", ',', new[] { "a", "" })] - [InlineData(",b", ',', new[] { "", "b" })] - [InlineData(",a,b", ',', new[] { "", "a", "b" })] - [InlineData("a,b,", ',', new[] { "a", "b", "" })] - [InlineData("a,b,c", ',', new[] { "a", "b", "c" })] - [InlineData("a,,c", ',', new[] { "a", "", "c" })] - [InlineData(",a,b,c", ',', new[] { "", "a", "b", "c" })] - [InlineData("a,b,c,", ',', new[] { "a", "b", "c", "" })] - [InlineData(",a,b,c,", ',', new[] { "", "a", "b", "c", "" })] - [InlineData("first,second", ',', new[] { "first", "second" })] - [InlineData("first,", ',', new[] { "first", "" })] - [InlineData(",second", ',', new[] { "", "second" })] - [InlineData(",first,second", ',', new[] { "", "first", "second" })] - [InlineData("first,second,", ',', new[] { "first", "second", "" })] - [InlineData("first,second,third", ',', new[] { "first", "second", "third" })] - [InlineData("first,,third", ',', new[] { "first", "", "third" })] - [InlineData(",first,second,third", ',', new[] { "", "first", "second", "third" })] - [InlineData("first,second,third,", ',', new[] { "first", "second", "third", "" })] - [InlineData(",first,second,third,", ',', new[] { "", "first", "second", "third", "" })] - [InlineData("Foo Bar Baz", ' ', new[] { "Foo", "Bar", "Baz" })] - [InlineData("Foo Bar Baz ", ' ', new[] { "Foo", "Bar", "Baz", "" })] - [InlineData(" Foo Bar Baz ", ' ', new[] { "", "Foo", "Bar", "Baz", "" })] - [InlineData(" Foo Bar Baz ", ' ', new[] { "", "Foo", "", "Bar", "Baz", "" })] - [InlineData("Foo Baz Bar", default(char), new[] { "Foo Baz Bar" })] - [InlineData("Foo Baz \x0000 Bar", default(char), new[] { "Foo Baz ", " Bar" })] - [InlineData("Foo Baz \x0000 Bar\x0000", default(char), new[] { "Foo Baz ", " Bar", "" })] - public static void SpanSplitCharSeparator(string valueParam, char separator, string[] expectedParam) + [MemberData(nameof(SplitSequenceSeparatorData))] + public static void Split_SequenceSeparator(T[] value, T[] separator, Range[] result) where T : IEquatable { - char[][] expected = expectedParam.Select(x => x.ToCharArray()).ToArray(); - AssertEqual(expected, valueParam, valueParam.AsSpan().Split(separator)); + AssertEnsureCorrectEnumeration(new ReadOnlySpan(value).Split(separator), result); } - [Theory] - [InlineData("", new[] { "" })] - [InlineData(" ", new[] { "", "" })] - [InlineData(" ", new[] { "", "", "", "", "", "" })] - [InlineData(" ", new[] { "", "", "" })] - [InlineData("ab", new[] { "ab" })] - [InlineData("a b", new[] { "a", "b" })] - [InlineData("a ", new[] { "a", "" })] - [InlineData(" b", new[] { "", "b" })] - [InlineData("Foo Bar Baz", new[] { "Foo", "Bar", "Baz" })] - [InlineData("Foo Bar Baz ", new[] { "Foo", "Bar", "Baz", "" })] - [InlineData(" Foo Bar Baz ", new[] { "", "Foo", "Bar", "Baz", "" })] - [InlineData(" Foo Bar Baz ", new[] { "", "Foo", "", "Bar", "Baz", "" })] - public static void SpanSplitDefaultCharSeparator(string valueParam, string[] expectedParam) - { - char[][] expected = expectedParam.Select(x => x.ToCharArray()).ToArray(); - AssertEqual(expected, valueParam, valueParam.AsSpan().Split(' ')); - } + public static IEnumerable SplitAnySeparatorData => + [ + // Split no separators + [ (char[])['a', ' ', 'b'], (char[])[], (Range[])[0..3] ], + [ (int[]) [1, 2, 3], (int[]) [], (Range[])[0..3] ], + [ (long[])[1, 2, 3], (long[])[], (Range[])[0..3] ], + [ (byte[])[1, 2, 3], (byte[])[], (Range[])[0..3] ], + [ (CustomStruct[])[new(1), new(2), new(3)], (CustomStruct[])[], (Range[])[0..3] ], + [ (CustomClass[])[new(1), new(2), new(3)], (CustomClass[])[], (Range[])[0..3] ], + + // Split non-matching separators + [ (char[])['a', ' ', 'b'], (char[])[',', '.' ], (Range[])[0..3] ], + [ (int[]) [1, 2, 3], (int[]) [4, 5], (Range[])[0..3] ], + [ (long[])[1, 2, 3], (long[])[4, 5], (Range[])[0..3] ], + [ (byte[])[1, 2, 3], (byte[])[4, 5], (Range[])[0..3] ], + [ (CustomStruct[])[new(1), new(2), new(3)], (CustomStruct[])[new(4), new(5)], (Range[])[0..3] ], + [ (CustomClass[])[new(1), new(2), new(3)], (CustomClass[])[new(4), new(5)], (Range[])[0..3] ], + + // Split on sequence containing only a separator + [ (char[])[','], (char[])[','], (Range[])[0..0, 1..1] ], + [ (int[]) [1], (int[]) [1], (Range[])[0..0, 1..1] ], + [ (long[])[1], (long[])[1], (Range[])[0..0, 1..1] ], + [ (byte[])[1], (byte[])[1], (Range[])[0..0, 1..1] ], + [ (CustomStruct[])[new(1)], (CustomStruct[])[new(1)], (Range[])[0..0, 1..1] ], + [ (CustomClass[]) [new(1)], (CustomClass[])[new(1)], (Range[])[0..0, 1..1] ], + + // Split on empty sequence with default separator + [ (char[])[], (char[])[default(char)], (Range[])[0..0] ], + [ (int[]) [], (int[]) [default(int)], (Range[])[0..0] ], + [ (long[])[], (long[])[default(long)], (Range[])[0..0] ], + [ (byte[])[], (byte[])[default(byte)], (Range[])[0..0] ], + [ (CustomStruct[])[], (CustomStruct[])[new(default)], (Range[])[0..0] ], + [ (CustomClass[]) [], (CustomClass[])[new(default)], (Range[])[0..0] ], + + [ (char[])['a', ',', '-', 'b'], (char[])[',', '-'], (Range[]) [ 0..1, 2..2, 3..4 ] ], + [ (int[]) [1, 2, 4, 3], (int[])[2, 4], (Range[]) [ 0..1, 2..2, 3..4 ] ], + [ (long[])[1, 2, 4, 3], (long[])[2, 4], (Range[]) [ 0..1, 2..2, 3..4 ] ], + [ (byte[])[1, 2, 4, 3], (byte[])[2, 4], (Range[]) [ 0..1, 2..2, 3..4 ] ], + [ (CustomStruct[])[new(1), new(2), new(4), new(3)], (CustomStruct[]) [new(2), new(4)], (Range[]) [ 0..1, 2..2, 3..4 ] ], + [ (CustomClass[])[new(1), new(2), new(4), new(3)], (CustomClass[])[new(2), new(4)], (Range[]) [ 0..1, 2..2, 3..4 ] ], + + [ (char[])[',', '-', 'a', ',', '-', 'b'], (char[])[',', '-'], (Range[]) [ 0..0, 1..1, 2..3, 4..4, 5..6 ] ], + [ (int[]) [2, 4, 3, 2, 4, 5], (int[]) [2, 4], (Range[]) [ 0..0, 1..1, 2..3, 4..4, 5..6 ] ], + [ (long[])[2, 4, 3, 2, 4, 5], (long[])[2, 4], (Range[]) [ 0..0, 1..1, 2..3, 4..4, 5..6 ] ], + [ (byte[])[2, 4, 3, 2, 4, 5], (byte[])[2, 4], (Range[]) [ 0..0, 1..1, 2..3, 4..4, 5..6 ] ], + [ (CustomStruct[])[new(2), new(4), new(3), new(2), new(4), new(5)], (CustomStruct[]) [new(2), new(4)], (Range[]) [ 0..0, 1..1, 2..3, 4..4, 5..6 ] ], + [ (CustomClass[])[new(2), new(4), new(3), new(2), new(4), new(5)], (CustomClass[])[new(2), new(4)], (Range[]) [ 0..0, 1..1, 2..3, 4..4, 5..6 ] ], + ]; [Theory] - [InlineData(" Foo Bar Baz,", ", ", new[] { " Foo Bar Baz," })] - [InlineData(" Foo Bar Baz, ", ", ", new[] { " Foo Bar Baz", "" })] - [InlineData(", Foo Bar Baz, ", ", ", new[] { "", "Foo Bar Baz", "" })] - [InlineData(", Foo, Bar, Baz, ", ", ", new[] { "", "Foo", "Bar", "Baz", "" })] - [InlineData(", , Foo Bar, Baz", ", ", new[] { "", "", "Foo Bar", "Baz" })] - [InlineData(", , Foo Bar, Baz, , ", ", ", new[] { "", "", "Foo Bar", "Baz", "", "" })] - [InlineData(", , , , , ", ", ", new[] { "", "", "", "", "", "" })] - [InlineData(" ", " ", new[] { "", "", "", "", "", "" })] - [InlineData(" Foo, Bar Baz ", " ", new[] { "", "Foo, Bar", "Baz", "" })] - public static void SpanSplitStringSeparator(string valueParam, string separator, string[] expectedParam) + [MemberData(nameof(SplitAnySeparatorData))] + public static void Split_AnySingleElementSeparator(T[] value, T[] separator, Range[] result) where T : IEquatable { - char[][] expected = expectedParam.Select(x => x.ToCharArray()).ToArray(); - AssertEqual(expected, valueParam, valueParam.AsSpan().Split(separator)); + AssertEnsureCorrectEnumeration(new ReadOnlySpan(value).SplitAny(separator), result); + + if (value is char[] source && separator is char[] separators) + { + var charEnumerator = new ReadOnlySpan(source).SplitAny(SearchValues.Create(separators)); + AssertEnsureCorrectEnumeration(charEnumerator, result); + } } - private static void AssertEqual(T[][] items, ReadOnlySpan orig, MemoryExtensions.SpanSplitEnumerator source) where T : IEquatable + private static void AssertEnsureCorrectEnumeration(MemoryExtensions.SpanSplitEnumerator enumerator, Range[] result) where T : IEquatable { - foreach (var item in items) + foreach ((Range r, int index) in ((Range[])[0..0]).Concat(result).Select((e, i) => (e, i))) { - Assert.True(source.MoveNext()); - var slice = orig[source.Current]; - Assert.Equal(item, slice.ToArray()); + Assert.Equal(r, enumerator.Current); + if (index < result.Length) + Assert.True(enumerator.MoveNext()); } - Assert.False(source.MoveNext()); + Assert.False(enumerator.MoveNext()); } } } diff --git a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/String.SplitTests.cs b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/String.SplitTests.cs index 5348827e5ed7f7..fccf80921f140e 100644 --- a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/String.SplitTests.cs +++ b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/String.SplitTests.cs @@ -646,7 +646,8 @@ public static void SplitCharArraySeparator(string value, char[] separators, int Assert.Equal(expected.Length, value.AsSpan().SplitAny(ranges, separators, options)); Assert.Equal(expected, ranges.Take(expected.Length).Select(r => value[r]).ToArray()); - if (count == int.MaxValue && options is StringSplitOptions.None && separators is { Length: > 0}) + // The SpanSplitEnumerator does not replicate the behaviour of splitting on a whitespace char when input char[] is null or empty. + if (count == int.MaxValue && options is StringSplitOptions.None && separators is { Length: > 0 }) { AssertEqual(expected, value.AsSpan(), value.AsSpan().SplitAny(separators)); AssertEqual(expected, value.AsSpan(), value.AsSpan().SplitAny(Buffers.SearchValues.Create(separators))); From 4077f7b944f771f8dfcab053f96715a311a2fda0 Mon Sep 17 00:00:00 2001 From: bbartels Date: Mon, 15 Jul 2024 02:17:12 +0100 Subject: [PATCH 14/21] Renames test file --- .../tests/ReadOnlySpan/{Split.char.cs => Split.T.cs} | 0 src/libraries/System.Memory/tests/System.Memory.Tests.csproj | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename src/libraries/System.Memory/tests/ReadOnlySpan/{Split.char.cs => Split.T.cs} (100%) diff --git a/src/libraries/System.Memory/tests/ReadOnlySpan/Split.char.cs b/src/libraries/System.Memory/tests/ReadOnlySpan/Split.T.cs similarity index 100% rename from src/libraries/System.Memory/tests/ReadOnlySpan/Split.char.cs rename to src/libraries/System.Memory/tests/ReadOnlySpan/Split.T.cs diff --git a/src/libraries/System.Memory/tests/System.Memory.Tests.csproj b/src/libraries/System.Memory/tests/System.Memory.Tests.csproj index 6f68804b534732..70c50f6212bf71 100644 --- a/src/libraries/System.Memory/tests/System.Memory.Tests.csproj +++ b/src/libraries/System.Memory/tests/System.Memory.Tests.csproj @@ -191,7 +191,7 @@ - + From 48f2926cb3f2854ddc0832ac35d0afed55c8e65f Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Mon, 15 Jul 2024 13:20:15 -0400 Subject: [PATCH 15/21] Recommended changes to new MemoryExtensions.Split behavior, implementation, and tests --- .../tests/ReadOnlySpan/Split.T.cs | 6 +- .../src/System/MemoryExtensions.Split.cs | 199 ++++++++++++++---- .../System/String.SplitTests.cs | 20 +- 3 files changed, 175 insertions(+), 50 deletions(-) diff --git a/src/libraries/System.Memory/tests/ReadOnlySpan/Split.T.cs b/src/libraries/System.Memory/tests/ReadOnlySpan/Split.T.cs index 50d29a29093356..6864dbeec646b9 100644 --- a/src/libraries/System.Memory/tests/ReadOnlySpan/Split.T.cs +++ b/src/libraries/System.Memory/tests/ReadOnlySpan/Split.T.cs @@ -145,7 +145,7 @@ public static void Split_SequenceSeparator(T[] value, T[] separator, Range[] public static IEnumerable SplitAnySeparatorData => [ // Split no separators - [ (char[])['a', ' ', 'b'], (char[])[], (Range[])[0..3] ], + [ (char[])['a', ' ', 'b'], (char[])[], (Range[])[0..1, 2..3] ], // an empty span of separators for char is handled as all whitespace being separators [ (int[]) [1, 2, 3], (int[]) [], (Range[])[0..3] ], [ (long[])[1, 2, 3], (long[])[], (Range[])[0..3] ], [ (byte[])[1, 2, 3], (byte[])[], (Range[])[0..3] ], @@ -197,7 +197,9 @@ public static void Split_AnySingleElementSeparator(T[] value, T[] separator, { AssertEnsureCorrectEnumeration(new ReadOnlySpan(value).SplitAny(separator), result); - if (value is char[] source && separator is char[] separators) + if (value is char[] source && + separator is char[] separators && + separators.Length > 0) // the SearchValues overload does not special-case empty { var charEnumerator = new ReadOnlySpan(source).SplitAny(SearchValues.Create(separators)); AssertEnsureCorrectEnumeration(charEnumerator, result); diff --git a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Split.cs b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Split.cs index 0d03f788bc821b..ece3c133f9e30c 100644 --- a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Split.cs +++ b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Split.cs @@ -3,6 +3,8 @@ using System.Buffers; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; +using System.Runtime.CompilerServices; +using System.Threading; namespace System { @@ -12,82 +14,120 @@ public static partial class MemoryExtensions /// Returns a type that allows for enumeration of each element within a split span /// using the provided separator character. /// + /// The type of the elements. /// The source span to be enumerated. /// The separator character to be used to split the provided span. /// Returns a . - public static SpanSplitEnumerator Split(this ReadOnlySpan source, T separator) - where T : IEquatable => new SpanSplitEnumerator(source, separator); + public static SpanSplitEnumerator Split(this ReadOnlySpan source, T separator) where T : IEquatable => + new SpanSplitEnumerator(source, separator); /// /// Returns a type that allows for enumeration of each element within a split span /// using the provided separator span. /// + /// The type of the elements. /// The source span to be enumerated. /// The separator span to be used to split the provided span. /// Returns a . - public static SpanSplitEnumerator Split(this ReadOnlySpan source, ReadOnlySpan separator) - where T : IEquatable => new SpanSplitEnumerator(source, separator, treatAsSingleSeparator: true); + public static SpanSplitEnumerator Split(this ReadOnlySpan source, ReadOnlySpan separator) where T : IEquatable => + new SpanSplitEnumerator(source, separator, treatAsSingleSeparator: true); /// /// Returns a type that allows for enumeration of each element within a split span /// using any of the provided elements. /// + /// The type of the elements. /// The source span to be enumerated. /// The separators to be used to split the provided span. /// Returns a . - public static SpanSplitEnumerator SplitAny(this ReadOnlySpan source, [UnscopedRef] params ReadOnlySpan separators) - where T : IEquatable => new SpanSplitEnumerator(source, separators, treatAsSingleSeparator: false); + /// + /// If is and if is empty, + /// all Unicode whitespace characters are used as the separators. This matches the behavior of when + /// and related overloads are used with an empty separator array, + /// or when + /// is used with an empty separator span. + /// + public static SpanSplitEnumerator SplitAny(this ReadOnlySpan source, [UnscopedRef] params ReadOnlySpan separators) where T : IEquatable => + new SpanSplitEnumerator(source, separators); /// /// Returns a type that allows for enumeration of each element within a split span /// using the provided . /// + /// The type of the elements. /// The source span to be enumerated. /// The to be used to split the provided span. /// Returns a . - public static SpanSplitEnumerator SplitAny(this ReadOnlySpan source, SearchValues separators) - where T : IEquatable => new SpanSplitEnumerator(source, separators); + /// + /// Unlike , the is not checked for being empty. + /// An empty will result in no separators being found, regardless of the type of , + /// whereas will use all Unicode whitespace characters as separators if is + /// empty and is . + /// + public static SpanSplitEnumerator SplitAny(this ReadOnlySpan source, SearchValues separators) where T : IEquatable => + new SpanSplitEnumerator(source, separators); + /// Indicates in which mode is operating, with regards to how it should interpret its state. private enum SpanSplitEnumeratorMode { + /// Either a default was used, or the enumerator has finished enumerating and there's no more work to do. None = 0, + + /// A single T separator was provided. SingleElement, + + /// A span of separators was provided, each of which should be treated independently. + Any, + + /// The separator is a span of elements to be treated as a single sequence. Sequence, + + /// The separator is an empty sequence, such that no splits should be performed. EmptySequence, - Any, + + /// + /// A was provided and should behave the same as with but with the separators in the + /// instance instead of in a . + /// SearchValues } /// - /// allows for enumeration of each element within a - /// that has been split using a provided separator. + /// Enables enumerating each split within a that has been divided using one or more separators. /// public ref struct SpanSplitEnumerator where T : IEquatable { + /// The input span being split. private readonly ReadOnlySpan _span; + /// A single separator to use when is . private readonly T _separator = default!; + /// + /// A separator span to use when is (in which case + /// it's treated as a single separator) or (in which case it's treated as a set of separators). + /// private readonly ReadOnlySpan _separatorBuffer; + /// A set of separators to use when is . private readonly SearchValues _searchValues = default!; - private readonly SpanSplitEnumeratorMode _splitMode; - + /// Mode that dictates how the instance was configured and how its fields should be used in . + private SpanSplitEnumeratorMode _splitMode; + /// The inclusive starting index in of the current range. private int _startCurrent = 0; + /// The exclusive ending index in of the current range. private int _endCurrent = 0; + /// The index in from which the next separator search should start. private int _startNext = 0; - /// - /// Returns an enumerator that allows for iteration over the split span. - /// + /// Gets an enumerator that allows for iteration over the split span. /// Returns a that can be used to iterate over the split span. public SpanSplitEnumerator GetEnumerator() => this; - /// - /// Returns the current element of the enumeration. - /// - /// Returns a instance that indicates the bounds of the current element withing the source span. + /// Gets the current element of the enumeration. + /// Returns a instance that indicates the bounds of the current element withing the source span. public Range Current => new Range(_startCurrent, _endCurrent); + /// Initializes the enumerator for . internal SpanSplitEnumerator(ReadOnlySpan span, SearchValues searchValues) { _span = span; @@ -95,18 +135,41 @@ internal SpanSplitEnumerator(ReadOnlySpan span, SearchValues searchValues) _searchValues = searchValues; } + /// Initializes the enumerator for . + /// + /// If is empty and is , as an optimization + /// it will instead use with a cached + /// for all whitespace characters. + /// + internal SpanSplitEnumerator(ReadOnlySpan span, ReadOnlySpan separators) + { + _span = span; + if (typeof(T) == typeof(char) && separators.Length == 0) + { + _searchValues = Unsafe.As>(WhiteSpaceSearchValues); + _splitMode = SpanSplitEnumeratorMode.SearchValues; + } + else + { + _separatorBuffer = separators; + _splitMode = SpanSplitEnumeratorMode.Any; + } + } + + /// Initializes the enumerator for (or if the separator is empty). + /// must be true. internal SpanSplitEnumerator(ReadOnlySpan span, ReadOnlySpan separator, bool treatAsSingleSeparator) { + Debug.Assert(treatAsSingleSeparator, "Should only ever be called as true; exists to differentiate from separators overload"); + _span = span; _separatorBuffer = separator; - _splitMode = (separator.Length, treatAsSingleSeparator) switch - { - (0, true) => SpanSplitEnumeratorMode.EmptySequence, - (_, true) => SpanSplitEnumeratorMode.Sequence, - _ => SpanSplitEnumeratorMode.Any - }; + _splitMode = separator.Length == 0 ? + SpanSplitEnumeratorMode.EmptySequence : + SpanSplitEnumeratorMode.Sequence; } + /// Initializes the enumerator for . internal SpanSplitEnumerator(ReadOnlySpan span, T separator) { _span = span; @@ -120,30 +183,82 @@ internal SpanSplitEnumerator(ReadOnlySpan span, T separator) /// if the enumerator was successfully advanced to the next element; if the enumerator has passed the end of the enumeration. public bool MoveNext() { - if (_splitMode is SpanSplitEnumeratorMode.None || _startNext > _span.Length) + // Search for the next separator index. + int separatorIndex, separatorLength; + switch (_splitMode) { - return false; - } + case SpanSplitEnumeratorMode.None: + return false; - ReadOnlySpan slice = _span[_startNext..]; + case SpanSplitEnumeratorMode.SingleElement: + separatorIndex = _span.Slice(_startNext).IndexOf(_separator); + separatorLength = 1; + break; - Debug.Assert(_splitMode is not SpanSplitEnumeratorMode.None); - (int separatorIndex, int separatorLength) = _splitMode switch - { - SpanSplitEnumeratorMode.SingleElement => (slice.IndexOf(_separator), 1), - SpanSplitEnumeratorMode.Sequence => (slice.IndexOf(_separatorBuffer), _separatorBuffer.Length), - SpanSplitEnumeratorMode.EmptySequence => (-1, 1), - SpanSplitEnumeratorMode.Any => (slice.IndexOfAny(_separatorBuffer), 1), - _ => (_searchValues.IndexOfAny(slice), 1) - }; + case SpanSplitEnumeratorMode.Any: + separatorIndex = _span.Slice(_startNext).IndexOfAny(_separatorBuffer); + separatorLength = 1; + break; + + case SpanSplitEnumeratorMode.Sequence: + separatorIndex = _span.Slice(_startNext).IndexOf(_separatorBuffer); + separatorLength = _separatorBuffer.Length; + break; - int elementLength = (separatorIndex != -1 ? separatorIndex : slice.Length); + case SpanSplitEnumeratorMode.EmptySequence: + separatorIndex = -1; + separatorLength = 1; + break; + + default: + Debug.Assert(_splitMode == SpanSplitEnumeratorMode.SearchValues, $"Unknown split mode: {_splitMode}"); + separatorIndex = _span.Slice(_startNext).IndexOfAny(_searchValues); + separatorLength = 1; + break; + } _startCurrent = _startNext; - _endCurrent = _startCurrent + elementLength; - _startNext = _endCurrent + separatorLength; + if (separatorIndex >= 0) + { + _endCurrent = _startCurrent + separatorIndex; + _startNext = _endCurrent + separatorLength; + } + else + { + _startNext = _endCurrent = _span.Length; + + // Set _splitMode to None so that subsequent MoveNext calls will return false. + _splitMode = SpanSplitEnumeratorMode.None; + } + return true; } } + + /// Gets a for all of the Unicode whitespace characters + private static SearchValues WhiteSpaceSearchValues + { + get + { + return s_whiteSpaceSearchValues ?? Initialize(); + + [MethodImpl(MethodImplOptions.NoInlining)] + static SearchValues Initialize() + { + SearchValues sv = SearchValues.Create("\t\n\v\f\r\u0020\u0085\u00a0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000"); +#if DEBUG + for (int i = 0; i <= char.MaxValue; i++) + { + Debug.Assert(char.IsWhiteSpace((char)i) == sv.Contains((char)i)); + } +#endif + Interlocked.CompareExchange(ref s_whiteSpaceSearchValues, sv, null); + return s_whiteSpaceSearchValues; + } + } + } + + /// A lazily-initialized for all of the Unicode whitespace characters. + private static SearchValues? s_whiteSpaceSearchValues; } } diff --git a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/String.SplitTests.cs b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/String.SplitTests.cs index fccf80921f140e..3ed49c1ef14921 100644 --- a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/String.SplitTests.cs +++ b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/String.SplitTests.cs @@ -528,6 +528,7 @@ public static void SplitCharSeparator(string value, char separator, int count, S Assert.Equal(expected, value.Split(separator.ToString())); AssertEqual(expected, value.AsSpan(), value.AsSpan().Split(separator)); + AssertEqual(expected, value.AsSpan(), value.AsSpan().SplitAny([separator])); AssertEqual(expected, value.AsSpan(), value.AsSpan().SplitAny(Buffers.SearchValues.Create([separator]))); } @@ -646,11 +647,13 @@ public static void SplitCharArraySeparator(string value, char[] separators, int Assert.Equal(expected.Length, value.AsSpan().SplitAny(ranges, separators, options)); Assert.Equal(expected, ranges.Take(expected.Length).Select(r => value[r]).ToArray()); - // The SpanSplitEnumerator does not replicate the behaviour of splitting on a whitespace char when input char[] is null or empty. - if (count == int.MaxValue && options is StringSplitOptions.None && separators is { Length: > 0 }) + if (count == int.MaxValue && options is StringSplitOptions.None) { AssertEqual(expected, value.AsSpan(), value.AsSpan().SplitAny(separators)); - AssertEqual(expected, value.AsSpan(), value.AsSpan().SplitAny(Buffers.SearchValues.Create(separators))); + if (separators is { Length: > 0 }) // the SearchValues overload doesn't special-case empty to mean whitespace + { + AssertEqual(expected, value.AsSpan(), value.AsSpan().SplitAny(Buffers.SearchValues.Create(separators))); + } } } @@ -701,6 +704,11 @@ public static void SplitStringArraySeparator(string value, string[] separators, Range[] ranges = new Range[count == int.MaxValue ? value.Length + 1 : count]; Assert.Equal(expected.Length, value.AsSpan().SplitAny(ranges, separators, options)); Assert.Equal(expected, ranges.Take(expected.Length).Select(r => value[r]).ToArray()); + + if (separators is { Length: 1 } && count == int.MaxValue && options == StringSplitOptions.None) + { + AssertEqual(expected, value, value.AsSpan().Split(separators[0])); + } } private static string[] ToStringArray(char[] source) @@ -718,12 +726,12 @@ private static string[] ToStringArray(char[] source) private static void AssertEqual(string[] items, ReadOnlySpan source, MemoryExtensions.SpanSplitEnumerator enumerator) { - foreach (var item in items) + foreach (string item in items) { Assert.True(enumerator.MoveNext()); - var slice = source[enumerator.Current]; - Assert.Equal(item, new string(slice)); + Assert.Equal(item, source[enumerator.Current].ToString()); } + Assert.False(enumerator.MoveNext()); } } From c8024c4ebc0a14ac3bbf9a4a352fb3631873afe1 Mon Sep 17 00:00:00 2001 From: bbartels Date: Mon, 15 Jul 2024 19:24:05 +0100 Subject: [PATCH 16/21] Removes _startNext --- .../src/System/MemoryExtensions.Split.cs | 26 +++++++------------ 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Split.cs b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Split.cs index ece3c133f9e30c..9de9d9c8d47f23 100644 --- a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Split.cs +++ b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Split.cs @@ -116,8 +116,6 @@ private enum SpanSplitEnumeratorMode private int _startCurrent = 0; /// The exclusive ending index in of the current range. private int _endCurrent = 0; - /// The index in from which the next separator search should start. - private int _startNext = 0; /// Gets an enumerator that allows for iteration over the split span. /// Returns a that can be used to iterate over the split span. @@ -184,49 +182,45 @@ internal SpanSplitEnumerator(ReadOnlySpan span, T separator) public bool MoveNext() { // Search for the next separator index. - int separatorIndex, separatorLength; + int separatorIndex; switch (_splitMode) { case SpanSplitEnumeratorMode.None: return false; case SpanSplitEnumeratorMode.SingleElement: - separatorIndex = _span.Slice(_startNext).IndexOf(_separator); - separatorLength = 1; + _startCurrent = _endCurrent + 1; + separatorIndex = _span.Slice(_startCurrent).IndexOf(_separator); break; case SpanSplitEnumeratorMode.Any: - separatorIndex = _span.Slice(_startNext).IndexOfAny(_separatorBuffer); - separatorLength = 1; + _startCurrent = _endCurrent + 1; + separatorIndex = _span.Slice(_startCurrent).IndexOfAny(_separatorBuffer); break; case SpanSplitEnumeratorMode.Sequence: - separatorIndex = _span.Slice(_startNext).IndexOf(_separatorBuffer); - separatorLength = _separatorBuffer.Length; + _startCurrent = _endCurrent + _separatorBuffer.Length; + separatorIndex = _span.Slice(_startCurrent).IndexOf(_separatorBuffer); break; case SpanSplitEnumeratorMode.EmptySequence: + _startCurrent = _endCurrent + 1; separatorIndex = -1; - separatorLength = 1; break; default: Debug.Assert(_splitMode == SpanSplitEnumeratorMode.SearchValues, $"Unknown split mode: {_splitMode}"); - separatorIndex = _span.Slice(_startNext).IndexOfAny(_searchValues); - separatorLength = 1; + _startCurrent = _endCurrent + 1; + separatorIndex = _span.Slice(_startCurrent).IndexOfAny(_searchValues); break; } - _startCurrent = _startNext; if (separatorIndex >= 0) { _endCurrent = _startCurrent + separatorIndex; - _startNext = _endCurrent + separatorLength; } else { - _startNext = _endCurrent = _span.Length; - // Set _splitMode to None so that subsequent MoveNext calls will return false. _splitMode = SpanSplitEnumeratorMode.None; } From b1449601b52f9f57d6f921f0f47a80300ed97b0e Mon Sep 17 00:00:00 2001 From: bbartels Date: Mon, 15 Jul 2024 20:44:43 +0100 Subject: [PATCH 17/21] Fixes tests --- src/libraries/System.Memory/tests/ReadOnlySpan/Split.T.cs | 7 ++++--- .../src/System/MemoryExtensions.Split.cs | 4 +++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/libraries/System.Memory/tests/ReadOnlySpan/Split.T.cs b/src/libraries/System.Memory/tests/ReadOnlySpan/Split.T.cs index 6864dbeec646b9..68ba8487f57e42 100644 --- a/src/libraries/System.Memory/tests/ReadOnlySpan/Split.T.cs +++ b/src/libraries/System.Memory/tests/ReadOnlySpan/Split.T.cs @@ -24,7 +24,7 @@ public static void DefaultSpanSplitEnumeratorBehaviour() // Implicit DoesNotThrow assertion charSpanEnumerator.GetEnumerator(); - var stringSpanEnumerator = new MemoryExtensions.SpanSplitEnumerator(); + var stringSpanEnumerator = new MemoryExtensions.SpanSplitEnumerator(); Assert.Equal(new Range(0, 0), stringSpanEnumerator.Current); Assert.False(stringSpanEnumerator.MoveNext()); stringSpanEnumerator.GetEnumerator(); @@ -208,10 +208,11 @@ separator is char[] separators && private static void AssertEnsureCorrectEnumeration(MemoryExtensions.SpanSplitEnumerator enumerator, Range[] result) where T : IEquatable { - foreach ((Range r, int index) in ((Range[])[0..0]).Concat(result).Select((e, i) => (e, i))) + Assert.True(enumerator.MoveNext()); + foreach ((Range r, int index) in result.Select((e, i) => (e, i))) { Assert.Equal(r, enumerator.Current); - if (index < result.Length) + if (index < result.Length - 1) Assert.True(enumerator.MoveNext()); } Assert.False(enumerator.MoveNext()); diff --git a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Split.cs b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Split.cs index 9de9d9c8d47f23..d29bc855453a2d 100644 --- a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Split.cs +++ b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Split.cs @@ -115,7 +115,7 @@ private enum SpanSplitEnumeratorMode /// The inclusive starting index in of the current range. private int _startCurrent = 0; /// The exclusive ending index in of the current range. - private int _endCurrent = 0; + private int _endCurrent = -1; /// Gets an enumerator that allows for iteration over the split span. /// Returns a that can be used to iterate over the split span. @@ -165,6 +165,7 @@ internal SpanSplitEnumerator(ReadOnlySpan span, ReadOnlySpan separator, bo _splitMode = separator.Length == 0 ? SpanSplitEnumeratorMode.EmptySequence : SpanSplitEnumeratorMode.Sequence; + _endCurrent = Math.Min(-1, -separator.Length); } /// Initializes the enumerator for . @@ -221,6 +222,7 @@ public bool MoveNext() } else { + _endCurrent = _span.Length; // Set _splitMode to None so that subsequent MoveNext calls will return false. _splitMode = SpanSplitEnumeratorMode.None; } From 9dd4eb16d5ea43f49446cbd3713c3965b0691c16 Mon Sep 17 00:00:00 2001 From: bbartels Date: Mon, 15 Jul 2024 20:53:53 +0100 Subject: [PATCH 18/21] Fixes tests --- src/libraries/System.Memory/tests/ReadOnlySpan/Split.T.cs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/libraries/System.Memory/tests/ReadOnlySpan/Split.T.cs b/src/libraries/System.Memory/tests/ReadOnlySpan/Split.T.cs index 68ba8487f57e42..746b219555b003 100644 --- a/src/libraries/System.Memory/tests/ReadOnlySpan/Split.T.cs +++ b/src/libraries/System.Memory/tests/ReadOnlySpan/Split.T.cs @@ -208,7 +208,15 @@ separator is char[] separators && private static void AssertEnsureCorrectEnumeration(MemoryExtensions.SpanSplitEnumerator enumerator, Range[] result) where T : IEquatable { + // Assert.Throws would not work due to the requirement to capture the ref struct + try + { + _ = enumerator.Current; + Assert.Fail("enumerator.Current is not valid until the first call to MoveNext()"); + } + catch (ArgumentOutOfRangeException) { } Assert.True(enumerator.MoveNext()); + foreach ((Range r, int index) in result.Select((e, i) => (e, i))) { Assert.Equal(r, enumerator.Current); From 453f07fddf9d5a3490650dd39ec27d4e3ee8481d Mon Sep 17 00:00:00 2001 From: bbartels Date: Mon, 15 Jul 2024 21:08:36 +0100 Subject: [PATCH 19/21] Adds Debug.Assert --- .../src/System/MemoryExtensions.Split.cs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Split.cs b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Split.cs index d29bc855453a2d..803a5fdf3506b5 100644 --- a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Split.cs +++ b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Split.cs @@ -165,6 +165,7 @@ internal SpanSplitEnumerator(ReadOnlySpan span, ReadOnlySpan separator, bo _splitMode = separator.Length == 0 ? SpanSplitEnumeratorMode.EmptySequence : SpanSplitEnumeratorMode.Sequence; + // _endCurrent needs to be adjusted such that after first call to MoveNext() _startCurrent is 0 _endCurrent = Math.Min(-1, -separator.Length); } @@ -216,6 +217,8 @@ public bool MoveNext() break; } + Debug.Assert(_endCurrent < 0 && _startCurrent != 0, "On first iteration of MoveNext() _startCurrent should be 0"); + if (separatorIndex >= 0) { _endCurrent = _startCurrent + separatorIndex; From c59a0b6332ca68f8afc4a74ee84b82770f9e3cae Mon Sep 17 00:00:00 2001 From: bbartels Date: Mon, 15 Jul 2024 21:22:24 +0100 Subject: [PATCH 20/21] Fixes Debug.Assert condition --- .../System.Private.CoreLib/src/System/MemoryExtensions.Split.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Split.cs b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Split.cs index 803a5fdf3506b5..8f61afd0dd4ae6 100644 --- a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Split.cs +++ b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Split.cs @@ -217,7 +217,7 @@ public bool MoveNext() break; } - Debug.Assert(_endCurrent < 0 && _startCurrent != 0, "On first iteration of MoveNext() _startCurrent should be 0"); + Debug.Assert((_endCurrent >= 0) || (_startCurrent == 0), "On first iteration of MoveNext() _startCurrent should be 0"); if (separatorIndex >= 0) { From 572b0d6216921dc19902cd99b8be679db9307d34 Mon Sep 17 00:00:00 2001 From: Benjamin Bartels Date: Mon, 15 Jul 2024 23:10:11 +0100 Subject: [PATCH 21/21] Update Split.T.cs --- src/libraries/System.Memory/tests/ReadOnlySpan/Split.T.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Memory/tests/ReadOnlySpan/Split.T.cs b/src/libraries/System.Memory/tests/ReadOnlySpan/Split.T.cs index 746b219555b003..91d3784374a36c 100644 --- a/src/libraries/System.Memory/tests/ReadOnlySpan/Split.T.cs +++ b/src/libraries/System.Memory/tests/ReadOnlySpan/Split.T.cs @@ -24,7 +24,7 @@ public static void DefaultSpanSplitEnumeratorBehaviour() // Implicit DoesNotThrow assertion charSpanEnumerator.GetEnumerator(); - var stringSpanEnumerator = new MemoryExtensions.SpanSplitEnumerator(); + var stringSpanEnumerator = new MemoryExtensions.SpanSplitEnumerator(); Assert.Equal(new Range(0, 0), stringSpanEnumerator.Current); Assert.False(stringSpanEnumerator.MoveNext()); stringSpanEnumerator.GetEnumerator();