diff --git a/src/libraries/System.Memory/ref/System.Memory.cs b/src/libraries/System.Memory/ref/System.Memory.cs index 66b95a12b5c25c..670a250bf5eaac 100644 --- a/src/libraries/System.Memory/ref/System.Memory.cs +++ b/src/libraries/System.Memory/ref/System.Memory.cs @@ -350,6 +350,10 @@ public static void Sort(this System.Span keys, System.Span(this System.Span keys, System.Span items, System.Comparison comparison) { } public static void Sort(this System.Span span, TComparer comparer) where TComparer : System.Collections.Generic.IComparer? { } public static void Sort(this System.Span keys, System.Span items, TComparer comparer) where TComparer : System.Collections.Generic.IComparer? { } + public static System.MemoryExtensions.SpanSplitEnumerator Split(this System.ReadOnlySpan source, T separator) where T : IEquatable { throw null; } + public static System.MemoryExtensions.SpanSplitEnumerator Split(this System.ReadOnlySpan source, System.ReadOnlySpan separator) where T : IEquatable { throw null; } + public static System.MemoryExtensions.SpanSplitEnumerator SplitAny(this System.ReadOnlySpan source, [System.Diagnostics.CodeAnalysis.UnscopedRef] params System.ReadOnlySpan separators) where T : IEquatable { throw null; } + public static System.MemoryExtensions.SpanSplitEnumerator SplitAny(this System.ReadOnlySpan source, System.Buffers.SearchValues separators) where T : IEquatable { throw null; } public static int Split(this System.ReadOnlySpan source, System.Span destination, char separator, System.StringSplitOptions options = System.StringSplitOptions.None) { throw null; } public static int Split(this System.ReadOnlySpan source, System.Span destination, System.ReadOnlySpan separator, System.StringSplitOptions options = System.StringSplitOptions.None) { throw null; } public static int SplitAny(this System.ReadOnlySpan source, System.Span destination, System.ReadOnlySpan separators, System.StringSplitOptions options = System.StringSplitOptions.None) { throw null; } @@ -430,6 +434,12 @@ public ref struct TryWriteInterpolatedStringHandler public bool AppendFormatted(string? value) { throw null; } public bool AppendFormatted(string? value, int alignment = 0, string? format = null) { throw null; } } + public ref struct SpanSplitEnumerator where T : System.IEquatable + { + public System.MemoryExtensions.SpanSplitEnumerator GetEnumerator() { throw null; } + public readonly System.Range Current { get { throw null; } } + public bool MoveNext() { throw null; } + } } } namespace System.Buffers diff --git a/src/libraries/System.Memory/tests/ReadOnlySpan/Split.T.cs b/src/libraries/System.Memory/tests/ReadOnlySpan/Split.T.cs new file mode 100644 index 00000000000000..91d3784374a36c --- /dev/null +++ b/src/libraries/System.Memory/tests/ReadOnlySpan/Split.T.cs @@ -0,0 +1,229 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Buffers; +using System.Collections.Generic; +using System.Linq; +using Xunit; + +namespace System.SpanTests +{ + public static partial class ReadOnlySpanTests + { + public record struct CustomStruct(int value) : IEquatable; + public record class CustomClass(int value) : IEquatable; + + [Fact] + public static void DefaultSpanSplitEnumeratorBehaviour() + { + var charSpanEnumerator = new MemoryExtensions.SpanSplitEnumerator(); + Assert.Equal(new Range(0, 0), charSpanEnumerator.Current); + Assert.False(charSpanEnumerator.MoveNext()); + + // Implicit DoesNotThrow assertion + charSpanEnumerator.GetEnumerator(); + + var stringSpanEnumerator = new MemoryExtensions.SpanSplitEnumerator(); + Assert.Equal(new Range(0, 0), stringSpanEnumerator.Current); + Assert.False(stringSpanEnumerator.MoveNext()); + stringSpanEnumerator.GetEnumerator(); + } + + public static IEnumerable SplitSingleElementSeparatorData => + [ + // Split on default + [ (char[])['a', ' ', 'b'], default(char), (Range[])[0..3] ], + [ (int[]) [1, 2, 3], default(int), (Range[])[0..3] ], + [ (long[])[1, 2, 3], default(long), (Range[])[0..3] ], + [ (byte[])[1, 2, 3], default(byte), (Range[])[0..3] ], + [ (CustomStruct[])[new(1), new(2), new(3)], default(CustomStruct), (Range[])[0..3] ], + [ (CustomClass[])[new(1), new(2), new(3)], default(CustomClass), (Range[])[0..3] ], + + // Split no matching element + [ (char[])['a', ' ', 'b'], ',', (Range[])[0..3] ], + [ (int[]) [1, 2, 3], (int)4, (Range[])[0..3] ], + [ (long[])[1, 2, 3], (long)4, (Range[])[0..3] ], + [ (byte[])[1, 2, 3], (byte)4, (Range[])[0..3] ], + [ (CustomStruct[])[new(1), new(2), new(3)], new CustomStruct(4), (Range[])[0..3] ], + [ (CustomClass[])[new(1), new(2), new(3)], new CustomClass(4), (Range[])[0..3] ], + + // Split on sequence containing only a separator + [ (char[])[','], ',', (Range[])[0..0, 1..1] ], + [ (int[]) [1], (int)1, (Range[])[0..0, 1..1] ], + [ (long[])[1], (long)1, (Range[])[0..0, 1..1] ], + [ (byte[])[1], (byte)1, (Range[])[0..0, 1..1] ], + [ (CustomStruct[])[new(1)], new CustomStruct(1), (Range[])[0..0, 1..1] ], + [ (CustomClass[]) [new(1)], new CustomClass(1), (Range[])[0..0, 1..1] ], + + // Split on empty sequence with default separator + [ (char[])[], default(char), (Range[])[0..0] ], + [ (int[]) [], default(int), (Range[])[0..0] ], + [ (long[])[], default(long), (Range[])[0..0] ], + [ (byte[])[], default(byte), (Range[])[0..0] ], + [ (CustomStruct[])[], default(CustomStruct), (Range[])[0..0] ], + [ (CustomClass[]) [], default(CustomClass), (Range[])[0..0] ], + + [ (char[])['a', ',', 'b'], ',', (Range[]) [ 0..1, 2..3 ] ], + [ (int[]) [1, 2, 3], (int)2, (Range[]) [ 0..1, 2..3 ] ], + [ (long[])[1, 2, 3], (long)2, (Range[]) [ 0..1, 2..3 ] ], + [ (byte[])[1, 2, 3], (byte)2, (Range[]) [ 0..1, 2..3 ] ], + [ (CustomStruct[])[new(1), new(2), new(3)], new CustomStruct(2), (Range[]) [ 0..1, 2..3 ] ], + [ (CustomClass[])[new(1), new(2), new(3)], new CustomClass(2), (Range[]) [ 0..1, 2..3 ] ], + + [ (char[])['a', 'b', ',', ','], ',', (Range[]) [ 0..2, 3..3, 4..4 ] ], + [ (int[]) [1, 3, 2, 2], (int)2, (Range[]) [ 0..2, 3..3, 4..4 ] ], + [ (long[])[1, 3, 2, 2], (long)2, (Range[]) [ 0..2, 3..3, 4..4 ] ], + [ (byte[])[1, 3, 2, 2], (byte)2, (Range[]) [ 0..2, 3..3, 4..4 ] ], + [ (CustomStruct[])[new(1), new(3), new(2), new(2)], new CustomStruct(2), (Range[]) [ 0..2, 3..3, 4..4 ] ], + [ (CustomClass[])[new(1), new(3), new(2), new(2)], new CustomClass(2), (Range[]) [ 0..2, 3..3, 4..4 ] ], + ]; + + [Theory] + [MemberData(nameof(SplitSingleElementSeparatorData))] + public static void Split_SingleElementSeparator(T[] value, T separator, Range[] result) where T : IEquatable + { + AssertEnsureCorrectEnumeration(new ReadOnlySpan(value).Split(separator), result); + } + + public static IEnumerable SplitSequenceSeparatorData => + [ + // Split no separators + [ (char[])['a', ' ', 'b'], (char[])[], (Range[])[0..3] ], + [ (int[]) [1, 2, 3], (int[]) [], (Range[])[0..3] ], + [ (long[])[1, 2, 3], (long[])[], (Range[])[0..3] ], + [ (byte[])[1, 2, 3], (byte[])[], (Range[])[0..3] ], + [ (CustomStruct[])[new(1), new(2), new(3)], (CustomStruct[])[], (Range[])[0..3] ], + [ (CustomClass[])[new(1), new(2), new(3)], (CustomClass[])[], (Range[])[0..3] ], + + // Split no matching elements + [ (char[])['a', ' ', 'b'], (char[])[',', '.' ], (Range[])[0..3] ], + [ (int[]) [1, 2, 3], (int[]) [4, 3], (Range[])[0..3] ], + [ (long[])[1, 2, 3], (long[])[4, 3], (Range[])[0..3] ], + [ (byte[])[1, 2, 3], (byte[])[4, 3], (Range[])[0..3] ], + [ (CustomStruct[])[new(1), new(2), new(3)], (CustomStruct[])[new(4), new(3)], (Range[])[0..3] ], + [ (CustomClass[])[new(1), new(2), new(3)], (CustomClass[])[new(4), new(3)], (Range[])[0..3] ], + + // Split on input span with only a single sequence separator + [ (char[])[',', '.'], (char[])[',', '.' ], (Range[])[0..0, 2..2] ], + [ (int[]) [4, 3], (int[]) [4, 3], (Range[])[0..0, 2..2] ], + [ (long[])[4, 3], (long[])[4, 3], (Range[])[0..0, 2..2] ], + [ (byte[])[4, 3], (byte[])[4, 3], (Range[])[0..0, 2..2] ], + [ (CustomStruct[])[new(4), new(3)], (CustomStruct[])[new(4), new(3)], (Range[])[0..0, 2..2] ], + [ (CustomClass[])[new(4), new(3)], (CustomClass[])[new(4), new(3)], (Range[])[0..0, 2..2] ], + + // Split on empty sequence with default separator + [ (char[])[], (char[])[default(char)], (Range[])[0..0] ], + [ (int[]) [], (int[]) [default(int)], (Range[])[0..0] ], + [ (long[])[], (long[])[default(long)], (Range[])[0..0] ], + [ (byte[])[], (byte[])[default(byte)], (Range[])[0..0] ], + [ (CustomStruct[])[], (CustomStruct[])[default], (Range[])[0..0] ], + [ (CustomClass[]) [], (CustomClass[])[default], (Range[])[0..0] ], + + [ (char[])['a', ',', '-', 'b'], (char[])[',', '-'], (Range[]) [ 0..1, 3..4 ] ], + [ (int[]) [1, 2, 4, 3], (int[])[2, 4], (Range[]) [ 0..1, 3..4 ] ], + [ (long[])[1, 2, 4, 3], (long[])[2, 4], (Range[]) [ 0..1, 3..4 ] ], + [ (byte[])[1, 2, 4, 3], (byte[])[2, 4], (Range[]) [ 0..1, 3..4 ] ], + [ (CustomStruct[])[new(1), new(2), new(4), new(3)], (CustomStruct[]) [new(2), new(4)], (Range[]) [ 0..1, 3..4 ] ], + [ (CustomClass[])[new(1), new(2), new(4), new(3)], (CustomClass[])[new(2), new(4)], (Range[]) [ 0..1, 3..4 ] ], + + [ (char[])[',', '-', 'a', ',', '-', 'b'], (char[])[',', '-'], (Range[]) [ 0..0, 2..3, 5..6 ] ], + [ (int[]) [2, 4, 3, 2, 4, 5], (int[]) [2, 4], (Range[]) [ 0..0, 2..3, 5..6 ] ], + [ (long[])[2, 4, 3, 2, 4, 5], (long[])[2, 4], (Range[]) [ 0..0, 2..3, 5..6 ] ], + [ (byte[])[2, 4, 3, 2, 4, 5], (byte[])[2, 4], (Range[]) [ 0..0, 2..3, 5..6 ] ], + [ (CustomStruct[])[new(2), new(4), new(3), new(2), new(4), new(5)], (CustomStruct[]) [new(2), new(4)], (Range[]) [ 0..0, 2..3, 5..6 ] ], + [ (CustomClass[])[new(2), new(4), new(3), new(2), new(4), new(5)], (CustomClass[])[new(2), new(4)], (Range[]) [ 0..0, 2..3, 5..6 ] ], + ]; + + [Theory] + [MemberData(nameof(SplitSequenceSeparatorData))] + public static void Split_SequenceSeparator(T[] value, T[] separator, Range[] result) where T : IEquatable + { + AssertEnsureCorrectEnumeration(new ReadOnlySpan(value).Split(separator), result); + } + + public static IEnumerable SplitAnySeparatorData => + [ + // Split no separators + [ (char[])['a', ' ', 'b'], (char[])[], (Range[])[0..1, 2..3] ], // an empty span of separators for char is handled as all whitespace being separators + [ (int[]) [1, 2, 3], (int[]) [], (Range[])[0..3] ], + [ (long[])[1, 2, 3], (long[])[], (Range[])[0..3] ], + [ (byte[])[1, 2, 3], (byte[])[], (Range[])[0..3] ], + [ (CustomStruct[])[new(1), new(2), new(3)], (CustomStruct[])[], (Range[])[0..3] ], + [ (CustomClass[])[new(1), new(2), new(3)], (CustomClass[])[], (Range[])[0..3] ], + + // Split non-matching separators + [ (char[])['a', ' ', 'b'], (char[])[',', '.' ], (Range[])[0..3] ], + [ (int[]) [1, 2, 3], (int[]) [4, 5], (Range[])[0..3] ], + [ (long[])[1, 2, 3], (long[])[4, 5], (Range[])[0..3] ], + [ (byte[])[1, 2, 3], (byte[])[4, 5], (Range[])[0..3] ], + [ (CustomStruct[])[new(1), new(2), new(3)], (CustomStruct[])[new(4), new(5)], (Range[])[0..3] ], + [ (CustomClass[])[new(1), new(2), new(3)], (CustomClass[])[new(4), new(5)], (Range[])[0..3] ], + + // Split on sequence containing only a separator + [ (char[])[','], (char[])[','], (Range[])[0..0, 1..1] ], + [ (int[]) [1], (int[]) [1], (Range[])[0..0, 1..1] ], + [ (long[])[1], (long[])[1], (Range[])[0..0, 1..1] ], + [ (byte[])[1], (byte[])[1], (Range[])[0..0, 1..1] ], + [ (CustomStruct[])[new(1)], (CustomStruct[])[new(1)], (Range[])[0..0, 1..1] ], + [ (CustomClass[]) [new(1)], (CustomClass[])[new(1)], (Range[])[0..0, 1..1] ], + + // Split on empty sequence with default separator + [ (char[])[], (char[])[default(char)], (Range[])[0..0] ], + [ (int[]) [], (int[]) [default(int)], (Range[])[0..0] ], + [ (long[])[], (long[])[default(long)], (Range[])[0..0] ], + [ (byte[])[], (byte[])[default(byte)], (Range[])[0..0] ], + [ (CustomStruct[])[], (CustomStruct[])[new(default)], (Range[])[0..0] ], + [ (CustomClass[]) [], (CustomClass[])[new(default)], (Range[])[0..0] ], + + [ (char[])['a', ',', '-', 'b'], (char[])[',', '-'], (Range[]) [ 0..1, 2..2, 3..4 ] ], + [ (int[]) [1, 2, 4, 3], (int[])[2, 4], (Range[]) [ 0..1, 2..2, 3..4 ] ], + [ (long[])[1, 2, 4, 3], (long[])[2, 4], (Range[]) [ 0..1, 2..2, 3..4 ] ], + [ (byte[])[1, 2, 4, 3], (byte[])[2, 4], (Range[]) [ 0..1, 2..2, 3..4 ] ], + [ (CustomStruct[])[new(1), new(2), new(4), new(3)], (CustomStruct[]) [new(2), new(4)], (Range[]) [ 0..1, 2..2, 3..4 ] ], + [ (CustomClass[])[new(1), new(2), new(4), new(3)], (CustomClass[])[new(2), new(4)], (Range[]) [ 0..1, 2..2, 3..4 ] ], + + [ (char[])[',', '-', 'a', ',', '-', 'b'], (char[])[',', '-'], (Range[]) [ 0..0, 1..1, 2..3, 4..4, 5..6 ] ], + [ (int[]) [2, 4, 3, 2, 4, 5], (int[]) [2, 4], (Range[]) [ 0..0, 1..1, 2..3, 4..4, 5..6 ] ], + [ (long[])[2, 4, 3, 2, 4, 5], (long[])[2, 4], (Range[]) [ 0..0, 1..1, 2..3, 4..4, 5..6 ] ], + [ (byte[])[2, 4, 3, 2, 4, 5], (byte[])[2, 4], (Range[]) [ 0..0, 1..1, 2..3, 4..4, 5..6 ] ], + [ (CustomStruct[])[new(2), new(4), new(3), new(2), new(4), new(5)], (CustomStruct[]) [new(2), new(4)], (Range[]) [ 0..0, 1..1, 2..3, 4..4, 5..6 ] ], + [ (CustomClass[])[new(2), new(4), new(3), new(2), new(4), new(5)], (CustomClass[])[new(2), new(4)], (Range[]) [ 0..0, 1..1, 2..3, 4..4, 5..6 ] ], + ]; + + [Theory] + [MemberData(nameof(SplitAnySeparatorData))] + public static void Split_AnySingleElementSeparator(T[] value, T[] separator, Range[] result) where T : IEquatable + { + AssertEnsureCorrectEnumeration(new ReadOnlySpan(value).SplitAny(separator), result); + + if (value is char[] source && + separator is char[] separators && + separators.Length > 0) // the SearchValues overload does not special-case empty + { + var charEnumerator = new ReadOnlySpan(source).SplitAny(SearchValues.Create(separators)); + AssertEnsureCorrectEnumeration(charEnumerator, result); + } + } + + private static void AssertEnsureCorrectEnumeration(MemoryExtensions.SpanSplitEnumerator enumerator, Range[] result) where T : IEquatable + { + // Assert.Throws would not work due to the requirement to capture the ref struct + try + { + _ = enumerator.Current; + Assert.Fail("enumerator.Current is not valid until the first call to MoveNext()"); + } + catch (ArgumentOutOfRangeException) { } + Assert.True(enumerator.MoveNext()); + + foreach ((Range r, int index) in result.Select((e, i) => (e, i))) + { + Assert.Equal(r, enumerator.Current); + if (index < result.Length - 1) + Assert.True(enumerator.MoveNext()); + } + Assert.False(enumerator.MoveNext()); + } + } +} diff --git a/src/libraries/System.Memory/tests/System.Memory.Tests.csproj b/src/libraries/System.Memory/tests/System.Memory.Tests.csproj index 319edd9039f8f4..38b40b284d1b25 100644 --- a/src/libraries/System.Memory/tests/System.Memory.Tests.csproj +++ b/src/libraries/System.Memory/tests/System.Memory.Tests.csproj @@ -186,6 +186,7 @@ + diff --git a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems index 406f3caecba30e..5797936fee097f 100644 --- a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems +++ b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems @@ -575,6 +575,7 @@ + diff --git a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Split.cs b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Split.cs new file mode 100644 index 00000000000000..8f61afd0dd4ae6 --- /dev/null +++ b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Split.cs @@ -0,0 +1,263 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +using System.Buffers; +using System.Diagnostics; +using System.Diagnostics.CodeAnalysis; +using System.Runtime.CompilerServices; +using System.Threading; + +namespace System +{ + public static partial class MemoryExtensions + { + /// + /// Returns a type that allows for enumeration of each element within a split span + /// using the provided separator character. + /// + /// The type of the elements. + /// The source span to be enumerated. + /// The separator character to be used to split the provided span. + /// Returns a . + public static SpanSplitEnumerator Split(this ReadOnlySpan source, T separator) where T : IEquatable => + new SpanSplitEnumerator(source, separator); + + /// + /// Returns a type that allows for enumeration of each element within a split span + /// using the provided separator span. + /// + /// The type of the elements. + /// The source span to be enumerated. + /// The separator span to be used to split the provided span. + /// Returns a . + public static SpanSplitEnumerator Split(this ReadOnlySpan source, ReadOnlySpan separator) where T : IEquatable => + new SpanSplitEnumerator(source, separator, treatAsSingleSeparator: true); + + /// + /// Returns a type that allows for enumeration of each element within a split span + /// using any of the provided elements. + /// + /// The type of the elements. + /// The source span to be enumerated. + /// The separators to be used to split the provided span. + /// Returns a . + /// + /// If is and if is empty, + /// all Unicode whitespace characters are used as the separators. This matches the behavior of when + /// and related overloads are used with an empty separator array, + /// or when + /// is used with an empty separator span. + /// + public static SpanSplitEnumerator SplitAny(this ReadOnlySpan source, [UnscopedRef] params ReadOnlySpan separators) where T : IEquatable => + new SpanSplitEnumerator(source, separators); + + /// + /// Returns a type that allows for enumeration of each element within a split span + /// using the provided . + /// + /// The type of the elements. + /// The source span to be enumerated. + /// The to be used to split the provided span. + /// Returns a . + /// + /// Unlike , the is not checked for being empty. + /// An empty will result in no separators being found, regardless of the type of , + /// whereas will use all Unicode whitespace characters as separators if is + /// empty and is . + /// + public static SpanSplitEnumerator SplitAny(this ReadOnlySpan source, SearchValues separators) where T : IEquatable => + new SpanSplitEnumerator(source, separators); + + /// Indicates in which mode is operating, with regards to how it should interpret its state. + private enum SpanSplitEnumeratorMode + { + /// Either a default was used, or the enumerator has finished enumerating and there's no more work to do. + None = 0, + + /// A single T separator was provided. + SingleElement, + + /// A span of separators was provided, each of which should be treated independently. + Any, + + /// The separator is a span of elements to be treated as a single sequence. + Sequence, + + /// The separator is an empty sequence, such that no splits should be performed. + EmptySequence, + + /// + /// A was provided and should behave the same as with but with the separators in the + /// instance instead of in a . + /// + SearchValues + } + + /// + /// Enables enumerating each split within a that has been divided using one or more separators. + /// + public ref struct SpanSplitEnumerator where T : IEquatable + { + /// The input span being split. + private readonly ReadOnlySpan _span; + + /// A single separator to use when is . + private readonly T _separator = default!; + /// + /// A separator span to use when is (in which case + /// it's treated as a single separator) or (in which case it's treated as a set of separators). + /// + private readonly ReadOnlySpan _separatorBuffer; + /// A set of separators to use when is . + private readonly SearchValues _searchValues = default!; + + /// Mode that dictates how the instance was configured and how its fields should be used in . + private SpanSplitEnumeratorMode _splitMode; + /// The inclusive starting index in of the current range. + private int _startCurrent = 0; + /// The exclusive ending index in of the current range. + private int _endCurrent = -1; + + /// Gets an enumerator that allows for iteration over the split span. + /// Returns a that can be used to iterate over the split span. + public SpanSplitEnumerator GetEnumerator() => this; + + /// Gets the current element of the enumeration. + /// Returns a instance that indicates the bounds of the current element withing the source span. + public Range Current => new Range(_startCurrent, _endCurrent); + + /// Initializes the enumerator for . + internal SpanSplitEnumerator(ReadOnlySpan span, SearchValues searchValues) + { + _span = span; + _splitMode = SpanSplitEnumeratorMode.SearchValues; + _searchValues = searchValues; + } + + /// Initializes the enumerator for . + /// + /// If is empty and is , as an optimization + /// it will instead use with a cached + /// for all whitespace characters. + /// + internal SpanSplitEnumerator(ReadOnlySpan span, ReadOnlySpan separators) + { + _span = span; + if (typeof(T) == typeof(char) && separators.Length == 0) + { + _searchValues = Unsafe.As>(WhiteSpaceSearchValues); + _splitMode = SpanSplitEnumeratorMode.SearchValues; + } + else + { + _separatorBuffer = separators; + _splitMode = SpanSplitEnumeratorMode.Any; + } + } + + /// Initializes the enumerator for (or if the separator is empty). + /// must be true. + internal SpanSplitEnumerator(ReadOnlySpan span, ReadOnlySpan separator, bool treatAsSingleSeparator) + { + Debug.Assert(treatAsSingleSeparator, "Should only ever be called as true; exists to differentiate from separators overload"); + + _span = span; + _separatorBuffer = separator; + _splitMode = separator.Length == 0 ? + SpanSplitEnumeratorMode.EmptySequence : + SpanSplitEnumeratorMode.Sequence; + // _endCurrent needs to be adjusted such that after first call to MoveNext() _startCurrent is 0 + _endCurrent = Math.Min(-1, -separator.Length); + } + + /// Initializes the enumerator for . + internal SpanSplitEnumerator(ReadOnlySpan span, T separator) + { + _span = span; + _separator = separator; + _splitMode = SpanSplitEnumeratorMode.SingleElement; + } + + /// + /// Advances the enumerator to the next element of the enumeration. + /// + /// if the enumerator was successfully advanced to the next element; if the enumerator has passed the end of the enumeration. + public bool MoveNext() + { + // Search for the next separator index. + int separatorIndex; + switch (_splitMode) + { + case SpanSplitEnumeratorMode.None: + return false; + + case SpanSplitEnumeratorMode.SingleElement: + _startCurrent = _endCurrent + 1; + separatorIndex = _span.Slice(_startCurrent).IndexOf(_separator); + break; + + case SpanSplitEnumeratorMode.Any: + _startCurrent = _endCurrent + 1; + separatorIndex = _span.Slice(_startCurrent).IndexOfAny(_separatorBuffer); + break; + + case SpanSplitEnumeratorMode.Sequence: + _startCurrent = _endCurrent + _separatorBuffer.Length; + separatorIndex = _span.Slice(_startCurrent).IndexOf(_separatorBuffer); + break; + + case SpanSplitEnumeratorMode.EmptySequence: + _startCurrent = _endCurrent + 1; + separatorIndex = -1; + break; + + default: + Debug.Assert(_splitMode == SpanSplitEnumeratorMode.SearchValues, $"Unknown split mode: {_splitMode}"); + _startCurrent = _endCurrent + 1; + separatorIndex = _span.Slice(_startCurrent).IndexOfAny(_searchValues); + break; + } + + Debug.Assert((_endCurrent >= 0) || (_startCurrent == 0), "On first iteration of MoveNext() _startCurrent should be 0"); + + if (separatorIndex >= 0) + { + _endCurrent = _startCurrent + separatorIndex; + } + else + { + _endCurrent = _span.Length; + // Set _splitMode to None so that subsequent MoveNext calls will return false. + _splitMode = SpanSplitEnumeratorMode.None; + } + + return true; + } + } + + /// Gets a for all of the Unicode whitespace characters + private static SearchValues WhiteSpaceSearchValues + { + get + { + return s_whiteSpaceSearchValues ?? Initialize(); + + [MethodImpl(MethodImplOptions.NoInlining)] + static SearchValues Initialize() + { + SearchValues sv = SearchValues.Create("\t\n\v\f\r\u0020\u0085\u00a0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000"); +#if DEBUG + for (int i = 0; i <= char.MaxValue; i++) + { + Debug.Assert(char.IsWhiteSpace((char)i) == sv.Contains((char)i)); + } +#endif + Interlocked.CompareExchange(ref s_whiteSpaceSearchValues, sv, null); + return s_whiteSpaceSearchValues; + } + } + } + + /// A lazily-initialized for all of the Unicode whitespace characters. + private static SearchValues? s_whiteSpaceSearchValues; + } +} diff --git a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/String.SplitTests.cs b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/String.SplitTests.cs index 037e7a98a95878..3ed49c1ef14921 100644 --- a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/String.SplitTests.cs +++ b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/String.SplitTests.cs @@ -1,6 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Collections.Generic; using System.Linq; using Xunit; @@ -132,6 +133,7 @@ public static void SplitOneCountSingleResult() public static void SplitNoMatchSingleResult() { const string Value = "a b"; + ReadOnlySpan SpanValue = "a b"; const int Count = int.MaxValue; const StringSplitOptions Options = StringSplitOptions.None; @@ -152,17 +154,22 @@ public static void SplitNoMatchSingleResult() Assert.Equal(expected, Value.Split(new[] { "," }, Count, Options)); Range[] ranges = new Range[10]; - Assert.Equal(1, Value.AsSpan().Split(ranges, ',', Options)); + Assert.Equal(1, SpanValue.Split(ranges, ',', Options)); Assert.Equal(0..3, ranges[0]); Array.Clear(ranges); - Assert.Equal(1, Value.AsSpan().Split(ranges, ",", Options)); + Assert.Equal(1, SpanValue.Split(ranges, ",", Options)); Assert.Equal(0..3, ranges[0]); Array.Clear(ranges); - Assert.Equal(1, Value.AsSpan().SplitAny(ranges, ",", Options)); + Assert.Equal(1, SpanValue.SplitAny(ranges, ",", Options)); Assert.Equal(0..3, ranges[0]); Array.Clear(ranges); + + AssertEqual(expected, SpanValue, SpanValue.Split(',')); + AssertEqual(expected, SpanValue, SpanValue.Split(",")); + AssertEqual(expected, SpanValue, SpanValue.SplitAny(',')); + AssertEqual(expected, SpanValue, SpanValue.SplitAny(Buffers.SearchValues.Create([',']))); } private const int M = int.MaxValue; @@ -519,6 +526,10 @@ public static void SplitCharSeparator(string value, char separator, int count, S Assert.Equal(expected, value.Split(new[] { separator })); Assert.Equal(expected, value.Split((ReadOnlySpan)new[] { separator })); Assert.Equal(expected, value.Split(separator.ToString())); + + AssertEqual(expected, value.AsSpan(), value.AsSpan().Split(separator)); + AssertEqual(expected, value.AsSpan(), value.AsSpan().SplitAny([separator])); + AssertEqual(expected, value.AsSpan(), value.AsSpan().SplitAny(Buffers.SearchValues.Create([separator]))); } Range[] ranges = new Range[count == int.MaxValue ? value.Length + 1 : count]; @@ -576,6 +587,7 @@ public static void SplitStringSeparator(string value, string separator, int coun if (count == int.MaxValue && options == StringSplitOptions.None) { Assert.Equal(expected, value.Split(separator)); + AssertEqual(expected, value.AsSpan(), value.AsSpan().Split(separator)); } Range[] ranges = new Range[count == int.MaxValue ? value.Length + 1 : count]; @@ -634,6 +646,15 @@ public static void SplitCharArraySeparator(string value, char[] separators, int Range[] ranges = new Range[count == int.MaxValue ? value.Length + 1 : count]; Assert.Equal(expected.Length, value.AsSpan().SplitAny(ranges, separators, options)); Assert.Equal(expected, ranges.Take(expected.Length).Select(r => value[r]).ToArray()); + + if (count == int.MaxValue && options is StringSplitOptions.None) + { + AssertEqual(expected, value.AsSpan(), value.AsSpan().SplitAny(separators)); + if (separators is { Length: > 0 }) // the SearchValues overload doesn't special-case empty to mean whitespace + { + AssertEqual(expected, value.AsSpan(), value.AsSpan().SplitAny(Buffers.SearchValues.Create(separators))); + } + } } [Theory] @@ -683,6 +704,11 @@ public static void SplitStringArraySeparator(string value, string[] separators, Range[] ranges = new Range[count == int.MaxValue ? value.Length + 1 : count]; Assert.Equal(expected.Length, value.AsSpan().SplitAny(ranges, separators, options)); Assert.Equal(expected, ranges.Take(expected.Length).Select(r => value[r]).ToArray()); + + if (separators is { Length: 1 } && count == int.MaxValue && options == StringSplitOptions.None) + { + AssertEqual(expected, value, value.AsSpan().Split(separators[0])); + } } private static string[] ToStringArray(char[] source) @@ -697,5 +723,16 @@ private static string[] ToStringArray(char[] source) } return result; } + + private static void AssertEqual(string[] items, ReadOnlySpan source, MemoryExtensions.SpanSplitEnumerator enumerator) + { + foreach (string item in items) + { + Assert.True(enumerator.MoveNext()); + Assert.Equal(item, source[enumerator.Current].ToString()); + } + + Assert.False(enumerator.MoveNext()); + } } }