From bdc11930b55f9ffd22d43b286779c0f882666e40 Mon Sep 17 00:00:00 2001 From: Dmitry Khalanskiy Date: Tue, 2 Dec 2025 12:00:28 +0100 Subject: [PATCH 1/2] Document the parser concatenation function --- .../src/internal/format/parser/Parser.kt | 49 +++++++++++++++++-- 1 file changed, 45 insertions(+), 4 deletions(-) diff --git a/core/common/src/internal/format/parser/Parser.kt b/core/common/src/internal/format/parser/Parser.kt index 28025975..fd7e0b5f 100644 --- a/core/common/src/internal/format/parser/Parser.kt +++ b/core/common/src/internal/format/parser/Parser.kt @@ -42,10 +42,22 @@ internal class ParserStructure( } /** - * Concatenates a list of parser structures into a single structure, processing them in reverse order. - * Simplifies the result by merging number spans and handling unconditional modifications. + * Concatenates a list of parser structures into a single *valid* structure. + * + * A *valid* parser is one where, if numeric values are parsed consecutively without a separator + * (or with zero-width [UnconditionalModification] separators) between them, + * they are represented as a single [NumberSpanParserOperation]. */ internal fun List>.concat(): ParserStructure { + /** + * Returns a *valid* parser obtained by prepending [baseOperations] followed by [numberSpan] + * to [simplifiedParserStructure], + * while ensuring that [unconditionalModifications] are preserved in the result. + * + * Requirements: + * - [simplifiedParserStructure] must have non-empty [ParserStructure.operations]. + * - [simplifiedParserStructure] is a *valid* parser. + */ fun mergeOperations( baseOperations: List>, numberSpan: List>?, @@ -76,13 +88,27 @@ internal fun List>.concat(): ParserStructure { return ParserStructure(mergedOperations, simplifiedParserStructure.followedBy) } - // Simplifies this parser and appends [other] to all execution paths. - // Merges number spans, collects unconditional modifications, and flattens alternatives. + /** + * Returns a *valid* parser obtained by prepending *any* parser `this` to a *valid* parser [other]. + */ fun ParserStructure.simplifyAndAppend(other: ParserStructure): ParserStructure { val newOperations = mutableListOf>() var currentNumberSpan: MutableList>? = null val unconditionalModifications = mutableListOf>() + // Loop invariant: + // + // |- zero-width parsers interspersing the number span + // | + // unconditionalModifications + // \-------------------------/ + // operation, ..., operation, number, number, UnconditionalModification, number, operation, operation + // \_______________________/ \______________ . . . . . . . . . . . . . ______/ \_______/ + // newOperations currentNumberSpan op + // | | |- next operation + // |- operations where spans of |- the continued span of + // number parsers are merged into number parsers + // `NumberSpanParserOperation` for (op in operations) { when (op) { is NumberSpanParserOperation -> { @@ -105,6 +131,7 @@ internal fun List>.concat(): ParserStructure { } } + // *Valid* parsers resulting from appending [other] to every parser in `this.followedBy`. val mergedTails = followedBy.flatMap { val simplified = it.simplifyAndAppend(other) // Parser `ParserStructure(emptyList(), p)` is equivalent to `p`, @@ -116,7 +143,12 @@ internal fun List>.concat(): ParserStructure { else listOf(simplified) }.ifEmpty { + // We only enter this branch if [followedBy] is empty. + // In that case, [mergedTails] is exactly `listOf(other)`. + // We optimize this common case here as a fast-path and to reduce indirection in the resulting parser. if (other.operations.isNotEmpty()) { + // Directly append `other` to the simplified `this`. + // The call is valid: `other.operations` is non-empty return mergeOperations(newOperations, currentNumberSpan, unconditionalModifications, other) } // [other] has no operations, just alternatives; use them as our tails @@ -156,6 +188,15 @@ internal fun List>.concat(): ParserStructure { } } + // Loop invariant: + // + // this = Parser, ..., Parser, operations, operations, operations, Parser, Parser, ... + // \____/ \________________________________/ \_________________/ + // parser accumulatedOperations.reversed() result + // | | |- simplified parser + // | |- span of parsers without branching + // | + // |- next parser to be processed for (parser in this.asReversed()) { if (parser.followedBy.isEmpty()) { accumulatedOperations.add(parser.operations) From 45ff436c518762d5a26268164e16ff6b64433e18 Mon Sep 17 00:00:00 2001 From: Dmitry Khalanskiy Date: Mon, 8 Dec 2025 12:49:54 +0100 Subject: [PATCH 2/2] WIP: document more validity requirements --- .../src/internal/format/parser/Parser.kt | 46 ++++++++++++++++--- 1 file changed, 40 insertions(+), 6 deletions(-) diff --git a/core/common/src/internal/format/parser/Parser.kt b/core/common/src/internal/format/parser/Parser.kt index fd7e0b5f..1729060f 100644 --- a/core/common/src/internal/format/parser/Parser.kt +++ b/core/common/src/internal/format/parser/Parser.kt @@ -42,21 +42,30 @@ internal class ParserStructure( } /** - * Concatenates a list of parser structures into a single *valid* structure. + * Concatenates a list of (potentially non-*valid*) parser structures into a single *valid* structure. * - * A *valid* parser is one where, if numeric values are parsed consecutively without a separator - * (or with zero-width [UnconditionalModification] separators) between them, - * they are represented as a single [NumberSpanParserOperation]. + * A *valid* parser is one where: + * + * - Consecutive number parsers one any parsing path are represented as a single + * [NumberSpanParserOperation]. + * - A span of [UnconditionalModification] can not precede a [NumberSpanParserOperation], + * unless the span itself is preceded by a non-numeric non-zero-width parser. + * + * Together, these two rules ensure that whenever numeric values are parsed consecutively, + * even with zero-width parser operations between them (at the moment, these are only + * [UnconditionalModification]), they will be treated as a single number that's then + * split into components. */ internal fun List>.concat(): ParserStructure { /** * Returns a *valid* parser obtained by prepending [baseOperations] followed by [numberSpan] * to [simplifiedParserStructure], - * while ensuring that [unconditionalModifications] are preserved in the result. + * while ensuring that [unconditionalModifications] are present in the result. * * Requirements: * - [simplifiedParserStructure] must have non-empty [ParserStructure.operations]. * - [simplifiedParserStructure] is a *valid* parser. + * - [baseOperations] can not end with either an [UnconditionalModification] or a [NumberSpanParserOperation]. */ fun mergeOperations( baseOperations: List>, @@ -68,6 +77,7 @@ internal fun List>.concat(): ParserStructure { val firstOperation = operationsToMerge.firstOrNull() val mergedOperations = buildList { addAll(baseOperations) + // Currently, `this` is either empty or ends with a non-numeric non-zero-width parser. when { numberSpan == null -> { addAll(operationsToMerge) @@ -83,6 +93,19 @@ internal fun List>.concat(): ParserStructure { addAll(operationsToMerge) } } + // Currently, `this` ends with the operations from `operationsToMerge`. + // `operationsToMerge` was not empty, by the input requirements, so its `lastOrNull()` is non-empty. + // - If it's a `NumberSpanParserOperation`, + // this means its `followedBy` do not start with a `NumberSpanParserOperation`, + // since `simplifiedParserStructure` is *valid*. + // This means it's valid to append `unconditionalModifications`. + // - If it's an `UnconditionalModification`, + // this means either that its `followedBy` do not start with a `NumberSpanParserOperation`, + // or that some non-zero-width non-numeric parsers precede it in `operationsToMerge`. + // Adding new `unconditionalModifications` to the existing span does not break correctness. + // - If it's some other parser, + // then `unconditionalModifications` is preceded by a non-zero-width non-numeric parser, + // which is valid. addAll(unconditionalModifications) } return ParserStructure(mergedOperations, simplifiedParserStructure.followedBy) @@ -163,9 +186,20 @@ internal fun List>.concat(): ParserStructure { newOperations.add(NumberSpanParserOperation(currentNumberSpan)) } newOperations.addAll(unconditionalModifications) + // Either the merged tails do not start with a `NumberSpanParserOperation`, + // or the last non-zero-width parser `newOperations` exists and is not a number parser. + // + // In the first case, the resulting parser is *valid*: + // `unconditionalModifications` does not precede a number parser, and in `newOperations`, + // consecutive number parsers are merged into one. + // + // In the second case, the resulting parser is also *valid*: + // `unconditionalModifications` may precede a number parser, but it also has + // a non-zero-width non-number parser before it. ParserStructure(newOperations, mergedTails) } else { - // Distribute number span across alternatives that start with number spans + // Some `mergedTails` begin with a number parser, and also, either + // the current number span isn't empty, or there are no non-zero-width non-number parsers preceding it. val newTails = mergedTails.map { structure -> mergeOperations(emptyList(), currentNumberSpan, unconditionalModifications, structure) }