From cc1540872ecd111634af21057f1792485db2ca23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nacho=20Cord=C3=B3n?= Date: Tue, 18 Nov 2025 10:20:58 +0100 Subject: [PATCH 1/7] Fixes esql class cast bug in STATS at planning level (#137511) --- docs/changelog/137511.yaml | 5 + .../src/main/resources/inlinestats.csv-spec | 17 + .../src/main/resources/stats.csv-spec | 71 ++ .../xpack/esql/action/EsqlCapabilities.java | 7 + .../esql/optimizer/LogicalPlanOptimizer.java | 9 + .../rules/logical/DeduplicateAggs.java | 21 + ...ReplaceAggregateAggExpressionWithEval.java | 28 +- .../optimizer/LogicalPlanOptimizerTests.java | 31 +- .../rules/logical/DeduplicateAggsTests.java | 616 ++++++++++++++++++ 9 files changed, 786 insertions(+), 19 deletions(-) create mode 100644 docs/changelog/137511.yaml create mode 100644 x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/DeduplicateAggs.java create mode 100644 x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/DeduplicateAggsTests.java diff --git a/docs/changelog/137511.yaml b/docs/changelog/137511.yaml new file mode 100644 index 0000000000000..14340bda606e9 --- /dev/null +++ b/docs/changelog/137511.yaml @@ -0,0 +1,5 @@ +pr: 137511 +summary: Fixes esql class cast bug in STATS at planning level +area: ES|QL +type: bug +issues: [] diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/inlinestats.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/inlinestats.csv-spec index 30ed34d9ec611..5eadaf5b43f49 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/inlinestats.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/inlinestats.csv-spec @@ -1001,3 +1001,20 @@ FROM hosts METADATA _index description:text | host:keyword | ip0:ip | _index:keyword | x:ip | ip1:long | host_group:text | card:keyword alpha db server |alpha |127.0.0.1 |hosts |127.0.0.1|1 |DB servers |eth0 ; + +fixClassCastBugWithSeveralCounts +required_capability: inline_stats +required_capability: fix_stats_classcast_exception + +FROM sample_data, sample_data_str +| EVAL one_ip = client_ip::ip +| INLINE STATS count1=count(client_ip::ip), count2=count(one_ip) +| KEEP count1, count2 +| LIMIT 3 +; + +count1:long |count2:long +14 |14 +14 |14 +14 |14 +; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec index 22f1e7199fc54..49faf97ac6685 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec @@ -3344,3 +3344,74 @@ VALUES(color):keyword | color:keyword yellow | yellow // end::mv-group-values-expand-result[] ; + +fixClassCastBugWithCountDistinct +required_capability: fix_stats_classcast_exception + +from airports +| rename scalerank AS x +| stats a = count(x), b = count(x) + count(x), c = count_distinct(x) +; + +a:long | b:long | c:long +891 | 1782 | 8 +; + +fixClassCastBugWithValuesFn +required_capability: fix_stats_classcast_exception + +ROW x = [1,2,3] +| STATS a = MV_COUNT(VALUES(x)), b = VALUES(x), c = SUM(x) +; + +a:integer | b:integer | c:long +3 | [1, 2, 3] | 6 +; + +fixClassCastBugWithSeveralCountDistincts +required_capability: fix_stats_classcast_exception + +ROW x = 1 +| STATS a = 2*COUNT_DISTINCT(x), b = COUNT_DISTINCT(x), c = MAX(x) +; + +a:long | b:long | c:integer +2 | 1 | 1 +; + +fixClassCastBugWithMedianPlusCountDistinct +required_capability: fix_stats_classcast_exception + +FROM sample_data_ts_long +| EVAL sym1 = 0, sym5 = 1 +| STATS sym2 = median(sym5) + 0, sym3 = median(sym5), sym4 = count_distinct(sym1) +; + +sym2:double |sym3:double | sym4:long +1.0 | 1.0 | 1 +; + +fixClassCastBugWithFoldableLiterals +required_capability: fix_stats_classcast_exception + +from airports +| rename scalerank AS x +| stats a = count(x), b = count(x) + count(x), c = count_distinct(x, 10), d = count_distinct(x, 10 + 1 - 1) +; + +a:long | b:long | c:long | d:long +891 | 1782 | 8 | 8 +; + +fixClassCastBugWithSurrogateExpressions +required_capability: fix_stats_classcast_exception + +from airports +| rename scalerank AS x +| stats a = median(x), b = percentile(x, 50), c = count_distinct(x) +; + +a:double | b:double | c:long +6.0 | 6.0 | 8 +; + diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index e2c3b7cd1beda..4c0bc0ff6af40 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -1282,6 +1282,13 @@ public enum Cap { */ STATS_WITH_FILTERED_SURROGATE_FIXED, + /** + * Fix for ClassCastException in STATS + * https://github.com/elastic/elasticsearch/issues/133992 + * https://github.com/elastic/elasticsearch/issues/136598 + */ + FIX_STATS_CLASSCAST_EXCEPTION, + /** * {@link org.elasticsearch.xpack.esql.optimizer.rules.logical.ReplaceAliasingEvalWithProject} did not fully account for shadowing. * https://github.com/elastic/elasticsearch/issues/137019. diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java index 14a858f85fd2a..cfee578df349e 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java @@ -17,6 +17,7 @@ import org.elasticsearch.xpack.esql.optimizer.rules.logical.CombineEvals; import org.elasticsearch.xpack.esql.optimizer.rules.logical.CombineProjections; import org.elasticsearch.xpack.esql.optimizer.rules.logical.ConstantFolding; +import org.elasticsearch.xpack.esql.optimizer.rules.logical.DeduplicateAggs; import org.elasticsearch.xpack.esql.optimizer.rules.logical.ExtractAggregateCommonFilter; import org.elasticsearch.xpack.esql.optimizer.rules.logical.FoldNull; import org.elasticsearch.xpack.esql.optimizer.rules.logical.LiteralsOnTheRight; @@ -171,6 +172,14 @@ protected static Batch operators(boolean local) { new SplitInWithFoldableValue(), new PropagateEvalFoldables(), new ConstantFolding(), + /* Then deduplicate aggregations + We need this after the constant folding + because we could have expressions like + count_distinct(_, 9 + 1) + count_distinct(_, 10) + which are semantically identical + */ + new DeduplicateAggs(), new PartiallyFoldCase(), // boolean new BooleanSimplification(), diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/DeduplicateAggs.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/DeduplicateAggs.java new file mode 100644 index 0000000000000..170f87f86f8ff --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/DeduplicateAggs.java @@ -0,0 +1,21 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.optimizer.rules.logical; + +/** + * This rule handles duplicate aggregate functions to avoid duplicate compute + * stats a = min(x), b = min(x), c = count(*), d = count() by g + * becomes + * stats a = min(x), c = count(*) by g | eval b = a, d = c | keep a, b, c, d, g + */ +public final class DeduplicateAggs extends ReplaceAggregateAggExpressionWithEval { + + public DeduplicateAggs() { + super(false); + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregateAggExpressionWithEval.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregateAggExpressionWithEval.java index 075d8676abde4..52e99fd0bf89c 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregateAggExpressionWithEval.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregateAggExpressionWithEval.java @@ -43,9 +43,17 @@ * becomes * stats a = min(x), c = count(*) by g | eval b = a, d = c | keep a, b, c, d, g */ -public final class ReplaceAggregateAggExpressionWithEval extends OptimizerRules.OptimizerRule { +public class ReplaceAggregateAggExpressionWithEval extends OptimizerRules.OptimizerRule { + private final boolean replaceNestedExpressions; + + public ReplaceAggregateAggExpressionWithEval(boolean replaceNestedExpressions) { + super(OptimizerRules.TransformDirection.UP); + this.replaceNestedExpressions = replaceNestedExpressions; + } + public ReplaceAggregateAggExpressionWithEval() { super(OptimizerRules.TransformDirection.UP); + this.replaceNestedExpressions = true; } @Override @@ -88,7 +96,7 @@ protected LogicalPlan rule(Aggregate aggregate) { // common case - handle duplicates if (child instanceof AggregateFunction af) { // canonical representation, with resolved aliases - AggregateFunction canonical = (AggregateFunction) af.canonical().transformUp(e -> aliases.resolve(e, e)); + AggregateFunction canonical = getCannonical(af, aliases); Alias found = rootAggs.get(canonical); // aggregate is new @@ -106,14 +114,15 @@ protected LogicalPlan rule(Aggregate aggregate) { } // nested expression over aggregate function or groups // replace them with reference and move the expression into a follow-up eval - else { + else if (replaceNestedExpressions) { changed.set(true); Expression aggExpression = child.transformUp(AggregateFunction.class, af -> { - AggregateFunction canonical = (AggregateFunction) af.canonical(); + // canonical representation, with resolved aliases + AggregateFunction canonical = getCannonical(af, aliases); Alias alias = rootAggs.get(canonical); if (alias == null) { - // create synthetic alias ove the found agg function - alias = new Alias(af.source(), syntheticName(canonical, child, counter[0]++), canonical, null, true); + // create synthetic alias over the found agg function + alias = new Alias(af.source(), syntheticName(canonical, child, counter[0]++), af.canonical(), null, true); // and remember it to remove duplicates rootAggs.put(canonical, alias); // add it to the list of aggregates and continue @@ -132,6 +141,9 @@ protected LogicalPlan rule(Aggregate aggregate) { Alias alias = as.replaceChild(aggExpression); newEvals.add(alias); newProjections.add(alias.toAttribute()); + } else { + newAggs.add(agg); + newProjections.add(agg.toAttribute()); } } // not an alias (e.g. grouping field) @@ -155,6 +167,10 @@ protected LogicalPlan rule(Aggregate aggregate) { return plan; } + private static AggregateFunction getCannonical(AggregateFunction af, AttributeMap aliases) { + return (AggregateFunction) af.canonical().transformUp(e -> aliases.resolve(e, e)); + } + private static String syntheticName(Expression expression, Expression af, int counter) { return TemporaryNameUtils.temporaryName(expression, af, counter); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java index 2cd28b817a184..3ff2c3f8be2e8 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java @@ -178,6 +178,13 @@ import static org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.EsqlBinaryComparison.BinaryComparisonOperation.GTE; import static org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.EsqlBinaryComparison.BinaryComparisonOperation.LT; import static org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.EsqlBinaryComparison.BinaryComparisonOperation.LTE; +<<<<<<< HEAD +======= +import static org.elasticsearch.xpack.esql.optimizer.rules.logical.DeduplicateAggsTests.aggFieldName; +import static org.elasticsearch.xpack.esql.optimizer.rules.logical.DeduplicateAggsTests.aliased; +import static org.elasticsearch.xpack.esql.optimizer.rules.logical.OptimizerRules.TransformDirection.DOWN; +import static org.elasticsearch.xpack.esql.optimizer.rules.logical.OptimizerRules.TransformDirection.UP; +>>>>>>> 0a81875703ed (Fixes esql class cast bug in STATS at planning level (#137511)) import static org.hamcrest.Matchers.allOf; import static org.hamcrest.Matchers.anyOf; import static org.hamcrest.Matchers.contains; @@ -473,6 +480,7 @@ public void testAggsWithOverridingInputAndGrouping() throws Exception { } /** +<<<<<<< HEAD * Project[[s{r}#4 AS d, s{r}#4, last_name{f}#21, first_name{f}#18]] * \_Limit[1000[INTEGER]] * \_Aggregate[[last_name{f}#21, first_name{f}#18],[SUM(salary{f}#22) AS s, last_name{f}#21, first_name{f}#18]] @@ -495,6 +503,9 @@ public void testCombineProjectionWithDuplicateAggregation() { } /** +======= + *
{@code
+>>>>>>> 0a81875703ed (Fixes esql class cast bug in STATS at planning level (#137511))
      * Limit[1000[INTEGER]]
      * \_Aggregate[STANDARD,[],[SUM(salary{f}#12,true[BOOLEAN]) AS sum(salary), SUM(salary{f}#12,last_name{f}#11 == [44 6f 65][KEYW
      * ORD]) AS sum(salary) WheRe last_name ==   "Doe"]]
@@ -3809,6 +3820,7 @@ public void testPruneRenameOnAggBy() {
     }
 
     /**
+<<<<<<< HEAD
      * Expects
      * Project[[c1{r}#2, c2{r}#4, cs{r}#6, cm{r}#8, cexp{r}#10]]
      * \_Eval[[c1{r}#2 AS c2, c1{r}#2 AS cs, c1{r}#2 AS cm, c1{r}#2 AS cexp]]
@@ -3971,6 +3983,8 @@ public void testEliminateDuplicateRenamedGroupings() {
     }
 
     /**
+=======
+>>>>>>> 0a81875703ed (Fixes esql class cast bug in STATS at planning level (#137511))
      * Expected
      * Limit[2[INTEGER]]
      * \_Filter[a{r}#6 > 2[INTEGER]]
@@ -4061,19 +4075,6 @@ public void testLimitZeroUsesLocalRelation() {
         assertThat(plan, instanceOf(LocalRelation.class));
     }
 
-    private  T aliased(Expression exp, Class clazz) {
-        var alias = as(exp, Alias.class);
-        return as(alias.child(), clazz);
-    }
-
-    private  void aggFieldName(Expression exp, Class aggType, String fieldName) {
-        var alias = as(exp, Alias.class);
-        var af = as(alias.child(), aggType);
-        var field = af.field();
-        var name = field.foldable() ? BytesRefs.toString(field.fold(FoldContext.small())) : Expressions.name(field);
-        assertThat(name, is(fieldName));
-    }
-
     /**
      * Expects
      * Limit[1000[INTEGER]]
@@ -4831,6 +4832,7 @@ public void testStatsExpOverAggsWithScalars() {
 
     /**
      * Expects
+<<<<<<< HEAD
      * Project[[a{r}#5, b{r}#9, $$max(salary)_+_3>$COUNT$2{r}#46 AS d, $$count(salary)_->$MIN$3{r}#47 AS e, $$avg(salary)_+_m
      * >$MAX$1{r}#45 AS g]]
      * \_Eval[[$$$$avg(salary)_+_m>$AVG$0$SUM$0{r}#48 / $$max(salary)_+_3>$COUNT$2{r}#46 AS $$avg(salary)_+_m>$AVG$0, $$avg(
@@ -4902,6 +4904,9 @@ public void testStatsExpOverAggsWithScalarAndDuplicateAggs() {
 
     /**
      * Expects
+=======
+     * 
{@code
+>>>>>>> 0a81875703ed (Fixes esql class cast bug in STATS at planning level (#137511))
      * Project[[a{r}#5, a{r}#5 AS b, w{r}#12]]
      * \_Limit[1000[INTEGER]]
      *   \_Aggregate[[w{r}#12],[SUM($$salary_/_2_+_la>$SUM$0{r}#26) AS a, w{r}#12]]
diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/DeduplicateAggsTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/DeduplicateAggsTests.java
new file mode 100644
index 0000000000000..e656cc364691a
--- /dev/null
+++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/DeduplicateAggsTests.java
@@ -0,0 +1,616 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.esql.optimizer.rules.logical;
+
+import org.elasticsearch.common.lucene.BytesRefs;
+import org.elasticsearch.xpack.esql.core.expression.Alias;
+import org.elasticsearch.xpack.esql.core.expression.Expression;
+import org.elasticsearch.xpack.esql.core.expression.Expressions;
+import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
+import org.elasticsearch.xpack.esql.core.expression.FoldContext;
+import org.elasticsearch.xpack.esql.core.expression.Literal;
+import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute;
+import org.elasticsearch.xpack.esql.expression.function.aggregate.AggregateFunction;
+import org.elasticsearch.xpack.esql.expression.function.aggregate.Count;
+import org.elasticsearch.xpack.esql.expression.function.aggregate.CountDistinct;
+import org.elasticsearch.xpack.esql.expression.function.aggregate.Max;
+import org.elasticsearch.xpack.esql.expression.function.aggregate.Min;
+import org.elasticsearch.xpack.esql.expression.function.aggregate.Sum;
+import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Add;
+import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Div;
+import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Mul;
+import org.elasticsearch.xpack.esql.optimizer.AbstractLogicalPlanOptimizerTests;
+import org.elasticsearch.xpack.esql.plan.logical.Aggregate;
+import org.elasticsearch.xpack.esql.plan.logical.EsRelation;
+import org.elasticsearch.xpack.esql.plan.logical.Eval;
+import org.elasticsearch.xpack.esql.plan.logical.Limit;
+import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
+import org.elasticsearch.xpack.esql.plan.logical.Project;
+import org.elasticsearch.xpack.esql.plan.logical.join.InlineJoin;
+
+import static org.elasticsearch.xpack.esql.EsqlTestUtils.as;
+import static org.hamcrest.Matchers.contains;
+import static org.hamcrest.Matchers.containsString;
+import static org.hamcrest.Matchers.hasSize;
+import static org.hamcrest.Matchers.instanceOf;
+import static org.hamcrest.Matchers.is;
+
+//@TestLogging(value = "org.elasticsearch.xpack.esql:TRACE", reason = "debug")
+public class DeduplicateAggsTests extends AbstractLogicalPlanOptimizerTests {
+    public static  void aggFieldName(Expression exp, Class aggType, String fieldName) {
+        var alias = as(exp, Alias.class);
+        var af = as(alias.child(), aggType);
+        var field = af.field();
+        var name = field.foldable() ? BytesRefs.toString(field.fold(FoldContext.small())) : Expressions.name(field);
+        assertThat(name, is(fieldName));
+    }
+
+    public static  T aliased(Expression exp, Class clazz) {
+        var alias = as(exp, Alias.class);
+        return as(alias.child(), clazz);
+    }
+
+    /**
+     * 
{@code
+     * Project[[s{r}#4 AS d, s{r}#4, last_name{f}#21, first_name{f}#18]]
+     * \_Limit[1000[INTEGER]]
+     *   \_Aggregate[[last_name{f}#21, first_name{f}#18],[SUM(salary{f}#22) AS s, last_name{f}#21, first_name{f}#18]]
+     *     \_EsRelation[test][_meta_field{f}#23, emp_no{f}#17, first_name{f}#18, ..]
+     * }
+ */ + public void testCombineProjectionWithDuplicateAggregation() { + var plan = plan(""" + from test + | stats s = sum(salary), d = sum(salary), c = sum(salary) by last_name, first_name + | keep d, s, last_name, first_name + """); + + var project = as(plan, Project.class); + assertThat(Expressions.names(project.projections()), contains("d", "s", "last_name", "first_name")); + var limit = as(project.child(), Limit.class); + var agg = as(limit.child(), Aggregate.class); + assertThat(Expressions.names(agg.aggregates()), contains("s", "last_name", "first_name")); + assertThat(Alias.unwrap(agg.aggregates().get(0)), instanceOf(Sum.class)); + assertThat(Expressions.names(agg.groupings()), contains("last_name", "first_name")); + } + + /** + * Expects + *
{@code
+     * Project[[c1{r}#2, c2{r}#4, cs{r}#6, cm{r}#8, cexp{r}#10]]
+     * \_Eval[[c1{r}#2 AS c2, c1{r}#2 AS cs, c1{r}#2 AS cm, c1{r}#2 AS cexp]]
+     *   \_Limit[1000[INTEGER]]
+     *     \_Aggregate[[],[COUNT([2a][KEYWORD]) AS c1]]
+     *       \_EsRelation[test][_meta_field{f}#17, emp_no{f}#11, first_name{f}#12, ..]
+     * }
+ */ + @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/100634") + public void testEliminateDuplicateAggsCountAll() { + var plan = plan(""" + from test + | stats c1 = count(1), c2 = count(2), cs = count(*), cm = count(), cexp = count("123") + """); + + var project = as(plan, Project.class); + assertThat(Expressions.names(project.projections()), contains("c1", "c2", "cs", "cm", "cexp")); + var eval = as(project.child(), Eval.class); + var fields = eval.fields(); + assertThat(Expressions.names(fields), contains("c2", "cs", "cm", "cexp")); + for (Alias field : fields) { + assertThat(Expressions.name(field.child()), is("c1")); + } + var limit = as(eval.child(), Limit.class); + var agg = as(limit.child(), Aggregate.class); + var aggs = agg.aggregates(); + assertThat(Expressions.names(aggs), contains("c1")); + aggFieldName(aggs.get(0), Count.class, "*"); + var source = as(agg.child(), EsRelation.class); + } + + /** + * Expects + *
{@code
+     * Project[[a{r}#5, b{r}#9, $$max(salary)_+_3>$COUNT$2{r}#46 AS d, $$count(salary)_->$MIN$3{r}#47 AS e, $$avg(salary)_+_m
+     * >$MAX$1{r}#45 AS g]]
+     * \_Eval[[$$$$avg(salary)_+_m>$AVG$0$SUM$0{r}#48 / $$max(salary)_+_3>$COUNT$2{r}#46 AS $$avg(salary)_+_m>$AVG$0, $$avg(
+     * salary)_+_m>$AVG$0{r}#44 + $$avg(salary)_+_m>$MAX$1{r}#45 AS a, $$avg(salary)_+_m>$MAX$1{r}#45 + 3[INTEGER] +
+     * 3.141592653589793[DOUBLE] + $$max(salary)_+_3>$COUNT$2{r}#46 AS b]]
+     *   \_Limit[1000[INTEGER]]
+     *     \_Aggregate[[w{r}#28],[SUM(salary{f}#39) AS $$$$avg(salary)_+_m>$AVG$0$SUM$0, MAX(salary{f}#39) AS $$avg(salary)_+_m>$MAX$1
+     * , COUNT(salary{f}#39) AS $$max(salary)_+_3>$COUNT$2, MIN(salary{f}#39) AS $$count(salary)_->$MIN$3]]
+     *       \_Eval[[languages{f}#37 % 2[INTEGER] AS w]]
+     *         \_EsRelation[test][_meta_field{f}#40, emp_no{f}#34, first_name{f}#35, ..]
+     * }
+ */ + public void testStatsExpOverAggsWithScalarAndDuplicateAggs() { + var plan = optimizedPlan(""" + from test + | stats a = avg(salary) + max(salary), + b = max(salary) + 3 + PI() + count(salary), + c = count(salary) - min(salary), + d = count(salary), + e = min(salary), + f = max(salary), + g = max(salary) + by w = languages % 2 + | keep a, b, d, e, g + """); + + var project = as(plan, Project.class); + var projections = project.projections(); + assertThat(Expressions.names(projections), contains("a", "b", "d", "e", "g")); + var refA = Alias.unwrap(projections.get(0)); + var refB = Alias.unwrap(projections.get(1)); + var refD = Alias.unwrap(projections.get(2)); + var refE = Alias.unwrap(projections.get(3)); + var refG = Alias.unwrap(projections.get(4)); + + var eval = as(project.child(), Eval.class); + var fields = eval.fields(); + // avg = Sum/Count + assertThat(Expressions.name(fields.get(0)), containsString("AVG")); + assertThat(Alias.unwrap(fields.get(0)), instanceOf(Div.class)); + // avg + max + assertThat(Expressions.name(fields.get(1)), is("a")); + var add = as(Alias.unwrap(fields.get(1)), Add.class); + var max_salary = add.right(); + assertThat(Expressions.attribute(fields.get(1)), is(Expressions.attribute(refA))); + + assertThat(Expressions.name(fields.get(2)), is("b")); + assertThat(Expressions.attribute(fields.get(2)), is(Expressions.attribute(refB))); + + add = as(Alias.unwrap(fields.get(2)), Add.class); + add = as(add.left(), Add.class); + add = as(add.left(), Add.class); + assertThat(Expressions.attribute(max_salary), is(Expressions.attribute(add.left()))); + + var limit = as(eval.child(), Limit.class); + + var agg = as(limit.child(), Aggregate.class); + var aggs = agg.aggregates(); + var sum = as(Alias.unwrap(aggs.get(0)), Sum.class); + + assertThat(Expressions.attribute(aggs.get(1)), is(Expressions.attribute(max_salary))); + var max = as(Alias.unwrap(aggs.get(1)), Max.class); + var count = as(Alias.unwrap(aggs.get(2)), Count.class); + var min = as(Alias.unwrap(aggs.get(3)), Min.class); + + eval = as(agg.child(), Eval.class); + fields = eval.fields(); + assertThat(Expressions.name(fields.get(0)), is("w")); + } + + /** + * Expects + *
{@code
+     * Project[[c1{r}#7, cx{r}#10, cs{r}#12, cy{r}#15]]
+     * \_Eval[[c1{r}#7 AS cx, c1{r}#7 AS cs, c1{r}#7 AS cy]]
+     *   \_Limit[1000[INTEGER]]
+     *     \_Aggregate[[],[COUNT([2a][KEYWORD]) AS c1]]
+     *       \_EsRelation[test][_meta_field{f}#22, emp_no{f}#16, first_name{f}#17, ..]
+     * }
+ */ + @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/100634") + public void testEliminateDuplicateAggsWithAliasedFields() { + var plan = plan(""" + from test + | eval x = 1 + | eval y = x + | stats c1 = count(1), cx = count(x), cs = count(*), cy = count(y) + """); + + var project = as(plan, Project.class); + assertThat(Expressions.names(project.projections()), contains("c1", "cx", "cs", "cy")); + var eval = as(project.child(), Eval.class); + var fields = eval.fields(); + assertThat(Expressions.names(fields), contains("cx", "cs", "cy")); + for (Alias field : fields) { + assertThat(Expressions.name(field.child()), is("c1")); + } + var limit = as(eval.child(), Limit.class); + var agg = as(limit.child(), Aggregate.class); + var aggs = agg.aggregates(); + assertThat(Expressions.names(aggs), contains("c1")); + aggFieldName(aggs.get(0), Count.class, "*"); + var source = as(agg.child(), EsRelation.class); + } + + /** + * Expects + *
{@code
+     * Project[[min{r}#1385, max{r}#1388, min{r}#1385 AS min2, max{r}#1388 AS max2, gender{f}#1398]]
+     * \_Limit[1000[INTEGER]]
+     *   \_Aggregate[[gender{f}#1398],[MIN(salary{f}#1401) AS min, MAX(salary{f}#1401) AS max, gender{f}#1398]]
+     *     \_EsRelation[test][_meta_field{f}#1402, emp_no{f}#1396, first_name{f}#..]
+     * }
+ */ + public void testEliminateDuplicateAggsMixed() { + var plan = plan(""" + from test + | stats min = min(salary), max = max(salary), min2 = min(salary), max2 = max(salary) by gender + """); + + var project = as(plan, Project.class); + var projections = project.projections(); + assertThat(Expressions.names(projections), contains("min", "max", "min2", "max2", "gender")); + as(projections.get(0), ReferenceAttribute.class); + as(projections.get(1), ReferenceAttribute.class); + assertThat(Expressions.name(aliased(projections.get(2), ReferenceAttribute.class)), is("min")); + assertThat(Expressions.name(aliased(projections.get(3), ReferenceAttribute.class)), is("max")); + + var limit = as(project.child(), Limit.class); + var agg = as(limit.child(), Aggregate.class); + var aggs = agg.aggregates(); + assertThat(Expressions.names(aggs), contains("min", "max", "gender")); + aggFieldName(aggs.get(0), Min.class, "salary"); + aggFieldName(aggs.get(1), Max.class, "salary"); + var source = as(agg.child(), EsRelation.class); + } + + /** + * Expects + *
{@code
+     * Project[[max(x){r}#11, max(x){r}#11 AS max(y), max(x){r}#11 AS max(z)]]
+     * \_Limit[1000[INTEGER]]
+     *   \_Aggregate[[],[MAX(salary{f}#21) AS max(x)]]
+     *     \_EsRelation[test][_meta_field{f}#22, emp_no{f}#16, first_name{f}#17, ..]
+     * }
+ */ + public void testEliminateDuplicateAggsNonCount() { + var plan = plan(""" + from test + | eval x = salary + | eval y = x + | eval z = y + | stats max(x), max(y), max(z) + """); + + var project = as(plan, Project.class); + var projections = project.projections(); + assertThat(Expressions.names(projections), contains("max(x)", "max(y)", "max(z)")); + as(projections.get(0), ReferenceAttribute.class); + assertThat(Expressions.name(aliased(projections.get(1), ReferenceAttribute.class)), is("max(x)")); + assertThat(Expressions.name(aliased(projections.get(2), ReferenceAttribute.class)), is("max(x)")); + + var limit = as(project.child(), Limit.class); + var agg = as(limit.child(), Aggregate.class); + var aggs = agg.aggregates(); + assertThat(Expressions.names(aggs), contains("max(x)")); + aggFieldName(aggs.get(0), Max.class, "salary"); + var source = as(agg.child(), EsRelation.class); + } + + /** + * Expects + *
{@code
+     * Limit[1000[INTEGER]]
+     * \_Aggregate[[salary{f}#12],[salary{f}#12, salary{f}#12 AS x]]
+     *   \_EsRelation[test][_meta_field{f}#13, emp_no{f}#7, first_name{f}#8, ge..]
+     * }
+ */ + public void testEliminateDuplicateRenamedGroupings() { + var plan = plan(""" + from test + | eval x = salary + | stats by salary, x + """); + + var limit = as(plan, Limit.class); + var agg = as(limit.child(), Aggregate.class); + var relation = as(agg.child(), EsRelation.class); + + assertThat(Expressions.names(agg.groupings()), contains("salary")); + assertThat(Expressions.names(agg.aggregates()), contains("salary", "x")); + } + + /** + * Expects + *
{@code
+     * EsqlProject[[a{r}#5, c{r}#8]]
+     * \_Eval[[null[INTEGER] AS x]]
+     *   \_EsRelation[test][_meta_field{f}#15, emp_no{f}#9, first_name{f}#10, g..]
+     * }
+ */ + @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/100634") + public void testEliminateDuplicateAggWithNull() { + var plan = plan(""" + from test + | eval x = null + 1 + | stats a = avg(x), c = count(x) + """); + fail("Awaits fix"); + } + + /** + *
{@code
+     * Project[[a{r}#8, b{r}#12, c{r}#15, c{r}#15 AS d#18]]
+     * \_Eval[[a{r}#8 + a{r}#8 AS b#12]]
+     *   \_Limit[1000[INTEGER],false,false]
+     *     \_Aggregate[[],[COUNT(scalerank{f}#21,true[BOOLEAN],PT0S[TIME_DURATION]) AS a#8,
+     *     COUNTDISTINCT(scalerank{f}#21,true[BOOLEAN],PT0S[TIME_DURATION],10[INTEGER]) AS c#15]]
+     *       \_EsRelation[airports][abbrev{f}#19, city{f}#25, city_location{f}#26, coun..]
+     * }
+ */ + public void testAggsDeduplication() { + String query = """ + FROM airports + | rename scalerank AS x + | stats a = count(x), b = count(x) + count(x), c = count_distinct(x, 10), d = count_distinct(x, 10 + 1 - 1) + """; + + LogicalPlan plan = planAirports(query); + Project project = as(plan, Project.class); + var projections = project.projections(); + assertThat(projections, hasSize(4)); + var a = as(projections.get(0), ReferenceAttribute.class); + var b = as(projections.get(1), ReferenceAttribute.class); + var c = as(projections.get(2), ReferenceAttribute.class); + var d = as(projections.get(3), Alias.class); + + assertEquals("a", a.name()); + assertEquals("b", b.name()); + assertEquals("c", c.name()); + assertEquals("d", d.name()); + assertEquals("c", as(d.child(), ReferenceAttribute.class).name()); + + Eval eval = as(project.child(), Eval.class); + assertThat(eval.fields(), hasSize(1)); + var bEval = as(eval.fields().getFirst(), Alias.class); + var bAdd = as(bEval.child(), Add.class); + + assertEquals("a", as(bAdd.left(), ReferenceAttribute.class).name()); + assertEquals("a", as(bAdd.right(), ReferenceAttribute.class).name()); + assertEquals("b", bEval.name()); + + Limit limit = as(eval.child(), Limit.class); + Aggregate agg = as(limit.child(), Aggregate.class); + + assertThat(agg.aggregates(), hasSize(2)); + var countAlias = as(agg.aggregates().get(0), Alias.class); + var countDistinctAliasFirst = as(agg.aggregates().get(1), Alias.class); + + var count = as(countAlias.child(), Count.class); + var countDistinct = as(countDistinctAliasFirst.child(), CountDistinct.class); + + assertEquals("a", countAlias.name()); + assertEquals("c", countDistinctAliasFirst.name()); + assertEquals("scalerank", as(count.field(), FieldAttribute.class).name()); + assertEquals("scalerank", as(countDistinct.field(), FieldAttribute.class).name()); + } + + /** + *
{@code
+     * Project[[MAX(y){r}#16, 2* MAX(a){r}#18]]
+     * \_Eval[[MAX(y){r}#16 * 2[INTEGER] AS 2* MAX(a)#18]]
+     *   \_Limit[1000[INTEGER],false,false]
+     *     \_Aggregate[[],[MAX(scalerank{f}#21,true[BOOLEAN],PT0S[TIME_DURATION]) AS MAX(y)#16]]
+     *       \_EsRelation[airports][abbrev{f}#19, city{f}#25, city_location{f}#26, coun..]
+     * }
+ */ + public void testAggsDeduplicationWithComplicatedAliasChains() { + String query = """ + FROM airports + | RENAME scalerank AS x + | EVAL y = x, z = x + | RENAME z AS a + | STATS MAX(y), 2* MAX(a) + """; + + LogicalPlan plan = planAirports(query); + Project project = as(plan, Project.class); + var projections = project.projections(); + assertThat(projections, hasSize(2)); + + var firstResult = as(projections.get(0), ReferenceAttribute.class); + var secondResult = as(projections.get(1), ReferenceAttribute.class); + assertEquals("MAX(y)", firstResult.name()); + assertEquals("2* MAX(a)", secondResult.name()); + + var eval = as(project.child(), Eval.class); + var outputs = eval.output(); + assertThat(outputs, hasSize(2)); + + assertEquals("MAX(y)", as(outputs.get(0), ReferenceAttribute.class).name()); + assertEquals("2* MAX(a)", as(outputs.get(1), ReferenceAttribute.class).name()); + assertThat(eval.fields(), hasSize(1)); + var twoMax = as(eval.fields().getFirst(), Alias.class); + var mul = as(twoMax.child(), Mul.class); + assertEquals("MAX(y)", as(mul.left(), ReferenceAttribute.class).name()); + assertEquals(2, as(mul.right(), Literal.class).value()); + assertEquals("2* MAX(a)", twoMax.name()); + + var aggregates = as(as(eval.child(), Limit.class).child(), Aggregate.class).aggregates(); + assertThat(aggregates, hasSize(1)); + var agg = as(aggregates.getFirst(), Alias.class); + assertEquals("MAX(y)", agg.name()); + var maxY = as(agg.child(), Max.class); + assertEquals("scalerank", as(maxY.field(), FieldAttribute.class).name()); + } + + /** + *
{@code
+     * Project[[COUNT(y){r}#7, 2 * COUNT(scalerank){r}#9, y{r}#5]]
+     * \_Eval[[COUNT(y){r}#7 * 2[INTEGER] AS 2 * COUNT(scalerank)#9]]
+     *   \_Limit[1000[INTEGER],false,false]
+     *     \_Aggregate[[scalerank{f}#13],[COUNT(scalerank{f}#13,true[BOOLEAN],PT0S[TIME_DURATION]) AS COUNT(y)#7, scalerank{f}#13 AS y
+     * #5]]
+     *       \_EsRelation[airports][abbrev{f}#11, city{f}#17, city_location{f}#18, coun..]
+     * }
+ */ + public void testAggsDeduplicationInByClauses() { + String query = """ + FROM airports + | STATS COUNT(y), 2 * COUNT(scalerank) BY y = scalerank + """; + + LogicalPlan plan = planAirports(query); + Project project = as(plan, Project.class); + var projections = project.projections(); + assertThat(projections, hasSize(3)); + + assertEquals("COUNT(y)", as(projections.get(0), ReferenceAttribute.class).name()); + assertEquals("2 * COUNT(scalerank)", as(projections.get(1), ReferenceAttribute.class).name()); + assertEquals("y", as(projections.get(2), ReferenceAttribute.class).name()); + + var eval = as(project.child(), Eval.class); + + assertThat(eval.fields(), hasSize(1)); + var evalAlias = as(eval.fields().getFirst(), Alias.class); + var mul = as(evalAlias.child(), Mul.class); + assertEquals("COUNT(y)", as(mul.left(), ReferenceAttribute.class).name()); + assertEquals(2, as(mul.right(), Literal.class).value()); + assertEquals("2 * COUNT(scalerank)", evalAlias.name()); + + var aggregates = as(as(eval.child(), Limit.class).child(), Aggregate.class).aggregates(); + assertThat(aggregates, hasSize(2)); + + var countAlias = as(aggregates.getFirst(), Alias.class); + assertEquals("COUNT(y)", countAlias.name()); + var countField = as(countAlias.child(), Count.class).field(); + assertEquals("scalerank", as(countField, FieldAttribute.class).name()); + assertEquals("scalerank", as(as(aggregates.get(1), Alias.class).child(), FieldAttribute.class).name()); + } + + /** + *
{@code
+     * Project[[a{r}#5, b{r}#9, c{r}#13]]
+     * \_Eval[[a{r}#5 * 2[INTEGER] AS b#9]]
+     *   \_Limit[1000[INTEGER],false,false]
+     *     \_Aggregate[[],[COUNT(scalerank{f}#16,scalerank{f}#16 > 7[INTEGER],PT0S[TIME_DURATION]) AS a#5, COUNTDISTINCT(scalerank{f}#
+     * 16,true[BOOLEAN],PT0S[TIME_DURATION]) AS c#13]]
+     *       \_EsRelation[airports][abbrev{f}#14, city{f}#20, city_location{f}#21, coun..]
+     * }
+ */ + public void testDuplicatedAggsWithSameCannonicalizationInWhereCondition() { + String query = """ + FROM airports + | STATS a = COUNT(scalerank) WHERE scalerank > 7, + b = 2*COUNT(scalerank) WHERE 7 < scalerank, + c = COUNT_DISTINCT(scalerank) + """; + + LogicalPlan plan = planAirports(query); + Project project = as(plan, Project.class); + var projections = project.projections(); + assertThat(projections, hasSize(3)); + + assertEquals("a", as(projections.get(0), ReferenceAttribute.class).name()); + assertEquals("b", as(projections.get(1), ReferenceAttribute.class).name()); + assertEquals("c", as(projections.get(2), ReferenceAttribute.class).name()); + + var eval = as(project.child(), Eval.class); + + assertThat(eval.fields(), hasSize(1)); + var evalAlias = as(eval.fields().getFirst(), Alias.class); + var mul = as(evalAlias.child(), Mul.class); + assertEquals("a", as(mul.left(), ReferenceAttribute.class).name()); + assertEquals(2, as(mul.right(), Literal.class).value()); + assertEquals("b", evalAlias.name()); + + var aggregates = as(as(eval.child(), Limit.class).child(), Aggregate.class).aggregates(); + assertThat(aggregates, hasSize(2)); + + var a = as(aggregates.get(0), Alias.class); + var c = as(aggregates.get(1), Alias.class); + + assertEquals("a", a.name()); + var aField = as(a.child(), Count.class).field(); + assertEquals("scalerank", as(aField, FieldAttribute.class).name()); + + assertEquals("c", c.name()); + var cField = as(c.child(), CountDistinct.class).field(); + assertEquals("scalerank", as(cField, FieldAttribute.class).name()); + } + + /** + * Project[[a{r}#5, b{r}#8, c{r}#11]] + * \_Eval[[$$COUNTDISTINCT$2*COUNT_DISTINC>$0{r$}#20 * 2[INTEGER] AS a#5, $$COUNTDISTINCT$2*COUNT_DISTINC>$0{r$}#20 * 2[ + * INTEGER] AS b#8, $$COUNTDISTINCT$2*COUNT_DISTINC>$0{r$}#20 * 2[INTEGER] AS c#11]] + * \_Limit[1000[INTEGER],false,false] + * \_Aggregate[[],[COUNTDISTINCT(scalerank{f}#14,true[BOOLEAN],PT0S[TIME_DURATION],100[INTEGER]) AS $$COUNTDISTINCT$2*COUNT_DI + * STINC>$0#20]] + * \_EsRelation[airports][abbrev{f}#12, city{f}#18, city_location{f}#19, coun..] + */ + public void testDuplicatedAggWithFoldableIdenticalExpressions() { + String query = """ + FROM airports + | STATS a = 2*COUNT_DISTINCT(scalerank, 100), + b = 2*COUNT_DISTINCT(scalerank, 220 - 150 + 30), + c = 2*COUNT_DISTINCT(scalerank, 1 + 200 - 80 - 20 - 1) + """; + + LogicalPlan plan = planAirports(query); + Project project = as(plan, Project.class); + var projections = project.projections(); + assertThat(projections, hasSize(3)); + + assertEquals("a", as(projections.get(0), ReferenceAttribute.class).name()); + assertEquals("b", as(projections.get(1), ReferenceAttribute.class).name()); + assertEquals("c", as(projections.get(2), ReferenceAttribute.class).name()); + + var eval = as(project.child(), Eval.class); + var aggregates = as(as(eval.child(), Limit.class).child(), Aggregate.class).aggregates(); + + assertThat(eval.fields(), hasSize(3)); + var firstEvalField = as(as(eval.fields().get(0), Alias.class).child(), Mul.class); + var secondEvalField = as(as(eval.fields().get(1), Alias.class).child(), Mul.class); + var thirdEvalField = as(as(eval.fields().get(2), Alias.class).child(), Mul.class); + assertEquals(firstEvalField, secondEvalField); + assertEquals(secondEvalField, thirdEvalField); + + assertThat(aggregates, hasSize(1)); + var countDistinct = as(as(aggregates.get(0), Alias.class).child(), CountDistinct.class); + assertEquals("scalerank", as(countDistinct.field(), FieldAttribute.class).name()); + } + + /** + * Limit[1000[INTEGER],false,false] + * \_InlineJoin[LEFT,[],[]] + * |_EsRelation[airports][abbrev{f}#12, city{f}#18, city_location{f}#19, coun..] + * \_Project[[a{r}#5, b{r}#8, c{r}#11]] + * \_Eval[[$$COUNTDISTINCT$2*COUNT_DISTINC>$0{r$}#20 * 2[INTEGER] AS a#5, $$COUNTDISTINCT$2*COUNT_DISTINC>$0{r$}#20 * 2[ + * INTEGER] AS b#8, $$COUNTDISTINCT$2*COUNT_DISTINC>$0{r$}#20 * 2[INTEGER] AS c#11]] + * \_Aggregate[[],[COUNTDISTINCT(scalerank{f}#14,true[BOOLEAN],PT0S[TIME_DURATION],100[INTEGER]) AS $$COUNTDISTINCT$2*COUNT_DI + * STINC>$0#20]] + * \_StubRelation[[abbrev{f}#12, city{f}#18, city_location{f}#19, country{f}#17, location{f}#16, name{f}#13, scalerank{f}#14, ty + * pe{f}#15]] + */ + public void testDuplicatedInlineAggWithFoldableIdenticalExpressions() { + String query = """ + FROM airports + | INLINE STATS a = 2*COUNT_DISTINCT(scalerank, 100), + b = 2*COUNT_DISTINCT(scalerank, 220 - 150 + 30), + c = 2*COUNT_DISTINCT(scalerank, 1 + 200 - 80 - 20 - 1) + """; + + LogicalPlan plan = planAirports(query); + var limit = as(plan, Limit.class); + var inlineJoin = as(limit.child(), InlineJoin.class); + + var project = as(inlineJoin.right(), Project.class); + var projections = project.projections(); + assertThat(projections, hasSize(3)); + + assertEquals("a", as(projections.get(0), ReferenceAttribute.class).name()); + assertEquals("b", as(projections.get(1), ReferenceAttribute.class).name()); + assertEquals("c", as(projections.get(2), ReferenceAttribute.class).name()); + + var eval = as(project.child(), Eval.class); + var aggregates = as(eval.child(), Aggregate.class).aggregates(); + assertThat(eval.fields(), hasSize(3)); + var firstEvalField = as(as(eval.fields().get(0), Alias.class).child(), Mul.class); + var secondEvalField = as(as(eval.fields().get(1), Alias.class).child(), Mul.class); + var thirdEvalField = as(as(eval.fields().get(2), Alias.class).child(), Mul.class); + assertEquals(firstEvalField, secondEvalField); + assertEquals(secondEvalField, thirdEvalField); + + assertThat(aggregates, hasSize(1)); + var countDistinct = as(as(aggregates.get(0), Alias.class).child(), CountDistinct.class); + assertEquals("scalerank", as(countDistinct.field(), FieldAttribute.class).name()); + } +} From d58d058fb3fdbd02a338564208d7cf863b1c8fb1 Mon Sep 17 00:00:00 2001 From: ncordon Date: Fri, 5 Dec 2025 16:29:14 +0100 Subject: [PATCH 2/7] Fixes changelog file --- docs/changelog/137511.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/changelog/137511.yaml b/docs/changelog/137511.yaml index 14340bda606e9..c31f05da0eb2b 100644 --- a/docs/changelog/137511.yaml +++ b/docs/changelog/137511.yaml @@ -2,4 +2,6 @@ pr: 137511 summary: Fixes esql class cast bug in STATS at planning level area: ES|QL type: bug -issues: [] +issues: + - 133992 + - 136598 From 6486aa5fc72e3be62fe51ccf4cbdafef711b3cc2 Mon Sep 17 00:00:00 2001 From: ncordon Date: Fri, 5 Dec 2025 16:32:32 +0100 Subject: [PATCH 3/7] Reverts file --- .../optimizer/LogicalPlanOptimizerTests.java | 31 ++++++++----------- 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java index 3ff2c3f8be2e8..2cd28b817a184 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java @@ -178,13 +178,6 @@ import static org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.EsqlBinaryComparison.BinaryComparisonOperation.GTE; import static org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.EsqlBinaryComparison.BinaryComparisonOperation.LT; import static org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.EsqlBinaryComparison.BinaryComparisonOperation.LTE; -<<<<<<< HEAD -======= -import static org.elasticsearch.xpack.esql.optimizer.rules.logical.DeduplicateAggsTests.aggFieldName; -import static org.elasticsearch.xpack.esql.optimizer.rules.logical.DeduplicateAggsTests.aliased; -import static org.elasticsearch.xpack.esql.optimizer.rules.logical.OptimizerRules.TransformDirection.DOWN; -import static org.elasticsearch.xpack.esql.optimizer.rules.logical.OptimizerRules.TransformDirection.UP; ->>>>>>> 0a81875703ed (Fixes esql class cast bug in STATS at planning level (#137511)) import static org.hamcrest.Matchers.allOf; import static org.hamcrest.Matchers.anyOf; import static org.hamcrest.Matchers.contains; @@ -480,7 +473,6 @@ public void testAggsWithOverridingInputAndGrouping() throws Exception { } /** -<<<<<<< HEAD * Project[[s{r}#4 AS d, s{r}#4, last_name{f}#21, first_name{f}#18]] * \_Limit[1000[INTEGER]] * \_Aggregate[[last_name{f}#21, first_name{f}#18],[SUM(salary{f}#22) AS s, last_name{f}#21, first_name{f}#18]] @@ -503,9 +495,6 @@ public void testCombineProjectionWithDuplicateAggregation() { } /** -======= - *
{@code
->>>>>>> 0a81875703ed (Fixes esql class cast bug in STATS at planning level (#137511))
      * Limit[1000[INTEGER]]
      * \_Aggregate[STANDARD,[],[SUM(salary{f}#12,true[BOOLEAN]) AS sum(salary), SUM(salary{f}#12,last_name{f}#11 == [44 6f 65][KEYW
      * ORD]) AS sum(salary) WheRe last_name ==   "Doe"]]
@@ -3820,7 +3809,6 @@ public void testPruneRenameOnAggBy() {
     }
 
     /**
-<<<<<<< HEAD
      * Expects
      * Project[[c1{r}#2, c2{r}#4, cs{r}#6, cm{r}#8, cexp{r}#10]]
      * \_Eval[[c1{r}#2 AS c2, c1{r}#2 AS cs, c1{r}#2 AS cm, c1{r}#2 AS cexp]]
@@ -3983,8 +3971,6 @@ public void testEliminateDuplicateRenamedGroupings() {
     }
 
     /**
-=======
->>>>>>> 0a81875703ed (Fixes esql class cast bug in STATS at planning level (#137511))
      * Expected
      * Limit[2[INTEGER]]
      * \_Filter[a{r}#6 > 2[INTEGER]]
@@ -4075,6 +4061,19 @@ public void testLimitZeroUsesLocalRelation() {
         assertThat(plan, instanceOf(LocalRelation.class));
     }
 
+    private  T aliased(Expression exp, Class clazz) {
+        var alias = as(exp, Alias.class);
+        return as(alias.child(), clazz);
+    }
+
+    private  void aggFieldName(Expression exp, Class aggType, String fieldName) {
+        var alias = as(exp, Alias.class);
+        var af = as(alias.child(), aggType);
+        var field = af.field();
+        var name = field.foldable() ? BytesRefs.toString(field.fold(FoldContext.small())) : Expressions.name(field);
+        assertThat(name, is(fieldName));
+    }
+
     /**
      * Expects
      * Limit[1000[INTEGER]]
@@ -4832,7 +4831,6 @@ public void testStatsExpOverAggsWithScalars() {
 
     /**
      * Expects
-<<<<<<< HEAD
      * Project[[a{r}#5, b{r}#9, $$max(salary)_+_3>$COUNT$2{r}#46 AS d, $$count(salary)_->$MIN$3{r}#47 AS e, $$avg(salary)_+_m
      * >$MAX$1{r}#45 AS g]]
      * \_Eval[[$$$$avg(salary)_+_m>$AVG$0$SUM$0{r}#48 / $$max(salary)_+_3>$COUNT$2{r}#46 AS $$avg(salary)_+_m>$AVG$0, $$avg(
@@ -4904,9 +4902,6 @@ public void testStatsExpOverAggsWithScalarAndDuplicateAggs() {
 
     /**
      * Expects
-=======
-     * 
{@code
->>>>>>> 0a81875703ed (Fixes esql class cast bug in STATS at planning level (#137511))
      * Project[[a{r}#5, a{r}#5 AS b, w{r}#12]]
      * \_Limit[1000[INTEGER]]
      *   \_Aggregate[[w{r}#12],[SUM($$salary_/_2_+_la>$SUM$0{r}#26) AS a, w{r}#12]]

From f1ed57c01af4432120fbf93156ac07487681c327 Mon Sep 17 00:00:00 2001
From: elasticsearchmachine 
Date: Fri, 5 Dec 2025 15:47:47 +0000
Subject: [PATCH 4/7] [CI] Auto commit changes from spotless

---
 .../xpack/esql/optimizer/rules/logical/DeduplicateAggsTests.java | 1 -
 1 file changed, 1 deletion(-)

diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/DeduplicateAggsTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/DeduplicateAggsTests.java
index e656cc364691a..2babfed5b0fad 100644
--- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/DeduplicateAggsTests.java
+++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/DeduplicateAggsTests.java
@@ -31,7 +31,6 @@
 import org.elasticsearch.xpack.esql.plan.logical.Limit;
 import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
 import org.elasticsearch.xpack.esql.plan.logical.Project;
-import org.elasticsearch.xpack.esql.plan.logical.join.InlineJoin;
 
 import static org.elasticsearch.xpack.esql.EsqlTestUtils.as;
 import static org.hamcrest.Matchers.contains;

From 80ac2067d6c9cbb01fb1c9a0d1811611972b886a Mon Sep 17 00:00:00 2001
From: ncordon 
Date: Mon, 8 Dec 2025 22:42:17 +0100
Subject: [PATCH 5/7] Small nit

---
 .../qa/testFixtures/src/main/resources/inlinestats.csv-spec   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/inlinestats.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/inlinestats.csv-spec
index 5eadaf5b43f49..44f678b2c7e5d 100644
--- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/inlinestats.csv-spec
+++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/inlinestats.csv-spec
@@ -1003,12 +1003,12 @@ alpha db server    |alpha          |127.0.0.1      |hosts           |127.0.0.1|1
 ;
 
 fixClassCastBugWithSeveralCounts
-required_capability: inline_stats
+required_capability: inlinestats_v7
 required_capability: fix_stats_classcast_exception
 
 FROM sample_data, sample_data_str
 | EVAL one_ip = client_ip::ip
-| INLINE STATS count1=count(client_ip::ip), count2=count(one_ip)
+| INLINESTATS count1=count(client_ip::ip), count2=count(one_ip)
 | KEEP count1, count2
 | LIMIT 3
 ;

From 510740ee6eadad578205d858de18a48d9e103a1d Mon Sep 17 00:00:00 2001
From: ncordon 
Date: Mon, 8 Dec 2025 22:56:31 +0100
Subject: [PATCH 6/7] Adds CoordinatorOnly interface

---
 .../optimizer/LocalLogicalPlanOptimizer.java  |  7 +++++++
 .../rules/logical/DeduplicateAggs.java        |  2 +-
 .../rules/logical/OptimizerRules.java         | 19 +++++++++++++++++++
 .../rules/logical/DeduplicateAggsTests.java   |  3 ++-
 4 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizer.java
index dab2b35025aa6..af161a473b83f 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizer.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizer.java
@@ -7,6 +7,7 @@
 
 package org.elasticsearch.xpack.esql.optimizer;
 
+import org.elasticsearch.xpack.esql.optimizer.rules.logical.OptimizerRules;
 import org.elasticsearch.xpack.esql.optimizer.rules.logical.PropagateEmptyRelation;
 import org.elasticsearch.xpack.esql.optimizer.rules.logical.ReplaceStatsFilteredAggWithEval;
 import org.elasticsearch.xpack.esql.optimizer.rules.logical.ReplaceStringCasingWithInsensitiveRegexMatch;
@@ -70,6 +71,12 @@ private static Batch localOperators() {
                 // skip it: once a fragment contains an Agg, this can no longer be pruned, which the rule can do
                 case ReplaceStatsFilteredAggWithEval ignoredReplace -> {
                 }
+                case OptimizerRules.LocalAware localAware -> {
+                    Rule local = localAware.local();
+                    if (local != null) {
+                        newRules.add(local);
+                    }
+                }
                 default -> newRules.add(r);
             }
         }
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/DeduplicateAggs.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/DeduplicateAggs.java
index 170f87f86f8ff..ff01b5255a493 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/DeduplicateAggs.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/DeduplicateAggs.java
@@ -13,7 +13,7 @@
  * becomes
  * stats a = min(x), c = count(*) by g | eval b = a, d = c | keep a, b, c, d, g
  */
-public final class DeduplicateAggs extends ReplaceAggregateAggExpressionWithEval {
+public final class DeduplicateAggs extends ReplaceAggregateAggExpressionWithEval implements OptimizerRules.CoordinatorOnly {
 
     public DeduplicateAggs() {
         super(false);
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/OptimizerRules.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/OptimizerRules.java
index a32bf3a720088..a73340fa49b5c 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/OptimizerRules.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/OptimizerRules.java
@@ -109,4 +109,23 @@ public final LogicalPlan apply(LogicalPlan plan, P context) {
 
         protected abstract LogicalPlan rule(SubPlan plan, P context);
     }
+
+    /**
+     * Rule that has a different implementation when applied to a local plan.
+     */
+    public interface LocalAware {
+        /**
+         * the local version of the rule. {@code null} if the rule should not be applied locally.
+         */
+        Rule local();
+    }
+
+    /**
+     * This rule should only be applied on the coordinator plan, not for a local plan.
+     */
+    public interface CoordinatorOnly extends LocalAware {
+        default Rule local() {
+            return null;
+        }
+    }
 }
diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/DeduplicateAggsTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/DeduplicateAggsTests.java
index 2babfed5b0fad..4f3672548e45f 100644
--- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/DeduplicateAggsTests.java
+++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/DeduplicateAggsTests.java
@@ -31,6 +31,7 @@
 import org.elasticsearch.xpack.esql.plan.logical.Limit;
 import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
 import org.elasticsearch.xpack.esql.plan.logical.Project;
+import org.elasticsearch.xpack.esql.plan.logical.join.InlineJoin;
 
 import static org.elasticsearch.xpack.esql.EsqlTestUtils.as;
 import static org.hamcrest.Matchers.contains;
@@ -582,7 +583,7 @@ public void testDuplicatedAggWithFoldableIdenticalExpressions() {
     public void testDuplicatedInlineAggWithFoldableIdenticalExpressions() {
         String query = """
                 FROM airports
-                | INLINE STATS a = 2*COUNT_DISTINCT(scalerank, 100),
+                | INLINESTATS a = 2*COUNT_DISTINCT(scalerank, 100),
                 b = 2*COUNT_DISTINCT(scalerank, 220 - 150 + 30),
                 c = 2*COUNT_DISTINCT(scalerank, 1 + 200 - 80 - 20 - 1)
             """;

From 29d6f88907f7cb4ef924e9f03362bb3366ff81a1 Mon Sep 17 00:00:00 2001
From: ncordon 
Date: Tue, 9 Dec 2025 10:48:08 +0100
Subject: [PATCH 7/7] Fixes test

---
 .../src/main/resources/inlinestats.csv-spec      | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/inlinestats.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/inlinestats.csv-spec
index 44f678b2c7e5d..bec21f4b65a1c 100644
--- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/inlinestats.csv-spec
+++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/inlinestats.csv-spec
@@ -1010,11 +1010,21 @@ FROM sample_data, sample_data_str
 | EVAL one_ip = client_ip::ip
 | INLINESTATS count1=count(client_ip::ip), count2=count(one_ip)
 | KEEP count1, count2
-| LIMIT 3
 ;
 
 count1:long    |count2:long
-14             |14 
-14             |14 
+14             |14
+14             |14
+14             |14
+14             |14
+14             |14
+14             |14
+14             |14
+14             |14
+14             |14
+14             |14
+14             |14
+14             |14
+14             |14
 14             |14
 ;