From c99acc9d41803998e84601be7cf3cc5e2e2b5a61 Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Wed, 26 Nov 2025 00:44:04 +0000 Subject: [PATCH 1/2] refactor: fix ops.StrftimeOp, ops.ToDatetimeOp, ops.ToTimestampOp in sqlglot compiler --- .../sqlglot/expressions/datetime_ops.py | 69 ++++++++++++++++--- .../sqlglot/expressions/timedelta_ops.py | 8 +++ bigframes/core/compile/sqlglot/sqlglot_ir.py | 2 + .../test_datetime_ops/test_strftime/out.sql | 13 +++- .../test_to_datetime/out.sql | 12 +++- .../test_to_timestamp/out.sql | 15 +++- .../test_to_timedelta/out.sql | 53 +++++++++----- .../sqlglot/expressions/test_datetime_ops.py | 37 ++++++---- .../sqlglot/expressions/test_timedelta_ops.py | 7 +- 9 files changed, 165 insertions(+), 51 deletions(-) diff --git a/bigframes/core/compile/sqlglot/expressions/datetime_ops.py b/bigframes/core/compile/sqlglot/expressions/datetime_ops.py index 949b122a1d..0f1e9dadf3 100644 --- a/bigframes/core/compile/sqlglot/expressions/datetime_ops.py +++ b/bigframes/core/compile/sqlglot/expressions/datetime_ops.py @@ -16,7 +16,9 @@ import sqlglot.expressions as sge +from bigframes import dtypes from bigframes import operations as ops +from bigframes.core.compile.constants import UNIT_TO_US_CONVERSION_FACTORS from bigframes.core.compile.sqlglot.expressions.typed_expr import TypedExpr import bigframes.core.compile.sqlglot.scalar_compiler as scalar_compiler @@ -81,7 +83,17 @@ def _(expr: TypedExpr) -> sge.Expression: @register_unary_op(ops.StrftimeOp, pass_op=True) def _(expr: TypedExpr, op: ops.StrftimeOp) -> sge.Expression: - return sge.func("FORMAT_TIMESTAMP", sge.convert(op.date_format), expr.expr) + func_name = "" + if expr.dtype == dtypes.DATE_DTYPE: + func_name = "FORMAT_DATE" + elif expr.dtype == dtypes.DATETIME_DTYPE: + func_name = "FORMAT_DATETIME" + elif expr.dtype == dtypes.TIME_DTYPE: + func_name = "FORMAT_TIME" + elif expr.dtype == dtypes.TIMESTAMP_DTYPE: + func_name = "FORMAT_TIMESTAMP" + + return sge.func(func_name, sge.convert(op.date_format), expr.expr) @register_unary_op(ops.time_op) @@ -89,14 +101,55 @@ def _(expr: TypedExpr) -> sge.Expression: return sge.func("TIME", expr.expr) -@register_unary_op(ops.ToDatetimeOp) -def _(expr: TypedExpr) -> sge.Expression: - return sge.Cast(this=sge.func("TIMESTAMP_SECONDS", expr.expr), to="DATETIME") - +@register_unary_op(ops.ToDatetimeOp, pass_op=True) +def _(expr: TypedExpr, op: ops.ToDatetimeOp) -> sge.Expression: + if op.format: + result = expr.expr + if expr.dtype != dtypes.STRING_DTYPE: + result = sge.Cast(this=result, to="STRING") + result = sge.func( + "PARSE_TIMESTAMP", sge.convert(op.format), result, sge.convert("UTC") + ) + return sge.Cast(this=result, to="DATETIME") + + if expr.dtype == dtypes.STRING_DTYPE: + return sge.TryCast(this=expr.expr, to="DATETIME") + + value = expr.expr + unit = op.unit or "ns" + factor = UNIT_TO_US_CONVERSION_FACTORS[unit] + if factor != 1: + value = sge.Mul(this=value, expression=sge.convert(factor)) + value = sge.func("TRUNC", value) + return sge.Cast( + this=sge.func("TIMESTAMP_MICROS", sge.Cast(this=value, to="INT64")), + to="DATETIME", + ) + + +@register_unary_op(ops.ToTimestampOp, pass_op=True) +def _(expr: TypedExpr, op: ops.ToTimestampOp) -> sge.Expression: + if op.format: + result = expr.expr + if expr.dtype != dtypes.STRING_DTYPE: + result = sge.Cast(this=result, to="STRING") + return sge.func( + "PARSE_TIMESTAMP", sge.convert(op.format), expr.expr, sge.convert("UTC") + ) -@register_unary_op(ops.ToTimestampOp) -def _(expr: TypedExpr) -> sge.Expression: - return sge.func("TIMESTAMP_SECONDS", expr.expr) + if expr.dtype == dtypes.STRING_DTYPE: + return sge.func("TIMESTAMP", expr.expr) + + value = expr.expr + unit = op.unit or "ns" + factor = UNIT_TO_US_CONVERSION_FACTORS[unit] + if factor != 1: + value = sge.Mul(this=value, expression=sge.convert(factor)) + value = sge.func("TRUNC", value) + return sge.Cast( + this=sge.func("TIMESTAMP_MICROS", sge.Cast(this=value, to="INT64")), + to="TIMESTAMP", + ) @register_unary_op(ops.UnixMicros) diff --git a/bigframes/core/compile/sqlglot/expressions/timedelta_ops.py b/bigframes/core/compile/sqlglot/expressions/timedelta_ops.py index 667c828b13..b8fdfd5a09 100644 --- a/bigframes/core/compile/sqlglot/expressions/timedelta_ops.py +++ b/bigframes/core/compile/sqlglot/expressions/timedelta_ops.py @@ -16,6 +16,7 @@ import sqlglot.expressions as sge +from bigframes import dtypes from bigframes import operations as ops from bigframes.core.compile.constants import UNIT_TO_US_CONVERSION_FACTORS from bigframes.core.compile.sqlglot.expressions.typed_expr import TypedExpr @@ -32,7 +33,14 @@ def _(expr: TypedExpr) -> sge.Expression: @register_unary_op(ops.ToTimedeltaOp, pass_op=True) def _(expr: TypedExpr, op: ops.ToTimedeltaOp) -> sge.Expression: value = expr.expr + if expr.dtype == dtypes.TIMEDELTA_DTYPE: + return value + factor = UNIT_TO_US_CONVERSION_FACTORS[op.unit] if factor != 1: value = sge.Mul(this=value, expression=sge.convert(factor)) + if expr.dtype == dtypes.FLOAT_DTYPE: + value = sge.Floor(this=value) + if expr.dtype != dtypes.INT_DTYPE: + value = sge.Cast(this=value, to=sge.DataType(this="INT64")) return value diff --git a/bigframes/core/compile/sqlglot/sqlglot_ir.py b/bigframes/core/compile/sqlglot/sqlglot_ir.py index fd3bdd532f..0d568b098b 100644 --- a/bigframes/core/compile/sqlglot/sqlglot_ir.py +++ b/bigframes/core/compile/sqlglot/sqlglot_ir.py @@ -648,6 +648,8 @@ def _literal(value: typing.Any, dtype: dtypes.Dtype) -> sge.Expression: elif dtype == dtypes.BYTES_DTYPE: return _cast(str(value), sqlglot_type) elif dtypes.is_time_like(dtype): + if isinstance(value, str): + return _cast(sge.convert(value), sqlglot_type) if isinstance(value, np.generic): value = value.item() return _cast(sge.convert(value.isoformat()), sqlglot_type) diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_strftime/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_strftime/out.sql index 190cd7895b..1d8f62f948 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_strftime/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_strftime/out.sql @@ -1,13 +1,22 @@ WITH `bfcte_0` AS ( SELECT + `date_col`, + `datetime_col`, + `time_col`, `timestamp_col` FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` ), `bfcte_1` AS ( SELECT *, - FORMAT_TIMESTAMP('%Y-%m-%d', `timestamp_col`) AS `bfcol_1` + FORMAT_DATE('%Y-%m-%d', `date_col`) AS `bfcol_8`, + FORMAT_DATETIME('%Y-%m-%d', `datetime_col`) AS `bfcol_9`, + FORMAT_TIME('%Y-%m-%d', `time_col`) AS `bfcol_10`, + FORMAT_TIMESTAMP('%Y-%m-%d', `timestamp_col`) AS `bfcol_11` FROM `bfcte_0` ) SELECT - `bfcol_1` AS `timestamp_col` + `bfcol_8` AS `date_col`, + `bfcol_9` AS `datetime_col`, + `bfcol_10` AS `time_col`, + `bfcol_11` AS `timestamp_col` FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_to_datetime/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_to_datetime/out.sql index bbba3b1533..a8d40a8486 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_to_datetime/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_to_datetime/out.sql @@ -1,13 +1,19 @@ WITH `bfcte_0` AS ( SELECT - `int64_col` + `float64_col`, + `int64_col`, + `string_col` FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` ), `bfcte_1` AS ( SELECT *, - CAST(TIMESTAMP_SECONDS(`int64_col`) AS DATETIME) AS `bfcol_1` + CAST(TIMESTAMP_MICROS(CAST(TRUNC(`int64_col` * 0.001) AS INT64)) AS DATETIME) AS `bfcol_6`, + SAFE_CAST(`string_col` AS DATETIME) AS `bfcol_7`, + CAST(TIMESTAMP_MICROS(CAST(TRUNC(`float64_col` * 0.001) AS INT64)) AS DATETIME) AS `bfcol_8` FROM `bfcte_0` ) SELECT - `bfcol_1` AS `int64_col` + `bfcol_6` AS `int64_col`, + `bfcol_7` AS `string_col`, + `bfcol_8` AS `float64_col` FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_to_timestamp/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_to_timestamp/out.sql index df01fb3269..a5f9ee1112 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_to_timestamp/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_to_timestamp/out.sql @@ -1,13 +1,24 @@ WITH `bfcte_0` AS ( SELECT + `float64_col`, `int64_col` FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` ), `bfcte_1` AS ( SELECT *, - TIMESTAMP_SECONDS(`int64_col`) AS `bfcol_1` + CAST(TIMESTAMP_MICROS(CAST(TRUNC(`int64_col` * 0.001) AS INT64)) AS TIMESTAMP) AS `bfcol_2`, + CAST(TIMESTAMP_MICROS(CAST(TRUNC(`float64_col` * 0.001) AS INT64)) AS TIMESTAMP) AS `bfcol_3`, + CAST(TIMESTAMP_MICROS(CAST(TRUNC(`int64_col` * 1000000) AS INT64)) AS TIMESTAMP) AS `bfcol_4`, + CAST(TIMESTAMP_MICROS(CAST(TRUNC(`int64_col` * 1000) AS INT64)) AS TIMESTAMP) AS `bfcol_5`, + CAST(TIMESTAMP_MICROS(CAST(TRUNC(`int64_col`) AS INT64)) AS TIMESTAMP) AS `bfcol_6`, + CAST(TIMESTAMP_MICROS(CAST(TRUNC(`int64_col` * 0.001) AS INT64)) AS TIMESTAMP) AS `bfcol_7` FROM `bfcte_0` ) SELECT - `bfcol_1` AS `int64_col` + `bfcol_2` AS `int64_col`, + `bfcol_3` AS `float64_col`, + `bfcol_4` AS `int64_col_s`, + `bfcol_5` AS `int64_col_ms`, + `bfcol_6` AS `int64_col_us`, + `bfcol_7` AS `int64_col_ns` FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_timedelta_ops/test_to_timedelta/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_timedelta_ops/test_to_timedelta/out.sql index 3c75cc3e89..ed7dbc7c8a 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_timedelta_ops/test_to_timedelta/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_timedelta_ops/test_to_timedelta/out.sql @@ -1,37 +1,54 @@ WITH `bfcte_0` AS ( SELECT + `float64_col`, `int64_col`, `rowindex` FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` ), `bfcte_1` AS ( SELECT *, - `rowindex` AS `bfcol_4`, - `int64_col` AS `bfcol_5`, - `int64_col` AS `bfcol_6` + `rowindex` AS `bfcol_6`, + `int64_col` AS `bfcol_7`, + `float64_col` AS `bfcol_8`, + `int64_col` AS `bfcol_9` FROM `bfcte_0` ), `bfcte_2` AS ( SELECT *, - `bfcol_4` AS `bfcol_10`, - `bfcol_5` AS `bfcol_11`, - `bfcol_6` AS `bfcol_12`, - `bfcol_5` * 1000000 AS `bfcol_13` + `bfcol_6` AS `bfcol_14`, + `bfcol_7` AS `bfcol_15`, + `bfcol_8` AS `bfcol_16`, + `bfcol_9` AS `bfcol_17`, + CAST(FLOOR(`bfcol_8` * 1000000) AS INT64) AS `bfcol_18` FROM `bfcte_1` ), `bfcte_3` AS ( SELECT *, - `bfcol_10` AS `bfcol_18`, - `bfcol_11` AS `bfcol_19`, - `bfcol_12` AS `bfcol_20`, - `bfcol_13` AS `bfcol_21`, - `bfcol_11` * 604800000000 AS `bfcol_22` + `bfcol_14` AS `bfcol_24`, + `bfcol_15` AS `bfcol_25`, + `bfcol_16` AS `bfcol_26`, + `bfcol_17` AS `bfcol_27`, + `bfcol_18` AS `bfcol_28`, + `bfcol_15` * 3600000000 AS `bfcol_29` FROM `bfcte_2` +), `bfcte_4` AS ( + SELECT + *, + `bfcol_24` AS `bfcol_36`, + `bfcol_25` AS `bfcol_37`, + `bfcol_26` AS `bfcol_38`, + `bfcol_27` AS `bfcol_39`, + `bfcol_28` AS `bfcol_40`, + `bfcol_29` AS `bfcol_41`, + `bfcol_27` AS `bfcol_42` + FROM `bfcte_3` ) SELECT - `bfcol_18` AS `rowindex`, - `bfcol_19` AS `int64_col`, - `bfcol_20` AS `duration_us`, - `bfcol_21` AS `duration_s`, - `bfcol_22` AS `duration_w` -FROM `bfcte_3` \ No newline at end of file + `bfcol_36` AS `rowindex`, + `bfcol_37` AS `int64_col`, + `bfcol_38` AS `float64_col`, + `bfcol_39` AS `duration_us`, + `bfcol_40` AS `duration_s`, + `bfcol_41` AS `duration_w`, + `bfcol_42` AS `duration_on_duration` +FROM `bfcte_4` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/test_datetime_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_datetime_ops.py index 6384dc79a9..9d93b9019f 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_datetime_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_datetime_ops.py @@ -143,12 +143,15 @@ def test_second(scalar_types_df: bpd.DataFrame, snapshot): def test_strftime(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "timestamp_col" - bf_df = scalar_types_df[[col_name]] - sql = utils._apply_ops_to_sql( - bf_df, [ops.StrftimeOp("%Y-%m-%d").as_expr(col_name)], [col_name] - ) + bf_df = scalar_types_df[["timestamp_col", "datetime_col", "date_col", "time_col"]] + ops_map = { + "date_col": ops.StrftimeOp("%Y-%m-%d").as_expr("date_col"), + "datetime_col": ops.StrftimeOp("%Y-%m-%d").as_expr("datetime_col"), + "time_col": ops.StrftimeOp("%Y-%m-%d").as_expr("time_col"), + "timestamp_col": ops.StrftimeOp("%Y-%m-%d").as_expr("timestamp_col"), + } + sql = utils._apply_ops_to_sql(bf_df, list(ops_map.values()), list(ops_map.keys())) snapshot.assert_match(sql, "out.sql") @@ -161,22 +164,26 @@ def test_time(scalar_types_df: bpd.DataFrame, snapshot): def test_to_datetime(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "int64_col" - bf_df = scalar_types_df[[col_name]] - sql = utils._apply_ops_to_sql( - bf_df, [ops.ToDatetimeOp().as_expr(col_name)], [col_name] - ) + col_names = ["int64_col", "string_col", "float64_col"] + bf_df = scalar_types_df[col_names] + ops_map = {col_name: ops.ToDatetimeOp().as_expr(col_name) for col_name in col_names} + sql = utils._apply_ops_to_sql(bf_df, list(ops_map.values()), list(ops_map.keys())) snapshot.assert_match(sql, "out.sql") def test_to_timestamp(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "int64_col" - bf_df = scalar_types_df[[col_name]] - sql = utils._apply_ops_to_sql( - bf_df, [ops.ToTimestampOp().as_expr(col_name)], [col_name] - ) + bf_df = scalar_types_df[["int64_col", "string_col", "float64_col"]] + ops_map = { + "int64_col": ops.ToTimestampOp().as_expr("int64_col"), + "float64_col": ops.ToTimestampOp().as_expr("float64_col"), + "int64_col_s": ops.ToTimestampOp(unit="s").as_expr("int64_col"), + "int64_col_ms": ops.ToTimestampOp(unit="ms").as_expr("int64_col"), + "int64_col_us": ops.ToTimestampOp(unit="us").as_expr("int64_col"), + "int64_col_ns": ops.ToTimestampOp(unit="ns").as_expr("int64_col"), + } + sql = utils._apply_ops_to_sql(bf_df, list(ops_map.values()), list(ops_map.keys())) snapshot.assert_match(sql, "out.sql") diff --git a/tests/unit/core/compile/sqlglot/expressions/test_timedelta_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_timedelta_ops.py index 8675b42bec..164c11aab5 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_timedelta_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_timedelta_ops.py @@ -22,10 +22,11 @@ def test_to_timedelta(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["int64_col"]] + bf_df = scalar_types_df[["int64_col", "float64_col"]] bf_df["duration_us"] = bpd.to_timedelta(bf_df["int64_col"], "us") - bf_df["duration_s"] = bpd.to_timedelta(bf_df["int64_col"], "s") - bf_df["duration_w"] = bpd.to_timedelta(bf_df["int64_col"], "W") + bf_df["duration_s"] = bpd.to_timedelta(bf_df["float64_col"], "s") + bf_df["duration_w"] = bpd.to_timedelta(bf_df["int64_col"], "h") + bf_df["duration_on_duration"] = bpd.to_timedelta(bf_df["duration_us"], "ms") snapshot.assert_match(bf_df.sql, "out.sql") From c7e55e27d2d4b7171cce6e4f718b07686cacb1d3 Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Wed, 26 Nov 2025 18:37:12 +0000 Subject: [PATCH 2/2] address comments --- bigframes/core/compile/sqlglot/expressions/timedelta_ops.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/bigframes/core/compile/sqlglot/expressions/timedelta_ops.py b/bigframes/core/compile/sqlglot/expressions/timedelta_ops.py index b8fdfd5a09..f5b9f891c1 100644 --- a/bigframes/core/compile/sqlglot/expressions/timedelta_ops.py +++ b/bigframes/core/compile/sqlglot/expressions/timedelta_ops.py @@ -40,7 +40,5 @@ def _(expr: TypedExpr, op: ops.ToTimedeltaOp) -> sge.Expression: if factor != 1: value = sge.Mul(this=value, expression=sge.convert(factor)) if expr.dtype == dtypes.FLOAT_DTYPE: - value = sge.Floor(this=value) - if expr.dtype != dtypes.INT_DTYPE: - value = sge.Cast(this=value, to=sge.DataType(this="INT64")) + value = sge.Cast(this=sge.Floor(this=value), to=sge.DataType(this="INT64")) return value