diff --git a/bigframes/core/compile/sqlglot/compiler.py b/bigframes/core/compile/sqlglot/compiler.py index 501243fe8e..870e7064b8 100644 --- a/bigframes/core/compile/sqlglot/compiler.py +++ b/bigframes/core/compile/sqlglot/compiler.py @@ -180,6 +180,7 @@ def compile_readtable(node: nodes.ReadTableNode, child: ir.SQLGlotIR): col_names=[col.source_id for col in node.scan_list.items], alias_names=[col.id.sql for col in node.scan_list.items], uid_gen=child.uid_gen, + sql_predicate=node.source.sql_predicate, system_time=node.source.at_time, ) diff --git a/bigframes/core/compile/sqlglot/sqlglot_ir.py b/bigframes/core/compile/sqlglot/sqlglot_ir.py index cbc601ea63..176564fe23 100644 --- a/bigframes/core/compile/sqlglot/sqlglot_ir.py +++ b/bigframes/core/compile/sqlglot/sqlglot_ir.py @@ -120,6 +120,7 @@ def from_table( col_names: typing.Sequence[str], alias_names: typing.Sequence[str], uid_gen: guid.SequentialUIDGenerator, + sql_predicate: typing.Optional[str] = None, system_time: typing.Optional[datetime.datetime] = None, ) -> SQLGlotIR: """Builds a SQLGlotIR expression from a BigQuery table. @@ -131,6 +132,7 @@ def from_table( col_names (typing.Sequence[str]): The names of the columns to select. alias_names (typing.Sequence[str]): The aliases for the selected columns. uid_gen (guid.SequentialUIDGenerator): A generator for unique identifiers. + sql_predicate (typing.Optional[str]): An optional SQL predicate for filtering. system_time (typing.Optional[str]): An optional system time for time-travel queries. """ selections = [ @@ -158,6 +160,10 @@ def from_table( version=version, ) select_expr = sge.Select().select(*selections).from_(table_expr) + if sql_predicate: + select_expr = select_expr.where( + sg.parse_one(sql_predicate, dialect="bigquery"), append=False + ) return cls(expr=select_expr, uid_gen=uid_gen) @classmethod diff --git a/tests/unit/core/compile/sqlglot/snapshots/test_compile_readtable/test_compile_readtable_w_columns_filters/out.sql b/tests/unit/core/compile/sqlglot/snapshots/test_compile_readtable/test_compile_readtable_w_columns_filters/out.sql new file mode 100644 index 0000000000..0d8a10c956 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/snapshots/test_compile_readtable/test_compile_readtable_w_columns_filters/out.sql @@ -0,0 +1,14 @@ +WITH `bfcte_0` AS ( + SELECT + `int64_col`, + `rowindex`, + `string_col` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` + WHERE + `rowindex` > 0 AND `string_col` IN ('Hello, World!') +) +SELECT + `rowindex`, + `int64_col`, + `string_col` +FROM `bfcte_0` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/test_compile_readtable.py b/tests/unit/core/compile/sqlglot/test_compile_readtable.py index 37d87510ee..dd776d9a8f 100644 --- a/tests/unit/core/compile/sqlglot/test_compile_readtable.py +++ b/tests/unit/core/compile/sqlglot/test_compile_readtable.py @@ -67,3 +67,15 @@ def test_compile_readtable_w_system_time( ) bf_df = compiler_session.read_gbq_table(str(table_ref)) snapshot.assert_match(bf_df.sql, "out.sql") + + +def test_compile_readtable_w_columns_filters(compiler_session, snapshot): + columns = ["rowindex", "int64_col", "string_col"] + filters = [("rowindex", ">", 0), ("string_col", "in", ["Hello, World!"])] + bf_df = compiler_session._loader.read_gbq_table( + "bigframes-dev.sqlglot_test.scalar_types", + enable_snapshot=False, + columns=columns, + filters=filters, + ) + snapshot.assert_match(bf_df.sql, "out.sql")