From 0bd920be0ac5184a64e18697a3ed7083ed892e9f Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Fri, 19 Dec 2025 00:06:39 +0000 Subject: [PATCH 1/2] feat: implement bigframes.bigquery.search function Implements the `bigframes.bigquery.search` function, which maps to the BigQuery `SEARCH` function. This includes: - Defining `SearchOp` in `bigframes/operations/search_ops.py`. - Implementing the user-facing `search` function in `bigframes/bigquery/_operations/search.py`. - Registering the operation in the Ibis compiler with custom SQL generation to handle BigQuery's named argument syntax (`=>`). - Exposing the function in `bigframes/bigquery/__init__.py`. - Adding unit tests in `tests/unit/bigquery/test_search.py` to verify the expression tree construction. --- bigframes/bigquery/__init__.py | 8 +- bigframes/bigquery/_operations/search.py | 86 ++++++++- .../ibis_compiler/scalar_op_registry.py | 37 ++++ bigframes/operations/__init__.py | 3 + bigframes/operations/search_ops.py | 31 ++++ tests/unit/bigquery/test_search.py | 171 ++++++++++++++++++ 6 files changed, 334 insertions(+), 2 deletions(-) create mode 100644 bigframes/operations/search_ops.py create mode 100644 tests/unit/bigquery/test_search.py diff --git a/bigframes/bigquery/__init__.py b/bigframes/bigquery/__init__.py index f835285a21..9b73ebd5c0 100644 --- a/bigframes/bigquery/__init__.py +++ b/bigframes/bigquery/__init__.py @@ -57,7 +57,11 @@ to_json, to_json_string, ) -from bigframes.bigquery._operations.search import create_vector_index, vector_search +from bigframes.bigquery._operations.search import ( + create_vector_index, + search, + vector_search, +) from bigframes.bigquery._operations.sql import sql_scalar from bigframes.bigquery._operations.struct import struct from bigframes.core import log_adapter @@ -99,6 +103,7 @@ to_json_string, # search ops create_vector_index, + search, vector_search, # sql ops sql_scalar, @@ -150,6 +155,7 @@ "to_json_string", # search ops "create_vector_index", + "search", "vector_search", # sql ops "sql_scalar", diff --git a/bigframes/bigquery/_operations/search.py b/bigframes/bigquery/_operations/search.py index b65eed2475..2c60655c30 100644 --- a/bigframes/bigquery/_operations/search.py +++ b/bigframes/bigquery/_operations/search.py @@ -91,7 +91,7 @@ def create_vector_index( def vector_search( base_table: str, column_to_search: str, - query: Union[dataframe.DataFrame, series.Series], + query: Union["dataframe.DataFrame", "series.Series"], *, query_column_to_search: Optional[str] = None, top_k: Optional[int] = None, @@ -247,3 +247,87 @@ def vector_search( df = query._session.read_gbq_query(sql, allow_large_results=allow_large_results) return df + + +def search( + data_to_search: Union["dataframe.DataFrame", "series.Series"], + search_query: str, + *, + json_scope: Optional[str] = None, + analyzer: Optional[str] = None, + analyzer_options: Optional[str] = None, +) -> series.Series: + """ + The SEARCH function checks to see whether a BigQuery table or other search + data contains a set of search terms (tokens). It returns TRUE if all search + terms appear in the data, based on the rules for search_query and text + analysis described in the text analyzer. Otherwise, this function returns + FALSE. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import bigframes.bigquery as bbq + + >>> data = bpd.read_gbq("SELECT 'Please use foobar@example.com as your email.' AS email") + >>> bbq.search(data['email'], 'exam') + 0 False + Name: email, dtype: boolean + + >>> bbq.search(data['email'], 'foobar') + 0 True + Name: email, dtype: boolean + + >>> bbq.search(data['email'], 'example.com') + 0 True + Name: email, dtype: boolean + + Args: + data_to_search (bigframes.dataframe.DataFrame | bigframes.series.Series): + The data to search over. + search_query (str): + A STRING literal, or a STRING constant expression that represents + the terms of the search query. + json_scope (str, optional): + A named argument with a STRING value. Takes one of the following + values to indicate the scope of JSON data to be searched. It has no + effect if data_to_search isn't a JSON value or doesn't contain a + JSON field. + analyzer (str, optional): + A named argument with a STRING value. Takes one of the following + values to indicate the text analyzer to use: 'LOG_ANALYZER', + 'NO_OP_ANALYZER', 'PATTERN_ANALYZER'. + analyzer_options (str, optional): + A named argument with a JSON-formatted STRING value. Takes a list + of text analysis rules. + + Returns: + bigframes.series.Series: A new Series with the boolean result. + """ + import bigframes.operations.search_ops as search_ops + import bigframes.series + + if not isinstance(data_to_search, (bigframes.series.Series, bigframes.dataframe.DataFrame)): + raise ValueError("data_to_search must be a Series or DataFrame") + + if isinstance(data_to_search, bigframes.dataframe.DataFrame): + # SEARCH on a table (or dataframe) treats it as a STRUCT + # We need to apply the op on the dataframe, which should handle it as a struct or row + # However, unary ops are usually applied on Series. + # But DataFrame can be passed if we convert it to a struct first? + # Or does DataFrame support _apply_unary_op? + # bigframes.dataframe.DataFrame does not have _apply_unary_op. + # We can convert DataFrame to a Series of Structs. + # But SEARCH in BigQuery can take a table reference which is evaluated as a STRUCT. + # So creating a struct from all columns seems correct. + import bigframes.bigquery._operations.struct as struct_ops + data_to_search = struct_ops.struct(data_to_search) + + return data_to_search._apply_unary_op( + search_ops.SearchOp( + search_query=search_query, + json_scope=json_scope, + analyzer=analyzer, + analyzer_options=analyzer_options, + ) + ) diff --git a/bigframes/core/compile/ibis_compiler/scalar_op_registry.py b/bigframes/core/compile/ibis_compiler/scalar_op_registry.py index 91bbfbfbcf..698d4cae45 100644 --- a/bigframes/core/compile/ibis_compiler/scalar_op_registry.py +++ b/bigframes/core/compile/ibis_compiler/scalar_op_registry.py @@ -2153,6 +2153,43 @@ def str_lstrip_op( # type: ignore[empty-body] """Remove leading and trailing characters.""" +@scalar_op_compiler.register_unary_op(ops.SearchOp, pass_op=True) +def search_op_impl(x: ibis_types.Value, op: ops.SearchOp): + values = [ + typing.cast(ibis_generic.Value, x.op()), + typing.cast(ibis_generic.Value, ibis_types.literal(op.search_query).op()), + ] + sql_template = "SEARCH({0}, {1}" + arg_index = 2 + if op.json_scope is not None: + values.append( + typing.cast(ibis_generic.Value, ibis_types.literal(op.json_scope).op()) + ) + sql_template += f", json_scope=>{{{arg_index}}}" + arg_index += 1 + if op.analyzer is not None: + values.append( + typing.cast(ibis_generic.Value, ibis_types.literal(op.analyzer).op()) + ) + sql_template += f", analyzer=>{{{arg_index}}}" + arg_index += 1 + if op.analyzer_options is not None: + values.append( + typing.cast( + ibis_generic.Value, ibis_types.literal(op.analyzer_options).op() + ) + ) + sql_template += f", analyzer_options=>{{{arg_index}}}" + arg_index += 1 + sql_template += ")" + + return ibis_generic.SqlScalar( + ibis_generic.Literal(sql_template, dtype=ibis_dtypes.string), + values=tuple(values), + output_type=ibis_dtypes.boolean, + ).to_expr() + + @ibis_udf.scalar.builtin(name="rtrim") def str_rstrip_op( # type: ignore[empty-body] x: ibis_dtypes.String, to_strip: ibis_dtypes.String diff --git a/bigframes/operations/__init__.py b/bigframes/operations/__init__.py index 5da8efaa3b..2c15c24106 100644 --- a/bigframes/operations/__init__.py +++ b/bigframes/operations/__init__.py @@ -184,6 +184,7 @@ NaryRemoteFunctionOp, RemoteFunctionOp, ) +from bigframes.operations.search_ops import SearchOp from bigframes.operations.string_ops import ( capitalize_op, EndsWithOp, @@ -374,6 +375,8 @@ "BinaryRemoteFunctionOp", "NaryRemoteFunctionOp", "RemoteFunctionOp", + # Search ops + "SearchOp", # Frequency ops "DatetimeToIntegerLabelOp", "FloorDtOp", diff --git a/bigframes/operations/search_ops.py b/bigframes/operations/search_ops.py new file mode 100644 index 0000000000..1eda0dde8b --- /dev/null +++ b/bigframes/operations/search_ops.py @@ -0,0 +1,31 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import dataclasses +import typing + +from bigframes import dtypes +from bigframes.operations import base_ops + + +@dataclasses.dataclass(frozen=True) +class SearchOp(base_ops.UnaryOp): + name: typing.ClassVar[str] = "search" + search_query: str + json_scope: typing.Optional[str] = None + analyzer: typing.Optional[str] = None + analyzer_options: typing.Optional[str] = None + + def output_type(self, *input_types): + return dtypes.BOOL_DTYPE diff --git a/tests/unit/bigquery/test_search.py b/tests/unit/bigquery/test_search.py new file mode 100644 index 0000000000..1b1524aa62 --- /dev/null +++ b/tests/unit/bigquery/test_search.py @@ -0,0 +1,171 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pandas as pd +import pytest + +import bigframes.bigquery as bbq +import bigframes.operations.search_ops as search_ops +import bigframes.series +import bigframes.session +import bigframes.testing.mocks + + +@pytest.fixture +def mock_session(): + return bigframes.testing.mocks.create_bigquery_session() + + +def test_search_series(mock_session): + # Use real Series backed by mock session (via read_pandas/ReadLocalNode) + s = bigframes.series.Series(["foo bar", "baz"], session=mock_session) + search_query = "foo" + result = bbq.search(s, search_query) + + # Verify the operation in the expression tree + import bigframes.core.nodes as nodes + import bigframes.core.expression as ex + + # Get the underlying node + node = result._block.expr.node + + # Traverse down to find the ProjectionNode + while isinstance(node, nodes.SelectionNode): + node = node.child + + # It should be a ProjectionNode (since search is a unary op applied to existing data) + assert isinstance(node, nodes.ProjectionNode) + + # Find the assignment corresponding to the result column + # result._value_column corresponds to one of the output columns of the SelectionNode chain + # But checking the ProjectionNode assignments directly is easier if we iterate through them. + # The SearchOp should be one of the assignments. + + # Locate the assignment with SearchOp + assignments = [expr for expr, id in node.assignments if isinstance(expr, ex.OpExpression) and isinstance(expr.op, search_ops.SearchOp)] + assert len(assignments) == 1 + assignment = assignments[0] + + # The expression should be an OpExpression with SearchOp + assert isinstance(assignment, ex.OpExpression) + assert isinstance(assignment.op, search_ops.SearchOp) + + assert assignment.op.search_query == search_query + assert assignment.op.json_scope is None + assert assignment.op.analyzer is None + assert assignment.op.analyzer_options is None + + +def test_search_series_with_options(mock_session): + s = bigframes.series.Series(["foo bar", "baz"], session=mock_session) + search_query = "foo" + result = bbq.search( + s, + search_query, + json_scope="JSON_VALUES", + analyzer="LOG_ANALYZER", + analyzer_options='{"delimiters": [" "]}', + ) + + # Verify the operation in the expression tree + import bigframes.core.nodes as nodes + import bigframes.core.expression as ex + + # Get the underlying node + node = result._block.expr.node + + # Traverse down to find the ProjectionNode + while isinstance(node, nodes.SelectionNode): + node = node.child + + # It should be a ProjectionNode + assert isinstance(node, nodes.ProjectionNode) + + # Locate the assignment with SearchOp + assignments = [expr for expr, id in node.assignments if isinstance(expr, ex.OpExpression) and isinstance(expr.op, search_ops.SearchOp)] + assert len(assignments) == 1 + assignment = assignments[0] + + assert isinstance(assignment, ex.OpExpression) + assert isinstance(assignment.op, search_ops.SearchOp) + + assert assignment.op.search_query == search_query + assert assignment.op.json_scope == "JSON_VALUES" + assert assignment.op.analyzer == "LOG_ANALYZER" + assert assignment.op.analyzer_options == '{"delimiters": [" "]}' + + +def test_search_dataframe(mock_session): + # Mock dataframe with 2 columns + df = pd.DataFrame({"col1": ["foo", "bar"], "col2": ["baz", "qux"]}) + bf = bigframes.dataframe.DataFrame(df, session=mock_session) + + search_query = "foo" + result = bbq.search(bf, search_query) + + import bigframes.core.nodes as nodes + import bigframes.core.expression as ex + from bigframes.operations import struct_ops + + # Get the underlying node + node = result._block.expr.node + + # Traverse down to find the ProjectionNode + while isinstance(node, nodes.SelectionNode): + node = node.child + + # Should be a ProjectionNode + assert isinstance(node, nodes.ProjectionNode) + + assignments = [expr for expr, id in node.assignments if isinstance(expr, ex.OpExpression) and isinstance(expr.op, search_ops.SearchOp)] + assert len(assignments) == 1 + assignment = assignments[0] + + assert isinstance(assignment, ex.OpExpression) + assert isinstance(assignment.op, search_ops.SearchOp) + assert assignment.op.search_query == search_query + + # Verify that the input to SearchOp is a StructOp + # The input expression to SearchOp + search_input = assignment.inputs[0] + + # Since struct() op and search op might be in the same ProjectionNode or different ones. + # If they are in the same ProjectionNode, `search_input` would be a DerefOp to a column not in assignments? + # No, ProjectionNode assignments are parallel. So struct op must be in a child node. + + # Check if struct op is in the same node (unlikely for parallel projection unless merged somehow, but typical flow puts them sequential) + + # If search_input is DerefOp, we look in the child node. + assert isinstance(search_input, ex.DerefOp) + + child_node = node.child + # Traverse SelectionNodes if any + while isinstance(child_node, nodes.SelectionNode): + child_node = child_node.child + + # It should be a ProjectionNode (from struct()) + assert isinstance(child_node, nodes.ProjectionNode) + + # Find the struct assignment + struct_col_id = search_input.id + struct_assignment = next(expr for expr, id in child_node.assignments if id == struct_col_id) + + assert isinstance(struct_assignment, ex.OpExpression) + assert isinstance(struct_assignment.op, struct_ops.StructOp) + assert struct_assignment.op.column_names == ("col1", "col2") + + +def test_search_invalid_input(mock_session): + with pytest.raises(ValueError, match="data_to_search must be a Series or DataFrame"): + bbq.search("invalid", "foo") From 68ff3dd9e85a6b901b5f876020ed2a3f74e66844 Mon Sep 17 00:00:00 2001 From: Tim Swena Date: Mon, 22 Dec 2025 23:44:17 +0000 Subject: [PATCH 2/2] fix: simplify search op --- bigframes/bigquery/_operations/search.py | 46 +++++-------------- .../core/compile/ibis_compiler/__init__.py | 1 + .../ibis_compiler/operations/search_ops.py | 40 ++++++++++++++++ .../ibis_compiler/scalar_op_registry.py | 37 --------------- bigframes/core/compile/sqlglot/__init__.py | 1 + .../compile/sqlglot/expressions/search_ops.py | 29 ++++++++++++ bigframes/operations/search_ops.py | 3 -- 7 files changed, 83 insertions(+), 74 deletions(-) create mode 100644 bigframes/core/compile/ibis_compiler/operations/search_ops.py create mode 100644 bigframes/core/compile/sqlglot/expressions/search_ops.py diff --git a/bigframes/bigquery/_operations/search.py b/bigframes/bigquery/_operations/search.py index 2c60655c30..10af14b8a0 100644 --- a/bigframes/bigquery/_operations/search.py +++ b/bigframes/bigquery/_operations/search.py @@ -20,10 +20,11 @@ import google.cloud.bigquery as bigquery +import bigframes.core.sql +import bigframes.dataframe import bigframes.ml.utils as utils if typing.TYPE_CHECKING: - import bigframes.dataframe as dataframe import bigframes.series as series import bigframes.session @@ -91,7 +92,7 @@ def create_vector_index( def vector_search( base_table: str, column_to_search: str, - query: Union["dataframe.DataFrame", "series.Series"], + query: Union[bigframes.dataframe.DataFrame, series.Series], *, query_column_to_search: Optional[str] = None, top_k: Optional[int] = None, @@ -99,7 +100,7 @@ def vector_search( fraction_lists_to_search: Optional[float] = None, use_brute_force: Optional[bool] = None, allow_large_results: Optional[bool] = None, -) -> dataframe.DataFrame: +) -> bigframes.dataframe.DataFrame: """ Conduct vector search which searches embeddings to find semantically similar entities. @@ -108,7 +109,6 @@ def vector_search( **Examples:** - >>> import bigframes.pandas as bpd >>> import bigframes.bigquery as bbq @@ -250,12 +250,8 @@ def vector_search( def search( - data_to_search: Union["dataframe.DataFrame", "series.Series"], + data_to_search: Union[bigframes.dataframe.DataFrame, series.Series], search_query: str, - *, - json_scope: Optional[str] = None, - analyzer: Optional[str] = None, - analyzer_options: Optional[str] = None, ) -> series.Series: """ The SEARCH function checks to see whether a BigQuery table or other search @@ -288,18 +284,6 @@ def search( search_query (str): A STRING literal, or a STRING constant expression that represents the terms of the search query. - json_scope (str, optional): - A named argument with a STRING value. Takes one of the following - values to indicate the scope of JSON data to be searched. It has no - effect if data_to_search isn't a JSON value or doesn't contain a - JSON field. - analyzer (str, optional): - A named argument with a STRING value. Takes one of the following - values to indicate the text analyzer to use: 'LOG_ANALYZER', - 'NO_OP_ANALYZER', 'PATTERN_ANALYZER'. - analyzer_options (str, optional): - A named argument with a JSON-formatted STRING value. Takes a list - of text analysis rules. Returns: bigframes.series.Series: A new Series with the boolean result. @@ -307,27 +291,21 @@ def search( import bigframes.operations.search_ops as search_ops import bigframes.series - if not isinstance(data_to_search, (bigframes.series.Series, bigframes.dataframe.DataFrame)): + if not isinstance( + data_to_search, (bigframes.series.Series, bigframes.dataframe.DataFrame) + ): raise ValueError("data_to_search must be a Series or DataFrame") if isinstance(data_to_search, bigframes.dataframe.DataFrame): - # SEARCH on a table (or dataframe) treats it as a STRUCT - # We need to apply the op on the dataframe, which should handle it as a struct or row - # However, unary ops are usually applied on Series. - # But DataFrame can be passed if we convert it to a struct first? - # Or does DataFrame support _apply_unary_op? - # bigframes.dataframe.DataFrame does not have _apply_unary_op. - # We can convert DataFrame to a Series of Structs. - # But SEARCH in BigQuery can take a table reference which is evaluated as a STRUCT. - # So creating a struct from all columns seems correct. + # SEARCH on a table (or dataframe) treats it as a STRUCT. For easier + # application of a scalar unary op, we convert to a struct proactively + # in the expression. import bigframes.bigquery._operations.struct as struct_ops + data_to_search = struct_ops.struct(data_to_search) return data_to_search._apply_unary_op( search_ops.SearchOp( search_query=search_query, - json_scope=json_scope, - analyzer=analyzer, - analyzer_options=analyzer_options, ) ) diff --git a/bigframes/core/compile/ibis_compiler/__init__.py b/bigframes/core/compile/ibis_compiler/__init__.py index 6b9d284c53..2af8061bd7 100644 --- a/bigframes/core/compile/ibis_compiler/__init__.py +++ b/bigframes/core/compile/ibis_compiler/__init__.py @@ -22,4 +22,5 @@ import bigframes.core.compile.ibis_compiler.operations.generic_ops # noqa: F401 import bigframes.core.compile.ibis_compiler.operations.geo_ops # noqa: F401 +import bigframes.core.compile.ibis_compiler.operations.search_ops # noqa: F401 import bigframes.core.compile.ibis_compiler.scalar_op_registry # noqa: F401 diff --git a/bigframes/core/compile/ibis_compiler/operations/search_ops.py b/bigframes/core/compile/ibis_compiler/operations/search_ops.py new file mode 100644 index 0000000000..516b49036d --- /dev/null +++ b/bigframes/core/compile/ibis_compiler/operations/search_ops.py @@ -0,0 +1,40 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +BigFrames -> Ibis compilation for the operations in bigframes.operations.search_ops. + +Please keep implementations in sequential order by op name. +""" + +from __future__ import annotations + +from bigframes_vendored.ibis.expr import types as ibis_types +import bigframes_vendored.ibis.expr.operations.udf as ibis_udf + +from bigframes.core.compile.ibis_compiler import scalar_op_compiler +from bigframes.operations import search_ops + +register_unary_op = scalar_op_compiler.scalar_op_compiler.register_unary_op + + +@register_unary_op(search_ops.SearchOp, pass_op=True) +def search_op_impl(x: ibis_types.Value, op: search_ops.SearchOp): + return search(x, op.search_query) + + +@ibis_udf.scalar.builtin(name="search") +def search(data_to_search, search_query) -> bool: + """Checks to see whether a table or other search data contains a set of search terms.""" + return False # pragma: NO COVER diff --git a/bigframes/core/compile/ibis_compiler/scalar_op_registry.py b/bigframes/core/compile/ibis_compiler/scalar_op_registry.py index 698d4cae45..91bbfbfbcf 100644 --- a/bigframes/core/compile/ibis_compiler/scalar_op_registry.py +++ b/bigframes/core/compile/ibis_compiler/scalar_op_registry.py @@ -2153,43 +2153,6 @@ def str_lstrip_op( # type: ignore[empty-body] """Remove leading and trailing characters.""" -@scalar_op_compiler.register_unary_op(ops.SearchOp, pass_op=True) -def search_op_impl(x: ibis_types.Value, op: ops.SearchOp): - values = [ - typing.cast(ibis_generic.Value, x.op()), - typing.cast(ibis_generic.Value, ibis_types.literal(op.search_query).op()), - ] - sql_template = "SEARCH({0}, {1}" - arg_index = 2 - if op.json_scope is not None: - values.append( - typing.cast(ibis_generic.Value, ibis_types.literal(op.json_scope).op()) - ) - sql_template += f", json_scope=>{{{arg_index}}}" - arg_index += 1 - if op.analyzer is not None: - values.append( - typing.cast(ibis_generic.Value, ibis_types.literal(op.analyzer).op()) - ) - sql_template += f", analyzer=>{{{arg_index}}}" - arg_index += 1 - if op.analyzer_options is not None: - values.append( - typing.cast( - ibis_generic.Value, ibis_types.literal(op.analyzer_options).op() - ) - ) - sql_template += f", analyzer_options=>{{{arg_index}}}" - arg_index += 1 - sql_template += ")" - - return ibis_generic.SqlScalar( - ibis_generic.Literal(sql_template, dtype=ibis_dtypes.string), - values=tuple(values), - output_type=ibis_dtypes.boolean, - ).to_expr() - - @ibis_udf.scalar.builtin(name="rtrim") def str_rstrip_op( # type: ignore[empty-body] x: ibis_dtypes.String, to_strip: ibis_dtypes.String diff --git a/bigframes/core/compile/sqlglot/__init__.py b/bigframes/core/compile/sqlglot/__init__.py index 9e3f123807..61ba4398c6 100644 --- a/bigframes/core/compile/sqlglot/__init__.py +++ b/bigframes/core/compile/sqlglot/__init__.py @@ -25,6 +25,7 @@ import bigframes.core.compile.sqlglot.expressions.geo_ops # noqa: F401 import bigframes.core.compile.sqlglot.expressions.json_ops # noqa: F401 import bigframes.core.compile.sqlglot.expressions.numeric_ops # noqa: F401 +import bigframes.core.compile.sqlglot.expressions.search_ops # noqa: F401 import bigframes.core.compile.sqlglot.expressions.string_ops # noqa: F401 import bigframes.core.compile.sqlglot.expressions.struct_ops # noqa: F401 import bigframes.core.compile.sqlglot.expressions.timedelta_ops # noqa: F401 diff --git a/bigframes/core/compile/sqlglot/expressions/search_ops.py b/bigframes/core/compile/sqlglot/expressions/search_ops.py new file mode 100644 index 0000000000..1fff4282e6 --- /dev/null +++ b/bigframes/core/compile/sqlglot/expressions/search_ops.py @@ -0,0 +1,29 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import sqlglot.expressions as sge + +from bigframes import operations as ops +from bigframes.core.compile.sqlglot.expressions.typed_expr import TypedExpr +import bigframes.core.compile.sqlglot.scalar_compiler as scalar_compiler + +register_nary_op = scalar_compiler.scalar_op_compiler.register_nary_op +register_unary_op = scalar_compiler.scalar_op_compiler.register_unary_op + + +@register_unary_op(ops.SearchOp, pass_op=True) +def _(expr: TypedExpr, op: ops.SearchOp) -> sge.Expression: + return sge.func("SEARCH", expr.expr, sge.convert(op.search_query)) diff --git a/bigframes/operations/search_ops.py b/bigframes/operations/search_ops.py index 1eda0dde8b..f63696f15f 100644 --- a/bigframes/operations/search_ops.py +++ b/bigframes/operations/search_ops.py @@ -23,9 +23,6 @@ class SearchOp(base_ops.UnaryOp): name: typing.ClassVar[str] = "search" search_query: str - json_scope: typing.Optional[str] = None - analyzer: typing.Optional[str] = None - analyzer_options: typing.Optional[str] = None def output_type(self, *input_types): return dtypes.BOOL_DTYPE