From 4de6184b93b7d60de367cf6f68ca73082292e9d2 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Wed, 5 Nov 2025 10:29:16 +0000 Subject: [PATCH 01/31] feat: add support for Python 3.14 --- .github/workflows/lint.yml | 2 +- .github/workflows/mypy.yml | 2 +- .github/workflows/unittest.yml | 4 +-- CONTRIBUTING.rst | 10 ++++--- GEMINI.md | 4 +-- .../getting_started_bq_dataframes.ipynb | 6 ++-- notebooks/location/regionalized.ipynb | 4 +-- .../remote_functions/remote_function.ipynb | 4 +-- .../remote_function_usecases.ipynb | 6 ++-- .../remote_function_vertex_claude_model.ipynb | 4 +-- noxfile.py | 30 +++++++++++-------- samples/polars/noxfile.py | 2 +- samples/snippets/noxfile.py | 2 +- scripts/test_publish_api_coverage.py | 8 ----- setup.py | 1 + 15 files changed, 45 insertions(+), 44 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 7914b72651..b848262c3a 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -15,7 +15,7 @@ jobs: - name: Setup Python uses: actions/setup-python@v5 with: - python-version: "3.10" + python-version: "3.14" - name: Install nox run: | python -m pip install --upgrade setuptools pip wheel diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml index fc9e970946..a12138dc2a 100644 --- a/.github/workflows/mypy.yml +++ b/.github/workflows/mypy.yml @@ -15,7 +15,7 @@ jobs: - name: Setup Python uses: actions/setup-python@v5 with: - python-version: "3.10" + python-version: "3.14" - name: Install nox run: | python -m pip install --upgrade setuptools pip wheel diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml index 518cec6312..f334f4eb43 100644 --- a/.github/workflows/unittest.yml +++ b/.github/workflows/unittest.yml @@ -14,7 +14,7 @@ jobs: runs-on: ubuntu-22.04 strategy: matrix: - python: ['3.9', '3.10', '3.11', '3.12', '3.13'] + python: ['3.9', '3.10', '3.11', '3.12', '3.13', '3.14'] steps: - name: Checkout uses: actions/checkout@v4 @@ -48,7 +48,7 @@ jobs: - name: Setup Python uses: actions/setup-python@v5 with: - python-version: "3.10" + python-version: "3.14" - name: Install coverage run: | python -m pip install --upgrade setuptools pip wheel diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 5374e7e377..a4ca07b0c1 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -22,7 +22,7 @@ In order to add a feature: documentation. - The feature must work fully on the following CPython versions: - 3.9, 3.10, 3.11, 3.12 and 3.13 on both UNIX and Windows. + 3.9, 3.10, 3.11, 3.12, 3.13 and 3.14 on both UNIX and Windows. - The feature must not add unnecessary dependencies (where "unnecessary" is of course subjective, but new dependencies should @@ -72,7 +72,7 @@ We use `nox `__ to instrument our tests. - To run a single unit test:: - $ nox -s unit-3.13 -- -k + $ nox -s unit-3.14 -- -k .. note:: @@ -143,12 +143,12 @@ Running System Tests $ nox -s system # Run a single system test - $ nox -s system-3.13 -- -k + $ nox -s system-3.14 -- -k .. note:: - System tests are only configured to run under Python 3.9, 3.11, 3.12 and 3.13. + System tests are only configured to run under Python 3.9, 3.12 and 3.14. For expediency, we do not run them in older versions of Python 3. This alone will not run the tests. You'll need to change some local @@ -263,12 +263,14 @@ We support: - `Python 3.11`_ - `Python 3.12`_ - `Python 3.13`_ +- `Python 3.14`_ .. _Python 3.9: https://docs.python.org/3.9/ .. _Python 3.10: https://docs.python.org/3.10/ .. _Python 3.11: https://docs.python.org/3.11/ .. _Python 3.12: https://docs.python.org/3.12/ .. _Python 3.13: https://docs.python.org/3.13/ +.. _Python 3.14: https://docs.python.org/3.14/ Supported versions can be found in our ``noxfile.py`` `config`_. diff --git a/GEMINI.md b/GEMINI.md index 0d447f17a4..1c8cff3387 100644 --- a/GEMINI.md +++ b/GEMINI.md @@ -13,7 +13,7 @@ We use `nox` to instrument our tests. - To run a single unit test: ```bash - nox -r -s unit-3.13 -- -k + nox -r -s unit-3.14 -- -k ``` - Ignore this step if you lack access to Google Cloud resources. To run system @@ -23,7 +23,7 @@ We use `nox` to instrument our tests. $ nox -r -s system # Run a single system test - $ nox -r -s system-3.13 -- -k + $ nox -r -s system-3.14 -- -k - The codebase must have better coverage than it had previously after each change. You can test coverage via `nox -s unit system cover` (takes a long diff --git a/notebooks/getting_started/getting_started_bq_dataframes.ipynb b/notebooks/getting_started/getting_started_bq_dataframes.ipynb index fa88cf65bb..8f7d046840 100644 --- a/notebooks/getting_started/getting_started_bq_dataframes.ipynb +++ b/notebooks/getting_started/getting_started_bq_dataframes.ipynb @@ -1331,15 +1331,15 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import sys\n", "\n", - "# Python 3.13 is not yet a supported runtime for remote functions.\n", + "# Python 3.14 is not yet a supported runtime for remote functions.\n", "# See: https://cloud.google.com/functions/docs/runtime-support#python for the supported runtimes.\n", - "if sys.version_info >= (3, 13, 0):\n", + "if sys.version_info >= (3, 14, 0):\n", " sys.exit(0)" ] }, diff --git a/notebooks/location/regionalized.ipynb b/notebooks/location/regionalized.ipynb index 066cd18136..8acfd9a6b2 100644 --- a/notebooks/location/regionalized.ipynb +++ b/notebooks/location/regionalized.ipynb @@ -1347,9 +1347,9 @@ "source": [ "import sys\n", "\n", - "# Python 3.13 is not yet a supported runtime for remote functions.\n", + "# Python 3.14 is not yet a supported runtime for remote functions.\n", "# See: https://cloud.google.com/functions/docs/runtime-support#python for the supported runtimes.\n", - "if sys.version_info >= (3, 13, 0):\n", + "if sys.version_info >= (3, 14, 0):\n", " sys.exit(0)" ] }, diff --git a/notebooks/remote_functions/remote_function.ipynb b/notebooks/remote_functions/remote_function.ipynb index e2bc88ecae..e0cfe7026f 100644 --- a/notebooks/remote_functions/remote_function.ipynb +++ b/notebooks/remote_functions/remote_function.ipynb @@ -9,9 +9,9 @@ "source": [ "import sys\n", "\n", - "# Python 3.13 is not yet a supported runtime for remote functions.\n", + "# Python 3.14 is not yet a supported runtime for remote functions.\n", "# See: https://cloud.google.com/functions/docs/runtime-support#python for the supported runtimes.\n", - "if sys.version_info >= (3, 13, 0):\n", + "if sys.version_info >= (3, 14, 0):\n", " sys.exit(0)" ] }, diff --git a/notebooks/remote_functions/remote_function_usecases.ipynb b/notebooks/remote_functions/remote_function_usecases.ipynb index 03ae652095..bdc7e4b853 100644 --- a/notebooks/remote_functions/remote_function_usecases.ipynb +++ b/notebooks/remote_functions/remote_function_usecases.ipynb @@ -23,15 +23,15 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import sys\n", "\n", - "# Python 3.13 is not yet a supported runtime for remote functions.\n", + "# Python 3.14 is not yet a supported runtime for remote functions.\n", "# See: https://cloud.google.com/functions/docs/runtime-support#python for the supported runtimes.\n", - "if sys.version_info >= (3, 13, 0):\n", + "if sys.version_info >= (3, 14, 0):\n", " sys.exit(0)" ] }, diff --git a/notebooks/remote_functions/remote_function_vertex_claude_model.ipynb b/notebooks/remote_functions/remote_function_vertex_claude_model.ipynb index 9792c90205..33d6d35615 100644 --- a/notebooks/remote_functions/remote_function_vertex_claude_model.ipynb +++ b/notebooks/remote_functions/remote_function_vertex_claude_model.ipynb @@ -36,9 +36,9 @@ "source": [ "import sys\n", "\n", - "# Python 3.13 is not yet a supported runtime for remote functions.\n", + "# Python 3.14 is not yet a supported runtime for remote functions.\n", "# See: https://cloud.google.com/functions/docs/runtime-support#python for the supported runtimes.\n", - "if sys.version_info >= (3, 13, 0):\n", + "if sys.version_info >= (3, 14, 0):\n", " sys.exit(0)" ] }, diff --git a/noxfile.py b/noxfile.py index 44fc5adede..9786609fe4 100644 --- a/noxfile.py +++ b/noxfile.py @@ -33,8 +33,10 @@ ISORT_VERSION = "isort==5.12.0" MYPY_VERSION = "mypy==1.15.0" -# TODO: switch to 3.13 once remote functions / cloud run adds a runtime for it (internal issue 333742751) -LATEST_FULLY_SUPPORTED_PYTHON = "3.12" +# TODO: switch to 3.14 once remote functions adds a runtime for it +# https://cloud.google.com/run/docs/runtimes/python +# https://cloud.google.com/functions/docs/runtime-support#python +LATEST_FULLY_SUPPORTED_PYTHON = "3.13" # Notebook tests should match colab and BQ Studio. # Check with import sys; sys.version_info @@ -58,13 +60,14 @@ "setup.py", ] -DEFAULT_PYTHON_VERSION = "3.10" +DEFAULT_PYTHON_VERSION = "3.14" -# Cloud Run Functions supports Python versions up to 3.12 +# Cloud Run Functions supports Python versions up to 3.13 # https://cloud.google.com/run/docs/runtimes/python -E2E_TEST_PYTHON_VERSION = "3.12" +# https://cloud.google.com/functions/docs/runtime-support#python +E2E_TEST_PYTHON_VERSION = "3.13" -UNIT_TEST_PYTHON_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13"] +UNIT_TEST_PYTHON_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] UNIT_TEST_STANDARD_DEPENDENCIES = [ "mock", "asyncmock", @@ -83,13 +86,14 @@ # Make sure we leave some versions without "extras" so we know those # dependencies are actually optional. "3.13": ["tests", "polars", "scikit-learn", "anywidget"], + "3.14": ["tests", "polars", "scikit-learn", "anywidget"], } # 3.11 is used by colab. # 3.10 is needed for Windows tests as it is the only version installed in the # bigframes-windows container image. For more information, search # bigframes/windows-docker, internally. -SYSTEM_TEST_PYTHON_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13"] +SYSTEM_TEST_PYTHON_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] SYSTEM_TEST_STANDARD_DEPENDENCIES = [ "jinja2", "mock", @@ -113,8 +117,9 @@ # Make sure we leave some versions without "extras" so we know those # dependencies are actually optional. "3.10": ["tests", "scikit-learn", "anywidget"], - LATEST_FULLY_SUPPORTED_PYTHON: ["tests", "scikit-learn", "polars", "anywidget"], + "3.12": ["tests", "scikit-learn", "polars", "anywidget"], "3.13": ["tests", "polars", "anywidget"], + "3.14": ["tests", "polars", "anywidget"], } LOGGING_NAME_ENV_VAR = "BIGFRAMES_PERFORMANCE_LOG_NAME" @@ -807,11 +812,12 @@ def notebook(session: nox.Session): "notebooks/dataframes/anywidget_mode.ipynb", ] - # TODO: remove exception for Python 3.13 cloud run adds a runtime for it (internal issue 333742751) - # TODO: remove exception for Python 3.13 if nbmake adds support for + # TODO: remove exception for Python 3.14 once remote functions adds a runtime for it + # https://cloud.google.com/run/docs/runtimes/python + # https://cloud.google.com/functions/docs/runtime-support#python # sys.exit(0) or pytest.skip(...). # See: https://github.com/treebeardtech/nbmake/issues/134 - if session.python == "3.13": + if session.python == "3.14": denylist.extend( [ "notebooks/getting_started/getting_started_bq_dataframes.ipynb", @@ -995,7 +1001,7 @@ def benchmark(session: nox.Session): ) -@nox.session(python="3.10") +@nox.session(python=DEFAULT_PYTHON_VERSION) def release_dry_run(session): env = {} diff --git a/samples/polars/noxfile.py b/samples/polars/noxfile.py index 494639d2fa..782da04329 100644 --- a/samples/polars/noxfile.py +++ b/samples/polars/noxfile.py @@ -88,7 +88,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] +ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/samples/snippets/noxfile.py b/samples/snippets/noxfile.py index 494639d2fa..782da04329 100644 --- a/samples/snippets/noxfile.py +++ b/samples/snippets/noxfile.py @@ -88,7 +88,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] +ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/scripts/test_publish_api_coverage.py b/scripts/test_publish_api_coverage.py index 6e366b6854..6caae68cd6 100644 --- a/scripts/test_publish_api_coverage.py +++ b/scripts/test_publish_api_coverage.py @@ -26,10 +26,6 @@ def api_coverage_df(): return build_api_coverage_table("my_bf_ver", "my_release_ver") -@pytest.mark.skipif( - sys.version_info >= (3, 13), - reason="Issues with installing sklearn for this test in python 3.13", -) def test_api_coverage_produces_expected_schema(api_coverage_df): if sys.version.split(".")[:2] == ["3", "9"]: pytest.skip( @@ -59,10 +55,6 @@ def test_api_coverage_produces_expected_schema(api_coverage_df): ) -@pytest.mark.skipif( - sys.version_info >= (3, 13), - reason="Issues with installing sklearn for this test in python 3.13", -) def test_api_coverage_produces_missing_parameters(api_coverage_df): """Make sure at least some functions have reported missing parameters.""" assert (api_coverage_df["missing_parameters"].str.len() > 0).any() diff --git a/setup.py b/setup.py index fa663f66d5..63e8881155 100644 --- a/setup.py +++ b/setup.py @@ -141,6 +141,7 @@ "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", "Operating System :: OS Independent", "Topic :: Internet", ], From 2fbd11d7f5099d8a1ad26187bf7bf69ef5eff2f3 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Wed, 5 Nov 2025 10:37:12 +0000 Subject: [PATCH 02/31] update python version for docs --- noxfile.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/noxfile.py b/noxfile.py index 9786609fe4..8c95e9a78e 100644 --- a/noxfile.py +++ b/noxfile.py @@ -520,7 +520,7 @@ def cover(session): session.run("coverage", "erase") -@nox.session(python="3.13") +@nox.session(python="3.10") def docs(session): """Build the docs for this library.""" session.install("-e", ".[scikit-learn]") @@ -551,7 +551,7 @@ def docs(session): ) -@nox.session(python=DEFAULT_PYTHON_VERSION) +@nox.session(python="3.10") def docfx(session): """Build the docfx yaml files for this library.""" From 71ef8f346ab1629262c83f590aa095ae5dfd09f7 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Fri, 12 Dec 2025 19:52:24 +0000 Subject: [PATCH 03/31] revert docs --- noxfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/noxfile.py b/noxfile.py index 8c95e9a78e..1e6bc0b652 100644 --- a/noxfile.py +++ b/noxfile.py @@ -520,7 +520,7 @@ def cover(session): session.run("coverage", "erase") -@nox.session(python="3.10") +@nox.session(python="3.13") def docs(session): """Build the docs for this library.""" session.install("-e", ".[scikit-learn]") From 9257c5c573bd04434c13824c8b070d8b08102977 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Fri, 12 Dec 2025 19:59:16 +0000 Subject: [PATCH 04/31] update black to 23.7.0 --- bigframes/_config/auth.py | 6 +++--- bigframes/core/block_transforms.py | 1 - bigframes/core/blocks.py | 4 ++-- .../compile/ibis_compiler/scalar_op_registry.py | 7 ------- bigframes/core/expression_factoring.py | 6 +++--- bigframes/core/indexes/base.py | 1 - bigframes/core/window/rolling.py | 1 - bigframes/ml/cluster.py | 1 - bigframes/ml/decomposition.py | 1 - bigframes/ml/ensemble.py | 3 --- bigframes/ml/imported.py | 9 ++++++--- bigframes/ml/impute.py | 1 - bigframes/ml/model_selection.py | 1 - bigframes/ml/preprocessing.py | 1 - bigframes/ml/sql.py | 15 ++++++++++----- bigframes/operations/blob.py | 3 ++- bigframes/session/__init__.py | 4 ++-- bigframes/session/bq_caching_executor.py | 1 - noxfile.py | 2 +- .../large/functions/test_managed_function.py | 10 ---------- .../large/functions/test_remote_function.py | 6 ------ tests/system/small/ml/test_metrics.py | 1 - tests/system/small/ml/test_model_selection.py | 8 +++++++- tests/system/small/test_anywidget.py | 2 -- tests/system/small/test_dataframe.py | 3 --- tests/system/small/test_dataframe_io.py | 2 -- tests/system/small/test_index_io.py | 2 -- tests/system/small/test_null_index.py | 1 - tests/system/small/test_pandas.py | 8 ++++---- tests/system/small/test_pandas_options.py | 1 - tests/system/small/test_series.py | 1 - tests/system/small/test_series_io.py | 1 - tests/system/small/test_session.py | 2 -- .../sqlglot/expressions/test_string_ops.py | 1 - .../core/compile/sqlglot/test_compile_concat.py | 1 - .../unit/functions/test_remote_function_utils.py | 2 -- tests/unit/test_dataframe_polars.py | 2 -- tests/unit/test_series_polars.py | 1 - .../sklearn/preprocessing/_encoder.py | 3 ++- 39 files changed, 42 insertions(+), 84 deletions(-) diff --git a/bigframes/_config/auth.py b/bigframes/_config/auth.py index 1574fc4883..ccb5fcbedb 100644 --- a/bigframes/_config/auth.py +++ b/bigframes/_config/auth.py @@ -30,9 +30,9 @@ _cached_project_default: Optional[str] = None -def get_default_credentials_with_project() -> tuple[ - google.auth.credentials.Credentials, Optional[str] -]: +def get_default_credentials_with_project() -> ( + tuple[google.auth.credentials.Credentials, Optional[str]] +): global _AUTH_LOCK, _cached_credentials, _cached_project_default with _AUTH_LOCK: diff --git a/bigframes/core/block_transforms.py b/bigframes/core/block_transforms.py index 16be560b05..867890298a 100644 --- a/bigframes/core/block_transforms.py +++ b/bigframes/core/block_transforms.py @@ -618,7 +618,6 @@ def skew( skew_column_ids: typing.Sequence[str], grouping_column_ids: typing.Sequence[str] = (), ) -> blocks.Block: - original_columns = skew_column_ids column_labels = block.select_columns(original_columns).column_labels diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py index df7c6dee43..6da97a7468 100644 --- a/bigframes/core/blocks.py +++ b/bigframes/core/blocks.py @@ -650,7 +650,6 @@ def _get_sampling_option( sampling_method: Optional[str] = None, random_state: Optional[int] = None, ) -> sampling_options.SamplingOptions: - if (sampling_method is not None) and (sampling_method not in _SAMPLING_METHODS): raise NotImplementedError( f"The downsampling method {sampling_method} is not implemented, " @@ -693,7 +692,8 @@ def to_pandas_batches( """Download results one message at a time. page_size and max_results determine the size and number of batches, - see https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.job.QueryJob#google_cloud_bigquery_job_QueryJob_result""" + see https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.job.QueryJob#google_cloud_bigquery_job_QueryJob_result + """ under_10gb = ( (not allow_large_results) diff --git a/bigframes/core/compile/ibis_compiler/scalar_op_registry.py b/bigframes/core/compile/ibis_compiler/scalar_op_registry.py index 91bbfbfbcf..d25bdf85ee 100644 --- a/bigframes/core/compile/ibis_compiler/scalar_op_registry.py +++ b/bigframes/core/compile/ibis_compiler/scalar_op_registry.py @@ -1888,7 +1888,6 @@ def struct_op_impl( def ai_generate( *values: ibis_types.Value, op: ops.AIGenerate ) -> ibis_types.StructValue: - return ai_ops.AIGenerate( _construct_prompt(values, op.prompt_context), # type: ignore op.connection_id, # type: ignore @@ -1903,7 +1902,6 @@ def ai_generate( def ai_generate_bool( *values: ibis_types.Value, op: ops.AIGenerateBool ) -> ibis_types.StructValue: - return ai_ops.AIGenerateBool( _construct_prompt(values, op.prompt_context), # type: ignore op.connection_id, # type: ignore @@ -1917,7 +1915,6 @@ def ai_generate_bool( def ai_generate_int( *values: ibis_types.Value, op: ops.AIGenerateInt ) -> ibis_types.StructValue: - return ai_ops.AIGenerateInt( _construct_prompt(values, op.prompt_context), # type: ignore op.connection_id, # type: ignore @@ -1931,7 +1928,6 @@ def ai_generate_int( def ai_generate_double( *values: ibis_types.Value, op: ops.AIGenerateDouble ) -> ibis_types.StructValue: - return ai_ops.AIGenerateDouble( _construct_prompt(values, op.prompt_context), # type: ignore op.connection_id, # type: ignore @@ -1943,7 +1939,6 @@ def ai_generate_double( @scalar_op_compiler.register_nary_op(ops.AIIf, pass_op=True) def ai_if(*values: ibis_types.Value, op: ops.AIIf) -> ibis_types.StructValue: - return ai_ops.AIIf( _construct_prompt(values, op.prompt_context), # type: ignore op.connection_id, # type: ignore @@ -1954,7 +1949,6 @@ def ai_if(*values: ibis_types.Value, op: ops.AIIf) -> ibis_types.StructValue: def ai_classify( *values: ibis_types.Value, op: ops.AIClassify ) -> ibis_types.StructValue: - return ai_ops.AIClassify( _construct_prompt(values, op.prompt_context), # type: ignore op.categories, # type: ignore @@ -1964,7 +1958,6 @@ def ai_classify( @scalar_op_compiler.register_nary_op(ops.AIScore, pass_op=True) def ai_score(*values: ibis_types.Value, op: ops.AIScore) -> ibis_types.StructValue: - return ai_ops.AIScore( _construct_prompt(values, op.prompt_context), # type: ignore op.connection_id, # type: ignore diff --git a/bigframes/core/expression_factoring.py b/bigframes/core/expression_factoring.py index d7ac49b585..4de205949d 100644 --- a/bigframes/core/expression_factoring.py +++ b/bigframes/core/expression_factoring.py @@ -167,9 +167,9 @@ def graph_extract_scalar_exprs() -> Sequence[nodes.ColumnDef]: # TODO: We can prune expressions that won't be reused here, return tuple(nodes.ColumnDef(expr, id) for id, expr in results.items()) - def graph_extract_window_expr() -> Optional[ - Tuple[Sequence[nodes.ColumnDef], window_spec.WindowSpec] - ]: + def graph_extract_window_expr() -> ( + Optional[Tuple[Sequence[nodes.ColumnDef], window_spec.WindowSpec]] + ): for id in graph.sinks: next_def = by_id[id] if isinstance(next_def.expression, agg_expressions.WindowExpression): diff --git a/bigframes/core/indexes/base.py b/bigframes/core/indexes/base.py index 383534fa4d..3a2b64f44d 100644 --- a/bigframes/core/indexes/base.py +++ b/bigframes/core/indexes/base.py @@ -210,7 +210,6 @@ def is_monotonic_increasing(self) -> bool: @property @validations.requires_ordering() def is_monotonic_decreasing(self) -> bool: - return typing.cast( bool, self._block.is_monotonic_decreasing(self._block.index_columns), diff --git a/bigframes/core/window/rolling.py b/bigframes/core/window/rolling.py index d6c77bf0a7..c5082603c1 100644 --- a/bigframes/core/window/rolling.py +++ b/bigframes/core/window/rolling.py @@ -217,7 +217,6 @@ def create_range_window( grouping_keys: Sequence[str] = tuple(), drop_null_groups: bool = True, ) -> Window: - if on is None: # Rolling on index index_dtypes = block.index.dtypes diff --git a/bigframes/ml/cluster.py b/bigframes/ml/cluster.py index 9ce4649c5e..f12f96ce5b 100644 --- a/bigframes/ml/cluster.py +++ b/bigframes/ml/cluster.py @@ -44,7 +44,6 @@ class KMeans( base.UnsupervisedTrainablePredictor, bigframes_vendored.sklearn.cluster._kmeans.KMeans, ): - __doc__ = bigframes_vendored.sklearn.cluster._kmeans.KMeans.__doc__ def __init__( diff --git a/bigframes/ml/decomposition.py b/bigframes/ml/decomposition.py index 3ff32d2433..ef8a8d569b 100644 --- a/bigframes/ml/decomposition.py +++ b/bigframes/ml/decomposition.py @@ -226,7 +226,6 @@ def __init__( # TODO: Add support for hyperparameter tuning. l2_reg: float = 1.0, ): - feedback_type = feedback_type.lower() # type: ignore if feedback_type not in ("explicit", "implicit"): raise ValueError("Expected feedback_type to be `explicit` or `implicit`.") diff --git a/bigframes/ml/ensemble.py b/bigframes/ml/ensemble.py index 2633f13411..1ccc20dec1 100644 --- a/bigframes/ml/ensemble.py +++ b/bigframes/ml/ensemble.py @@ -213,7 +213,6 @@ class XGBClassifier( base.SupervisedTrainableWithEvaluationPredictor, bigframes_vendored.xgboost.sklearn.XGBClassifier, ): - __doc__ = bigframes_vendored.xgboost.sklearn.XGBClassifier.__doc__ def __init__( @@ -370,7 +369,6 @@ class RandomForestRegressor( base.SupervisedTrainableWithEvaluationPredictor, bigframes_vendored.sklearn.ensemble._forest.RandomForestRegressor, ): - __doc__ = bigframes_vendored.sklearn.ensemble._forest.RandomForestRegressor.__doc__ def __init__( @@ -536,7 +534,6 @@ class RandomForestClassifier( base.SupervisedTrainableWithEvaluationPredictor, bigframes_vendored.sklearn.ensemble._forest.RandomForestClassifier, ): - __doc__ = bigframes_vendored.sklearn.ensemble._forest.RandomForestClassifier.__doc__ def __init__( diff --git a/bigframes/ml/imported.py b/bigframes/ml/imported.py index a73ee352d0..b5bba1960a 100644 --- a/bigframes/ml/imported.py +++ b/bigframes/ml/imported.py @@ -72,7 +72,8 @@ def predict(self, X: utils.ArrayType) -> bpd.DataFrame: Input DataFrame. Schema is defined by the model. Returns: - bigframes.dataframe.DataFrame: Output DataFrame. Schema is defined by the model.""" + bigframes.dataframe.DataFrame: Output DataFrame. Schema is defined by the model. + """ if not self._bqml_model: if self.model_path is None: @@ -151,7 +152,8 @@ def predict(self, X: utils.ArrayType) -> bpd.DataFrame: Input DataFrame or Series. Schema is defined by the model. Returns: - bigframes.dataframe.DataFrame: Output DataFrame, schema is defined by the model.""" + bigframes.dataframe.DataFrame: Output DataFrame, schema is defined by the model. + """ if not self._bqml_model: if self.model_path is None: @@ -270,7 +272,8 @@ def predict(self, X: utils.ArrayType) -> bpd.DataFrame: Input DataFrame or Series. Schema is defined by the model. Returns: - bigframes.dataframe.DataFrame: Output DataFrame. Schema is defined by the model.""" + bigframes.dataframe.DataFrame: Output DataFrame. Schema is defined by the model. + """ if not self._bqml_model: if self.model_path is None: diff --git a/bigframes/ml/impute.py b/bigframes/ml/impute.py index 818151a4f9..f9fd3496cc 100644 --- a/bigframes/ml/impute.py +++ b/bigframes/ml/impute.py @@ -33,7 +33,6 @@ class SimpleImputer( base.Transformer, bigframes_vendored.sklearn.impute._base.SimpleImputer, ): - __doc__ = bigframes_vendored.sklearn.impute._base.SimpleImputer.__doc__ def __init__( diff --git a/bigframes/ml/model_selection.py b/bigframes/ml/model_selection.py index 6eba4f81c2..305c898884 100644 --- a/bigframes/ml/model_selection.py +++ b/bigframes/ml/model_selection.py @@ -39,7 +39,6 @@ def train_test_split( stratify: Union[bpd.Series, None] = None, shuffle: bool = True, ) -> List[Union[bpd.DataFrame, bpd.Series]]: - if test_size is None: if train_size is None: test_size = 0.25 diff --git a/bigframes/ml/preprocessing.py b/bigframes/ml/preprocessing.py index 94c61674f6..6782f43696 100644 --- a/bigframes/ml/preprocessing.py +++ b/bigframes/ml/preprocessing.py @@ -328,7 +328,6 @@ def _compile_to_sql( ] elif self.strategy == "quantile": - return [ self._base_sql_generator.ml_quantile_bucketize( column, self.n_bins, f"kbinsdiscretizer_{column}" diff --git a/bigframes/ml/sql.py b/bigframes/ml/sql.py index 2937368c92..09a46b235d 100644 --- a/bigframes/ml/sql.py +++ b/bigframes/ml/sql.py @@ -160,7 +160,8 @@ def ml_one_hot_encoder( name: str, ) -> str: """Encode ML.ONE_HOT_ENCODER for BQML. - https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-one-hot-encoder for params.""" + https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-one-hot-encoder for params. + """ return f"""ML.ONE_HOT_ENCODER({sql_utils.identifier(numeric_expr_sql)}, '{drop}', {top_k}, {frequency_threshold}) OVER() AS {sql_utils.identifier(name)}""" def ml_label_encoder( @@ -171,14 +172,16 @@ def ml_label_encoder( name: str, ) -> str: """Encode ML.LABEL_ENCODER for BQML. - https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-label-encoder for params.""" + https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-label-encoder for params. + """ return f"""ML.LABEL_ENCODER({sql_utils.identifier(numeric_expr_sql)}, {top_k}, {frequency_threshold}) OVER() AS {sql_utils.identifier(name)}""" def ml_polynomial_expand( self, columns: Iterable[str], degree: int, name: str ) -> str: """Encode ML.POLYNOMIAL_EXPAND. - https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-polynomial-expand""" + https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-polynomial-expand + """ return f"""ML.POLYNOMIAL_EXPAND({self.struct_columns(columns)}, {degree}) AS {sql_utils.identifier(name)}""" def ml_distance( @@ -190,7 +193,8 @@ def ml_distance( name: str, ) -> str: """Encode ML.DISTANCE for BQML. - https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-distance""" + https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-distance + """ return f"""SELECT *, ML.DISTANCE({sql_utils.identifier(col_x)}, {sql_utils.identifier(col_y)}, '{type}') AS {sql_utils.identifier(name)} FROM ({source_sql})""" def ai_forecast( @@ -199,7 +203,8 @@ def ai_forecast( options: Mapping[str, Union[int, float, bool, Iterable[str]]], ): """Encode AI.FORECAST. - https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-ai-forecast""" + https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-ai-forecast + """ named_parameters_sql = self.build_named_parameters(**options) return f"""SELECT * FROM AI.FORECAST(({source_sql}),{named_parameters_sql})""" diff --git a/bigframes/operations/blob.py b/bigframes/operations/blob.py index 577de458f4..cd88cd7fca 100644 --- a/bigframes/operations/blob.py +++ b/bigframes/operations/blob.py @@ -80,7 +80,8 @@ def metadata(self) -> bigframes.series.Series: """Retrieve the metadata of the Blob. Returns: - bigframes.series.Series: JSON metadata of the Blob. Contains fields: content_type, md5_hash, size and updated(time).""" + bigframes.series.Series: JSON metadata of the Blob. Contains fields: content_type, md5_hash, size and updated(time). + """ series_to_check = bigframes.series.Series(self._data._block) # Check if it's a struct series from a verbose operation if dtypes.is_struct_like(series_to_check.dtype): diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index 3cb9d2bb68..5b21f1bb38 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -1368,7 +1368,6 @@ def read_json( write_engine=write_engine, ) if engine == "bigquery": - if dtype is not None: raise NotImplementedError( "BigQuery engine does not support the dtype arguments." @@ -2245,7 +2244,8 @@ def _create_bq_connection( iam_role: Optional[str] = None, ) -> str: """Create the connection with the session settings and try to attach iam role to the connection SA. - If any of project, location or connection isn't specified, use the session defaults. Returns fully-qualified connection name.""" + If any of project, location or connection isn't specified, use the session defaults. Returns fully-qualified connection name. + """ connection = self._bq_connection if not connection else connection connection = bigframes.clients.get_canonical_bq_connection_id( connection_id=connection, diff --git a/bigframes/session/bq_caching_executor.py b/bigframes/session/bq_caching_executor.py index 736dbf7be1..3de0e86993 100644 --- a/bigframes/session/bq_caching_executor.py +++ b/bigframes/session/bq_caching_executor.py @@ -623,7 +623,6 @@ def _execute_plan_gbq( create_table = True if not cache_spec.cluster_cols: - offsets_id = bigframes.core.identifiers.ColumnId( bigframes.core.guid.generate_guid() ) diff --git a/noxfile.py b/noxfile.py index 1e6bc0b652..054c4bbae6 100644 --- a/noxfile.py +++ b/noxfile.py @@ -28,7 +28,7 @@ import nox import nox.sessions -BLACK_VERSION = "black==22.3.0" +BLACK_VERSION = "black==23.7.0" FLAKE8_VERSION = "flake8==7.1.2" ISORT_VERSION = "isort==5.12.0" MYPY_VERSION = "mypy==1.15.0" diff --git a/tests/system/large/functions/test_managed_function.py b/tests/system/large/functions/test_managed_function.py index 732123ec84..a74ff29273 100644 --- a/tests/system/large/functions/test_managed_function.py +++ b/tests/system/large/functions/test_managed_function.py @@ -32,7 +32,6 @@ def test_managed_function_array_output(session, scalars_dfs, dataset_id): try: - with warnings.catch_warnings(record=True) as record: @session.udf( @@ -85,7 +84,6 @@ def featurize(x: int) -> list[float]: def test_managed_function_series_apply(session, dataset_id, scalars_dfs): try: - # An explicit name with "def" in it is used to test the robustness of # the user code extraction logic, which depends on that term. bq_name = f"{prefixer.create_prefix()}_def_to_test_code_extraction" @@ -145,7 +143,6 @@ def test_managed_function_series_apply_array_output( scalars_dfs, ): try: - with pytest.warns(bfe.PreviewWarning, match="udf is in preview."): @session.udf(dataset=dataset_id, name=prefixer.create_prefix()) @@ -233,7 +230,6 @@ def add(x: int, y: int) -> int: def test_managed_function_series_combine_array_output(session, dataset_id, scalars_dfs): try: - # The type hints in this function's signature has conflicts. The # `input_types` and `output_type` arguments from udf decorator take # precedence and will be used instead. @@ -451,7 +447,6 @@ def foo(x, y, z): return [str(x), str(y), z] try: - assert getattr(foo, "is_row_processor") is False assert getattr(foo, "input_dtypes") == expected_dtypes assert getattr(foo, "output_dtype") == pandas.ArrowDtype( @@ -771,7 +766,6 @@ def analyze(row): "\nenvironment may not precisely match your local environment." ), ): - analyze_mf = session.udf( input_types=pandas.Series, output_type=str, @@ -1087,7 +1081,6 @@ def analyze(s: pandas.Series, x: bool, y: float) -> str: def test_managed_function_df_where_mask(session, dataset_id, scalars_dfs): try: - # The return type has to be bool type for callable where condition. def is_sum_positive(a, b): return a + b > 0 @@ -1154,7 +1147,6 @@ def is_sum_positive(a, b): def test_managed_function_df_where_mask_series(session, dataset_id, scalars_dfs): try: - # The return type has to be bool type for callable where condition. def is_sum_positive_series(s): return s["int64_col"] + s["int64_too"] > 0 @@ -1254,7 +1246,6 @@ def the_sum(s: pandas.Series) -> int: def test_managed_function_series_where_mask_map(session, dataset_id, scalars_dfs): try: - # The return type has to be bool type for callable where condition. def _is_positive(s): return s + 1000 > 0 @@ -1307,7 +1298,6 @@ def _is_positive(s): def test_managed_function_series_apply_args(session, dataset_id, scalars_dfs): try: - with pytest.warns(bfe.PreviewWarning, match="udf is in preview."): @session.udf(dataset=dataset_id, name=prefixer.create_prefix()) diff --git a/tests/system/large/functions/test_remote_function.py b/tests/system/large/functions/test_remote_function.py index 253bc7b617..317e459fed 100644 --- a/tests/system/large/functions/test_remote_function.py +++ b/tests/system/large/functions/test_remote_function.py @@ -842,7 +842,6 @@ def test_remote_function_with_external_package_dependencies( session, scalars_dfs, dataset_id, bq_cf_connection ): try: - # The return type hint in this function's signature has conflict. The # `output_type` argument from remote_function decorator takes precedence # and will be used instead. @@ -897,7 +896,6 @@ def test_remote_function_with_explicit_name_reuse( session, scalars_dfs, dataset_id, bq_cf_connection ): try: - dirs_to_cleanup = [] # Define a user code @@ -1251,7 +1249,6 @@ def test_remote_function_via_session_custom_sa(scalars_dfs): rf_session = bigframes.Session(context=bigframes.BigQueryOptions(project=project)) try: - # TODO(shobs): Figure out why the default ingress setting # (internal-only) does not work here @rf_session.remote_function( @@ -1324,7 +1321,6 @@ def test_remote_function_via_session_custom_build_sa( rf_session = bigframes.Session(context=bigframes.BigQueryOptions(project=project)) try: - # TODO(shobs): Figure out why the default ingress setting # (internal-only) does not work here @rf_session.remote_function( @@ -3006,7 +3002,6 @@ def foo(x: int) -> int: @pytest.mark.flaky(retries=2, delay=120) def test_remote_function_df_where_mask(session, dataset_id, scalars_dfs): try: - # The return type has to be bool type for callable where condition. def is_sum_positive(a, b): return a + b > 0 @@ -3085,7 +3080,6 @@ def the_sum(a, b): @pytest.mark.flaky(retries=2, delay=120) def test_remote_function_df_where_mask_series(session, dataset_id, scalars_dfs): try: - # The return type has to be bool type for callable where condition. def is_sum_positive_series(s: pandas.Series) -> bool: return s["int64_col"] + s["int64_too"] > 0 diff --git a/tests/system/small/ml/test_metrics.py b/tests/system/small/ml/test_metrics.py index 040d4d97f6..848acb714d 100644 --- a/tests/system/small/ml/test_metrics.py +++ b/tests/system/small/ml/test_metrics.py @@ -798,7 +798,6 @@ def test_precision_score_binary_default_arguments(session): def test_precision_score_binary_invalid_input_raise_error( session, y_true, y_pred, pos_label ): - bf_y_true = session.read_pandas(y_true) bf_y_pred = session.read_pandas(y_pred) diff --git a/tests/system/small/ml/test_model_selection.py b/tests/system/small/ml/test_model_selection.py index ebce6e405a..cbea1e20a0 100644 --- a/tests/system/small/ml/test_model_selection.py +++ b/tests/system/small/ml/test_model_selection.py @@ -323,7 +323,13 @@ def test_train_test_split_value_error(penguins_df_default_index, train_size, tes ) def test_train_test_split_stratify(df_fixture, request): df = request.getfixturevalue(df_fixture) - X = df[["species", "island", "culmen_length_mm",]].rename( + X = df[ + [ + "species", + "island", + "culmen_length_mm", + ] + ].rename( columns={"species": "x_species"} ) # Keep "species" col just for easy checking. Rename to avoid conflicts. y = df[["species"]] diff --git a/tests/system/small/test_anywidget.py b/tests/system/small/test_anywidget.py index b0eeb4a3c2..729dcb3af9 100644 --- a/tests/system/small/test_anywidget.py +++ b/tests/system/small/test_anywidget.py @@ -407,7 +407,6 @@ def test_widget_with_empty_dataframe_should_have_zero_row_count( def test_widget_with_empty_dataframe_should_render_table_headers( empty_bf_df: bf.dataframe.DataFrame, ): - """ @@ -423,7 +422,6 @@ def test_widget_with_empty_dataframe_should_render_table_headers( """ with bigframes.option_context("display.repr_mode", "anywidget"): - from bigframes.display import TableWidget widget = TableWidget(empty_bf_df) diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py index d2a157b131..0022bb0fd2 100644 --- a/tests/system/small/test_dataframe.py +++ b/tests/system/small/test_dataframe.py @@ -946,7 +946,6 @@ def test_repr_w_display_options(scalars_dfs, session): with bigframes.option_context( "display.max_rows", 10, "display.max_columns", 5, "display.max_colwidth", 10 ): - # When there are 10 or fewer rows, the outputs should be identical except for the extra note. actual = scalars_df.head(10).__repr__() executions_post = metrics.execution_count @@ -2684,7 +2683,6 @@ def test_df_idxmax(): ], ) def test_df_align(join, axis): - index1: pandas.Index = pandas.Index([1, 2, 3, 4], dtype="Int64") index2: pandas.Index = pandas.Index([1, 2, 4, 5], dtype="Int64") @@ -5073,7 +5071,6 @@ def test_iloc_list_multiindex(scalars_dfs): def test_iloc_empty_list(scalars_df_index, scalars_pandas_df_index): - index_list: List[int] = [] bf_result = scalars_df_index.iloc[index_list] diff --git a/tests/system/small/test_dataframe_io.py b/tests/system/small/test_dataframe_io.py index 02acb8d8f2..fe2e78385b 100644 --- a/tests/system/small/test_dataframe_io.py +++ b/tests/system/small/test_dataframe_io.py @@ -257,7 +257,6 @@ def test_to_pandas_override_global_option(scalars_df_index): # Direct call to_pandas uses global default setting (allow_large_results=True), # table has 'bqdf' prefix. with bigframes.option_context("compute.allow_large_results", True): - scalars_df_index.to_pandas() table_id = scalars_df_index._query_job.destination.table_id assert table_id is not None @@ -328,7 +327,6 @@ def test_to_pandas_dry_run(session, scalars_pandas_df_multi_index): def test_to_arrow_override_global_option(scalars_df_index): # Direct call to_arrow uses global default setting (allow_large_results=True), with bigframes.option_context("compute.allow_large_results", True): - scalars_df_index.to_arrow() table_id = scalars_df_index._query_job.destination.table_id assert table_id is not None diff --git a/tests/system/small/test_index_io.py b/tests/system/small/test_index_io.py index 306b15e67a..b4d7c06da5 100644 --- a/tests/system/small/test_index_io.py +++ b/tests/system/small/test_index_io.py @@ -18,7 +18,6 @@ def test_to_pandas_override_global_option(scalars_df_index): with bigframes.option_context("compute.allow_large_results", True): - bf_index = scalars_df_index.index # Direct call to_pandas uses global default setting (allow_large_results=True), @@ -43,7 +42,6 @@ def test_to_pandas_dry_run(scalars_df_index): def test_to_numpy_override_global_option(scalars_df_index): with bigframes.option_context("compute.allow_large_results", True): - bf_index = scalars_df_index.index # Direct call to_numpy uses global default setting (allow_large_results=True), diff --git a/tests/system/small/test_null_index.py b/tests/system/small/test_null_index.py index 4aa7ba8c77..eb9dc114dd 100644 --- a/tests/system/small/test_null_index.py +++ b/tests/system/small/test_null_index.py @@ -381,7 +381,6 @@ def test_null_index_df_concat(scalars_df_null_index, scalars_pandas_df_default_i def test_null_index_map_dict_input( scalars_df_null_index, scalars_pandas_df_default_index ): - local_map = dict() # construct a local map, incomplete to cover behavior for s in scalars_pandas_df_default_index.string_col[:-3]: diff --git a/tests/system/small/test_pandas.py b/tests/system/small/test_pandas.py index a1c0dc9851..e83ba05837 100644 --- a/tests/system/small/test_pandas.py +++ b/tests/system/small/test_pandas.py @@ -101,7 +101,7 @@ def test_get_dummies_dataframe(scalars_dfs, kwargs): # dtype argument above is needed for pandas v1 only # adjust for expected dtype differences - for (column_name, type_name) in zip(pd_result.columns, pd_result.dtypes): + for column_name, type_name in zip(pd_result.columns, pd_result.dtypes): if type_name == "bool": pd_result[column_name] = pd_result[column_name].astype("boolean") @@ -130,7 +130,7 @@ def test_get_dummies_dataframe_duplicate_labels(scalars_dfs): # dtype argument above is needed for pandas v1 only # adjust for expected dtype differences - for (column_name, type_name) in zip(pd_result.columns, pd_result.dtypes): + for column_name, type_name in zip(pd_result.columns, pd_result.dtypes): if type_name == "bool": pd_result[column_name] = pd_result[column_name].astype("boolean") @@ -147,7 +147,7 @@ def test_get_dummies_series(scalars_dfs): # dtype argument above is needed for pandas v1 only # adjust for expected dtype differences - for (column_name, type_name) in zip(pd_result.columns, pd_result.dtypes): + for column_name, type_name in zip(pd_result.columns, pd_result.dtypes): if type_name == "bool": # pragma: NO COVER pd_result[column_name] = pd_result[column_name].astype("boolean") pd_result.columns = pd_result.columns.astype(object) @@ -168,7 +168,7 @@ def test_get_dummies_series_nameless(scalars_dfs): # dtype argument above is needed for pandas v1 only # adjust for expected dtype differences - for (column_name, type_name) in zip(pd_result.columns, pd_result.dtypes): + for column_name, type_name in zip(pd_result.columns, pd_result.dtypes): if type_name == "bool": # pragma: NO COVER pd_result[column_name] = pd_result[column_name].astype("boolean") pd_result.columns = pd_result.columns.astype(object) diff --git a/tests/system/small/test_pandas_options.py b/tests/system/small/test_pandas_options.py index 7a750ddfd3..a2a90f3fe5 100644 --- a/tests/system/small/test_pandas_options.py +++ b/tests/system/small/test_pandas_options.py @@ -50,7 +50,6 @@ def test_read_gbq_start_sets_session_location( query_prefix, reset_default_session_and_location, ): - # Form query as a table name or a SQL depending on the test scenario query_tokyo = test_data_tables_tokyo["scalars"] query = test_data_tables["scalars"] diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py index a95c9623e5..ca98377c8b 100644 --- a/tests/system/small/test_series.py +++ b/tests/system/small/test_series.py @@ -3002,7 +3002,6 @@ def test_value_counts_w_cut(scalars_dfs): def test_iloc_nested(scalars_df_index, scalars_pandas_df_index): - bf_result = scalars_df_index["string_col"].iloc[1:].iloc[1:].to_pandas() pd_result = scalars_pandas_df_index["string_col"].iloc[1:].iloc[1:] diff --git a/tests/system/small/test_series_io.py b/tests/system/small/test_series_io.py index 426679d37d..2f1780812a 100644 --- a/tests/system/small/test_series_io.py +++ b/tests/system/small/test_series_io.py @@ -22,7 +22,6 @@ def test_to_pandas_override_global_option(scalars_df_index): with bigframes.option_context("compute.allow_large_results", True): - bf_series = scalars_df_index["int64_col"] # Direct call to_pandas uses global default setting (allow_large_results=True) diff --git a/tests/system/small/test_session.py b/tests/system/small/test_session.py index 698f531d57..153deedcd9 100644 --- a/tests/system/small/test_session.py +++ b/tests/system/small/test_session.py @@ -871,7 +871,6 @@ def test_read_pandas(session, scalars_dfs): def test_read_pandas_series(session): - idx: pd.Index = pd.Index([2, 7, 1, 2, 8], dtype=pd.Int64Dtype()) pd_series = pd.Series([3, 1, 4, 1, 5], dtype=pd.Int64Dtype(), index=idx) bf_series = session.read_pandas(pd_series) @@ -880,7 +879,6 @@ def test_read_pandas_series(session): def test_read_pandas_index(session): - pd_idx: pd.Index = pd.Index([2, 7, 1, 2, 8], dtype=pd.Int64Dtype()) bf_idx = session.read_pandas(pd_idx) diff --git a/tests/unit/core/compile/sqlglot/expressions/test_string_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_string_ops.py index d1856b259d..29346486a3 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_string_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_string_ops.py @@ -183,7 +183,6 @@ def test_rstrip(scalar_types_df: bpd.DataFrame, snapshot): def test_startswith(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "string_col" bf_df = scalar_types_df[[col_name]] ops_map = { diff --git a/tests/unit/core/compile/sqlglot/test_compile_concat.py b/tests/unit/core/compile/sqlglot/test_compile_concat.py index c176b2e116..80cf16558b 100644 --- a/tests/unit/core/compile/sqlglot/test_compile_concat.py +++ b/tests/unit/core/compile/sqlglot/test_compile_concat.py @@ -28,7 +28,6 @@ def test_compile_concat(scalar_types_df: bpd.DataFrame, snapshot): def test_compile_concat_filter_sorted(scalar_types_df: bpd.DataFrame, snapshot): - scalars_array_value = scalar_types_df._block.expr input_1 = scalars_array_value.select_columns(["float64_col", "int64_col"]).order_by( [ordering.ascending_over("int64_col")] diff --git a/tests/unit/functions/test_remote_function_utils.py b/tests/unit/functions/test_remote_function_utils.py index 812d65bbad..e200e7c12a 100644 --- a/tests/unit/functions/test_remote_function_utils.py +++ b/tests/unit/functions/test_remote_function_utils.py @@ -441,7 +441,6 @@ def test_has_conflict_output_type_no_annotation(): ), ) def test_get_bigframes_metadata(metadata_options, metadata_string): - assert _utils.get_bigframes_metadata(**metadata_options) == metadata_string @@ -514,7 +513,6 @@ def test_get_bigframes_metadata_array_type_not_serializable(output_type): def test_get_python_output_type_from_bigframes_metadata( metadata_string, python_output_type ): - assert ( _utils.get_python_output_type_from_bigframes_metadata(metadata_string) == python_output_type diff --git a/tests/unit/test_dataframe_polars.py b/tests/unit/test_dataframe_polars.py index 1c73d9dc6b..1125b13f95 100644 --- a/tests/unit/test_dataframe_polars.py +++ b/tests/unit/test_dataframe_polars.py @@ -1979,7 +1979,6 @@ def test_df_idxmax(): ], ) def test_df_align(join, axis): - index1: pandas.Index = pandas.Index([1, 2, 3, 4], dtype="Int64") index2: pandas.Index = pandas.Index([1, 2, 4, 5], dtype="Int64") @@ -3906,7 +3905,6 @@ def test_iloc_list_multiindex(scalars_dfs): def test_iloc_empty_list(scalars_df_index, scalars_pandas_df_index): - index_list: List[int] = [] bf_result = scalars_df_index.iloc[index_list] diff --git a/tests/unit/test_series_polars.py b/tests/unit/test_series_polars.py index 516a46d4dd..494e2499db 100644 --- a/tests/unit/test_series_polars.py +++ b/tests/unit/test_series_polars.py @@ -3025,7 +3025,6 @@ def test_value_counts_w_cut(scalars_dfs): def test_iloc_nested(scalars_df_index, scalars_pandas_df_index): - bf_result = scalars_df_index["string_col"].iloc[1:].iloc[1:].to_pandas() pd_result = scalars_pandas_df_index["string_col"].iloc[1:].iloc[1:] diff --git a/third_party/bigframes_vendored/sklearn/preprocessing/_encoder.py b/third_party/bigframes_vendored/sklearn/preprocessing/_encoder.py index 64a5786f17..1301ef329a 100644 --- a/third_party/bigframes_vendored/sklearn/preprocessing/_encoder.py +++ b/third_party/bigframes_vendored/sklearn/preprocessing/_encoder.py @@ -84,5 +84,6 @@ def transform(self, X): Returns: bigframes.dataframe.DataFrame: The result is categorized as index: number, value: number, - where index is the position of the dict seeing the category, and value is 0 or 1.""" + where index is the position of the dict seeing the category, and value is 0 or 1. + """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) From f42ff08ff7c7216bb3ed7e2b69a328b3b47cf5e4 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Fri, 12 Dec 2025 20:00:48 +0000 Subject: [PATCH 05/31] revert docs --- noxfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/noxfile.py b/noxfile.py index 054c4bbae6..475c165229 100644 --- a/noxfile.py +++ b/noxfile.py @@ -430,7 +430,7 @@ def system_noextras(session: nox.sessions.Session): ) -@nox.session(python=LATEST_FULLY_SUPPORTED_PYTHON) +@nox.session(python="3.10") def doctest(session: nox.sessions.Session): """Run the system test suite.""" run_system( From ff03a4a94ef059d31ed8708f4c1b595d0816c7dc Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Fri, 12 Dec 2025 20:03:19 +0000 Subject: [PATCH 06/31] clean up --- noxfile.py | 24 ++---------------------- 1 file changed, 2 insertions(+), 22 deletions(-) diff --git a/noxfile.py b/noxfile.py index 475c165229..729a215106 100644 --- a/noxfile.py +++ b/noxfile.py @@ -33,11 +33,6 @@ ISORT_VERSION = "isort==5.12.0" MYPY_VERSION = "mypy==1.15.0" -# TODO: switch to 3.14 once remote functions adds a runtime for it -# https://cloud.google.com/run/docs/runtimes/python -# https://cloud.google.com/functions/docs/runtime-support#python -LATEST_FULLY_SUPPORTED_PYTHON = "3.13" - # Notebook tests should match colab and BQ Studio. # Check with import sys; sys.version_info # on a fresh notebook runtime. @@ -135,7 +130,7 @@ # from GitHub actions. "unit_noextras", "system-3.9", # No extras. - f"system-{LATEST_FULLY_SUPPORTED_PYTHON}", # All extras. + f"system-{DEFAULT_PYTHON_VERSION}", # All extras. "cover", # TODO(b/401609005): remove "cleanup", @@ -419,7 +414,7 @@ def system(session: nox.sessions.Session): ) -@nox.session(python=LATEST_FULLY_SUPPORTED_PYTHON) +@nox.session(python=DEFAULT_PYTHON_VERSION) def system_noextras(session: nox.sessions.Session): """Run the system test suite.""" run_system( @@ -812,21 +807,6 @@ def notebook(session: nox.Session): "notebooks/dataframes/anywidget_mode.ipynb", ] - # TODO: remove exception for Python 3.14 once remote functions adds a runtime for it - # https://cloud.google.com/run/docs/runtimes/python - # https://cloud.google.com/functions/docs/runtime-support#python - # sys.exit(0) or pytest.skip(...). - # See: https://github.com/treebeardtech/nbmake/issues/134 - if session.python == "3.14": - denylist.extend( - [ - "notebooks/getting_started/getting_started_bq_dataframes.ipynb", - "notebooks/remote_functions/remote_function_usecases.ipynb", - "notebooks/remote_functions/remote_function_vertex_claude_model.ipynb", - "notebooks/remote_functions/remote_function.ipynb", - ] - ) - # Convert each Path notebook object to a string using a list comprehension, # and remove tests that we choose not to test. notebooks = [str(nb) for nb in notebooks_list] From ed90b2e897ba2385f9c67b59937757737a1d1676 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Fri, 12 Dec 2025 20:06:46 +0000 Subject: [PATCH 07/31] clean up --- .../getting_started_bq_dataframes.ipynb | 14 -------------- notebooks/location/regionalized.ipynb | 14 -------------- notebooks/remote_functions/remote_function.ipynb | 15 --------------- .../remote_function_usecases.ipynb | 14 -------------- .../remote_function_vertex_claude_model.ipynb | 14 -------------- 5 files changed, 71 deletions(-) diff --git a/notebooks/getting_started/getting_started_bq_dataframes.ipynb b/notebooks/getting_started/getting_started_bq_dataframes.ipynb index 8f7d046840..f9fb950c53 100644 --- a/notebooks/getting_started/getting_started_bq_dataframes.ipynb +++ b/notebooks/getting_started/getting_started_bq_dataframes.ipynb @@ -1329,20 +1329,6 @@ "Running your own Python functions (or being able to bring your packages) and using them at scale is a challenge many data scientists face. BigQuery DataFrames makes it easy to deploy [remote functions](https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.pandas#bigframes_pandas_remote_function) that run scalar Python functions at BigQuery scale. These functions are persisted as [BigQuery remote functions](https://cloud.google.com/bigquery/docs/remote-functions) that you can then re-use." ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import sys\n", - "\n", - "# Python 3.14 is not yet a supported runtime for remote functions.\n", - "# See: https://cloud.google.com/functions/docs/runtime-support#python for the supported runtimes.\n", - "if sys.version_info >= (3, 14, 0):\n", - " sys.exit(0)" - ] - }, { "cell_type": "markdown", "metadata": { diff --git a/notebooks/location/regionalized.ipynb b/notebooks/location/regionalized.ipynb index 8acfd9a6b2..23313ec0c4 100644 --- a/notebooks/location/regionalized.ipynb +++ b/notebooks/location/regionalized.ipynb @@ -1339,20 +1339,6 @@ "# Using the Remote Functions" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import sys\n", - "\n", - "# Python 3.14 is not yet a supported runtime for remote functions.\n", - "# See: https://cloud.google.com/functions/docs/runtime-support#python for the supported runtimes.\n", - "if sys.version_info >= (3, 14, 0):\n", - " sys.exit(0)" - ] - }, { "attachments": {}, "cell_type": "markdown", diff --git a/notebooks/remote_functions/remote_function.ipynb b/notebooks/remote_functions/remote_function.ipynb index e0cfe7026f..4c0524d402 100644 --- a/notebooks/remote_functions/remote_function.ipynb +++ b/notebooks/remote_functions/remote_function.ipynb @@ -1,20 +1,5 @@ { "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "bcff4fc4", - "metadata": {}, - "outputs": [], - "source": [ - "import sys\n", - "\n", - "# Python 3.14 is not yet a supported runtime for remote functions.\n", - "# See: https://cloud.google.com/functions/docs/runtime-support#python for the supported runtimes.\n", - "if sys.version_info >= (3, 14, 0):\n", - " sys.exit(0)" - ] - }, { "cell_type": "code", "execution_count": 19, diff --git a/notebooks/remote_functions/remote_function_usecases.ipynb b/notebooks/remote_functions/remote_function_usecases.ipynb index bdc7e4b853..e3a94160ad 100644 --- a/notebooks/remote_functions/remote_function_usecases.ipynb +++ b/notebooks/remote_functions/remote_function_usecases.ipynb @@ -21,20 +21,6 @@ "# limitations under the License." ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import sys\n", - "\n", - "# Python 3.14 is not yet a supported runtime for remote functions.\n", - "# See: https://cloud.google.com/functions/docs/runtime-support#python for the supported runtimes.\n", - "if sys.version_info >= (3, 14, 0):\n", - " sys.exit(0)" - ] - }, { "cell_type": "markdown", "metadata": {}, diff --git a/notebooks/remote_functions/remote_function_vertex_claude_model.ipynb b/notebooks/remote_functions/remote_function_vertex_claude_model.ipynb index 33d6d35615..087f004cb4 100644 --- a/notebooks/remote_functions/remote_function_vertex_claude_model.ipynb +++ b/notebooks/remote_functions/remote_function_vertex_claude_model.ipynb @@ -28,20 +28,6 @@ "" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import sys\n", - "\n", - "# Python 3.14 is not yet a supported runtime for remote functions.\n", - "# See: https://cloud.google.com/functions/docs/runtime-support#python for the supported runtimes.\n", - "if sys.version_info >= (3, 14, 0):\n", - " sys.exit(0)" - ] - }, { "cell_type": "markdown", "metadata": {}, From bdb4d92b61770396a7ca5c9fd0b962b99221f4dd Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Fri, 12 Dec 2025 20:07:34 +0000 Subject: [PATCH 08/31] clean up --- noxfile.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/noxfile.py b/noxfile.py index 729a215106..7be5e34cae 100644 --- a/noxfile.py +++ b/noxfile.py @@ -57,11 +57,6 @@ DEFAULT_PYTHON_VERSION = "3.14" -# Cloud Run Functions supports Python versions up to 3.13 -# https://cloud.google.com/run/docs/runtimes/python -# https://cloud.google.com/functions/docs/runtime-support#python -E2E_TEST_PYTHON_VERSION = "3.13" - UNIT_TEST_PYTHON_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] UNIT_TEST_STANDARD_DEPENDENCIES = [ "mock", @@ -449,7 +444,7 @@ def doctest(session: nox.sessions.Session): ) -@nox.session(python=E2E_TEST_PYTHON_VERSION) +@nox.session(python=DEFAULT_PYTHON_VERSION) def e2e(session: nox.sessions.Session): """Run the large tests in system test suite.""" run_system( From 72dea59729b40c45b1197fa90372f50d6be81062 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Fri, 12 Dec 2025 20:56:31 +0000 Subject: [PATCH 09/31] restore changes --- noxfile.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/noxfile.py b/noxfile.py index 7be5e34cae..804e567532 100644 --- a/noxfile.py +++ b/noxfile.py @@ -420,7 +420,7 @@ def system_noextras(session: nox.sessions.Session): ) -@nox.session(python="3.10") +@nox.session(python="3.14") def doctest(session: nox.sessions.Session): """Run the system test suite.""" run_system( @@ -510,7 +510,7 @@ def cover(session): session.run("coverage", "erase") -@nox.session(python="3.13") +@nox.session(python="3.14") def docs(session): """Build the docs for this library.""" session.install("-e", ".[scikit-learn]") From 8ac7bf0b6e236045ba8a6f42ccf64e08a7e7d536 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Mon, 15 Dec 2025 20:46:12 +0000 Subject: [PATCH 10/31] update github action --- .github/workflows/docs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 6773aef7c2..d9ff95ed31 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -15,7 +15,7 @@ jobs: - name: Setup Python uses: actions/setup-python@v5 with: - python-version: "3.13" + python-version: "3.14" - name: Install nox run: | python -m pip install --upgrade setuptools pip wheel From 1e95ef502942921ce5895ccc8ae51dbb1d79ff11 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Mon, 15 Dec 2025 23:29:57 +0000 Subject: [PATCH 11/31] revert docs --- .github/workflows/docs.yml | 2 +- noxfile.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index d9ff95ed31..83545b51aa 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -15,7 +15,7 @@ jobs: - name: Setup Python uses: actions/setup-python@v5 with: - python-version: "3.14" + python-version: "3.12" - name: Install nox run: | python -m pip install --upgrade setuptools pip wheel diff --git a/noxfile.py b/noxfile.py index 804e567532..cc8f7afdfa 100644 --- a/noxfile.py +++ b/noxfile.py @@ -420,7 +420,7 @@ def system_noextras(session: nox.sessions.Session): ) -@nox.session(python="3.14") +@nox.session(python="3.12") def doctest(session: nox.sessions.Session): """Run the system test suite.""" run_system( @@ -510,7 +510,7 @@ def cover(session): session.run("coverage", "erase") -@nox.session(python="3.14") +@nox.session(python="3.12") def docs(session): """Build the docs for this library.""" session.install("-e", ".[scikit-learn]") From dc9d8a6a68df89a509001cf94aeeba320fc28b70 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Mon, 15 Dec 2025 23:37:09 +0000 Subject: [PATCH 12/31] revert changes to mypy --- .github/workflows/mypy.yml | 2 +- noxfile.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml index a12138dc2a..fc9e970946 100644 --- a/.github/workflows/mypy.yml +++ b/.github/workflows/mypy.yml @@ -15,7 +15,7 @@ jobs: - name: Setup Python uses: actions/setup-python@v5 with: - python-version: "3.14" + python-version: "3.10" - name: Install nox run: | python -m pip install --upgrade setuptools pip wheel diff --git a/noxfile.py b/noxfile.py index cc8f7afdfa..40716d2ecc 100644 --- a/noxfile.py +++ b/noxfile.py @@ -259,7 +259,7 @@ def unit_noextras(session): run_unit(session, install_test_extra=False) -@nox.session(python=DEFAULT_PYTHON_VERSION) +@nox.session(python="3.10") def mypy(session): """Run type checks with mypy.""" # Editable mode is not compatible with mypy when there are multiple From 926a401f21bedde91b5083547eb102997319b957 Mon Sep 17 00:00:00 2001 From: Linchin Date: Tue, 16 Dec 2025 21:15:40 +0000 Subject: [PATCH 13/31] change python version for docs to 3.13 --- .github/workflows/docs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 83545b51aa..6773aef7c2 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -15,7 +15,7 @@ jobs: - name: Setup Python uses: actions/setup-python@v5 with: - python-version: "3.12" + python-version: "3.13" - name: Install nox run: | python -m pip install --upgrade setuptools pip wheel From 66b5f8b4dffefa2960e51c2a6075195eeb4497b2 Mon Sep 17 00:00:00 2001 From: Linchin Date: Tue, 16 Dec 2025 21:30:29 +0000 Subject: [PATCH 14/31] python version in docs test --- noxfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/noxfile.py b/noxfile.py index 40716d2ecc..a5c75e1b07 100644 --- a/noxfile.py +++ b/noxfile.py @@ -510,7 +510,7 @@ def cover(session): session.run("coverage", "erase") -@nox.session(python="3.12") +@nox.session(python="3.13") def docs(session): """Build the docs for this library.""" session.install("-e", ".[scikit-learn]") From c9cc0db642b7fc972ffd0a732a85855f16f55d2c Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Tue, 16 Dec 2025 23:52:12 +0000 Subject: [PATCH 15/31] fix: Ensure _read_gbq_colab correctly logs BigQuery job labels --- bigframes/session/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index 5b21f1bb38..d4bd02f1cd 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -515,6 +515,7 @@ def _read_gbq_colab( ) -> pandas.Series: ... + @log_adapter.method_logger @log_adapter.log_name_override("read_gbq_colab") def _read_gbq_colab( self, From 4dfd21ca02a2f905b983020808986499cd5288ff Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Wed, 17 Dec 2025 00:02:01 +0000 Subject: [PATCH 16/31] Revert "fix: Ensure _read_gbq_colab correctly logs BigQuery job labels" This reverts commit c9cc0db642b7fc972ffd0a732a85855f16f55d2c. --- bigframes/session/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index d4bd02f1cd..5b21f1bb38 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -515,7 +515,6 @@ def _read_gbq_colab( ) -> pandas.Series: ... - @log_adapter.method_logger @log_adapter.log_name_override("read_gbq_colab") def _read_gbq_colab( self, From 5c018acc92d0c8732f384cef3f89dcdaf54c5b5a Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Wed, 17 Dec 2025 00:09:35 +0000 Subject: [PATCH 17/31] fix: Ensure _read_gbq_colab correctly logs BigQuery job labels --- bigframes/testing/mocks.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/bigframes/testing/mocks.py b/bigframes/testing/mocks.py index ff210419fd..ead394e497 100644 --- a/bigframes/testing/mocks.py +++ b/bigframes/testing/mocks.py @@ -27,7 +27,10 @@ import bigframes import bigframes.clients import bigframes.core.global_session +import bigframes.core.log_adapter import bigframes.dataframe +import bigframes.session._io.bigquery +from bigframes.session._io.bigquery import create_job_configs_labels import bigframes.session.clients """Utilities for creating test resources.""" @@ -90,6 +93,18 @@ def query_mock( job_config: Optional[google.cloud.bigquery.QueryJobConfig] = None, **kwargs, ): + job_config = ( + job_config + if job_config is not None + else google.cloud.bigquery.QueryJobConfig() + ) + api_methods = bigframes.core.log_adapter.get_and_reset_api_methods( + dry_run=job_config.dry_run + ) + job_config.labels = create_job_configs_labels( + job_configs_labels=job_config.labels, + api_methods=api_methods, + ) queries.append(query) job_configs.append(copy.deepcopy(job_config)) query_job = mock.create_autospec(google.cloud.bigquery.QueryJob, instance=True) From 7e02454512f568253ff77abe75cb7996e023fa0c Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Wed, 17 Dec 2025 02:01:04 +0000 Subject: [PATCH 18/31] Revert "fix: Ensure _read_gbq_colab correctly logs BigQuery job labels" This reverts commit 5c018acc92d0c8732f384cef3f89dcdaf54c5b5a. --- bigframes/testing/mocks.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/bigframes/testing/mocks.py b/bigframes/testing/mocks.py index ead394e497..ff210419fd 100644 --- a/bigframes/testing/mocks.py +++ b/bigframes/testing/mocks.py @@ -27,10 +27,7 @@ import bigframes import bigframes.clients import bigframes.core.global_session -import bigframes.core.log_adapter import bigframes.dataframe -import bigframes.session._io.bigquery -from bigframes.session._io.bigquery import create_job_configs_labels import bigframes.session.clients """Utilities for creating test resources.""" @@ -93,18 +90,6 @@ def query_mock( job_config: Optional[google.cloud.bigquery.QueryJobConfig] = None, **kwargs, ): - job_config = ( - job_config - if job_config is not None - else google.cloud.bigquery.QueryJobConfig() - ) - api_methods = bigframes.core.log_adapter.get_and_reset_api_methods( - dry_run=job_config.dry_run - ) - job_config.labels = create_job_configs_labels( - job_configs_labels=job_config.labels, - api_methods=api_methods, - ) queries.append(query) job_configs.append(copy.deepcopy(job_config)) query_job = mock.create_autospec(google.cloud.bigquery.QueryJob, instance=True) From e2c38d3031a1ad9eb640f966ffeed6b7c494f7f2 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Wed, 17 Dec 2025 02:08:43 +0000 Subject: [PATCH 19/31] fix(core): Make log_adapter state thread-local --- bigframes/core/log_adapter.py | 58 +++++++++++++++++++---------------- bigframes/testing/mocks.py | 15 +++++++++ 2 files changed, 47 insertions(+), 26 deletions(-) diff --git a/bigframes/core/log_adapter.py b/bigframes/core/log_adapter.py index 8179ffbeed..8d2e677cf7 100644 --- a/bigframes/core/log_adapter.py +++ b/bigframes/core/log_adapter.py @@ -15,12 +15,14 @@ import functools import inspect import threading -from typing import List, Optional +from typing import Optional from google.cloud import bigquery import pandas -_lock = threading.Lock() +_thread_local_data = threading.local() +_thread_local_data._api_methods = [] +_thread_local_data._call_stack = [] # The limit is 64 (https://cloud.google.com/bigquery/docs/labels-intro#requirements), # but leave a few spare for internal labels to be added. @@ -30,12 +32,8 @@ PANDAS_PARAM_TRACKING_TASK = "pandas_param_tracking" LOG_OVERRIDE_NAME = "__log_override_name__" -_api_methods: List = [] _excluded_methods = ["__setattr__", "__getattr__"] -# Stack to track method calls -_call_stack: List = [] - def submit_pandas_labels( bq_client: Optional[bigquery.Client], @@ -172,11 +170,14 @@ def wrapper(*args, **kwargs): base_name = custom_base_name full_method_name = f"{base_name.lower()}-{api_method_name}" + if not hasattr(_thread_local_data, "_call_stack"): + _thread_local_data._call_stack = [] + # Track directly called methods - if len(_call_stack) == 0: + if len(_thread_local_data._call_stack) == 0: add_api_method(full_method_name) - _call_stack.append(full_method_name) + _thread_local_data._call_stack.append(full_method_name) try: return method(*args, **kwargs) @@ -185,7 +186,7 @@ def wrapper(*args, **kwargs): # or not fully supported (NotImplementedError) in BigFrames. # Logging is currently supported only when we can access the bqclient through # _block.session.bqclient. - if len(_call_stack) == 1: + if len(_thread_local_data._call_stack) == 1: submit_pandas_labels( _get_bq_client(*args, **kwargs), base_name, @@ -196,7 +197,7 @@ def wrapper(*args, **kwargs): ) raise e finally: - _call_stack.pop() + _thread_local_data._call_stack.pop() return wrapper @@ -214,19 +215,21 @@ def property_logger(prop): def shared_wrapper(prop): @functools.wraps(prop) def wrapped(*args, **kwargs): + if not hasattr(_thread_local_data, "_call_stack"): + _thread_local_data._call_stack = [] qualname_parts = getattr(prop, "__qualname__", prop.__name__).split(".") class_name = qualname_parts[-2] if len(qualname_parts) > 1 else "" property_name = prop.__name__ full_property_name = f"{class_name.lower()}-{property_name.lower()}" - if len(_call_stack) == 0: + if len(_thread_local_data._call_stack) == 0: add_api_method(full_property_name) - _call_stack.append(full_property_name) + _thread_local_data._call_stack.append(full_property_name) try: return prop(*args, **kwargs) finally: - _call_stack.pop() + _thread_local_data._call_stack.pop() return wrapped @@ -251,23 +254,26 @@ def wrapper(func): def add_api_method(api_method_name): - global _lock - global _api_methods - with _lock: - # Push the method to the front of the _api_methods list - _api_methods.insert(0, api_method_name.replace("<", "").replace(">", "")) - # Keep the list length within the maximum limit (adjust MAX_LABELS_COUNT as needed) - _api_methods = _api_methods[:MAX_LABELS_COUNT] + if not hasattr(_thread_local_data, "_api_methods"): + _thread_local_data._api_methods = [] + + # Push the method to the front of the _api_methods list + _thread_local_data._api_methods.insert( + 0, api_method_name.replace("<", "").replace(">", "") + ) + # Keep the list length within the maximum limit + _thread_local_data._api_methods = _thread_local_data._api_methods[:MAX_LABELS_COUNT] def get_and_reset_api_methods(dry_run: bool = False): - global _lock - with _lock: - previous_api_methods = list(_api_methods) + if not hasattr(_thread_local_data, "_api_methods"): + _thread_local_data._api_methods = [] + + previous_api_methods = list(_thread_local_data._api_methods) - # dry_run might not make a job resource, so only reset the log on real queries. - if not dry_run: - _api_methods.clear() + # dry_run might not make a job resource, so only reset the log on real queries. + if not dry_run: + _thread_local_data._api_methods.clear() return previous_api_methods diff --git a/bigframes/testing/mocks.py b/bigframes/testing/mocks.py index ff210419fd..ead394e497 100644 --- a/bigframes/testing/mocks.py +++ b/bigframes/testing/mocks.py @@ -27,7 +27,10 @@ import bigframes import bigframes.clients import bigframes.core.global_session +import bigframes.core.log_adapter import bigframes.dataframe +import bigframes.session._io.bigquery +from bigframes.session._io.bigquery import create_job_configs_labels import bigframes.session.clients """Utilities for creating test resources.""" @@ -90,6 +93,18 @@ def query_mock( job_config: Optional[google.cloud.bigquery.QueryJobConfig] = None, **kwargs, ): + job_config = ( + job_config + if job_config is not None + else google.cloud.bigquery.QueryJobConfig() + ) + api_methods = bigframes.core.log_adapter.get_and_reset_api_methods( + dry_run=job_config.dry_run + ) + job_config.labels = create_job_configs_labels( + job_configs_labels=job_config.labels, + api_methods=api_methods, + ) queries.append(query) job_configs.append(copy.deepcopy(job_config)) query_job = mock.create_autospec(google.cloud.bigquery.QueryJob, instance=True) From cf345859d23060159dde095ab62244495d6c6cf5 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Wed, 17 Dec 2025 02:39:38 +0000 Subject: [PATCH 20/31] Revert "fix(core): Make log_adapter state thread-local" This reverts commit e2c38d3031a1ad9eb640f966ffeed6b7c494f7f2. --- bigframes/core/log_adapter.py | 58 ++++++++++++++++------------------- bigframes/testing/mocks.py | 15 --------- 2 files changed, 26 insertions(+), 47 deletions(-) diff --git a/bigframes/core/log_adapter.py b/bigframes/core/log_adapter.py index 8d2e677cf7..8179ffbeed 100644 --- a/bigframes/core/log_adapter.py +++ b/bigframes/core/log_adapter.py @@ -15,14 +15,12 @@ import functools import inspect import threading -from typing import Optional +from typing import List, Optional from google.cloud import bigquery import pandas -_thread_local_data = threading.local() -_thread_local_data._api_methods = [] -_thread_local_data._call_stack = [] +_lock = threading.Lock() # The limit is 64 (https://cloud.google.com/bigquery/docs/labels-intro#requirements), # but leave a few spare for internal labels to be added. @@ -32,8 +30,12 @@ PANDAS_PARAM_TRACKING_TASK = "pandas_param_tracking" LOG_OVERRIDE_NAME = "__log_override_name__" +_api_methods: List = [] _excluded_methods = ["__setattr__", "__getattr__"] +# Stack to track method calls +_call_stack: List = [] + def submit_pandas_labels( bq_client: Optional[bigquery.Client], @@ -170,14 +172,11 @@ def wrapper(*args, **kwargs): base_name = custom_base_name full_method_name = f"{base_name.lower()}-{api_method_name}" - if not hasattr(_thread_local_data, "_call_stack"): - _thread_local_data._call_stack = [] - # Track directly called methods - if len(_thread_local_data._call_stack) == 0: + if len(_call_stack) == 0: add_api_method(full_method_name) - _thread_local_data._call_stack.append(full_method_name) + _call_stack.append(full_method_name) try: return method(*args, **kwargs) @@ -186,7 +185,7 @@ def wrapper(*args, **kwargs): # or not fully supported (NotImplementedError) in BigFrames. # Logging is currently supported only when we can access the bqclient through # _block.session.bqclient. - if len(_thread_local_data._call_stack) == 1: + if len(_call_stack) == 1: submit_pandas_labels( _get_bq_client(*args, **kwargs), base_name, @@ -197,7 +196,7 @@ def wrapper(*args, **kwargs): ) raise e finally: - _thread_local_data._call_stack.pop() + _call_stack.pop() return wrapper @@ -215,21 +214,19 @@ def property_logger(prop): def shared_wrapper(prop): @functools.wraps(prop) def wrapped(*args, **kwargs): - if not hasattr(_thread_local_data, "_call_stack"): - _thread_local_data._call_stack = [] qualname_parts = getattr(prop, "__qualname__", prop.__name__).split(".") class_name = qualname_parts[-2] if len(qualname_parts) > 1 else "" property_name = prop.__name__ full_property_name = f"{class_name.lower()}-{property_name.lower()}" - if len(_thread_local_data._call_stack) == 0: + if len(_call_stack) == 0: add_api_method(full_property_name) - _thread_local_data._call_stack.append(full_property_name) + _call_stack.append(full_property_name) try: return prop(*args, **kwargs) finally: - _thread_local_data._call_stack.pop() + _call_stack.pop() return wrapped @@ -254,26 +251,23 @@ def wrapper(func): def add_api_method(api_method_name): - if not hasattr(_thread_local_data, "_api_methods"): - _thread_local_data._api_methods = [] - - # Push the method to the front of the _api_methods list - _thread_local_data._api_methods.insert( - 0, api_method_name.replace("<", "").replace(">", "") - ) - # Keep the list length within the maximum limit - _thread_local_data._api_methods = _thread_local_data._api_methods[:MAX_LABELS_COUNT] + global _lock + global _api_methods + with _lock: + # Push the method to the front of the _api_methods list + _api_methods.insert(0, api_method_name.replace("<", "").replace(">", "")) + # Keep the list length within the maximum limit (adjust MAX_LABELS_COUNT as needed) + _api_methods = _api_methods[:MAX_LABELS_COUNT] def get_and_reset_api_methods(dry_run: bool = False): - if not hasattr(_thread_local_data, "_api_methods"): - _thread_local_data._api_methods = [] - - previous_api_methods = list(_thread_local_data._api_methods) + global _lock + with _lock: + previous_api_methods = list(_api_methods) - # dry_run might not make a job resource, so only reset the log on real queries. - if not dry_run: - _thread_local_data._api_methods.clear() + # dry_run might not make a job resource, so only reset the log on real queries. + if not dry_run: + _api_methods.clear() return previous_api_methods diff --git a/bigframes/testing/mocks.py b/bigframes/testing/mocks.py index ead394e497..ff210419fd 100644 --- a/bigframes/testing/mocks.py +++ b/bigframes/testing/mocks.py @@ -27,10 +27,7 @@ import bigframes import bigframes.clients import bigframes.core.global_session -import bigframes.core.log_adapter import bigframes.dataframe -import bigframes.session._io.bigquery -from bigframes.session._io.bigquery import create_job_configs_labels import bigframes.session.clients """Utilities for creating test resources.""" @@ -93,18 +90,6 @@ def query_mock( job_config: Optional[google.cloud.bigquery.QueryJobConfig] = None, **kwargs, ): - job_config = ( - job_config - if job_config is not None - else google.cloud.bigquery.QueryJobConfig() - ) - api_methods = bigframes.core.log_adapter.get_and_reset_api_methods( - dry_run=job_config.dry_run - ) - job_config.labels = create_job_configs_labels( - job_configs_labels=job_config.labels, - api_methods=api_methods, - ) queries.append(query) job_configs.append(copy.deepcopy(job_config)) query_job = mock.create_autospec(google.cloud.bigquery.QueryJob, instance=True) From b9ac0d5fb41dddf3623a7142f5e8c41bb192bd2a Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Wed, 17 Dec 2025 03:19:53 +0000 Subject: [PATCH 21/31] Reapply "fix(core): Make log_adapter state thread-local" This reverts commit cf345859d23060159dde095ab62244495d6c6cf5. --- bigframes/core/log_adapter.py | 58 +++++++++++++++++++---------------- bigframes/testing/mocks.py | 15 +++++++++ 2 files changed, 47 insertions(+), 26 deletions(-) diff --git a/bigframes/core/log_adapter.py b/bigframes/core/log_adapter.py index 8179ffbeed..8d2e677cf7 100644 --- a/bigframes/core/log_adapter.py +++ b/bigframes/core/log_adapter.py @@ -15,12 +15,14 @@ import functools import inspect import threading -from typing import List, Optional +from typing import Optional from google.cloud import bigquery import pandas -_lock = threading.Lock() +_thread_local_data = threading.local() +_thread_local_data._api_methods = [] +_thread_local_data._call_stack = [] # The limit is 64 (https://cloud.google.com/bigquery/docs/labels-intro#requirements), # but leave a few spare for internal labels to be added. @@ -30,12 +32,8 @@ PANDAS_PARAM_TRACKING_TASK = "pandas_param_tracking" LOG_OVERRIDE_NAME = "__log_override_name__" -_api_methods: List = [] _excluded_methods = ["__setattr__", "__getattr__"] -# Stack to track method calls -_call_stack: List = [] - def submit_pandas_labels( bq_client: Optional[bigquery.Client], @@ -172,11 +170,14 @@ def wrapper(*args, **kwargs): base_name = custom_base_name full_method_name = f"{base_name.lower()}-{api_method_name}" + if not hasattr(_thread_local_data, "_call_stack"): + _thread_local_data._call_stack = [] + # Track directly called methods - if len(_call_stack) == 0: + if len(_thread_local_data._call_stack) == 0: add_api_method(full_method_name) - _call_stack.append(full_method_name) + _thread_local_data._call_stack.append(full_method_name) try: return method(*args, **kwargs) @@ -185,7 +186,7 @@ def wrapper(*args, **kwargs): # or not fully supported (NotImplementedError) in BigFrames. # Logging is currently supported only when we can access the bqclient through # _block.session.bqclient. - if len(_call_stack) == 1: + if len(_thread_local_data._call_stack) == 1: submit_pandas_labels( _get_bq_client(*args, **kwargs), base_name, @@ -196,7 +197,7 @@ def wrapper(*args, **kwargs): ) raise e finally: - _call_stack.pop() + _thread_local_data._call_stack.pop() return wrapper @@ -214,19 +215,21 @@ def property_logger(prop): def shared_wrapper(prop): @functools.wraps(prop) def wrapped(*args, **kwargs): + if not hasattr(_thread_local_data, "_call_stack"): + _thread_local_data._call_stack = [] qualname_parts = getattr(prop, "__qualname__", prop.__name__).split(".") class_name = qualname_parts[-2] if len(qualname_parts) > 1 else "" property_name = prop.__name__ full_property_name = f"{class_name.lower()}-{property_name.lower()}" - if len(_call_stack) == 0: + if len(_thread_local_data._call_stack) == 0: add_api_method(full_property_name) - _call_stack.append(full_property_name) + _thread_local_data._call_stack.append(full_property_name) try: return prop(*args, **kwargs) finally: - _call_stack.pop() + _thread_local_data._call_stack.pop() return wrapped @@ -251,23 +254,26 @@ def wrapper(func): def add_api_method(api_method_name): - global _lock - global _api_methods - with _lock: - # Push the method to the front of the _api_methods list - _api_methods.insert(0, api_method_name.replace("<", "").replace(">", "")) - # Keep the list length within the maximum limit (adjust MAX_LABELS_COUNT as needed) - _api_methods = _api_methods[:MAX_LABELS_COUNT] + if not hasattr(_thread_local_data, "_api_methods"): + _thread_local_data._api_methods = [] + + # Push the method to the front of the _api_methods list + _thread_local_data._api_methods.insert( + 0, api_method_name.replace("<", "").replace(">", "") + ) + # Keep the list length within the maximum limit + _thread_local_data._api_methods = _thread_local_data._api_methods[:MAX_LABELS_COUNT] def get_and_reset_api_methods(dry_run: bool = False): - global _lock - with _lock: - previous_api_methods = list(_api_methods) + if not hasattr(_thread_local_data, "_api_methods"): + _thread_local_data._api_methods = [] + + previous_api_methods = list(_thread_local_data._api_methods) - # dry_run might not make a job resource, so only reset the log on real queries. - if not dry_run: - _api_methods.clear() + # dry_run might not make a job resource, so only reset the log on real queries. + if not dry_run: + _thread_local_data._api_methods.clear() return previous_api_methods diff --git a/bigframes/testing/mocks.py b/bigframes/testing/mocks.py index ff210419fd..ead394e497 100644 --- a/bigframes/testing/mocks.py +++ b/bigframes/testing/mocks.py @@ -27,7 +27,10 @@ import bigframes import bigframes.clients import bigframes.core.global_session +import bigframes.core.log_adapter import bigframes.dataframe +import bigframes.session._io.bigquery +from bigframes.session._io.bigquery import create_job_configs_labels import bigframes.session.clients """Utilities for creating test resources.""" @@ -90,6 +93,18 @@ def query_mock( job_config: Optional[google.cloud.bigquery.QueryJobConfig] = None, **kwargs, ): + job_config = ( + job_config + if job_config is not None + else google.cloud.bigquery.QueryJobConfig() + ) + api_methods = bigframes.core.log_adapter.get_and_reset_api_methods( + dry_run=job_config.dry_run + ) + job_config.labels = create_job_configs_labels( + job_configs_labels=job_config.labels, + api_methods=api_methods, + ) queries.append(query) job_configs.append(copy.deepcopy(job_config)) query_job = mock.create_autospec(google.cloud.bigquery.QueryJob, instance=True) From 07ef19be3c9601489fda5960f2199171926f66da Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Wed, 17 Dec 2025 03:24:59 +0000 Subject: [PATCH 22/31] Revert "Reapply "fix(core): Make log_adapter state thread-local"" This reverts commit b9ac0d5fb41dddf3623a7142f5e8c41bb192bd2a. --- bigframes/core/log_adapter.py | 58 ++++++++++++++++------------------- bigframes/testing/mocks.py | 15 --------- 2 files changed, 26 insertions(+), 47 deletions(-) diff --git a/bigframes/core/log_adapter.py b/bigframes/core/log_adapter.py index 8d2e677cf7..8179ffbeed 100644 --- a/bigframes/core/log_adapter.py +++ b/bigframes/core/log_adapter.py @@ -15,14 +15,12 @@ import functools import inspect import threading -from typing import Optional +from typing import List, Optional from google.cloud import bigquery import pandas -_thread_local_data = threading.local() -_thread_local_data._api_methods = [] -_thread_local_data._call_stack = [] +_lock = threading.Lock() # The limit is 64 (https://cloud.google.com/bigquery/docs/labels-intro#requirements), # but leave a few spare for internal labels to be added. @@ -32,8 +30,12 @@ PANDAS_PARAM_TRACKING_TASK = "pandas_param_tracking" LOG_OVERRIDE_NAME = "__log_override_name__" +_api_methods: List = [] _excluded_methods = ["__setattr__", "__getattr__"] +# Stack to track method calls +_call_stack: List = [] + def submit_pandas_labels( bq_client: Optional[bigquery.Client], @@ -170,14 +172,11 @@ def wrapper(*args, **kwargs): base_name = custom_base_name full_method_name = f"{base_name.lower()}-{api_method_name}" - if not hasattr(_thread_local_data, "_call_stack"): - _thread_local_data._call_stack = [] - # Track directly called methods - if len(_thread_local_data._call_stack) == 0: + if len(_call_stack) == 0: add_api_method(full_method_name) - _thread_local_data._call_stack.append(full_method_name) + _call_stack.append(full_method_name) try: return method(*args, **kwargs) @@ -186,7 +185,7 @@ def wrapper(*args, **kwargs): # or not fully supported (NotImplementedError) in BigFrames. # Logging is currently supported only when we can access the bqclient through # _block.session.bqclient. - if len(_thread_local_data._call_stack) == 1: + if len(_call_stack) == 1: submit_pandas_labels( _get_bq_client(*args, **kwargs), base_name, @@ -197,7 +196,7 @@ def wrapper(*args, **kwargs): ) raise e finally: - _thread_local_data._call_stack.pop() + _call_stack.pop() return wrapper @@ -215,21 +214,19 @@ def property_logger(prop): def shared_wrapper(prop): @functools.wraps(prop) def wrapped(*args, **kwargs): - if not hasattr(_thread_local_data, "_call_stack"): - _thread_local_data._call_stack = [] qualname_parts = getattr(prop, "__qualname__", prop.__name__).split(".") class_name = qualname_parts[-2] if len(qualname_parts) > 1 else "" property_name = prop.__name__ full_property_name = f"{class_name.lower()}-{property_name.lower()}" - if len(_thread_local_data._call_stack) == 0: + if len(_call_stack) == 0: add_api_method(full_property_name) - _thread_local_data._call_stack.append(full_property_name) + _call_stack.append(full_property_name) try: return prop(*args, **kwargs) finally: - _thread_local_data._call_stack.pop() + _call_stack.pop() return wrapped @@ -254,26 +251,23 @@ def wrapper(func): def add_api_method(api_method_name): - if not hasattr(_thread_local_data, "_api_methods"): - _thread_local_data._api_methods = [] - - # Push the method to the front of the _api_methods list - _thread_local_data._api_methods.insert( - 0, api_method_name.replace("<", "").replace(">", "") - ) - # Keep the list length within the maximum limit - _thread_local_data._api_methods = _thread_local_data._api_methods[:MAX_LABELS_COUNT] + global _lock + global _api_methods + with _lock: + # Push the method to the front of the _api_methods list + _api_methods.insert(0, api_method_name.replace("<", "").replace(">", "")) + # Keep the list length within the maximum limit (adjust MAX_LABELS_COUNT as needed) + _api_methods = _api_methods[:MAX_LABELS_COUNT] def get_and_reset_api_methods(dry_run: bool = False): - if not hasattr(_thread_local_data, "_api_methods"): - _thread_local_data._api_methods = [] - - previous_api_methods = list(_thread_local_data._api_methods) + global _lock + with _lock: + previous_api_methods = list(_api_methods) - # dry_run might not make a job resource, so only reset the log on real queries. - if not dry_run: - _thread_local_data._api_methods.clear() + # dry_run might not make a job resource, so only reset the log on real queries. + if not dry_run: + _api_methods.clear() return previous_api_methods diff --git a/bigframes/testing/mocks.py b/bigframes/testing/mocks.py index ead394e497..ff210419fd 100644 --- a/bigframes/testing/mocks.py +++ b/bigframes/testing/mocks.py @@ -27,10 +27,7 @@ import bigframes import bigframes.clients import bigframes.core.global_session -import bigframes.core.log_adapter import bigframes.dataframe -import bigframes.session._io.bigquery -from bigframes.session._io.bigquery import create_job_configs_labels import bigframes.session.clients """Utilities for creating test resources.""" @@ -93,18 +90,6 @@ def query_mock( job_config: Optional[google.cloud.bigquery.QueryJobConfig] = None, **kwargs, ): - job_config = ( - job_config - if job_config is not None - else google.cloud.bigquery.QueryJobConfig() - ) - api_methods = bigframes.core.log_adapter.get_and_reset_api_methods( - dry_run=job_config.dry_run - ) - job_config.labels = create_job_configs_labels( - job_configs_labels=job_config.labels, - api_methods=api_methods, - ) queries.append(query) job_configs.append(copy.deepcopy(job_config)) query_job = mock.create_autospec(google.cloud.bigquery.QueryJob, instance=True) From 9e9809865f9a7afff32ae2b0b8f4145b018d44a8 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Wed, 17 Dec 2025 03:24:59 +0000 Subject: [PATCH 23/31] Reapply "fix(core): Make log_adapter state thread-local" This reverts commit cf345859d23060159dde095ab62244495d6c6cf5. --- bigframes/core/log_adapter.py | 58 +++++++++++++++++++---------------- bigframes/testing/mocks.py | 15 +++++++++ 2 files changed, 47 insertions(+), 26 deletions(-) diff --git a/bigframes/core/log_adapter.py b/bigframes/core/log_adapter.py index 8179ffbeed..8d2e677cf7 100644 --- a/bigframes/core/log_adapter.py +++ b/bigframes/core/log_adapter.py @@ -15,12 +15,14 @@ import functools import inspect import threading -from typing import List, Optional +from typing import Optional from google.cloud import bigquery import pandas -_lock = threading.Lock() +_thread_local_data = threading.local() +_thread_local_data._api_methods = [] +_thread_local_data._call_stack = [] # The limit is 64 (https://cloud.google.com/bigquery/docs/labels-intro#requirements), # but leave a few spare for internal labels to be added. @@ -30,12 +32,8 @@ PANDAS_PARAM_TRACKING_TASK = "pandas_param_tracking" LOG_OVERRIDE_NAME = "__log_override_name__" -_api_methods: List = [] _excluded_methods = ["__setattr__", "__getattr__"] -# Stack to track method calls -_call_stack: List = [] - def submit_pandas_labels( bq_client: Optional[bigquery.Client], @@ -172,11 +170,14 @@ def wrapper(*args, **kwargs): base_name = custom_base_name full_method_name = f"{base_name.lower()}-{api_method_name}" + if not hasattr(_thread_local_data, "_call_stack"): + _thread_local_data._call_stack = [] + # Track directly called methods - if len(_call_stack) == 0: + if len(_thread_local_data._call_stack) == 0: add_api_method(full_method_name) - _call_stack.append(full_method_name) + _thread_local_data._call_stack.append(full_method_name) try: return method(*args, **kwargs) @@ -185,7 +186,7 @@ def wrapper(*args, **kwargs): # or not fully supported (NotImplementedError) in BigFrames. # Logging is currently supported only when we can access the bqclient through # _block.session.bqclient. - if len(_call_stack) == 1: + if len(_thread_local_data._call_stack) == 1: submit_pandas_labels( _get_bq_client(*args, **kwargs), base_name, @@ -196,7 +197,7 @@ def wrapper(*args, **kwargs): ) raise e finally: - _call_stack.pop() + _thread_local_data._call_stack.pop() return wrapper @@ -214,19 +215,21 @@ def property_logger(prop): def shared_wrapper(prop): @functools.wraps(prop) def wrapped(*args, **kwargs): + if not hasattr(_thread_local_data, "_call_stack"): + _thread_local_data._call_stack = [] qualname_parts = getattr(prop, "__qualname__", prop.__name__).split(".") class_name = qualname_parts[-2] if len(qualname_parts) > 1 else "" property_name = prop.__name__ full_property_name = f"{class_name.lower()}-{property_name.lower()}" - if len(_call_stack) == 0: + if len(_thread_local_data._call_stack) == 0: add_api_method(full_property_name) - _call_stack.append(full_property_name) + _thread_local_data._call_stack.append(full_property_name) try: return prop(*args, **kwargs) finally: - _call_stack.pop() + _thread_local_data._call_stack.pop() return wrapped @@ -251,23 +254,26 @@ def wrapper(func): def add_api_method(api_method_name): - global _lock - global _api_methods - with _lock: - # Push the method to the front of the _api_methods list - _api_methods.insert(0, api_method_name.replace("<", "").replace(">", "")) - # Keep the list length within the maximum limit (adjust MAX_LABELS_COUNT as needed) - _api_methods = _api_methods[:MAX_LABELS_COUNT] + if not hasattr(_thread_local_data, "_api_methods"): + _thread_local_data._api_methods = [] + + # Push the method to the front of the _api_methods list + _thread_local_data._api_methods.insert( + 0, api_method_name.replace("<", "").replace(">", "") + ) + # Keep the list length within the maximum limit + _thread_local_data._api_methods = _thread_local_data._api_methods[:MAX_LABELS_COUNT] def get_and_reset_api_methods(dry_run: bool = False): - global _lock - with _lock: - previous_api_methods = list(_api_methods) + if not hasattr(_thread_local_data, "_api_methods"): + _thread_local_data._api_methods = [] + + previous_api_methods = list(_thread_local_data._api_methods) - # dry_run might not make a job resource, so only reset the log on real queries. - if not dry_run: - _api_methods.clear() + # dry_run might not make a job resource, so only reset the log on real queries. + if not dry_run: + _thread_local_data._api_methods.clear() return previous_api_methods diff --git a/bigframes/testing/mocks.py b/bigframes/testing/mocks.py index ff210419fd..ead394e497 100644 --- a/bigframes/testing/mocks.py +++ b/bigframes/testing/mocks.py @@ -27,7 +27,10 @@ import bigframes import bigframes.clients import bigframes.core.global_session +import bigframes.core.log_adapter import bigframes.dataframe +import bigframes.session._io.bigquery +from bigframes.session._io.bigquery import create_job_configs_labels import bigframes.session.clients """Utilities for creating test resources.""" @@ -90,6 +93,18 @@ def query_mock( job_config: Optional[google.cloud.bigquery.QueryJobConfig] = None, **kwargs, ): + job_config = ( + job_config + if job_config is not None + else google.cloud.bigquery.QueryJobConfig() + ) + api_methods = bigframes.core.log_adapter.get_and_reset_api_methods( + dry_run=job_config.dry_run + ) + job_config.labels = create_job_configs_labels( + job_configs_labels=job_config.labels, + api_methods=api_methods, + ) queries.append(query) job_configs.append(copy.deepcopy(job_config)) query_job = mock.create_autospec(google.cloud.bigquery.QueryJob, instance=True) From a320fd6ff046daa20ec4d2fc411f823a3126912e Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Wed, 17 Dec 2025 03:24:59 +0000 Subject: [PATCH 24/31] Revert "fix(core): Make log_adapter state thread-local" This reverts commit e2c38d3031a1ad9eb640f966ffeed6b7c494f7f2. --- bigframes/core/log_adapter.py | 58 ++++++++++++++++------------------- bigframes/testing/mocks.py | 15 --------- 2 files changed, 26 insertions(+), 47 deletions(-) diff --git a/bigframes/core/log_adapter.py b/bigframes/core/log_adapter.py index 8d2e677cf7..8179ffbeed 100644 --- a/bigframes/core/log_adapter.py +++ b/bigframes/core/log_adapter.py @@ -15,14 +15,12 @@ import functools import inspect import threading -from typing import Optional +from typing import List, Optional from google.cloud import bigquery import pandas -_thread_local_data = threading.local() -_thread_local_data._api_methods = [] -_thread_local_data._call_stack = [] +_lock = threading.Lock() # The limit is 64 (https://cloud.google.com/bigquery/docs/labels-intro#requirements), # but leave a few spare for internal labels to be added. @@ -32,8 +30,12 @@ PANDAS_PARAM_TRACKING_TASK = "pandas_param_tracking" LOG_OVERRIDE_NAME = "__log_override_name__" +_api_methods: List = [] _excluded_methods = ["__setattr__", "__getattr__"] +# Stack to track method calls +_call_stack: List = [] + def submit_pandas_labels( bq_client: Optional[bigquery.Client], @@ -170,14 +172,11 @@ def wrapper(*args, **kwargs): base_name = custom_base_name full_method_name = f"{base_name.lower()}-{api_method_name}" - if not hasattr(_thread_local_data, "_call_stack"): - _thread_local_data._call_stack = [] - # Track directly called methods - if len(_thread_local_data._call_stack) == 0: + if len(_call_stack) == 0: add_api_method(full_method_name) - _thread_local_data._call_stack.append(full_method_name) + _call_stack.append(full_method_name) try: return method(*args, **kwargs) @@ -186,7 +185,7 @@ def wrapper(*args, **kwargs): # or not fully supported (NotImplementedError) in BigFrames. # Logging is currently supported only when we can access the bqclient through # _block.session.bqclient. - if len(_thread_local_data._call_stack) == 1: + if len(_call_stack) == 1: submit_pandas_labels( _get_bq_client(*args, **kwargs), base_name, @@ -197,7 +196,7 @@ def wrapper(*args, **kwargs): ) raise e finally: - _thread_local_data._call_stack.pop() + _call_stack.pop() return wrapper @@ -215,21 +214,19 @@ def property_logger(prop): def shared_wrapper(prop): @functools.wraps(prop) def wrapped(*args, **kwargs): - if not hasattr(_thread_local_data, "_call_stack"): - _thread_local_data._call_stack = [] qualname_parts = getattr(prop, "__qualname__", prop.__name__).split(".") class_name = qualname_parts[-2] if len(qualname_parts) > 1 else "" property_name = prop.__name__ full_property_name = f"{class_name.lower()}-{property_name.lower()}" - if len(_thread_local_data._call_stack) == 0: + if len(_call_stack) == 0: add_api_method(full_property_name) - _thread_local_data._call_stack.append(full_property_name) + _call_stack.append(full_property_name) try: return prop(*args, **kwargs) finally: - _thread_local_data._call_stack.pop() + _call_stack.pop() return wrapped @@ -254,26 +251,23 @@ def wrapper(func): def add_api_method(api_method_name): - if not hasattr(_thread_local_data, "_api_methods"): - _thread_local_data._api_methods = [] - - # Push the method to the front of the _api_methods list - _thread_local_data._api_methods.insert( - 0, api_method_name.replace("<", "").replace(">", "") - ) - # Keep the list length within the maximum limit - _thread_local_data._api_methods = _thread_local_data._api_methods[:MAX_LABELS_COUNT] + global _lock + global _api_methods + with _lock: + # Push the method to the front of the _api_methods list + _api_methods.insert(0, api_method_name.replace("<", "").replace(">", "")) + # Keep the list length within the maximum limit (adjust MAX_LABELS_COUNT as needed) + _api_methods = _api_methods[:MAX_LABELS_COUNT] def get_and_reset_api_methods(dry_run: bool = False): - if not hasattr(_thread_local_data, "_api_methods"): - _thread_local_data._api_methods = [] - - previous_api_methods = list(_thread_local_data._api_methods) + global _lock + with _lock: + previous_api_methods = list(_api_methods) - # dry_run might not make a job resource, so only reset the log on real queries. - if not dry_run: - _thread_local_data._api_methods.clear() + # dry_run might not make a job resource, so only reset the log on real queries. + if not dry_run: + _api_methods.clear() return previous_api_methods diff --git a/bigframes/testing/mocks.py b/bigframes/testing/mocks.py index ead394e497..ff210419fd 100644 --- a/bigframes/testing/mocks.py +++ b/bigframes/testing/mocks.py @@ -27,10 +27,7 @@ import bigframes import bigframes.clients import bigframes.core.global_session -import bigframes.core.log_adapter import bigframes.dataframe -import bigframes.session._io.bigquery -from bigframes.session._io.bigquery import create_job_configs_labels import bigframes.session.clients """Utilities for creating test resources.""" @@ -93,18 +90,6 @@ def query_mock( job_config: Optional[google.cloud.bigquery.QueryJobConfig] = None, **kwargs, ): - job_config = ( - job_config - if job_config is not None - else google.cloud.bigquery.QueryJobConfig() - ) - api_methods = bigframes.core.log_adapter.get_and_reset_api_methods( - dry_run=job_config.dry_run - ) - job_config.labels = create_job_configs_labels( - job_configs_labels=job_config.labels, - api_methods=api_methods, - ) queries.append(query) job_configs.append(copy.deepcopy(job_config)) query_job = mock.create_autospec(google.cloud.bigquery.QueryJob, instance=True) From c03baaf967da9e240836ce8ab42770e9c2142574 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Wed, 17 Dec 2025 03:24:59 +0000 Subject: [PATCH 25/31] Reapply "fix: Ensure _read_gbq_colab correctly logs BigQuery job labels" This reverts commit 7e02454512f568253ff77abe75cb7996e023fa0c. --- bigframes/testing/mocks.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/bigframes/testing/mocks.py b/bigframes/testing/mocks.py index ff210419fd..ead394e497 100644 --- a/bigframes/testing/mocks.py +++ b/bigframes/testing/mocks.py @@ -27,7 +27,10 @@ import bigframes import bigframes.clients import bigframes.core.global_session +import bigframes.core.log_adapter import bigframes.dataframe +import bigframes.session._io.bigquery +from bigframes.session._io.bigquery import create_job_configs_labels import bigframes.session.clients """Utilities for creating test resources.""" @@ -90,6 +93,18 @@ def query_mock( job_config: Optional[google.cloud.bigquery.QueryJobConfig] = None, **kwargs, ): + job_config = ( + job_config + if job_config is not None + else google.cloud.bigquery.QueryJobConfig() + ) + api_methods = bigframes.core.log_adapter.get_and_reset_api_methods( + dry_run=job_config.dry_run + ) + job_config.labels = create_job_configs_labels( + job_configs_labels=job_config.labels, + api_methods=api_methods, + ) queries.append(query) job_configs.append(copy.deepcopy(job_config)) query_job = mock.create_autospec(google.cloud.bigquery.QueryJob, instance=True) From 0f8414dfa0f6a870df4694eff5e3ea6cf49927a8 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Wed, 17 Dec 2025 03:24:59 +0000 Subject: [PATCH 26/31] Revert "fix: Ensure _read_gbq_colab correctly logs BigQuery job labels" This reverts commit 5c018acc92d0c8732f384cef3f89dcdaf54c5b5a. --- bigframes/testing/mocks.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/bigframes/testing/mocks.py b/bigframes/testing/mocks.py index ead394e497..ff210419fd 100644 --- a/bigframes/testing/mocks.py +++ b/bigframes/testing/mocks.py @@ -27,10 +27,7 @@ import bigframes import bigframes.clients import bigframes.core.global_session -import bigframes.core.log_adapter import bigframes.dataframe -import bigframes.session._io.bigquery -from bigframes.session._io.bigquery import create_job_configs_labels import bigframes.session.clients """Utilities for creating test resources.""" @@ -93,18 +90,6 @@ def query_mock( job_config: Optional[google.cloud.bigquery.QueryJobConfig] = None, **kwargs, ): - job_config = ( - job_config - if job_config is not None - else google.cloud.bigquery.QueryJobConfig() - ) - api_methods = bigframes.core.log_adapter.get_and_reset_api_methods( - dry_run=job_config.dry_run - ) - job_config.labels = create_job_configs_labels( - job_configs_labels=job_config.labels, - api_methods=api_methods, - ) queries.append(query) job_configs.append(copy.deepcopy(job_config)) query_job = mock.create_autospec(google.cloud.bigquery.QueryJob, instance=True) From fec1d25487a460c66b765dd154eb560277c62fff Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Wed, 17 Dec 2025 03:24:59 +0000 Subject: [PATCH 27/31] Reapply "fix: Ensure _read_gbq_colab correctly logs BigQuery job labels" This reverts commit 4dfd21ca02a2f905b983020808986499cd5288ff. --- bigframes/session/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index 5b21f1bb38..d4bd02f1cd 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -515,6 +515,7 @@ def _read_gbq_colab( ) -> pandas.Series: ... + @log_adapter.method_logger @log_adapter.log_name_override("read_gbq_colab") def _read_gbq_colab( self, From 20c6e22da39d4b6ec404461ce1f400dafbf6ac6b Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Wed, 17 Dec 2025 03:25:00 +0000 Subject: [PATCH 28/31] Revert "fix: Ensure _read_gbq_colab correctly logs BigQuery job labels" This reverts commit c9cc0db642b7fc972ffd0a732a85855f16f55d2c. --- bigframes/session/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index d4bd02f1cd..5b21f1bb38 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -515,7 +515,6 @@ def _read_gbq_colab( ) -> pandas.Series: ... - @log_adapter.method_logger @log_adapter.log_name_override("read_gbq_colab") def _read_gbq_colab( self, From 843a5280243daaf404b855122ad8a3fd7b76b930 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Thu, 18 Dec 2025 02:26:46 +0000 Subject: [PATCH 29/31] Revert: Re-add pytest.mark.skipif for Python 3.13 in test_publish_api_coverage.py --- scripts/test_publish_api_coverage.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/scripts/test_publish_api_coverage.py b/scripts/test_publish_api_coverage.py index 6caae68cd6..6e366b6854 100644 --- a/scripts/test_publish_api_coverage.py +++ b/scripts/test_publish_api_coverage.py @@ -26,6 +26,10 @@ def api_coverage_df(): return build_api_coverage_table("my_bf_ver", "my_release_ver") +@pytest.mark.skipif( + sys.version_info >= (3, 13), + reason="Issues with installing sklearn for this test in python 3.13", +) def test_api_coverage_produces_expected_schema(api_coverage_df): if sys.version.split(".")[:2] == ["3", "9"]: pytest.skip( @@ -55,6 +59,10 @@ def test_api_coverage_produces_expected_schema(api_coverage_df): ) +@pytest.mark.skipif( + sys.version_info >= (3, 13), + reason="Issues with installing sklearn for this test in python 3.13", +) def test_api_coverage_produces_missing_parameters(api_coverage_df): """Make sure at least some functions have reported missing parameters.""" assert (api_coverage_df["missing_parameters"].str.len() > 0).any() From 301afb4ec5e9467105f2603ffa38044fc0c09b73 Mon Sep 17 00:00:00 2001 From: Tim Swena Date: Thu, 18 Dec 2025 18:56:33 +0000 Subject: [PATCH 30/31] chore: make `test_read_gbq_colab_includes_label` more robust to python changes --- tests/unit/session/test_read_gbq_colab.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/tests/unit/session/test_read_gbq_colab.py b/tests/unit/session/test_read_gbq_colab.py index b1dc1ec702..1b0f8c2df8 100644 --- a/tests/unit/session/test_read_gbq_colab.py +++ b/tests/unit/session/test_read_gbq_colab.py @@ -14,6 +14,7 @@ """Unit tests for read_gbq_colab helper functions.""" +import itertools import textwrap from unittest import mock @@ -27,15 +28,22 @@ def test_read_gbq_colab_includes_label(): """Make sure we can tell direct colab usage apart from regular read_gbq usage.""" - session = mocks.create_bigquery_session() + bqclient = mock.create_autospec(bigquery.Client, instance=True) + bqclient.project = "proj" + session = mocks.create_bigquery_session(bqclient=bqclient) _ = session._read_gbq_colab("SELECT 'read-gbq-colab-test'") - configs = session._job_configs # type: ignore label_values = [] - for config in configs: - if config is None: + for kall in itertools.chain( + bqclient.query_and_wait.call_args_list, + bqclient._query_and_wait_bigframes.call_args_list, + bqclient.query.call_args_list, + ): + breakpoint() + job_config = kall.kwargs.get("job_config") + if job_config is None: continue - label_values.extend(config.labels.values()) + label_values.extend(job_config.labels.values()) assert "session-read_gbq_colab" in label_values From db022081ce35919cd45d7d113d80e1e7d92cd25a Mon Sep 17 00:00:00 2001 From: Tim Swena Date: Thu, 18 Dec 2025 19:02:45 +0000 Subject: [PATCH 31/31] chore: remove breakpoint --- tests/unit/session/test_read_gbq_colab.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/unit/session/test_read_gbq_colab.py b/tests/unit/session/test_read_gbq_colab.py index 1b0f8c2df8..cc0508b75a 100644 --- a/tests/unit/session/test_read_gbq_colab.py +++ b/tests/unit/session/test_read_gbq_colab.py @@ -39,7 +39,6 @@ def test_read_gbq_colab_includes_label(): bqclient._query_and_wait_bigframes.call_args_list, bqclient.query.call_args_list, ): - breakpoint() job_config = kall.kwargs.get("job_config") if job_config is None: continue