diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index 4d594ddfbc..0456b15ac3 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -19,11 +19,9 @@
import datetime
import inspect
import itertools
-import json
import re
import sys
import textwrap
-import traceback
import typing
from typing import (
Any,
@@ -55,7 +53,6 @@
import pyarrow
import tabulate
-import bigframes._config.display_options as display_options
import bigframes.constants
import bigframes.core
from bigframes.core import agg_expressions, log_adapter
@@ -800,32 +797,15 @@ def __repr__(self) -> str:
)
self._set_internal_query_job(query_job)
+ from bigframes.display import plaintext
- column_count = len(pandas_df.columns)
-
- with display_options.pandas_repr(opts):
- import pandas.io.formats
-
- # safe to mutate this, this dict is owned by this code, and does not affect global config
- to_string_kwargs = (
- pandas.io.formats.format.get_dataframe_repr_params() # type: ignore
- )
- if not self._has_index:
- to_string_kwargs.update({"index": False})
- repr_string = pandas_df.to_string(**to_string_kwargs)
-
- # Modify the end of the string to reflect count.
- lines = repr_string.split("\n")
- pattern = re.compile("\\[[0-9]+ rows x [0-9]+ columns\\]")
- if pattern.match(lines[-1]):
- lines = lines[:-2]
-
- if row_count > len(lines) - 1:
- lines.append("...")
-
- lines.append("")
- lines.append(f"[{row_count} rows x {column_count} columns]")
- return "\n".join(lines)
+ return plaintext.create_text_representation(
+ pandas_df,
+ row_count,
+ is_series=False,
+ has_index=self._has_index,
+ column_count=len(self.columns),
+ )
def _get_display_df_and_blob_cols(self) -> tuple[DataFrame, list[str]]:
"""Process blob columns for display."""
@@ -844,75 +824,6 @@ def _get_display_df_and_blob_cols(self) -> tuple[DataFrame, list[str]]:
df[col] = df[col].blob._get_runtime(mode="R", with_metadata=True)
return df, blob_cols
- def _get_anywidget_bundle(
- self, include=None, exclude=None
- ) -> tuple[dict[str, Any], dict[str, Any]]:
- """
- Helper method to create and return the anywidget mimebundle.
- This function encapsulates the logic for anywidget display.
- """
- from bigframes import display
-
- df, blob_cols = self._get_display_df_and_blob_cols()
-
- # Create and display the widget
- widget = display.TableWidget(df)
- widget_repr_result = widget._repr_mimebundle_(include=include, exclude=exclude)
-
- # Handle both tuple (data, metadata) and dict returns
- if isinstance(widget_repr_result, tuple):
- widget_repr, widget_metadata = widget_repr_result
- else:
- widget_repr = widget_repr_result
- widget_metadata = {}
-
- widget_repr = dict(widget_repr)
-
- # At this point, we have already executed the query as part of the
- # widget construction. Let's use the information available to render
- # the HTML and plain text versions.
- widget_repr["text/html"] = self._create_html_representation(
- widget._cached_data,
- widget.row_count,
- len(self.columns),
- blob_cols,
- )
-
- widget_repr["text/plain"] = self._create_text_representation(
- widget._cached_data, widget.row_count
- )
-
- return widget_repr, widget_metadata
-
- def _create_text_representation(
- self, pandas_df: pandas.DataFrame, total_rows: typing.Optional[int]
- ) -> str:
- """Create a text representation of the DataFrame."""
- opts = bigframes.options.display
- with display_options.pandas_repr(opts):
- import pandas.io.formats
-
- # safe to mutate this, this dict is owned by this code, and does not affect global config
- to_string_kwargs = (
- pandas.io.formats.format.get_dataframe_repr_params() # type: ignore
- )
- if not self._has_index:
- to_string_kwargs.update({"index": False})
-
- # We add our own dimensions string, so don't want pandas to.
- to_string_kwargs.update({"show_dimensions": False})
- repr_string = pandas_df.to_string(**to_string_kwargs)
-
- lines = repr_string.split("\n")
-
- if total_rows is not None and total_rows > len(pandas_df):
- lines.append("...")
-
- lines.append("")
- column_count = len(self.columns)
- lines.append(f"[{total_rows or '?'} rows x {column_count} columns]")
- return "\n".join(lines)
-
def _repr_mimebundle_(self, include=None, exclude=None):
"""
Custom display method for IPython/Jupyter environments.
@@ -920,98 +831,9 @@ def _repr_mimebundle_(self, include=None, exclude=None):
"""
# TODO(b/467647693): Anywidget integration has been tested in Jupyter, VS Code, and
# BQ Studio, but there is a known compatibility issue with Marimo that needs to be addressed.
- opts = bigframes.options.display
- # Only handle widget display in anywidget mode
- if opts.repr_mode == "anywidget":
- try:
- return self._get_anywidget_bundle(include=include, exclude=exclude)
-
- except ImportError:
- # Anywidget is an optional dependency, so warn rather than fail.
- # TODO(shuowei): When Anywidget becomes the default for all repr modes,
- # remove this warning.
- warnings.warn(
- "Anywidget mode is not available. "
- "Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use interactive tables. "
- f"Falling back to static HTML. Error: {traceback.format_exc()}"
- )
-
- # In non-anywidget mode, fetch data once and use it for both HTML
- # and plain text representations to avoid multiple queries.
- opts = bigframes.options.display
- max_results = opts.max_rows
-
- df, blob_cols = self._get_display_df_and_blob_cols()
-
- pandas_df, row_count, query_job = df._block.retrieve_repr_request_results(
- max_results
- )
- self._set_internal_query_job(query_job)
- column_count = len(pandas_df.columns)
-
- html_string = self._create_html_representation(
- pandas_df, row_count, column_count, blob_cols
- )
-
- text_representation = self._create_text_representation(pandas_df, row_count)
-
- return {"text/html": html_string, "text/plain": text_representation}
-
- def _create_html_representation(
- self,
- pandas_df: pandas.DataFrame,
- row_count: int,
- column_count: int,
- blob_cols: list[str],
- ) -> str:
- """Create an HTML representation of the DataFrame."""
- opts = bigframes.options.display
- with display_options.pandas_repr(opts):
- # TODO(shuowei, b/464053870): Escaping HTML would be useful, but
- # `escape=False` is needed to show images. We may need to implement
- # a full-fledged repr module to better support types not in pandas.
- if bigframes.options.display.blob_display and blob_cols:
-
- def obj_ref_rt_to_html(obj_ref_rt) -> str:
- obj_ref_rt_json = json.loads(obj_ref_rt)
- obj_ref_details = obj_ref_rt_json["objectref"]["details"]
- if "gcs_metadata" in obj_ref_details:
- gcs_metadata = obj_ref_details["gcs_metadata"]
- content_type = typing.cast(
- str, gcs_metadata.get("content_type", "")
- )
- if content_type.startswith("image"):
- size_str = ""
- if bigframes.options.display.blob_display_width:
- size_str = f' width="{bigframes.options.display.blob_display_width}"'
- if bigframes.options.display.blob_display_height:
- size_str = (
- size_str
- + f' height="{bigframes.options.display.blob_display_height}"'
- )
- url = obj_ref_rt_json["access_urls"]["read_url"]
- return f''
-
- return f'uri: {obj_ref_rt_json["objectref"]["uri"]}, authorizer: {obj_ref_rt_json["objectref"]["authorizer"]}'
-
- formatters = {blob_col: obj_ref_rt_to_html for blob_col in blob_cols}
-
- # set max_colwidth so not to truncate the image url
- with pandas.option_context("display.max_colwidth", None):
- html_string = pandas_df.to_html(
- escape=False,
- notebook=True,
- max_rows=pandas.get_option("display.max_rows"),
- max_cols=pandas.get_option("display.max_columns"),
- show_dimensions=pandas.get_option("display.show_dimensions"),
- formatters=formatters, # type: ignore
- )
- else:
- # _repr_html_ stub is missing so mypy thinks it's a Series. Ignore mypy.
- html_string = pandas_df._repr_html_() # type:ignore
+ from bigframes.display import html
- html_string += f"[{row_count} rows x {column_count} columns in total]"
- return html_string
+ return html.repr_mimebundle(self, include=include, exclude=exclude)
def __delitem__(self, key: str):
df = self.drop(columns=[key])
diff --git a/bigframes/display/html.py b/bigframes/display/html.py
index 101bd296f1..3f1667eb9c 100644
--- a/bigframes/display/html.py
+++ b/bigframes/display/html.py
@@ -17,12 +17,23 @@
from __future__ import annotations
import html
-from typing import Any
+import json
+import traceback
+import typing
+from typing import Any, Union
+import warnings
import pandas as pd
import pandas.api.types
-from bigframes._config import options
+import bigframes
+from bigframes._config import display_options, options
+from bigframes.display import plaintext
+import bigframes.formatting_helpers as formatter
+
+if typing.TYPE_CHECKING:
+ import bigframes.dataframe
+ import bigframes.series
def _is_dtype_numeric(dtype: Any) -> bool:
@@ -91,3 +102,214 @@ def render_html(
table_html.append("")
return "\n".join(table_html)
+
+
+def _obj_ref_rt_to_html(obj_ref_rt: str) -> str:
+ obj_ref_rt_json = json.loads(obj_ref_rt)
+ obj_ref_details = obj_ref_rt_json["objectref"]["details"]
+ if "gcs_metadata" in obj_ref_details:
+ gcs_metadata = obj_ref_details["gcs_metadata"]
+ content_type = typing.cast(str, gcs_metadata.get("content_type", ""))
+ if content_type.startswith("image"):
+ size_str = ""
+ if options.display.blob_display_width:
+ size_str = f' width="{options.display.blob_display_width}"'
+ if options.display.blob_display_height:
+ size_str = size_str + f' height="{options.display.blob_display_height}"'
+ url = obj_ref_rt_json["access_urls"]["read_url"]
+ return f'
'
+
+ return f'uri: {obj_ref_rt_json["objectref"]["uri"]}, authorizer: {obj_ref_rt_json["objectref"]["authorizer"]}'
+
+
+def create_html_representation(
+ obj: Union[bigframes.dataframe.DataFrame, bigframes.series.Series],
+ pandas_df: pd.DataFrame,
+ total_rows: int,
+ total_columns: int,
+ blob_cols: list[str],
+) -> str:
+ """Create an HTML representation of the DataFrame or Series."""
+ from bigframes.series import Series
+
+ opts = options.display
+ with display_options.pandas_repr(opts):
+ if isinstance(obj, Series):
+ # Some pandas objects may not have a _repr_html_ method, or it might
+ # fail in certain environments. We fall back to a pre-formatted
+ # string representation to ensure something is always displayed.
+ pd_series = pandas_df.iloc[:, 0]
+ try:
+ # TODO(b/464053870): Support rich display for blob Series.
+ html_string = pd_series._repr_html_()
+ except AttributeError:
+ html_string = f"
{pd_series.to_string()}"
+
+ is_truncated = total_rows is not None and total_rows > len(pandas_df)
+ if is_truncated:
+ html_string += f"[{total_rows} rows]
" + return html_string + else: + # It's a DataFrame + # TODO(shuowei, b/464053870): Escaping HTML would be useful, but + # `escape=False` is needed to show images. We may need to implement + # a full-fledged repr module to better support types not in pandas. + if options.display.blob_display and blob_cols: + formatters = {blob_col: _obj_ref_rt_to_html for blob_col in blob_cols} + + # set max_colwidth so not to truncate the image url + with pandas.option_context("display.max_colwidth", None): + html_string = pandas_df.to_html( + escape=False, + notebook=True, + max_rows=pandas.get_option("display.max_rows"), + max_cols=pandas.get_option("display.max_columns"), + show_dimensions=pandas.get_option("display.show_dimensions"), + formatters=formatters, # type: ignore + ) + else: + # _repr_html_ stub is missing so mypy thinks it's a Series. Ignore mypy. + html_string = pandas_df._repr_html_() # type:ignore + + html_string += f"[{total_rows} rows x {total_columns} columns in total]" + return html_string + + +def _get_obj_metadata( + obj: Union[bigframes.dataframe.DataFrame, bigframes.series.Series], +) -> tuple[bool, bool]: + from bigframes.series import Series + + is_series = isinstance(obj, Series) + if is_series: + has_index = len(obj._block.index_columns) > 0 + else: + has_index = obj._has_index + return is_series, has_index + + +def get_anywidget_bundle( + obj: Union[bigframes.dataframe.DataFrame, bigframes.series.Series], + include=None, + exclude=None, +) -> tuple[dict[str, Any], dict[str, Any]]: + """ + Helper method to create and return the anywidget mimebundle. + This function encapsulates the logic for anywidget display. + """ + from bigframes import display + from bigframes.series import Series + + if isinstance(obj, Series): + df = obj.to_frame() + else: + df, blob_cols = obj._get_display_df_and_blob_cols() + + widget = display.TableWidget(df) + widget_repr_result = widget._repr_mimebundle_(include=include, exclude=exclude) + + if isinstance(widget_repr_result, tuple): + widget_repr, widget_metadata = widget_repr_result + else: + widget_repr = widget_repr_result + widget_metadata = {} + + widget_repr = dict(widget_repr) + + # Use cached data from widget to render HTML and plain text versions. + cached_pd = widget._cached_data + total_rows = widget.row_count + total_columns = len(df.columns) + + widget_repr["text/html"] = create_html_representation( + obj, + cached_pd, + total_rows, + total_columns, + blob_cols if "blob_cols" in locals() else [], + ) + is_series, has_index = _get_obj_metadata(obj) + widget_repr["text/plain"] = plaintext.create_text_representation( + cached_pd, + total_rows, + is_series=is_series, + has_index=has_index, + column_count=len(df.columns) if not is_series else 0, + ) + + return widget_repr, widget_metadata + + +def repr_mimebundle_deferred( + obj: Union[bigframes.dataframe.DataFrame, bigframes.series.Series], +) -> dict[str, str]: + return { + "text/plain": formatter.repr_query_job(obj._compute_dry_run()), + "text/html": formatter.repr_query_job_html(obj._compute_dry_run()), + } + + +def repr_mimebundle_head( + obj: Union[bigframes.dataframe.DataFrame, bigframes.series.Series], +) -> dict[str, str]: + from bigframes.series import Series + + opts = options.display + blob_cols: list[str] + if isinstance(obj, Series): + pandas_df, row_count, query_job = obj._block.retrieve_repr_request_results( + opts.max_rows + ) + blob_cols = [] + else: + df, blob_cols = obj._get_display_df_and_blob_cols() + pandas_df, row_count, query_job = df._block.retrieve_repr_request_results( + opts.max_rows + ) + + obj._set_internal_query_job(query_job) + column_count = len(pandas_df.columns) + + html_string = create_html_representation( + obj, pandas_df, row_count, column_count, blob_cols + ) + + is_series, has_index = _get_obj_metadata(obj) + text_representation = plaintext.create_text_representation( + pandas_df, + row_count, + is_series=is_series, + has_index=has_index, + column_count=len(pandas_df.columns) if not is_series else 0, + ) + + return {"text/html": html_string, "text/plain": text_representation} + + +def repr_mimebundle( + obj: Union[bigframes.dataframe.DataFrame, bigframes.series.Series], + include=None, + exclude=None, +): + """Custom display method for IPython/Jupyter environments.""" + # TODO(b/467647693): Anywidget integration has been tested in Jupyter, VS Code, and + # BQ Studio, but there is a known compatibility issue with Marimo that needs to be addressed. + + opts = options.display + if opts.repr_mode == "deferred": + return repr_mimebundle_deferred(obj) + + if opts.repr_mode == "anywidget": + try: + return get_anywidget_bundle(obj, include=include, exclude=exclude) + except ImportError: + # Anywidget is an optional dependency, so warn rather than fail. + # TODO(shuowei): When Anywidget becomes the default for all repr modes, + # remove this warning. + warnings.warn( + "Anywidget mode is not available. " + "Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use interactive tables. " + f"Falling back to static HTML. Error: {traceback.format_exc()}" + ) + + return repr_mimebundle_head(obj) diff --git a/bigframes/display/plaintext.py b/bigframes/display/plaintext.py new file mode 100644 index 0000000000..2f7bc1df07 --- /dev/null +++ b/bigframes/display/plaintext.py @@ -0,0 +1,102 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Plaintext display representations.""" + +from __future__ import annotations + +import typing + +import pandas +import pandas.io.formats + +from bigframes._config import display_options, options + +if typing.TYPE_CHECKING: + import pandas as pd + + +def create_text_representation( + pandas_df: pd.DataFrame, + total_rows: typing.Optional[int], + is_series: bool, + has_index: bool = True, + column_count: int = 0, +) -> str: + """Create a text representation of the DataFrame or Series. + + Args: + pandas_df: + The pandas DataFrame containing the data to represent. + total_rows: + The total number of rows in the original BigFrames object. + is_series: + Whether the object being represented is a Series. + has_index: + Whether the object has an index to display. + column_count: + The total number of columns in the original BigFrames object. + Only used for DataFrames. + + Returns: + A plaintext string representation. + """ + opts = options.display + + if is_series: + with display_options.pandas_repr(opts): + pd_series = pandas_df.iloc[:, 0] + if not has_index: + repr_string = pd_series.to_string( + length=False, index=False, name=True, dtype=True + ) + else: + repr_string = pd_series.to_string(length=False, name=True, dtype=True) + + lines = repr_string.split("\n") + is_truncated = total_rows is not None and total_rows > len(pandas_df) + + if is_truncated: + lines.append("...") + lines.append("") # Add empty line for spacing only if truncated + lines.append(f"[{total_rows} rows]") + + return "\n".join(lines) + + else: + # DataFrame + with display_options.pandas_repr(opts): + # safe to mutate this, this dict is owned by this code, and does not affect global config + to_string_kwargs = ( + pandas.io.formats.format.get_dataframe_repr_params() # type: ignore + ) + if not has_index: + to_string_kwargs.update({"index": False}) + + # We add our own dimensions string, so don't want pandas to. + to_string_kwargs.update({"show_dimensions": False}) + repr_string = pandas_df.to_string(**to_string_kwargs) + + lines = repr_string.split("\n") + is_truncated = total_rows is not None and total_rows > len(pandas_df) + + if is_truncated: + lines.append("...") + lines.append("") # Add empty line for spacing only if truncated + lines.append(f"[{total_rows or '?'} rows x {column_count} columns]") + else: + # For non-truncated DataFrames, we still need to add dimensions if show_dimensions was False + lines.append("") + lines.append(f"[{total_rows or '?'} rows x {column_count} columns]") + return "\n".join(lines) diff --git a/bigframes/formatting_helpers.py b/bigframes/formatting_helpers.py index 55731069a3..3c37a3470d 100644 --- a/bigframes/formatting_helpers.py +++ b/bigframes/formatting_helpers.py @@ -68,7 +68,7 @@ def repr_query_job(query_job: Optional[bigquery.QueryJob]): query_job: The job representing the execution of the query on the server. Returns: - Pywidget html table. + Formatted string. """ if query_job is None: return "No job information available" @@ -94,6 +94,46 @@ def repr_query_job(query_job: Optional[bigquery.QueryJob]): return res +def repr_query_job_html(query_job: Optional[bigquery.QueryJob]): + """Return query job as a formatted html string. + Args: + query_job: + The job representing the execution of the query on the server. + Returns: + Html string. + """ + if query_job is None: + return "No job information available" + if query_job.dry_run: + return f"Computation deferred. Computation will process {get_formatted_bytes(query_job.total_bytes_processed)}" + + # We can reuse the plaintext repr for now or make a nicer table. + # For deferred mode consistency, let's just wrap the text in a pre block or similar, + # but the request implies we want a distinct HTML representation if possible. + # However, existing repr_query_job returns a simple string. + # Let's format it as a simple table or list. + + res = "0 1910\n", + "1 1910\n", + "2 1910\n", + "3 1910\n", + "4 1910\n", + "5 1910\n", + "6 1910\n", + "7 1910\n", + "8 1910\n", + "9 1910[5552452 rows]" + ], + "text/plain": [ + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "Name: year, dtype: Int64\n", + "...\n", + "\n", + "[5552452 rows]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_series = df[\"year\"]\n", + "# Displaying the series triggers the interactive widget\n", + "test_series" + ] + }, + { + "cell_type": "markdown", + "id": "7bcf1bb7", + "metadata": {}, + "source": [ + "Display with Pagination" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "da23e0f3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "✅ Completed. " + ], + "text/plain": [ + "
0 1910\n", + "1 1910\n", + "2 1910\n", + "3 1910\n", + "4 1910\n", + "5 1910\n", + "6 1910\n", + "7 1910\n", + "8 1910\n", + "9 1910[5552452 rows]" + ], + "text/plain": [ + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "Name: year, dtype: Int64\n", + "...\n", + "\n", + "[5552452 rows]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_series" + ] + }, { "cell_type": "markdown", "id": "sorting-intro", @@ -369,9 +515,18 @@ "Programmatic Navigation Demo" ] }, + { + "cell_type": "markdown", + "id": "programmatic-header", + "metadata": {}, + "source": [ + "## 3. Programmatic Widget Control\n", + "You can also instantiate the `TableWidget` directly for more control, such as checking page counts or driving navigation programmatically." + ] + }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "id": "6920d49b", "metadata": {}, "outputs": [ @@ -409,15 +564,15 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "bf4224f8022042aea6d72507ddb5570b", + "model_id": "13b063f7ea74473eb18de270c48c6417", "version_major": 2, "version_minor": 1 }, "text/plain": [ - "
5 rows × 15 columns
\n", @@ -777,36 +942,36 @@ "\n", " publication_date class_international class_us application_number \\\n", "0 29.08.018 E04H 6/12| + | value |
+
|---|---|
| 0 | +a | +
| 1 | +b | +