From 212c9f22e10cae1b55e8d9494a1a33b6bb9c6179 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Tue, 4 Nov 2025 16:55:20 -0800 Subject: [PATCH] Improve `xarray_beam.Dataset` `__repr__`. The `__repr__` now replaces dask array representations within the template with `...` for brevity and clarity, as the dask chunks don't necessarily match the `xarray_beam` chunks. The test is updated to check for a more complete and accurate representation. PiperOrigin-RevId: 828189621 --- xarray_beam/__init__.py | 2 +- xarray_beam/_src/dataset.py | 7 +++++-- xarray_beam/_src/dataset_test.py | 23 +++++++++++++---------- 3 files changed, 19 insertions(+), 13 deletions(-) diff --git a/xarray_beam/__init__.py b/xarray_beam/__init__.py index 24dbeb7..6645c41 100644 --- a/xarray_beam/__init__.py +++ b/xarray_beam/__init__.py @@ -55,4 +55,4 @@ DatasetToZarr as DatasetToZarr, ) -__version__ = '0.11.4' # automatically synchronized to pyproject.toml +__version__ = '0.11.5' # automatically synchronized to pyproject.toml diff --git a/xarray_beam/_src/dataset.py b/xarray_beam/_src/dataset.py index d341188..81ead5d 100644 --- a/xarray_beam/_src/dataset.py +++ b/xarray_beam/_src/dataset.py @@ -537,7 +537,10 @@ def chunk_count(self) -> int: ) def __repr__(self): - base = repr(self.template) + template_repr = repr(self.template) + # replace dask.array reprs with ..., both for the sake of brevity and + # because the dask chunks are not the same as the Dataset chunks. + template_repr = re.sub(r'dask.array\<.*\>', '...', template_repr) chunks_str = ', '.join( [f'{k}: {v}' for k, v in self.chunks.items()] + [f'split_vars={self.split_vars}'] @@ -551,7 +554,7 @@ def __repr__(self): f'PTransform: {self._ptransform}\n' f'Chunks: {chunk_size} ({chunks_str})\n' f'Template: {total_size} ({chunk_count} chunk{plural})\n' - + textwrap.indent('\n'.join(base.split('\n')[1:]), ' ' * 4) + + textwrap.indent('\n'.join(template_repr.split('\n')[1:]), ' ' * 4) ) @classmethod diff --git a/xarray_beam/_src/dataset_test.py b/xarray_beam/_src/dataset_test.py index 21ad875..22aecae 100644 --- a/xarray_beam/_src/dataset_test.py +++ b/xarray_beam/_src/dataset_test.py @@ -12,12 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. import re +import textwrap from absl.testing import absltest from absl.testing import parameterized import apache_beam as beam import numpy as np -import pandas as pd import xarray import xarray_beam as xbeam from xarray_beam._src import dataset as xbeam_dataset @@ -475,16 +475,19 @@ class DatasetTest(test_util.TestCase): def test_repr(self): ds = xarray.Dataset({'foo': ('x', np.arange(10))}) - beam_ds = xbeam.Dataset.from_xarray(ds, {'x': 5}) - self.assertRegex( + beam_ds = xbeam.Dataset.from_xarray(ds, {'x': 5}, label='my_label') + self.assertEqual( repr(beam_ds), - re.escape( - '\n' - 'PTransform: \n' - 'Chunks: 40B (x: 5, split_vars=False)\n' - 'Template: 80B (2 chunks)\n' - ' Dimensions:' - ).replace('DatasetToChunks', 'DatasetToChunks.*'), + textwrap.dedent("""\ + + PTransform: + Chunks: 40B (x: 5, split_vars=False) + Template: 80B (2 chunks) + Dimensions: (x: 10) + Dimensions without coordinates: x + Data variables: + foo (x) int64 80B ... + """).strip(), ) def test_from_ptransform(self):