From 212c9f22e10cae1b55e8d9494a1a33b6bb9c6179 Mon Sep 17 00:00:00 2001
From: Stephan Hoyer <shoyer@google.com>
Date: Tue, 4 Nov 2025 16:55:20 -0800
Subject: [PATCH] Improve `xarray_beam.Dataset` `__repr__`.

The `__repr__` now replaces dask array representations within the template with `...` for brevity and clarity, as the dask chunks don't necessarily match the `xarray_beam` chunks. The test is updated to check for a more complete and accurate representation.

PiperOrigin-RevId: 828189621
---
 xarray_beam/__init__.py          |  2 +-
 xarray_beam/_src/dataset.py      |  7 +++++--
 xarray_beam/_src/dataset_test.py | 23 +++++++++++++----------
 3 files changed, 19 insertions(+), 13 deletions(-)
diff --git a/xarray_beam/__init__.py b/xarray_beam/__init__.py
index 24dbeb7..6645c41 100644
--- a/xarray_beam/__init__.py
+++ b/xarray_beam/__init__.py
@@ -55,4 +55,4 @@
     DatasetToZarr as DatasetToZarr,
 )
 
-__version__ = '0.11.4'  # automatically synchronized to pyproject.toml
+__version__ = '0.11.5'  # automatically synchronized to pyproject.toml
diff --git a/xarray_beam/_src/dataset.py b/xarray_beam/_src/dataset.py
index d341188..81ead5d 100644
--- a/xarray_beam/_src/dataset.py
+++ b/xarray_beam/_src/dataset.py
@@ -537,7 +537,10 @@ def chunk_count(self) -> int:
       )
 
   def __repr__(self):
-    base = repr(self.template)
+    template_repr = repr(self.template)
+    # replace dask.array reprs with ..., both for the sake of brevity and
+    # because the dask chunks are not the same as the Dataset chunks.
+    template_repr = re.sub(r'dask.array\<.*\>', '...', template_repr)
     chunks_str = ', '.join(
         [f'{k}: {v}' for k, v in self.chunks.items()]
         + [f'split_vars={self.split_vars}']
@@ -551,7 +554,7 @@ def __repr__(self):
         f'PTransform: {self._ptransform}\n'
         f'Chunks:     {chunk_size} ({chunks_str})\n'
         f'Template:   {total_size} ({chunk_count} chunk{plural})\n'
-        + textwrap.indent('\n'.join(base.split('\n')[1:]), ' ' * 4)
+        + textwrap.indent('\n'.join(template_repr.split('\n')[1:]), ' ' * 4)
     )
 
   @classmethod
diff --git a/xarray_beam/_src/dataset_test.py b/xarray_beam/_src/dataset_test.py
index 21ad875..22aecae 100644
--- a/xarray_beam/_src/dataset_test.py
+++ b/xarray_beam/_src/dataset_test.py
@@ -12,12 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import re
+import textwrap
 
 from absl.testing import absltest
 from absl.testing import parameterized
 import apache_beam as beam
 import numpy as np
-import pandas as pd
 import xarray
 import xarray_beam as xbeam
 from xarray_beam._src import dataset as xbeam_dataset
@@ -475,16 +475,19 @@ class DatasetTest(test_util.TestCase):
 
   def test_repr(self):
     ds = xarray.Dataset({'foo': ('x', np.arange(10))})
-    beam_ds = xbeam.Dataset.from_xarray(ds, {'x': 5})
-    self.assertRegex(
+    beam_ds = xbeam.Dataset.from_xarray(ds, {'x': 5}, label='my_label')
+    self.assertEqual(
         repr(beam_ds),
-        re.escape(
-            '<xarray_beam.Dataset>\n'
-            'PTransform: <DatasetToChunks>\n'
-            'Chunks:     40B (x: 5, split_vars=False)\n'
-            'Template:   80B (2 chunks)\n'
-            '    Dimensions:'
-        ).replace('DatasetToChunks', 'DatasetToChunks.*'),
+        textwrap.dedent("""\
+            <xarray_beam.Dataset>
+            PTransform: <DatasetToChunks(PTransform) label=[my_label]>
+            Chunks:     40B (x: 5, split_vars=False)
+            Template:   80B (2 chunks)
+                Dimensions:  (x: 10)
+                Dimensions without coordinates: x
+                Data variables:
+                    foo      (x) int64 80B ...
+        """).strip(),
     )
 
   def test_from_ptransform(self):