Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 11 additions & 4 deletions tests/test_plumbing.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,13 +292,19 @@ def gaps_data(self, time_index):

return data

def test_basic_gaps_description(self, gaps_data):
def test_basic_gaps_description(self):
"""Test basic gap analysis functionality."""
plumber = Plumber(gaps_data)
my_df = pd.DataFrame({

"temp__°C__Building": [np.nan, 1, np.nan, 3],
"power__W__Building": [np.nan, 1, 2, np.nan],
}, index=pd.date_range("2009", freq="h", periods=4, tz='UTC'))

plumber = Plumber(my_df)
result = plumber.get_gaps_description()

# Check structure
assert all(col in result.columns for col in gaps_data.columns)
assert all(col in result.columns for col in my_df.columns)
expected_stats = [
"data_presence_%",
"count",
Expand All @@ -315,7 +321,8 @@ def test_basic_gaps_description(self, gaps_data):
# Check specific values
temp_col = "temp__°C__Building"
assert result[temp_col]["count"] == 2
assert result[temp_col]["data_presence_%"] == pytest.approx(83.33, rel=1e-2)
assert result[temp_col]["data_presence_%"] == pytest.approx(50., rel=1e-2)
assert result["combination"]["max"] == pd.to_timedelta("02:00:00")

def test_gap_thresholds(self, gaps_data):
"""Test gap analysis with duration thresholds."""
Expand Down
17 changes: 9 additions & 8 deletions tide/plumbing.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ class Plumber:

Examples
--------
>>> from tide import Plumber
>>> from tide.plumbing import Plumber
>>> import pandas as pd
>>> # Create sample data with hierarchical column names
>>> data = pd.DataFrame(
Expand Down Expand Up @@ -249,7 +249,7 @@ def get_gaps_description(
return_combination: bool = True,
) -> pd.DataFrame:
"""
Get statistical description of gaps durations in the data.
Get a statistical description of gaps durations in the data.

Parameters
----------
Expand All @@ -264,7 +264,8 @@ def get_gaps_description(
gaps_gte : str or pd.Timedelta or dt.timedelta, optional
Lower threshold for gap duration
return_combination : bool, default True
Whether to include statistics for gaps present in any column
Whether to include statistics for gaps aggregation.
Useful to get statistics when all data are available.

Returns
-------
Expand Down Expand Up @@ -301,7 +302,7 @@ def get_gaps_description(
durations = []
for gap in gaps_list:
if len(gap) > 1:
durations.append(gap[-1] - gap[0])
durations.append(gap[-1] - gap[0] + pd.to_timedelta(gap.freq))
else:
durations.append(pd.to_timedelta(gap.freq))

Expand Down Expand Up @@ -405,7 +406,7 @@ def get_pipeline(

Examples
--------
>>> from tide import Plumber
>>> from tide.plumbing import Plumber
>>> import pandas as pd
>>> # Create sample data
>>> data = pd.DataFrame(
Expand Down Expand Up @@ -507,7 +508,7 @@ def get_corrected_data(

Examples
--------
>>> from tide import Plumber
>>> from tide.plumbing import Plumber
>>> import pandas as pd
>>> # Create sample data
>>> data = pd.DataFrame(
Expand Down Expand Up @@ -618,7 +619,7 @@ def plot_gaps_heatmap(

Examples
--------
>>> from tide import Plumber
>>> from tide.plumbing import Plumber
>>> import pandas as pd
>>> # Create sample data with gaps
>>> data = pd.DataFrame(
Expand Down Expand Up @@ -793,7 +794,7 @@ def plot(

Examples
--------
>>> from tide import Plumber
>>> from tide.plumbing import Plumber
>>> import pandas as pd
>>> # Create sample data
>>> data = pd.DataFrame(
Expand Down