Skip to content
Merged
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ Unless noted, iglu-r test is considered successful if it achieves precision of 0
| cv_glu | ✅ |
| cv_measures | ✅ |
| ea1c | ✅ |
| episode_calculation | 🟡 need fix in excl| || no match in lv1_hypo_excl and lv1_hyper_excl|
| episode_calculation | | || no match in lv1_hypo_excl and lv1_hyper_excl|
| gmi | ✅ |
| grade_eugly | ✅ |
| grade_hyper | ✅ |
Expand Down
6 changes: 6 additions & 0 deletions iglu_python/conga.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,12 @@ def conga_single(data: pd.DataFrame, hours: int = 1, tz: str = "") -> float:
lag = hourly_readings * hours
diffs = gl_vector[lag:] - gl_vector[:-lag]

# Check if we have sufficient data for std calculation
# Need at least 2 non-NaN values for ddof=1
valid_diffs = diffs[~np.isnan(diffs)]
if len(valid_diffs) < 2:
return np.nan

return float(np.nanstd(diffs, ddof=1))

# Handle Series input
Expand Down
49 changes: 27 additions & 22 deletions iglu_python/episode_calculation.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,8 +168,15 @@ def episode_calculation(
subject_episode_data['id'] = subject_id

# Append to main dataframes
episode_data_df = pd.concat([episode_data_df, subject_episode_data], ignore_index=True)
episode_summary_df = pd.concat([episode_summary_df, subject_summary], ignore_index=True)
if episode_data_df.empty:
episode_data_df = subject_episode_data
else:
episode_data_df = pd.concat([episode_data_df, subject_episode_data], ignore_index=True)

if episode_summary_df.empty:
episode_summary_df = subject_summary
else:
episode_summary_df = pd.concat([episode_summary_df, subject_summary], ignore_index=True)



Expand Down Expand Up @@ -238,7 +245,7 @@ def episode_single(
day_one = day_one.tz_convert(local_tz)
ndays = len(gd2d_tuple[1])
# generate grid times by starting from day one and cumulatively summing
time_ip = pd.date_range(start=day_one + pd.Timedelta(minutes=dt0), periods=ndays * 24 * 60 /dt0, freq=f"{dt0}min")
time_ip = pd.date_range(start=day_one + pd.Timedelta(minutes=dt0), periods=int(ndays * 24 * 60 /dt0), freq=f"{dt0}min")
data_ip = gd2d_tuple[0].flatten().tolist()
new_data = pd.DataFrame({
"time": time_ip,
Expand Down Expand Up @@ -297,29 +304,25 @@ def episode_single(
x, "hypo", lv1_hypo, int(120 / dt0) + 1, end_idx
),
}
)
),
include_groups=False
)
.reset_index()
.drop(columns=['level_1'])
)


# Add exclusive labels
def hypo_exclusion_logic(group_df):
# group_df is a DataFrame with all columns for the current group
if (group_df['lv2_hypo'] > 0).any():
return pd.Series([0] * len(group_df), index=group_df.index)
else:
return group_df['lv1_hypo']
ep_per_seg['lv1_hypo_excl'] = ep_per_seg.groupby(['segment', 'lv1_hypo']).apply(hypo_exclusion_logic).reset_index(level=[0,1], drop=True).values.flatten()

def hyper_exclusion_logic(group_df):
# group_df is a DataFrame with all columns for the current group
if (group_df['lv2_hyper'] > 0).any():
return pd.Series([0] * len(group_df), index=group_df.index)
else:
return group_df['lv1_hyper']
ep_per_seg['lv1_hyper_excl'] = ep_per_seg.groupby(['segment', 'lv1_hyper']).apply(hyper_exclusion_logic).reset_index(level=[0,1], drop=True).values.flatten()
# Add exclusive labels using the correct original logic without DeprecationWarning
# For hypo exclusion: group by both segment and lv1_hypo, set to 0 if any lv2_hypo > 0 in that group
def calculate_exclusion(df, lv1_col, lv2_col):
"""Calculate exclusion labels for lv1 episodes based on lv2 episodes in same group"""
df = df.copy()
df['group_id'] = df.groupby(['segment', lv1_col]).ngroup()
group_has_lv2 = df.groupby('group_id')[lv2_col].transform(lambda x: (x > 0).any())
return df[lv1_col].where(~group_has_lv2, 0)

ep_per_seg['lv1_hypo_excl'] = calculate_exclusion(ep_per_seg, 'lv1_hypo', 'lv2_hypo')
ep_per_seg['lv1_hyper_excl'] = calculate_exclusion(ep_per_seg, 'lv1_hyper', 'lv2_hyper')

full_segment_df = pd.concat([segment_data, ep_per_seg.drop(["segment"], axis=1)], axis=1)

Expand Down Expand Up @@ -402,7 +405,8 @@ def event_class(
else None] + [None]*(len(x)-1)
),
}
)
),
include_groups=False
)
.reset_index()
.drop(columns=['level_1'])
Expand Down Expand Up @@ -471,7 +475,8 @@ def lv1_excl(data: pd.DataFrame) -> np.ndarray:
lambda x: pd.DataFrame(
{
"excl":[0 if (x[lv2_first].values > 0).any() else x[lv1_first].iloc[0]]*len(x)
})
}),
include_groups=False
)

excl = excl.reset_index()
Expand Down
2 changes: 1 addition & 1 deletion iglu_python/grade.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def grade(data: Union[pd.DataFrame, pd.Series]) -> pd.DataFrame:
# Calculate GRADE score for each subject
result = (
data.groupby("id")
.apply(lambda x: np.mean(_grade_formula(x["gl"].dropna())))
.apply(lambda x: np.mean(_grade_formula(x["gl"].dropna())), include_groups=False)
.reset_index()
)
result.columns = ["id", "GRADE"]
Expand Down
9 changes: 3 additions & 6 deletions iglu_python/lbgi.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,14 +109,11 @@ def lbgi(data: Union[pd.DataFrame, pd.Series]) -> pd.DataFrame:
raise ValueError("Empty DataFrame provided")

# Calculate LBGI for each subject
result = pd.DataFrame(columns=["id", "LBGI"])
results = []

for subject_id in data["id"].unique():
subject_data = data[data["id"] == subject_id]["gl"]
lbgi_value = calculate_lbgi(subject_data)
result = pd.concat(
[result, pd.DataFrame({"id": [subject_id], "LBGI": [lbgi_value]})],
ignore_index=True,
)
results.append({"id": subject_id, "LBGI": lbgi_value})

return result
return pd.DataFrame(results)
2 changes: 1 addition & 1 deletion iglu_python/m_value.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def m_value(data: Union[pd.DataFrame, pd.Series], r: float = 90) -> pd.DataFrame
# Calculate M-value for each subject
result = (
data.groupby("id")
.apply(lambda x: 1000 * np.mean(np.abs(np.log10(x["gl"] / r)) ** 3))
.apply(lambda x: 1000 * np.mean(np.abs(np.log10(x["gl"] / r)) ** 3), include_groups=False)
.reset_index()
)
result.columns = ["id", "M_value"]
Expand Down
18 changes: 10 additions & 8 deletions iglu_python/mage.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,10 @@ def mage_ma_single(data: pd.DataFrame, short_ma: int, long_ma: int,
return_val = pd.DataFrame(columns=["start", "end", "mage", "plus_or_minus", "first_excursion"])
for segment in dfs:
ret = mage_atomic(segment,short_ma,long_ma)
return_val = pd.concat([return_val, ret], ignore_index=True)
if return_val.empty:
return_val = ret
else:
return_val = pd.concat([return_val, ret], ignore_index=True)

if return_type == 'df':
return return_val
Expand All @@ -195,9 +198,8 @@ def mage_ma_single(data: pd.DataFrame, short_ma: int, long_ma: int,
res = return_val[return_val['MAGE'].notna()].copy()
elif direction == 'max':
# Group by start,end and keep max mage in each group
res = (return_val.groupby(['start', 'end'])
.apply(lambda x: x[x['MAGE'] == x['MAGE'].max()])
.reset_index(drop=True))
idx = return_val.groupby(['start', 'end'])['MAGE'].idxmax()
res = return_val.loc[idx].reset_index(drop=True)
else: # default: first excursions only
res = return_val[return_val['first_excursion'] == True].copy()

Expand All @@ -220,13 +222,13 @@ def mage_atomic(data, short_ma,long_ma):
data["MA_Long"] = data["gl"].rolling(window=long_ma, min_periods=1).mean()
# Fill leading NAs (forward fill first valid value)
if short_ma > len(data):
data['MA_Short'].iloc[:short_ma] = data['MA_Short'].iloc[-1]
data.loc[data.index[:short_ma], 'MA_Short'] = data['MA_Short'].iloc[-1]
else:
data['MA_Short'].iloc[:short_ma] = data['MA_Short'].iloc[short_ma-1]
data.loc[data.index[:short_ma], 'MA_Short'] = data['MA_Short'].iloc[short_ma-1]
if long_ma > len(data):
data['MA_Long'].iloc[:long_ma] = data['MA_Long'].iloc[-1]
data.loc[data.index[:long_ma], 'MA_Long'] = data['MA_Long'].iloc[-1]
else:
data['MA_Long'].iloc[:long_ma] = data['MA_Long'].iloc[long_ma-1]
data.loc[data.index[:long_ma], 'MA_Long'] = data['MA_Long'].iloc[long_ma-1]
# Calculate difference
data['DELTA_SHORT_LONG'] = data['MA_Short'] - data['MA_Long']
data = data.reset_index(drop=True)
Expand Down
5 changes: 4 additions & 1 deletion iglu_python/modd.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,10 @@ def modd_single(data: pd.DataFrame) -> float:
abs_diffs = abs_diffs[~np.isnan(abs_diffs)] # Remove NaNs

# Calculate mean of absolute differences, ignoring NaN values
modd_val = np.nanmean(abs_diffs)
if len(abs_diffs) == 0:
modd_val = np.nan
else:
modd_val = np.nanmean(abs_diffs)

return float(modd_val) if not pd.isna(modd_val) else np.nan

Expand Down
12 changes: 8 additions & 4 deletions iglu_python/pgs.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,8 +127,12 @@ def pgs_single(subj_data: pd.DataFrame) -> float:

return pgs_score

# Calculate PGS for each subject
result = data.groupby("id").apply(lambda x: pgs_single(x)).reset_index()
result.columns = ["id", "PGS"]

return result
# Calculate PGS for each subject
results = []
for subject_id in data["id"].unique():
subject_data = data[data["id"] == subject_id].copy()
pgs_value = pgs_single(subject_data)
results.append({"id": subject_id, "PGS": pgs_value})

return pd.DataFrame(results)
2 changes: 1 addition & 1 deletion iglu_python/roc.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ def roc_single(data: pd.DataFrame, timelag: int, dt0: int = None , inter_gap: in
{
"id": ["subject1"] * len(data),
"time": pd.date_range(
start="2020-01-01", periods=len(data), freq=f"{dt0}T"
start="2020-01-01", periods=len(data), freq=f"{dt0}min"
),
"gl": data.values,
}
Expand Down
99 changes: 81 additions & 18 deletions iglu_python/sd_measures.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,13 +135,13 @@ def _calculate_sd_subtypes(gd2d: np.ndarray, dt0: int, subject_id: Any) -> Dict[

# 1. SDw - vertical within days
# Standard deviation within each day, then mean across days
daily_sds = np.nanstd(gd2d, axis=1, ddof=1) # ddof=1 for sample std
result['SDw'] = np.nanmean(daily_sds)
daily_sds = _safe_nanstd(gd2d, axis=1, ddof=1) # ddof=1 for sample std
result['SDw'] = _safe_nanmean(daily_sds)

# 2. SDhhmm - between time points
# Mean at each time point across days, then SD of those means
timepoint_means = np.nanmean(gd2d, axis=0)
result['SDhhmm'] = np.nanstd(timepoint_means, ddof=1)
timepoint_means = _safe_nanmean(gd2d, axis=0)
result['SDhhmm'] = _safe_nanstd(timepoint_means, ddof=1)

# 3. SDwsh - within series (1-hour windows)
# Rolling standard deviation over 1-hour windows
Expand All @@ -150,24 +150,24 @@ def _calculate_sd_subtypes(gd2d: np.ndarray, dt0: int, subject_id: Any) -> Dict[

# Calculate rolling standard deviation
rolling_sds = _rolling_std(gs, window=win)
result['SDwsh'] = np.nanmean(rolling_sds)
result['SDwsh'] = _safe_nanmean(rolling_sds)

# 4. SDdm - horizontal sd (between daily means)
# Standard deviation of daily mean glucose values
daily_means = np.nanmean(gd2d, axis=1)
result['SDdm'] = np.nanstd(daily_means, ddof=1)
daily_means = _safe_nanmean(gd2d, axis=1)
result['SDdm'] = _safe_nanstd(daily_means, ddof=1)

# 5. SDb - between days, within timepoints
# SD across days for each time point, then mean of those SDs
timepoint_sds = np.nanstd(gd2d, axis=0, ddof=1)
result['SDb'] = np.nanmean(timepoint_sds)
timepoint_sds = _safe_nanstd(gd2d, axis=0, ddof=1)
result['SDb'] = _safe_nanmean(timepoint_sds)

# 6. SDbdm - between days, within timepoints, corrected for daily means
# Subtract daily mean from each value, then calculate SDb on corrected values
daily_means_matrix = daily_means[:, np.newaxis] # Convert to column vector
corrected_gd2d = gd2d - daily_means_matrix
corrected_timepoint_sds = np.nanstd(corrected_gd2d, axis=0, ddof=1)
result['SDbdm'] = np.nanmean(corrected_timepoint_sds)
corrected_timepoint_sds = _safe_nanstd(corrected_gd2d, axis=0, ddof=1)
result['SDbdm'] = _safe_nanmean(corrected_timepoint_sds)

return result

Expand Down Expand Up @@ -200,10 +200,73 @@ def _rolling_std(data: np.ndarray, window: int) -> np.ndarray:
for i in range(n - window + 1):
window_data = valid_data[i:i + window]
if len(window_data) == window: # Full window
rolling_stds.append(np.nanstd(window_data, ddof=1))
rolling_stds.append(_safe_nanstd(window_data, ddof=1))

return np.array(rolling_stds) if rolling_stds else np.array([np.nan])

def _safe_nanstd(data: np.ndarray, axis: Optional[int] = None, ddof: int = 1) -> float:
"""
Safe version of np.nanstd that handles insufficient data gracefully

Parameters
----------
data : np.ndarray
Input data
axis : int, optional
Axis along which the standard deviation is computed
ddof : int
Delta degrees of freedom

Returns
-------
float
Standard deviation or np.nan if insufficient data
"""
with warnings.catch_warnings():
warnings.simplefilter("ignore", category=RuntimeWarning)

if axis is None:
# Check if we have enough non-NaN values
valid_data = data[~np.isnan(data)]
if len(valid_data) <= ddof:
return np.nan
else:
# For axis operations, we need to check each slice
# This is more complex, so we'll just suppress warnings
pass

return np.nanstd(data, axis=axis, ddof=ddof)


def _safe_nanmean(data: np.ndarray, axis: Optional[int] = None) -> float:
"""
Safe version of np.nanmean that handles empty slices gracefully

Parameters
----------
data : np.ndarray
Input data
axis : int, optional
Axis along which the mean is computed

Returns
-------
float
Mean or np.nan if no valid data
"""
with warnings.catch_warnings():
warnings.simplefilter("ignore", category=RuntimeWarning)

if axis is None:
# Check if we have any non-NaN values
if np.isnan(data).all():
return np.nan
else:
# For axis operations, suppress warnings and let numpy handle it
pass

return np.nanmean(data, axis=axis)


# Alternative vectorized implementation for better performance
def sd_measures_vectorized(data: pd.DataFrame,
Expand Down Expand Up @@ -237,11 +300,11 @@ def _calculate_sd_subtypes_vectorized(gd2d: np.ndarray, dt0: int, subject_id: An

return {
'id': subject_id,
'SDw': np.nanmean(np.nanstd(gd2d, axis=1, ddof=1)),
'SDhhmm': np.nanstd(np.nanmean(gd2d, axis=0), ddof=1),
'SDwsh': np.nanmean(_rolling_std(gd2d.T.flatten(), round(60/dt0))),
'SDdm': np.nanstd(np.nanmean(gd2d, axis=1), ddof=1),
'SDb': np.nanmean(np.nanstd(gd2d, axis=0, ddof=1)),
'SDbdm': np.nanmean(np.nanstd(gd2d - np.nanmean(gd2d, axis=1, keepdims=True),
'SDw': _safe_nanmean(np.nanstd(gd2d, axis=1, ddof=1)),
'SDhhmm': np.nanstd(_safe_nanmean(gd2d, axis=0), ddof=1),
'SDwsh': _safe_nanmean(_rolling_std(gd2d.T.flatten(), round(60/dt0))),
'SDdm': np.nanstd(_safe_nanmean(gd2d, axis=1), ddof=1),
'SDb': _safe_nanmean(np.nanstd(gd2d, axis=0, ddof=1)),
'SDbdm': _safe_nanmean(np.nanstd(gd2d - _safe_nanmean(gd2d, axis=1, keepdims=True),
axis=0, ddof=1))
}
4 changes: 2 additions & 2 deletions iglu_python/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,8 @@ def check_data_columns(data: pd.DataFrame, time_check=False, tz="") -> pd.DataFr
raise ValueError("Data contains no glucose values")

# Check for missing values
if data["gl"].isna().any():
warnings.warn("Data contains missing glucose values")
# if data["gl"].isna().any():
# warnings.warn("Data contains missing glucose values")

# convert time to specified timezone
# TODO: check if this is correct (R-implementation compatibility)
Expand Down
4 changes: 4 additions & 0 deletions tests/test_above_percent.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json

import pandas as pd
import numpy as np
import pytest

import iglu_python as iglu
Expand Down Expand Up @@ -48,6 +49,9 @@ def test_above_percent_iglu_r_compatible(scenario):
expected_results = scenario["results"]
expected_df = pd.DataFrame(expected_results)
expected_df = expected_df.reset_index(drop=True)
pd.set_option('future.no_silent_downcasting', True)
expected_df = expected_df.replace({None: np.nan})


# Compare DataFrames with precision to 0.001
pd.testing.assert_frame_equal(
Expand Down
Loading
Loading