From c399077b586c9e18d0e1d63cd7e37c2407beb7d0 Mon Sep 17 00:00:00 2001 From: Stas Khirman Date: Mon, 16 Jun 2025 10:45:51 +0300 Subject: [PATCH 01/16] move main function on the top --- iglu_python/grade.py | 38 ++++++++--------- iglu_python/gvp.py | 98 ++++++++++++++++++++++---------------------- iglu_python/lbgi.py | 72 ++++++++++++++++---------------- 3 files changed, 104 insertions(+), 104 deletions(-) diff --git a/iglu_python/grade.py b/iglu_python/grade.py index 8d278ba..dddb8ba 100644 --- a/iglu_python/grade.py +++ b/iglu_python/grade.py @@ -5,25 +5,6 @@ from .utils import check_data_columns - -def _grade_formula(x: Union[pd.Series, np.ndarray]) -> Union[pd.Series, np.ndarray]: - """ - Helper function to calculate GRADE score for individual glucose values. - - Parameters - ---------- - x : Union[pd.Series, np.ndarray] - Glucose values in mg/dL - - Returns - ------- - Union[pd.Series, np.ndarray] - GRADE scores for each glucose value - """ - grade = 425 * (np.log10(np.log10(x / 18)) + 0.16) ** 2 - return np.minimum(grade, 50) # Cap at 50 - - def grade(data: Union[pd.DataFrame, pd.Series]) -> pd.DataFrame: """ Calculate mean GRADE score for each subject. @@ -85,3 +66,22 @@ def grade(data: Union[pd.DataFrame, pd.Series]) -> pd.DataFrame: result.columns = ["id", "GRADE"] return result + + +def _grade_formula(x: Union[pd.Series, np.ndarray]) -> Union[pd.Series, np.ndarray]: + """ + Helper function to calculate GRADE score for individual glucose values. + + Parameters + ---------- + x : Union[pd.Series, np.ndarray] + Glucose values in mg/dL + + Returns + ------- + Union[pd.Series, np.ndarray] + GRADE scores for each glucose value + """ + grade = 425 * (np.log10(np.log10(x / 18)) + 0.16) ** 2 + return np.minimum(grade, 50) # Cap at 50 + diff --git a/iglu_python/gvp.py b/iglu_python/gvp.py index d6e7812..0bf3689 100644 --- a/iglu_python/gvp.py +++ b/iglu_python/gvp.py @@ -5,55 +5,6 @@ from .utils import CGMS2DayByDay, check_data_columns - -def calculate_gvp(glucose_values: pd.Series, timestamps: pd.Series) -> float: - """ - Calculate GVP for a single series of glucose values. - - Parameters - ---------- - glucose_values : pd.Series - Series of glucose values in mg/dL - timestamps : pd.Series - Series of timestamps corresponding to glucose values - - Returns - ------- - float - Glucose Variability Percentage - """ - # Remove NaN values - mask = ~(glucose_values.isna() | timestamps.isna()) - glucose_values = glucose_values[mask] - timestamps = timestamps[mask] - - if len(glucose_values) < 2: - return np.nan - - # Sort by timestamp - sort_idx = timestamps.argsort() - glucose_values = glucose_values.iloc[sort_idx] - timestamps = timestamps.iloc[sort_idx] - - # Calculate time differences in minutes - time_diffs = np.diff(timestamps.astype(np.int64) // 10**9) / 60.0 - - # Calculate glucose differences - glucose_diffs = np.diff(glucose_values) - - # Calculate total length of glucose trace - added_length = np.sqrt(time_diffs**2 + glucose_diffs**2) - total_length = np.sum(added_length) - - # Calculate length of flat trace - base_length = np.sum(time_diffs) - - # Calculate GVP - gvp = (total_length / base_length - 1) * 100 - - return gvp - - def gvp(data: Union[pd.DataFrame, pd.Series, list, np.ndarray]) -> pd.DataFrame: r""" Calculate Glucose Variability Percentage (GVP). @@ -164,3 +115,52 @@ def gvp_single(subj_data): if is_vector: df = df.drop(columns=["id"]) return df + + +def calculate_gvp(glucose_values: pd.Series, timestamps: pd.Series) -> float: + """ + Calculate GVP for a single series of glucose values. + + Parameters + ---------- + glucose_values : pd.Series + Series of glucose values in mg/dL + timestamps : pd.Series + Series of timestamps corresponding to glucose values + + Returns + ------- + float + Glucose Variability Percentage + """ + # Remove NaN values + mask = ~(glucose_values.isna() | timestamps.isna()) + glucose_values = glucose_values[mask] + timestamps = timestamps[mask] + + if len(glucose_values) < 2: + return np.nan + + # Sort by timestamp + sort_idx = timestamps.argsort() + glucose_values = glucose_values.iloc[sort_idx] + timestamps = timestamps.iloc[sort_idx] + + # Calculate time differences in minutes + time_diffs = np.diff(timestamps.astype(np.int64) // 10**9) / 60.0 + + # Calculate glucose differences + glucose_diffs = np.diff(glucose_values) + + # Calculate total length of glucose trace + added_length = np.sqrt(time_diffs**2 + glucose_diffs**2) + total_length = np.sum(added_length) + + # Calculate length of flat trace + base_length = np.sum(time_diffs) + + # Calculate GVP + gvp = (total_length / base_length - 1) * 100 + + return gvp + diff --git a/iglu_python/lbgi.py b/iglu_python/lbgi.py index bd66ede..55ae880 100644 --- a/iglu_python/lbgi.py +++ b/iglu_python/lbgi.py @@ -5,42 +5,6 @@ from .utils import check_data_columns - -def calculate_lbgi(glucose_values: pd.Series) -> float: - """ - Calculate LBGI for a single series of glucose values. - - Parameters - ---------- - glucose_values : pd.Series - Series of glucose values in mg/dL - - Returns - ------- - float - LBGI value - """ - # Remove NaN values - glucose_values = glucose_values.dropna() - - if len(glucose_values) == 0: - return np.nan - - # Calculate LBGI using the formula from the R implementation - # LBGI = 22.77 * mean(fbg[gl < 112.5]^2) - # where fbg = max(0, 1.509 * (log(gl)^1.084 - 5.381)) - - # Calculate fbg values - fbg = 1.509 * (np.log(glucose_values) ** 1.084 - 5.381) - fbg = np.minimum(fbg, 0) # Take min with 0 - - # Calculate LBGI - n = len(glucose_values) - lbgi = 10 * np.sum(fbg[glucose_values < 112.5] ** 2) / n - - return lbgi - - def lbgi(data: Union[pd.DataFrame, pd.Series]) -> pd.DataFrame: r""" Calculate the Low Blood Glucose Index (LBGI) for each subject. @@ -117,3 +81,39 @@ def lbgi(data: Union[pd.DataFrame, pd.Series]) -> pd.DataFrame: results.append({"id": subject_id, "LBGI": lbgi_value}) return pd.DataFrame(results) + + +def calculate_lbgi(glucose_values: pd.Series) -> float: + """ + Calculate LBGI for a single series of glucose values. + + Parameters + ---------- + glucose_values : pd.Series + Series of glucose values in mg/dL + + Returns + ------- + float + LBGI value + """ + # Remove NaN values + glucose_values = glucose_values.dropna() + + if len(glucose_values) == 0: + return np.nan + + # Calculate LBGI using the formula from the R implementation + # LBGI = 22.77 * mean(fbg[gl < 112.5]^2) + # where fbg = max(0, 1.509 * (log(gl)^1.084 - 5.381)) + + # Calculate fbg values + fbg = 1.509 * (np.log(glucose_values) ** 1.084 - 5.381) + fbg = np.minimum(fbg, 0) # Take min with 0 + + # Calculate LBGI + n = len(glucose_values) + lbgi = 10 * np.sum(fbg[glucose_values < 112.5] ** 2) / n + + return lbgi + From 2e1d2a35eb955649ff09416dc59c065e721055b3 Mon Sep 17 00:00:00 2001 From: Stas Khirman Date: Mon, 16 Jun 2025 14:50:17 +0300 Subject: [PATCH 02/16] gmi, cv_glu and cv_measures are imlemented --- README.md | 113 +++++++++------ iglu_python/__init__.py | 6 + iglu_python/cv_glu.py | 60 ++++++++ iglu_python/cv_measures.py | 100 ++++++++++++++ iglu_python/gmi.py | 57 ++++++++ iglu_python/utils.py | 15 +- tests/test_cv_glu.py | 274 +++++++++++++++++++++++++++++++++++++ tests/test_cv_measures.py | 258 ++++++++++++++++++++++++++++++++++ tests/test_gmi.py | 199 +++++++++++++++++++++++++++ 9 files changed, 1036 insertions(+), 46 deletions(-) create mode 100644 iglu_python/cv_glu.py create mode 100644 iglu_python/cv_measures.py create mode 100644 iglu_python/gmi.py create mode 100644 tests/test_cv_glu.py create mode 100644 tests/test_cv_measures.py create mode 100644 tests/test_gmi.py diff --git a/README.md b/README.md index 23a3fb3..ff5e2e5 100644 --- a/README.md +++ b/README.md @@ -19,50 +19,75 @@ A significant focus of this project has been ensuring compatibility with the ori This approach ensures that the Python implementation produces results consistent with the original R package. ## Unit Test Status -Unless noted, iglu-r test is considered successful if it achieves precision of 0.001 - -| Function | IGLU-R test compatibility | array/list/Series | TZ | Comments | -|----------|---------------------------|-------------------|----|----------| -| above_percent | ✅ | ||| -| active_percent | ✅ | -| adrr | ✅ | -| auc| 🟡 (0.01 precision) | || see [auc_evaluation.ipynb](https://github.com/staskh/iglu_python/blob/main/notebooks/auc_evaluation.ipynb)| -| below_percent| ✅ | -| cogi | ✅ | -| conga | ✅ | -| cv_glu | ✅ | -| cv_measures | ✅ | -| ea1c | ✅ | -| episode_calculation | ✅| || | -| gmi | ✅ | -| grade_eugly | ✅ | -| grade_hyper | ✅ | -| grade_hypo | ✅ | -| grade | ✅ | -| gri | ✅ | -| gvp | ✅ | -| hbgi | ✅ | -| hyper_index | ✅ | -| hypo_index | ✅ | -| igc | ✅ | -| j_index | ✅ | -| lbgi | ✅ | -| mad_glu | ✅ | -| mag | ✅ | || IMHO, Original R implementation has an error | -| mage | ✅ | || See algorithm at [MAGE](https://irinagain.github.io/iglu/articles/MAGE.html) | -| mean_glu | ✅ | -| median_glu | ✅ | -| modd | ✅ | -| pgs | ✅ | || | -| quantile_glu | ✅ | -| range_glu | ✅ | -| roc | ✅ | -| sd_glu | ✅ | -| sd_measures | ✅ | -| sd_roc | ✅ | ||| -| process_data | ✅ | -| summary_glu | ✅ | -| CGMS2DayByDay | ✅ | +The current version of IGLU-PYTHON is test-compatible with IGLU-R v4.2.2 + +Unless noted, IGLU-R test compatability is considered successful if it achieves precision of 0.001 + +| Function | Description | IGLU-R test compatibility | list /ndarray /Series input | TZ | Comments | +|----------|-------------|-------------|-------------------|----|----------| +| above_percent | percentage of values above target thresholds| ✅ | ||| +| active_percent | percentage of time CGM was active | ✅ | +| adrr | average daily risk range | ✅ | +| auc| Area Under Curve | 🟡 (0.01 precision) | || see [auc_evaluation.ipynb](https://github.com/staskh/iglu_python/blob/main/notebooks/auc_evaluation.ipynb)| +| below_percent| percentage of values below target thresholds| ✅ | +| cogi |Coefficient of Glucose Irregularity | ✅ | +| conga | Continuous Overall Net Glycemic Action |✅ | +| cv_glu | Coefficient of Variation | ✅| ✅ returns float | +| cv_measures | |✅ |✅ returns dict[str:float]| | +| ea1c |estimated A1C (eA1C) values| ✅ | +| episode_calculation | Hypo/Hyperglycemic episodes with summary statistics| ✅| || | +| gmi | Glucose Management Indicator | ✅ | +| grade_eugly |percentage of GRADE score attributable to target range| ✅ | +| grade_hyper |percentage of GRADE score attributable to hyperglycemia| ✅ | +| grade_hypo |percentage of GRADE score attributable to hypoglycemia| ✅ | +| grade |mean GRADE score| ✅ | +| gri |Glycemia Risk Index | ✅ | +| gvp |Glucose Variability Percentage| ✅ | +| hbgi |High Blood Glucose Index| ✅ | +| hyper_index |Hyperglycemia Index| ✅ | +| hypo_index |Hypoglycemia Index| ✅ | +| igc |Index of Glycemic Control| ✅ | +| in_range_percent |percentage of values within target ranges| ✅ | +| iqr_glu |glucose level interquartile range|✅ | +| j_index |J-Index score for glucose measurements| ✅ | +| lbgi | Low Blood Glucose Index| ✅ | +| m_value | M-value of Schlichtkrull et al | ✅ | +| mad_glu | Median Absolute Deviation | ✅ | +| mag | Mean Absolute Glucose| ✅ | || IMHO, Original R implementation has an error | +| mage | Mean Amplitude of Glycemic Excursions| ✅ | || See algorithm at [MAGE](https://irinagain.github.io/iglu/articles/MAGE.html) | +| mean_glu | Mean glucose value | ✅ | +| median_glu |Median glucose value| ✅ | +| modd | Mean of Daily Differences| ✅ | +| pgs | Personal Glycemic State | ✅ | || | +| quantile_glu |glucose level quantiles| ✅ | +| range_glu |glucose level range| ✅ | +| roc | Rate of Change| ✅ | +| sd_glu | standard deviation of glucose values| ✅ | +| sd_measures |various standard deviation subtypes| ✅ | +| sd_roc | standard deviation of the rate of change| ✅ | ||| +| summary_glu | summary glucose level| ✅ | +| process_data | Data Pre-Processor | ✅ | +| CGMS2DayByDay |Interpolate glucose input| ✅ | + +### Input & Output +The implementation maintains compatibility with the R version while following Python best practices. The metrics can be used as: + +```Python +import iglu_python ias iglu + +# With DataFrame input +result_df = iglu.cv_glu(data) # data should have 'id', 'time', and 'gl' columns +# Return DataFrame with "id' and column(s) with value(s) + +# With Series input (some metrics require Series with DateTimeIndex) +result_float = iglu.cv_glu(glucose_series) # just glucose values +# returns a single float value + +# Same with function that support list or ndarray +result_float = iglu.cv_glu(glucose_list) # list of glucose values +# returns a single float value + +``` # Installation diff --git a/iglu_python/__init__.py b/iglu_python/__init__.py index 0ff53fc..52356da 100644 --- a/iglu_python/__init__.py +++ b/iglu_python/__init__.py @@ -5,12 +5,15 @@ from .below_percent import below_percent from .cogi import cogi from .conga import conga +from .cv_glu import cv_glu +from .cv_measures import cv_measures from .ea1c import ea1c from .episode_calculation import episode_calculation from .grade import grade from .grade_eugly import grade_eugly from .grade_hyper import grade_hyper from .grade_hypo import grade_hypo +from .gmi import gmi from .gri import gri from .gvp import gvp from .hbgi import hbgi @@ -49,6 +52,8 @@ "CGMS2DayByDay", "cogi", "conga", + "cv_glu", + "cv_measures", "ea1c", "episode_calculation", "gd2d_to_df", @@ -56,6 +61,7 @@ "grade_eugly", "grade_hyper", "grade_hypo", + "gmi", "gri", "gvp", "hbgi", diff --git a/iglu_python/cv_glu.py b/iglu_python/cv_glu.py new file mode 100644 index 0000000..b2e28b3 --- /dev/null +++ b/iglu_python/cv_glu.py @@ -0,0 +1,60 @@ +"""Calculate Coefficient of Variation (CV) of glucose levels. + +This module provides a function to calculate the Coefficient of Variation (CV) of glucose measurements. +CV is a measure of relative variability, calculated as 100 * standard deviation / mean. + +References: + Rodbard (2009) Interpretation of continuous glucose monitoring data: + glycemic variability and quality of glycemic control, + Diabetes Technology and Therapeutics 11 .55-67, + doi:10.1089/dia.2008.0132. +""" + +from typing import Union +import pandas as pd +import numpy as np +from .utils import check_data_columns + +def cv_glu(data: Union[pd.DataFrame, pd.Series, list, np.ndarray]) -> Union[pd.DataFrame, float]: + """Calculate Coefficient of Variation (CV) of glucose levels. + + The function cv_glu produces CV values in a pandas DataFrame object. + + Args: + data: DataFrame object with column names "id", "time", and "gl", + or pandas Series of glucose values. + + Returns: + If a DataFrame object is passed, then a DataFrame with two columns: + subject id and corresponding CV value is returned. If a Series of glucose + values is passed, then a DataFrame with just the CV value is returned. + + Details: + A DataFrame with 1 row for each subject, a column for subject id and + a column for CV values is returned. NA glucose values are + omitted from the calculation of the CV. + + CV (Coefficient of Variation) is calculated by 100 * sd(G) / mean(G) + Where G is the list of all Glucose measurements for a subject. + """ + # Handle Series input + if isinstance(data, (list, np.ndarray, pd.Series)): + if isinstance(data, (list, np.ndarray)): + data = pd.Series(data) + data = data.dropna() + if len(data) == 0: + raise ValueError("No glucose values provided") + # Calculate CV for Series + cv_val = 100 * data.std() / data.mean() + return cv_val + + # Check and prepare data + data = check_data_columns(data) + + data = data.dropna() + # Calculate CV for each subject + out = data.groupby('id').agg( + CV=('gl', lambda x: 100 * x.std() / x.mean()) + ).reset_index() + + return out \ No newline at end of file diff --git a/iglu_python/cv_measures.py b/iglu_python/cv_measures.py new file mode 100644 index 0000000..6f926ac --- /dev/null +++ b/iglu_python/cv_measures.py @@ -0,0 +1,100 @@ +"""Calculate Coefficient of Variation subtypes (CVmean and CVsd). + +This module provides functions to calculate two types of Coefficient of Variation measures: +1. CVmean: Mean of daily coefficient of variations +2. CVsd: Standard deviation of daily coefficient of variations + +References: + Umpierrez, et.al. (2018) Glycemic Variability: How to Measure and Its Clinical + Implication for Type 2 Diabetes + The American Journal of Medical Sciences 356 .518-527, + doi:10.1016/j.amjms.2018.09.010. +""" + +import pandas as pd +import numpy as np +from .utils import check_data_columns, CGMS2DayByDay, is_iglu_r_compatible + +def cv_measures(data, dt0=None, inter_gap=45, tz="")->pd.DataFrame|dict[str:float]: + """Calculate Coefficient of Variation subtypes (CVmean and CVsd). + + The function cv_measures produces CV subtype values in a pandas DataFrame object. + + Args: + data: DataFrame object with column names "id", "time", and "gl" + dt0: The time frequency for interpolation in minutes. If None, will match the CGM meter's frequency + inter_gap: The maximum allowable gap (in minutes) for interpolation. Default is 45 + tz: String name of timezone. Default is "" + + Returns: + A DataFrame with three columns: subject id and corresponding CV subtype values (CVmean and CVsd) + + Details: + A DataFrame with 1 row for each subject, a column for subject id and + a column for each CV subtype value is returned. + + Missing values will be linearly interpolated when close enough to non-missing values. + + 1. CVmean: + Calculated by first taking the coefficient of variation of each day's glucose measurements, + then taking the mean of all the coefficient of variations. That is, for x + days we compute cv_1 ... cv_x daily coefficient of variations and calculate + 1/x * sum(cv_i) + + 2. CVsd: + Calculated by first taking the coefficient of variation of each day's glucose measurements, + then taking the standard deviation of all the coefficient of variations. That is, for d + days we compute cv_1 ... cv_d daily coefficient of variations and calculate + std([cv_1, cv_2, ... cv_d]) + """ + # Handle Series input + if isinstance(data, pd.Series): + if not isinstance(data.index, pd.DatetimeIndex): + raise ValueError("Series must have a DatetimeIndex") + + results_dict = _calculate_series_cv(data, dt0=dt0, inter_gap=inter_gap, tz=tz) + return results_dict + + # Check and prepare data + data = check_data_columns(data) + + + # Process each subject + results = [] + for subject_id in data['id'].unique(): + subject_data = data[data['id'] == subject_id] + + results_dict = _calculate_series_cv(subject_data, dt0=dt0, inter_gap=inter_gap, tz=tz) + + results.append({ + 'id': subject_id, + 'CVmean': results_dict['CVmean'], + 'CVsd': results_dict['CVsd'] + }) + + return pd.DataFrame(results) + +def _calculate_series_cv(subject_data: pd.DataFrame|pd.Series, dt0=None, inter_gap=45, tz="") -> dict[str:float]: + """Calculate CV for series/single subject input""" + + # Convert to day-by-day format + gd2d,active_days,dt0 = CGMS2DayByDay(subject_data, dt0=dt0, inter_gap=inter_gap, tz=tz) + + # gd2d is two dimensional array - 1st dimension is day, 2nd dimension is time point + # active_days is a list of days that have at least 2 non-missing values + # dt0 is the time frequency for interpolation in minutes + + # calculate devioation and median for each day + daily_deviations = np.apply_along_axis(np.nanstd, 1, gd2d,ddof=1) + daily_mean = np.apply_along_axis(np.nanmean, 1, gd2d) + + cv = daily_deviations *100 / daily_mean + + # calculate mean of daily deviations + cv_mean = np.nanmean(cv) + cv_sd = np.nanstd(cv,ddof=1) + + return { + 'CVmean': cv_mean, + 'CVsd': cv_sd + } diff --git a/iglu_python/gmi.py b/iglu_python/gmi.py new file mode 100644 index 0000000..55e604b --- /dev/null +++ b/iglu_python/gmi.py @@ -0,0 +1,57 @@ +"""Calculate Glucose Management Indicator (GMI). + +This module provides functions to calculate GMI values from continuous glucose monitoring data. +GMI is a metric that estimates HbA1c from CGM data using the formula: 3.31 + (0.02392 * mean_glucose) +where mean_glucose is the average glucose value in mg/dL. + +References: + Bergenstal (2018) Glucose Management Indicator (GMI): A New Term for + Estimating A1C From Continuous Glucose Monitoring + Hormone and Metabolic Research 41 .2275-2280, + doi:10.2337/dc18-1581. +""" + +import pandas as pd +import numpy as np + +from iglu_python.utils import check_data_columns + + +def gmi(data): + """Calculate GMI (Glucose Management Indicator). + + The function gmi produces GMI values in a pandas DataFrame object. + + Args: + data: DataFrame object with column names "id", "time", and "gl", + or numeric vector of glucose values. + + Returns: + If a DataFrame object is passed, then a DataFrame with two columns: + subject id and corresponding GMI is returned. If a vector of glucose + values is passed, then a DataFrame with just the GMI value is returned. + + Note: + A DataFrame with 1 row for each subject, a column for subject id and + a column for GMI values is returned. NA glucose values are + omitted from the calculation of the GMI. + + GMI score is calculated by 3.31 + (0.02392 * mean(G)) + where G is the vector of Glucose Measurements (mg/dL). + """ + # Handle Series input + if isinstance(data, pd.Series): + # Calculate GMI for Series + gmi_val = 3.31 + (0.02392 * data.mean()) + return pd.DataFrame({"GMI": [gmi_val]}) + + # Check and prepare data + data = check_data_columns(data) + is_vector = getattr(data, "is_vector", False) + + # Calculate GMI for each subject + out = data.groupby("id").agg( + GMI=("gl", lambda x: 3.31 + (0.02392 * x.mean())) + ).reset_index() + + return out \ No newline at end of file diff --git a/iglu_python/utils.py b/iglu_python/utils.py index a7f722f..bc1e354 100644 --- a/iglu_python/utils.py +++ b/iglu_python/utils.py @@ -121,7 +121,7 @@ def check_data_columns(data: pd.DataFrame, time_check=False, tz="") -> pd.DataFr def CGMS2DayByDay( - data: pd.DataFrame, + data: pd.DataFrame|pd.Series, dt0: Optional[pd.Timestamp] = None, inter_gap: int = 45, tz: str = "", @@ -133,7 +133,7 @@ def CGMS2DayByDay( with each row representing a day and each column representing a time point. Missing values are linearly interpolated when close enough to non-missing values. - data : pd.DataFrame + data : pd.DataFrame or pd.Series DataFrame with columns 'id', 'time', and 'gl'. Should only be data for 1 subject. In case multiple subject ids are detected, a warning is produced and only 1st subject is used. dt0 : int, optional @@ -165,6 +165,17 @@ def CGMS2DayByDay( >>> print(gd2d.shape) # Shape will be (1, 288) for one day with 5-min intervals (1, 288) """ + # Handle Series input + if isinstance(data, pd.Series): + if not isinstance(data.index, pd.DatetimeIndex): + raise ValueError("Series must have a DatetimeIndex") + data = pd.DataFrame( + { + "id": ["subject1"] * len(data.values), + "time": data.index, + "gl": data.values, + } + ) # Check data format data = check_data_columns(data, tz) diff --git a/tests/test_cv_glu.py b/tests/test_cv_glu.py new file mode 100644 index 0000000..88cf302 --- /dev/null +++ b/tests/test_cv_glu.py @@ -0,0 +1,274 @@ +"""Unit tests for CV (Coefficient of Variation) calculation.""" + +import json +import numpy as np +import pandas as pd +import pytest + +import iglu_python as iglu + +method_name = "cv_glu" + +def get_test_scenarios(): + """Get test scenarios for summary_glu calculations""" + # Load expected results + with open("tests/expected_results.json", "r") as f: + expected_results = json.load(f) + # set local timezone + iglu.utils.set_local_tz(expected_results["config"]["local_tz"]) + # Filter scenarios for summary_glu method + return [ + scenario + for scenario in expected_results["test_runs"] + if scenario["method"] == method_name + ] + +@pytest.mark.parametrize('scenario', get_test_scenarios()) +def test_cv_glu_iglu_r_compatible(scenario): + """Test CV calculation against expected results from R implementation.""" + + input_file_name = scenario["input_file_name"] + kwargs = scenario["kwargs"] + + # Read CSV and convert time column to datetime + df = pd.read_csv(input_file_name, index_col=0) + if "time" in df.columns: + df["time"] = pd.to_datetime(df["time"]) + + + expected_results = scenario["results"] + expected_df = pd.DataFrame(expected_results) + expected_df = expected_df.reset_index(drop=True) + pd.set_option('future.no_silent_downcasting', True) + expected_df = expected_df.replace({None: np.nan}) + + # Calculate CV + result_df = iglu.cv_glu(df,**kwargs) + + assert result_df is not None + + # Compare DataFrames with precision to 0.001 for numeric columns + pd.testing.assert_frame_equal( + result_df, + expected_df, + check_dtype=False, # Don't check dtypes since we might have different numeric types + check_index_type=True, + check_column_type=True, + check_frame_type=True, + check_names=True, + check_datetimelike_compat=True, + check_categorical=True, + check_like=True, + check_freq=True, + check_flags=True, + check_exact=False, + rtol=0.001, + ) + +def test_cv_glu_basic(): + """Test basic CV calculation with known glucose values.""" + # Create test data with two subjects + data = pd.DataFrame({ + 'id': ['1'] * 3 + ['2'] * 3, + 'time': pd.date_range('2020-01-01', periods=6, freq='5min'), + 'gl': [100, 120, 110, 90, 130, 95] + }) + + # Calculate CV + result = iglu.cv_glu(data) + + # Expected results: + # Subject 1: CV = 100 * np.std([100, 120, 110],ddof=1) / np.mean([100, 120, 110]) ≈ 9.09 + # Subject 2: CV = 100 * np.std([90, 130, 95],ddof=1) / np.mean([90, 130, 95]) ≈ 20.75.75 + expected = pd.DataFrame({ + 'id': ['1', '2'], + 'CV': [9.09, 20.75] + }) + + pd.testing.assert_frame_equal( + result, + expected, + check_dtype=False, + rtol=1e-2 # Allow for small numerical differences + ) + +def test_cv_glu_series(): + """Test CV calculation with pandas Series input.""" + # Create test data + data = pd.Series([100, 120, 110, 90, 130, 95]) + + # Calculate CV + result = iglu.cv_glu(data) + + # Expected result: CV = 100 * std([100, 120, 110, 90, 130, 95],ddof=1) / mean([100, 120, 110, 90, 130, 95]) ≈ 14.14 + expected = 14.33 + + np.testing.assert_allclose(result, expected, rtol=0.001) + +def test_cv_glu_empty(): + """Test CV calculation with empty data.""" + # Test with empty DataFrame + with pytest.raises(ValueError): + iglu.cv_glu(pd.DataFrame(columns=['id', 'time', 'gl'])) + + # Test with empty Series + with pytest.raises(ValueError): + iglu.cv_glu(pd.Series([])) + +def test_cv_glu_constant_glucose(): + """Test CV calculation with constant glucose values.""" + # Create test data with constant glucose + data = pd.DataFrame({ + 'id': ['1'] * 3, + 'time': pd.date_range('2020-01-01', periods=3, freq='5min'), + 'gl': [100, 100, 100] + }) + + # Calculate CV + result = iglu.cv_glu(data) + + # Expected result: CV = 0 (since std = 0) + expected = pd.DataFrame({ + 'id': ['1'], + 'CV': [0.0] + }) + + pd.testing.assert_frame_equal( + result, + expected, + check_dtype=False, + rtol=1e-3 + ) + +def test_cv_glu_missing_values(): + """Test CV calculation with missing values.""" + # Create test data with missing values + data = pd.DataFrame({ + 'id': ['1'] * 4, + 'time': pd.date_range('2020-01-01', periods=4, freq='5min'), + 'gl': [100, np.nan, 120, 110] + }) + + # Calculate CV + result = iglu.cv_glu(data) + + # Expected result: CV = 100 * np.std([100, 120, 110],ddof=1) / np.mean([100, 120, 110]) ≈ 9.0909 + expected = pd.DataFrame({ + 'id': ['1'], + 'CV': [9.0909] + }) + + pd.testing.assert_frame_equal( + result, + expected, + check_dtype=False, + rtol=1e-2 + ) + +def test_cv_glu_extreme_values(): + """Test CV calculation with extreme glucose values.""" + # Create test data with extreme values + data = pd.DataFrame({ + 'id': ['1'] * 3, + 'time': pd.date_range('2020-01-01', periods=3, freq='5min'), + 'gl': [40, 400, 40] # Very low and very high values + }) + + # Calculate CV + result = iglu.cv_glu(data) + + # Expected result: CV = 100 * std([40, 400, 40],ddof=1) / mean([40, 400, 40]) ≈ 129.90 + expected = pd.DataFrame({ + 'id': ['1'], + 'CV': [129.90] + }) + + pd.testing.assert_frame_equal( + result, + expected, + check_dtype=False, + rtol=1e-2 + ) + +def test_cv_glu_single_subject(): + """Test CV calculation with a single subject.""" + # Create test data for one subject + data = pd.DataFrame({ + 'id': ['1'] * 5, + 'time': pd.date_range('2020-01-01', periods=5, freq='5min'), + 'gl': [120, 118, 122, 119, 121] # Small variations around 120 + }) + + # Calculate CV + result = iglu.cv_glu(data) + + # Expected result: CV = 100 * std([120, 118, 122, 119, 121],ddof=1) / mean([120, 118, 122, 119, 121]) ≈ 1.317 + expected = pd.DataFrame({ + 'id': ['1'], + 'CV': [1.317] + }) + + pd.testing.assert_frame_equal( + result, + expected, + check_dtype=False, + rtol=1e-2 + ) + +def test_cv_glu_uneven_measurements(): + """Test CV calculation with subjects having different numbers of measurements.""" + # Create test data with two subjects having different numbers of measurements + data = pd.DataFrame({ + 'id': ['1'] * 3 + ['2'] * 5, # Subject 1 has 3 measurements, Subject 2 has 5 + 'time': pd.date_range('2020-01-01', periods=8, freq='5min'), + 'gl': [100, 120, 110, # Subject 1 + 90, 130, 95, 125, 105] # Subject 2 + }) + + # Calculate CV + result = iglu.cv_glu(data) + + # Expected results: + # Subject 1: CV = 100 * std([100, 120, 110],ddof=1) / mean([100, 120, 110]) ≈ 9.0909 + # Subject 2: CV = 100 * std([90, 130, 95, 125, 105],ddof=1) / mean([90, 130, 95, 125, 105]) ≈ 16.3472 + expected = pd.DataFrame({ + 'id': ['1', '2'], + 'CV': [9.0909, 16.3472] + }) + + pd.testing.assert_frame_equal( + result, + expected, + check_dtype=False, + rtol=1e-2 + ) + +def test_cv_glu_mixed_missing(): + """Test CV calculation with mixed missing values across subjects.""" + # Create test data with different patterns of missing values + data = pd.DataFrame({ + 'id': ['1'] * 3 + ['2'] * 3 + ['3'] * 3, + 'time': pd.date_range('2020-01-01', periods=9, freq='5min'), + 'gl': [100, np.nan, 110, # Subject 1: one missing + 90, 130, np.nan, # Subject 2: one missing + np.nan, np.nan, 95] # Subject 3: two missing + }) + + # Calculate CV + result = iglu.cv_glu(data) + + # Expected results: + # Subject 1: CV = 100 * std([100, 110]) / mean([100, 110]) ≈ 4.76 + # Subject 2: CV = 100 * std([90, 130]) / mean([90, 130]) ≈ 18.18 + # Subject 3: CV = 0 (only one non-missing value) + expected = pd.DataFrame({ + 'id': ['1', '2', '3'], + 'CV': [6.73435, 25.712, np.nan] + }) + + pd.testing.assert_frame_equal( + result, + expected, + check_dtype=False, + rtol=1e-2 + ) \ No newline at end of file diff --git a/tests/test_cv_measures.py b/tests/test_cv_measures.py new file mode 100644 index 0000000..38fd6f1 --- /dev/null +++ b/tests/test_cv_measures.py @@ -0,0 +1,258 @@ +"""Unit tests for CV measures (CVmean and CVsd) calculation.""" + +import json +import numpy as np +import pandas as pd +import pytest +import iglu_python as iglu + +method_name = "cv_measures" + + +def get_test_scenarios(): + """Get test scenarios for GVP calculations""" + # Load expected results + with open("tests/expected_results.json", "r") as f: + expected_results = json.load(f) + + # Filter scenarios for GVP method + return [ + scenario + for scenario in expected_results["test_runs"] + if scenario["method"] == method_name + ] + + +@pytest.mark.parametrize('scenario', get_test_scenarios()) +def test_cv_measures_iglu_r_compatible(scenario): + """Test CV measures calculation against expected results from R implementation.""" + input_file_name = scenario["input_file_name"] + kwargs = scenario["kwargs"] + + expected_results = scenario["results"] + expected_df = pd.DataFrame(expected_results) + expected_df = expected_df.reset_index(drop=True) + pd.set_option('future.no_silent_downcasting', True) + expected_df = expected_df.replace({None: np.nan}) + + + # Read CSV and convert time column to datetime + df = pd.read_csv(input_file_name, index_col=0) + if "time" in df.columns: + df["time"] = pd.to_datetime(df["time"]) + + # Calculate CV measures + result_df = iglu.cv_measures(df,**kwargs) + + # Compare with expected results + pd.testing.assert_frame_equal( + result_df.round(3), + expected_df.round(3), + check_dtype=False, # Don't check dtypes since we might have different numeric types + check_index_type=True, + check_column_type=True, + check_frame_type=True, + check_names=True, + check_datetimelike_compat=True, + check_categorical=True, + check_like=True, + check_freq=True, + check_flags=True, + check_exact=False, + rtol=1e-3, + ) + +def test_cv_measures_basic(): + """Test basic CV measures calculation with known glucose values.""" + # Create test data with two days of measurements for one subject + time = pd.to_datetime(['2020-01-01 10:00:00', '2020-01-01 10:05:00', '2020-01-01 10:10:00', + '2020-01-02 10:00:00', '2020-01-02 10:05:00', '2020-01-02 10:10:00']) + data = pd.DataFrame({ + 'id': ['1'] * 6, # 3 measurements per day for 2 days + 'time': time, + 'gl': [100, 120, 110, # Day 1: mean=110, std=10, CV=9.09 + 90, 130, 95] # Day 2: mean=105, std=21.21, CV=20.75 + }) + + # Calculate CV measures + result = iglu.cv_measures(data) + + # Expected results: + # CVmean = np.mean([9.09, 20.75]) = 14.92 + # CVsd = np.std([9.09, 20.75],ddof=1) = 8.244 + expected = pd.DataFrame({ + 'id': ['1'], + 'CVmean': [14.92], + 'CVsd': [8.244] + }) + + pd.testing.assert_frame_equal( + result, + expected, + check_dtype=False, + rtol=1e-2 + ) + +def test_cv_measures_multiple_subjects(): + """Test CV measures calculation with multiple subjects.""" + # Create test data with two subjects, each with two days of measurements + time = pd.to_datetime(['2020-01-01 10:00:00', '2020-01-01 10:05:00', '2020-01-01 10:10:00', + '2020-01-02 10:00:00', '2020-01-02 10:05:00', '2020-01-02 10:10:00', + '2020-01-03 10:00:00', '2020-01-03 10:05:00', '2020-01-03 10:10:00', + '2020-01-04 10:00:00', '2020-01-04 10:05:00', '2020-01-04 10:10:00']) + + data = pd.DataFrame({ + 'id': ['1'] * 6 + ['2'] * 6, # 3 measurements per day for 2 days for each subject + 'time': time, + 'gl': [100, 120, 110, # Subject 1, Day 1: CV=9.09 + 90, 130, 95, # Subject 1, Day 2: CV=20.20 + 80, 100, 90, # Subject 2, Day 1: CV=11.11 + 70, 110, 80] # Subject 2, Day 2: CV=25.00 + }) + + # Calculate CV measures + result = iglu.cv_measures(data) + + # Expected results: + # Subject 1: CVmean=14.92, CVsd=8.244 + # Subject 2: CVmean=17.565, CVsd=9.127 + expected = pd.DataFrame({ + 'id': ['1', '2'], + 'CVmean': [14.92, 17.565], + 'CVsd': [8.244, 9.127] + }) + + pd.testing.assert_frame_equal( + result, + expected, + check_dtype=False, + rtol=1e-2 + ) + +def test_cv_measures_missing_values(): + """Test CV measures calculation with missing values.""" + # Create test data with missing values + time = pd.to_datetime(['2020-01-01 10:00:00', '2020-01-01 10:05:00', '2020-01-01 10:10:00', + '2020-01-02 10:00:00', '2020-01-02 10:05:00', '2020-01-02 10:10:00']) + + data = pd.DataFrame({ + 'id': ['1'] * 6, # 3 measurements per day for 2 days + 'time': time, + 'gl': [100, np.nan, 110, # Day 1: CV=7.07 (after interpolation) + 90, 130, np.nan] # Day 2: CV=28.28 (after interpolation) + }) + + # Calculate CV measures + result = iglu.cv_measures(data, inter_gap=45) # Allow interpolation + + # Expected results: + # CVmean = mean([7.07, 28.28]) = 17.68 + # CVsd = np.std([7.07, 28.28],ddof=1) = 13.419 + expected = pd.DataFrame({ + 'id': ['1'], + 'CVmean': [16.223], + 'CVsd': [13.419] + }) + + pd.testing.assert_frame_equal( + result, + expected, + check_dtype=False, + rtol=1e-2 + ) + +def test_cv_measures_constant_glucose(): + """Test CV measures calculation with constant glucose values.""" + # Create test data with constant glucose values + time = pd.to_datetime(['2020-01-01 10:00:00', '2020-01-01 10:05:00', '2020-01-01 10:10:00', + '2020-01-02 10:00:00', '2020-01-02 10:05:00', '2020-01-02 10:10:00']) + + data = pd.DataFrame({ + 'id': ['1'] * 6, # 3 measurements per day for 2 days + 'time': time, + 'gl': [100, 100, 100, # Day 1: CV=0 + 100, 100, 100] # Day 2: CV=0 + }) + + # Calculate CV measures + result = iglu.cv_measures(data) + + # Expected results: + # CVmean = mean([0, 0]) = 0 + # CVsd = std([0, 0]) = 0 + expected = pd.DataFrame({ + 'id': ['1'], + 'CVmean': [0.0], + 'CVsd': [0.0] + }) + + pd.testing.assert_frame_equal( + result, + expected, + check_dtype=False, + rtol=1e-3 + ) + +def test_cv_measures_single_day(): + """Test CV measures calculation with only one day of data.""" + # Create test data with only one day of measurements + data = pd.DataFrame({ + 'id': ['1'] * 3, # 3 measurements for one day + 'time': pd.date_range('2020-01-01 10:00:00', periods=3, freq='5min'), + 'gl': [100, 120, 110] # CV=9.09 + }) + + # Calculate CV measures + result = iglu.cv_measures(data) + + # Expected results: + # CVmean = 9.09 (only one day) + # CVsd = NaN (can't calculate std with one value) + expected = pd.DataFrame({ + 'id': ['1'], + 'CVmean': [9.09], + 'CVsd': [np.nan] + }) + + pd.testing.assert_frame_equal( + result, + expected, + check_dtype=False, + rtol=1e-2 + ) + +def test_cv_measures_empty(): + """Test CV measures calculation with empty data.""" + # Test with empty DataFrame + with pytest.raises(ValueError): + iglu.cv_measures(pd.DataFrame(columns=['id', 'time', 'gl'])) + +def test_cv_measures_custom_dt0(): + """Test CV measures calculation with custom dt0 parameter.""" + # Create test data + time = pd.to_datetime(['2020-01-01 10:00:00', '2020-01-01 10:05:00', '2020-01-01 10:10:00', + '2020-01-02 10:00:00', '2020-01-02 10:05:00', '2020-01-02 10:10:00']) + + data = pd.DataFrame({ + 'id': ['1'] * 6, # 3 measurements per day for 2 days + 'time': time, + 'gl': [100, 120, 110, # Day 1 + 90, 130, 95] # Day 2 + }) + + # Calculate CV measures with custom dt0 + result = iglu.cv_measures(data, dt0=5) # 5-minute intervals + + # The results should be the same as without dt0 since our data is already in 5-minute intervals + expected = pd.DataFrame({ + 'id': ['1'], + 'CVmean': [14.92], + 'CVsd': [8.244] + }) + + pd.testing.assert_frame_equal( + result, + expected, + check_dtype=False, + rtol=1e-2 + ) \ No newline at end of file diff --git a/tests/test_gmi.py b/tests/test_gmi.py new file mode 100644 index 0000000..44dcf90 --- /dev/null +++ b/tests/test_gmi.py @@ -0,0 +1,199 @@ +import json + +import numpy as np +import pandas as pd +import pytest + +import iglu_python as iglu + +method_name = "gmi" + + +def get_test_scenarios(): + """Get test scenarios for GMI calculations""" + # Load expected results + with open("tests/expected_results.json", "r") as f: + expected_results = json.load(f) + + # Filter scenarios for GMI method + return [ + scenario + for scenario in expected_results["test_runs"] + if scenario["method"] == method_name + ] + + +@pytest.mark.parametrize("scenario", get_test_scenarios()) +def test_gmi_iglu_r_compatible(scenario): + """Test GMI calculation against expected results from R implementation""" + + input_file_name = scenario["input_file_name"] + kwargs = scenario["kwargs"] + + expected_results = scenario["results"] + expected_df = pd.DataFrame(expected_results) + expected_df = expected_df.reset_index(drop=True) + pd.set_option('future.no_silent_downcasting', True) + expected_df = expected_df.replace({None: np.nan}) + + # Read CSV and convert time column to datetime + df = pd.read_csv(input_file_name, index_col=0) + if "time" in df.columns: + df["time"] = pd.to_datetime(df["time"]) + + result_df = iglu.gmi(df, **kwargs) + + assert result_df is not None + + # Compare DataFrames with precision to 0.001 for numeric columns + pd.testing.assert_frame_equal( + result_df, + expected_df, + check_dtype=False, # Don't check dtypes since we might have different numeric types + check_index_type=True, + check_column_type=True, + check_frame_type=True, + check_names=True, + check_datetimelike_compat=True, + check_categorical=True, + check_like=True, + check_freq=True, + check_flags=True, + check_exact=False, + rtol=0.001, + ) + + +def test_gmi_basic(): + """Test basic GMI calculation with known glucose values""" + data = pd.DataFrame( + { + "id": ["subject1", "subject1", "subject1", "subject1", "subject2", "subject2"], + "time": pd.to_datetime( + [ + "2020-01-01 00:00:00", + "2020-01-01 00:05:00", + "2020-01-01 00:10:00", + "2020-01-01 00:15:00", + "2020-01-01 00:00:00", + "2020-01-01 00:05:00", + ] + ), + "gl": [150, 160, 170, 180, 140, 145], + } + ) + + result = iglu.gmi(data) + assert isinstance(result, pd.DataFrame) + assert all(col in result.columns for col in ["id", "GMI"]) + assert len(result) == 2 # One row per subject + + # Calculate expected GMI for subject1 + # Mean glucose = (150 + 160 + 170 + 180) / 4 = 165 + # GMI = 3.31 + (0.02392 * 165) = 7.2568 + expected_gmi1 = 3.31 + (0.02392 * 165) + assert abs(result.loc[result["id"] == "subject1", "GMI"].iloc[0] - expected_gmi1) < 0.001 + + # Calculate expected GMI for subject2 + # Mean glucose = (140 + 145) / 2 = 142.5 + # GMI = 3.31 + (0.02392 * 142.5) = 6.7186 + expected_gmi2 = 3.31 + (0.02392 * 142.5) + assert abs(result.loc[result["id"] == "subject2", "GMI"].iloc[0] - expected_gmi2) < 0.001 + + +def test_gmi_series(): + """Test GMI with Series input""" + series_data = pd.Series([150, 160, 170, 180, 190, 200]) + result = iglu.gmi(series_data) + assert isinstance(result, pd.DataFrame) + assert "GMI" in result.columns + assert len(result) == 1 + + # Calculate expected GMI + # Mean glucose = (150 + 160 + 170 + 180 + 190 + 200) / 6 = 175 + # GMI = 3.31 + (0.02392 * 175) = 7.496 + expected_gmi = 3.31 + (0.02392 * 175) + assert abs(result["GMI"].iloc[0] - expected_gmi) < 0.001 + + +def test_gmi_empty(): + """Test GMI with empty data""" + empty_data = pd.DataFrame(columns=["id", "time", "gl"]) + with pytest.raises(ValueError): + iglu.gmi(empty_data) + + +def test_gmi_constant_glucose(): + """Test GMI with constant glucose values""" + data = pd.DataFrame( + { + "id": ["subject1"] * 6, + "time": pd.to_datetime( + [ + "2020-01-01 00:00:00", + "2020-01-01 00:05:00", + "2020-01-01 00:10:00", + "2020-01-01 00:15:00", + "2020-01-01 00:20:00", + "2020-01-01 00:25:00", + ] + ), + "gl": [150] * 6, # Constant glucose + } + ) + + result = iglu.gmi(data) + assert len(result) == 1 + # For constant glucose of 150, GMI should be 3.31 + (0.02392 * 150) = 6.898 + expected_gmi = 3.31 + (0.02392 * 150) + assert abs(result["GMI"].iloc[0] - expected_gmi) < 0.001 + + +def test_gmi_missing_values(): + """Test GMI with missing values""" + data_with_na = pd.DataFrame( + { + "id": ["subject1"] * 4, + "time": pd.to_datetime( + [ + "2020-01-01 00:00:00", + "2020-01-01 00:05:00", + "2020-01-01 00:10:00", + "2020-01-01 00:15:00", + ] + ), + "gl": [150, np.nan, 170, 180], + } + ) + result = iglu.gmi(data_with_na) + assert isinstance(result, pd.DataFrame) + assert len(result) == 1 + # Mean glucose = (150 + 170 + 180) / 3 = 166.67 + # GMI = 3.31 + (0.02392 * 166.67) = 7.2987 + expected_gmi = 3.31 + (0.02392 * 166.67) + assert abs(result["GMI"].iloc[0] - expected_gmi) < 0.001 + + +def test_gmi_extreme_values(): + """Test GMI with extreme glucose values""" + data = pd.DataFrame( + { + "id": ["subject1"] * 4, + "time": pd.to_datetime( + [ + "2020-01-01 00:00:00", + "2020-01-01 00:05:00", + "2020-01-01 00:10:00", + "2020-01-01 00:15:00", + ] + ), + "gl": [40, 400, 600, 800], # Extreme values + } + ) + result = iglu.gmi(data) + assert isinstance(result, pd.DataFrame) + assert len(result) == 1 + # Mean glucose = (40 + 400 + 600 + 800) / 4 = 460 + # GMI = 3.31 + (0.02392 * 460) = 14.3132 + expected_gmi = 3.31 + (0.02392 * 460) + assert abs(result["GMI"].iloc[0] - expected_gmi) < 0.001 \ No newline at end of file From ed99d42aa031ce5b7a5b41183cc373bb3c03ce01 Mon Sep 17 00:00:00 2001 From: Stas Khirman Date: Mon, 16 Jun 2025 14:56:20 +0300 Subject: [PATCH 03/16] gmi, cv_glu and cv_measures are imlemented --- README.md | 2 +- tests/test_cv_measures.py | 68 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 68 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index ff5e2e5..5b111e1 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ Unless noted, IGLU-R test compatability is considered successful if it achieves | cogi |Coefficient of Glucose Irregularity | ✅ | | conga | Continuous Overall Net Glycemic Action |✅ | | cv_glu | Coefficient of Variation | ✅| ✅ returns float | -| cv_measures | |✅ |✅ returns dict[str:float]| | +| cv_measures |Coefficient of Variation subtypes (CVmean and CVsd) |✅ |✅ only Series(DatetimeIndex) returns dict[str:float]| | | ea1c |estimated A1C (eA1C) values| ✅ | | episode_calculation | Hypo/Hyperglycemic episodes with summary statistics| ✅| || | | gmi | Glucose Management Indicator | ✅ | diff --git a/tests/test_cv_measures.py b/tests/test_cv_measures.py index 38fd6f1..ec9edbf 100644 --- a/tests/test_cv_measures.py +++ b/tests/test_cv_measures.py @@ -255,4 +255,70 @@ def test_cv_measures_custom_dt0(): expected, check_dtype=False, rtol=1e-2 - ) \ No newline at end of file + ) + +def test_cv_measures_series_with_datetime_index(): + """Test CV measures calculation with Series input that has DatetimeIndex.""" + # Create test data with DatetimeIndex + time = pd.to_datetime(['2020-01-01 10:00:00', '2020-01-01 10:05:00', '2020-01-01 10:10:00', + '2020-01-02 10:00:00', '2020-01-02 10:05:00', '2020-01-02 10:10:00']) + data = pd.Series( + [100, 120, 110, # Day 1: mean=110, std=10, CV=9.09 + 90, 130, 95], # Day 2: mean=105, std=21.21, CV=20.75 + index=time + ) + + # Calculate CV measures + result = iglu.cv_measures(data) + + # Expected results: + # CVmean = np.mean([9.09, 20.75]) = 14.92 + # CVsd = np.std([9.09, 20.75], ddof=1) = 8.244 + expected = { + 'CVmean': 14.92, + 'CVsd': 8.244 + } + + # Compare results + assert isinstance(result, dict) + np.testing.assert_allclose(result['CVmean'], expected['CVmean'], rtol=0.001) + np.testing.assert_allclose(result['CVsd'], expected['CVsd'], rtol=0.001) + +def test_cv_measures_series_without_datetime_index(): + """Test CV measures calculation with Series input that doesn't have DatetimeIndex.""" + # Create test data with regular index + data = pd.Series( + [100, 120, 110, 90, 130, 95], + index=range(6) # Regular integer index instead of DatetimeIndex + ) + + # Attempt to calculate CV measures - should raise ValueError + with pytest.raises(ValueError, match="Series must have a DatetimeIndex"): + iglu.cv_measures(data) + +def test_cv_measures_series_with_missing_values(): + """Test CV measures calculation with Series input containing missing values.""" + # Create test data with DatetimeIndex and missing values + time = pd.to_datetime(['2020-01-01 10:00:00', '2020-01-01 10:05:00', '2020-01-01 10:10:00', + '2020-01-02 10:00:00', '2020-01-02 10:05:00', '2020-01-02 10:10:00']) + data = pd.Series( + [100, np.nan, 110, # Day 1: CV=7.07 (after interpolation) + 90, 130, np.nan], # Day 2: CV=28.28 (after interpolation) + index=time + ) + + # Calculate CV measures with interpolation + result = iglu.cv_measures(data, inter_gap=45) + + # Expected results: + # CVmean = 16.223 + # CVsd = 13.419 + expected = { + 'CVmean': 16.223, + 'CVsd': 13.419 + } + + # Compare results + assert isinstance(result, dict) + np.testing.assert_allclose(result['CVmean'], expected['CVmean'], rtol=0.001) + np.testing.assert_allclose(result['CVsd'], expected['CVsd'], rtol=0.001) From cf8de8f166ed73f24ed0297fc5a4a5cd9c41ed5a Mon Sep 17 00:00:00 2001 From: Stas Khirman Date: Mon, 16 Jun 2025 16:28:01 +0300 Subject: [PATCH 04/16] support for Series, list and ndarray -> return float or dict --- README.md | 32 +- iglu_python/auc.py | 94 +++--- iglu_python/cogi.py | 106 ++++--- iglu_python/gmi.py | 9 +- iglu_python/mage.py | 651 ++++++++++++++++++++-------------------- iglu_python/mean_glu.py | 9 +- iglu_python/sd_glu.py | 11 +- pyproject.toml | 2 +- tests/test_cogi.py | 7 +- tests/test_gmi.py | 6 +- tests/test_mage.py | 14 +- tests/test_mean_glu.py | 7 +- tests/test_sd_glu.py | 7 +- 13 files changed, 486 insertions(+), 469 deletions(-) diff --git a/README.md b/README.md index 5b111e1..090b0d6 100644 --- a/README.md +++ b/README.md @@ -28,15 +28,15 @@ Unless noted, IGLU-R test compatability is considered successful if it achieves | above_percent | percentage of values above target thresholds| ✅ | ||| | active_percent | percentage of time CGM was active | ✅ | | adrr | average daily risk range | ✅ | -| auc| Area Under Curve | 🟡 (0.01 precision) | || see [auc_evaluation.ipynb](https://github.com/staskh/iglu_python/blob/main/notebooks/auc_evaluation.ipynb)| +| auc| Area Under Curve | 🟡 (0.01 precision) |✅ only Series(DatetimeIndex) returns float || see [auc_evaluation.ipynb](https://github.com/staskh/iglu_python/blob/main/notebooks/auc_evaluation.ipynb)| | below_percent| percentage of values below target thresholds| ✅ | -| cogi |Coefficient of Glucose Irregularity | ✅ | +| cogi |Coefficient of Glucose Irregularity | ✅ | ✅ returns float | conga | Continuous Overall Net Glycemic Action |✅ | | cv_glu | Coefficient of Variation | ✅| ✅ returns float | | cv_measures |Coefficient of Variation subtypes (CVmean and CVsd) |✅ |✅ only Series(DatetimeIndex) returns dict[str:float]| | | ea1c |estimated A1C (eA1C) values| ✅ | | episode_calculation | Hypo/Hyperglycemic episodes with summary statistics| ✅| || | -| gmi | Glucose Management Indicator | ✅ | +| gmi | Glucose Management Indicator | ✅ | ✅ returns float | | grade_eugly |percentage of GRADE score attributable to target range| ✅ | | grade_hyper |percentage of GRADE score attributable to hyperglycemia| ✅ | | grade_hypo |percentage of GRADE score attributable to hypoglycemia| ✅ | @@ -54,15 +54,15 @@ Unless noted, IGLU-R test compatability is considered successful if it achieves | m_value | M-value of Schlichtkrull et al | ✅ | | mad_glu | Median Absolute Deviation | ✅ | | mag | Mean Absolute Glucose| ✅ | || IMHO, Original R implementation has an error | -| mage | Mean Amplitude of Glycemic Excursions| ✅ | || See algorithm at [MAGE](https://irinagain.github.io/iglu/articles/MAGE.html) | -| mean_glu | Mean glucose value | ✅ | +| mage | Mean Amplitude of Glycemic Excursions| ✅ |✅ only Series(DatetimeIndex) returns float || See algorithm at [MAGE](https://irinagain.github.io/iglu/articles/MAGE.html) | +| mean_glu | Mean glucose value | ✅ | ✅ returns float| | median_glu |Median glucose value| ✅ | | modd | Mean of Daily Differences| ✅ | | pgs | Personal Glycemic State | ✅ | || | | quantile_glu |glucose level quantiles| ✅ | | range_glu |glucose level range| ✅ | | roc | Rate of Change| ✅ | -| sd_glu | standard deviation of glucose values| ✅ | +| sd_glu | standard deviation of glucose values| ✅ | ✅ returns float | sd_measures |various standard deviation subtypes| ✅ | | sd_roc | standard deviation of the rate of change| ✅ | ||| | summary_glu | summary glucose level| ✅ | @@ -117,36 +117,32 @@ import iglu_python as iglu # Optional: datetime index or 'time' column data = pd.DataFrame({ 'id': ['Subject1'] * 100, - 'time': pd.date_range(start='2023-01-01', periods=100, freq='5min') - 'gl': [120, 135, 140, 125, 110, ...], # glucose values in mg/dL + 'time': pd.date_range(start='2023-01-01', periods=100, freq='5min'), + 'gl': [120, 135, 140, 125, 110]*20 # glucose values in mg/dL }) # Calculate glucose metrics mean_glucose = iglu.mean_glu(data) cv = iglu.cv_glu(data) -time_in_range = iglu.active_percent(data, lltr=70, ultr=180) +active = iglu.active_percent(data) -print(f"Mean glucose: {mean_glucose}") -print(f"CV: {cv}") -print(f"Time in range (70-180 mg/dL): {time_in_range}%") +print(f"Mean glucose: {mean_glucose['mean'][0]}") +print(f"CV: {cv['CV'][0]}") +print(f"CGM active percent: {active['active_percent'][0]}%") ``` ### Using with Time Series Data ```python import pandas as pd +import numpy as np import iglu_python as iglu -from datetime import datetime, timedelta # Create time series data timestamps = pd.date_range(start='2023-01-01', periods=288, freq='5min') glucose_values = [120 + 20 * np.sin(i/48) + np.random.normal(0, 5) for i in range(288)] -data = pd.DataFrame({ - 'id': ['Subject1'] * 288, - 'time': timestamps, - 'gl': glucose_values -}) +data = pd.Series(glucose_values, index=timestamps) # Calculate advanced metrics mage = iglu.mage(data) diff --git a/iglu_python/auc.py b/iglu_python/auc.py index 806f8fe..55ede21 100644 --- a/iglu_python/auc.py +++ b/iglu_python/auc.py @@ -56,57 +56,65 @@ def auc(data: pd.DataFrame, tz: str = "") -> pd.DataFrame: 0 subject1 155.0 1 subject2 142.5 """ + # Handle Series input + if isinstance(data, pd.Series): + if not isinstance(data.index, pd.DatetimeIndex): + raise ValueError("Series must have a DatetimeIndex") + + auc = auc_single(data,tz=tz) + return auc + # Check data format and convert time to datetime data = check_data_columns(data) - def auc_single(subject_data: pd.DataFrame) -> float: - """Calculate AUC for a single subject""" - # Get interpolated data using CGMS2DayByDay - gd2d, actual_dates, dt0 = CGMS2DayByDay(subject_data, tz=tz) - - # Convert gd2d to DataFrame - input_data = gd2d_to_df(gd2d, actual_dates, dt0) - if is_iglu_r_compatible(): - input_data['day'] = input_data['time'].dt.floor('d') - input_data['gl_next'] = input_data['gl'].shift(-1) - each_day_area = input_data.groupby("day").apply( - lambda x: np.nansum( - (dt0/60)*(x["gl"].values + x["gl_next"].values) / 2 - ), - include_groups=False - ) - # calculate number of not nan trapezoids in total (number of not nan gl and gl_next) - n_trapezoids = (~np.isnan(input_data["gl"]) & ~np.isnan(input_data["gl_next"])).sum() - hours = dt0/60 * n_trapezoids - daily_area = each_day_area.sum() - hourly_avg = daily_area/hours - return hourly_avg - else: - # Add hour column by rounding time to nearest hour - input_data['hour'] = input_data['time'].dt.floor('h') - - input_data['gl_next'] = input_data['gl'].shift(-1) - - # Calculate AUC for each hour using trapezoidal rule (mg*min/dL) - hourly_auc = input_data.groupby("hour").apply( - lambda x: np.nansum( - (dt0/60)*(x["gl"].values + x["gl_next"].values) / 2 - ), - include_groups=False - ) - # 0 mean no data in this hour, replace with nan - hourly_auc = hourly_auc.replace(0, np.nan) - - hourly_avg = hourly_auc.mean(skipna=True) - # Return mean of daily hourly averages - return hourly_avg - # Process each subject result = [] for subject in data["id"].unique(): subject_data = data[data["id"] == subject] - hourly_auc = auc_single(subject_data) + hourly_auc = auc_single(subject_data,tz=tz) result.append({"id": subject, "hourly_auc": hourly_auc}) # Convert to DataFrame return pd.DataFrame(result) + +def auc_single(subject_data: pd.DataFrame|pd.Series,tz:str = "") -> float: + """Calculate AUC for a single subject""" + # Get interpolated data using CGMS2DayByDay + gd2d, actual_dates, dt0 = CGMS2DayByDay(subject_data, tz=tz) + + # Convert gd2d to DataFrame + input_data = gd2d_to_df(gd2d, actual_dates, dt0) + if is_iglu_r_compatible(): + input_data['day'] = input_data['time'].dt.floor('d') + input_data['gl_next'] = input_data['gl'].shift(-1) + each_day_area = input_data.groupby("day").apply( + lambda x: np.nansum( + (dt0/60)*(x["gl"].values + x["gl_next"].values) / 2 + ), + include_groups=False + ) + # calculate number of not nan trapezoids in total (number of not nan gl and gl_next) + n_trapezoids = (~np.isnan(input_data["gl"]) & ~np.isnan(input_data["gl_next"])).sum() + hours = dt0/60 * n_trapezoids + daily_area = each_day_area.sum() + hourly_avg = daily_area/hours + return hourly_avg + else: + # Add hour column by rounding time to nearest hour + input_data['hour'] = input_data['time'].dt.floor('h') + + input_data['gl_next'] = input_data['gl'].shift(-1) + + # Calculate AUC for each hour using trapezoidal rule (mg*min/dL) + hourly_auc = input_data.groupby("hour").apply( + lambda x: np.nansum( + (dt0/60)*(x["gl"].values + x["gl_next"].values) / 2 + ), + include_groups=False + ) + # 0 mean no data in this hour, replace with nan + hourly_auc = hourly_auc.replace(0, np.nan) + + hourly_avg = hourly_auc.mean(skipna=True) + # Return mean of daily hourly averages + return hourly_avg diff --git a/iglu_python/cogi.py b/iglu_python/cogi.py index b42e346..b25d172 100644 --- a/iglu_python/cogi.py +++ b/iglu_python/cogi.py @@ -1,6 +1,7 @@ from typing import List, Union import pandas as pd +import numpy as np from .below_percent import below_percent from .in_range_percent import in_range_percent @@ -9,10 +10,10 @@ def cogi( - data: Union[pd.DataFrame, pd.Series, list], + data: Union[pd.DataFrame, pd.Series, list,np.ndarray], targets: List[int] = [70, 180], weights: List[float] = [0.5, 0.35, 0.15], -) -> pd.DataFrame: +) -> pd.DataFrame|float: """ Calculate Coefficient of Glucose Irregularity (COGI). @@ -66,53 +67,30 @@ def cogi( 0 68.9 """ - def weight_features( - feature: Union[float, pd.Series, list], - scale_range: List[float], - weight: float = 1, - increasing: bool = False, - ) -> Union[float, pd.Series, list]: - """Helper function to weight and scale features. If feature is a Series (or a list), the output is a Series (or list) with the same number of rows (or length) as the input, with values clipped (or "inverse" clipped) so that they are between 0 and 1.""" - if isinstance(feature, pd.Series): - scaled = (feature - min(scale_range)) / ( - max(scale_range) - min(scale_range) - ) - if increasing: - out = scaled.clip(lower=0, upper=1) - else: - out = (1 - scaled).clip(lower=0, upper=1) - elif isinstance(feature, list): - scaled = [ - (x - min(scale_range)) / (max(scale_range) - min(scale_range)) - for x in feature - ] - if increasing: - out = [min(1, max(0, x)) for x in scaled] - else: - out = [min(1, max(0, 1 - x)) for x in scaled] - else: - scaled = (feature - min(scale_range)) / ( - max(scale_range) - min(scale_range) - ) - if increasing: - out = min(1, max(0, scaled)) - else: - out = min(1, max(0, 1 - scaled)) - return out * weight - # Check and prepare data - is_vector = isinstance(data, (pd.Series, list)) - if not is_vector: - data = check_data_columns(data) targets = sorted([float(t) for t in targets]) + if isinstance(data, (pd.Series, list, np.ndarray)): + if isinstance(data, (list, np.ndarray)): + data = pd.Series(data) + return cogi_single(data, targets, weights) + + data = check_data_columns(data) + + out = data.groupby("id").agg( + COGI=('gl', lambda x: cogi_single(x, targets, weights)) + ).reset_index() + + return out + +def cogi_single(data: pd.Series, targets: List[int] = [70, 180], weights: List[float] = [0.5, 0.35, 0.15]) -> float: + """Calculate COGI for a single subject""" # Calculate components ir_df = in_range_percent(data, [targets]) - ir = ir_df["in_range_" + "_".join(map(str, targets))] + ir = ir_df["in_range_" + "_".join(map(str, targets))].iloc[0] br_df = below_percent(data, targets_below=[targets[0]]) - br = br_df["below_" + str(int(targets[0]))] - stddev_df = sd_glu(data) - stddev = stddev_df["SD"] + br = br_df["below_" + str(int(targets[0]))].iloc[0] + stddev = sd_glu(data) # Calculate weighted features weighted_features = ( @@ -121,10 +99,40 @@ def weight_features( + weight_features(stddev, [18, 108], weight=weights[2]) ) - # Create output DataFrame - out = pd.DataFrame({"COGI": weighted_features * 100}) # Convert to percentage - if not is_vector: - out["id"] = stddev_df["id"] - out = out[["id", "COGI"]] + return weighted_features * 100 # Convert to percentage - return out + + +def weight_features( + feature: Union[float, pd.Series, list], + scale_range: List[float], + weight: float = 1, + increasing: bool = False, +) -> Union[float, pd.Series, list]: + """Helper function to weight and scale features. If feature is a Series (or a list), the output is a Series (or list) with the same number of rows (or length) as the input, with values clipped (or "inverse" clipped) so that they are between 0 and 1.""" + if isinstance(feature, pd.Series): + scaled = (feature - min(scale_range)) / ( + max(scale_range) - min(scale_range) + ) + if increasing: + out = scaled.clip(lower=0, upper=1) + else: + out = (1 - scaled).clip(lower=0, upper=1) + elif isinstance(feature, list): + scaled = [ + (x - min(scale_range)) / (max(scale_range) - min(scale_range)) + for x in feature + ] + if increasing: + out = [min(1, max(0, x)) for x in scaled] + else: + out = [min(1, max(0, 1 - x)) for x in scaled] + else: + scaled = (feature - min(scale_range)) / ( + max(scale_range) - min(scale_range) + ) + if increasing: + out = min(1, max(0, scaled)) + else: + out = min(1, max(0, 1 - scaled)) + return out * weight diff --git a/iglu_python/gmi.py b/iglu_python/gmi.py index 55e604b..57d01be 100644 --- a/iglu_python/gmi.py +++ b/iglu_python/gmi.py @@ -13,11 +13,12 @@ import pandas as pd import numpy as np +from typing import Union from iglu_python.utils import check_data_columns -def gmi(data): +def gmi(data: Union[pd.DataFrame, pd.Series, list]) -> float|pd.DataFrame: """Calculate GMI (Glucose Management Indicator). The function gmi produces GMI values in a pandas DataFrame object. @@ -40,10 +41,12 @@ def gmi(data): where G is the vector of Glucose Measurements (mg/dL). """ # Handle Series input - if isinstance(data, pd.Series): + if isinstance(data, (list, np.ndarray, pd.Series)): + if isinstance(data, (list, np.ndarray)): + data = pd.Series(data) # Calculate GMI for Series gmi_val = 3.31 + (0.02392 * data.mean()) - return pd.DataFrame({"GMI": [gmi_val]}) + return gmi_val # Check and prepare data data = check_data_columns(data) diff --git a/iglu_python/mage.py b/iglu_python/mage.py index cb2df85..16916f8 100644 --- a/iglu_python/mage.py +++ b/iglu_python/mage.py @@ -110,339 +110,28 @@ def mage( MAGE 0 45.0 """ - - def mage_naive(data: pd.DataFrame) -> float: - """Calculate MAGE using naive algorithm""" - # Calculate absolute differences from mean - mean_gl = data["gl"].mean() - abs_diff_mean = abs(data["gl"] - mean_gl) - - # Calculate standard deviation - std_gl = data["gl"].std() - - # Calculate MAGE as mean of differences greater than sd_multiplier * std - mage_val = abs_diff_mean[abs_diff_mean > (sd_multiplier * std_gl)].mean() - - return float(mage_val) if not pd.isna(mage_val) else np.nan - - def mage_ma_single(data: pd.DataFrame, short_ma: int, long_ma: int, - direction:str ='avg', return_type:str = "num") -> pd.DataFrame: - """Calculate MAGE using moving average algorithm for a single subject""" - ## 1. Preprocessing - # 1.1 Interpolate over uniform grid - # Note: always interpolate to 5 minute grid - data_ip = CGMS2DayByDay(data, dt0=5, inter_gap=inter_gap, tz=tz) - dt0 = data_ip[2] # Time between measurements in minutes - # replace for 5 min to fix bug in CGMS2DayByDay - dt0 = 5 - day_one = data_ip[1][0] - ndays = len(data_ip[1]) - - # 1.2 Generate grid times by starting from day one and cumulatively summing - # note fix 5 min used in interpretation - gl = data_ip[0].flatten().tolist() - time_ip = [pd.Timedelta(i * 5, unit="m") + day_one for i in range(1,len(gl)+1)] - - # 1.3 Recalculate short_ma and long_ma because short and long are based on 5 minutes originally - # > Multiply by 5 to get length in min - # > Divide by dt0 to get rounded number of measurements that are roughly equal to original short/long ma definition - # short_ma = round(short_ma*5/dt0) - # long_ma = round(long_ma*5/dt0) - # Ensure short_ma and long_ma are appropriate - if short_ma >= long_ma: - short_ma, long_ma = long_ma, short_ma - - ## 2. Change to interpolated data (times and glucose) - # > change data into id, interpolated times, interpolated glucose (t to get rowwise) - # > drop NA rows before first glucose reading - # > then drop NA rows after last glucose reading - # > Label NA glucose as gap (gap = 1) - interpolated_data = pd.DataFrame({ - "id" : data['id'].iloc[0], - "time": pd.Series(time_ip, dtype='datetime64[ns]'), - "gl": pd.Series(gl, dtype='float64') - }) - # Drop NA rows before first glucose reading - first_valid_idx = interpolated_data['gl'].first_valid_index() - if first_valid_idx is not None: - interpolated_data = interpolated_data.iloc[first_valid_idx:] - # Drop NA rows after last glucose reading - last_valid_idx = interpolated_data['gl'].last_valid_index() - if last_valid_idx is not None: - interpolated_data = interpolated_data.iloc[:last_valid_idx+1] - # Add gap column to mark NA values as 1 - interpolated_data['gap'] = interpolated_data['gl'].isna().astype(int) - - # 4. Time Series Segmentation: split gaps > max_gap into separate segments - dfs = segment_time_series(interpolated_data,max_gap) # note: max_gap is in minutes - - # 5. Calculate MAGE on each identified segment - return_val = pd.DataFrame(columns=["start", "end", "mage", "plus_or_minus", "first_excursion"]) - for segment in dfs: - ret = mage_atomic(segment,short_ma,long_ma) - if return_val.empty: - return_val = ret - else: - return_val = pd.concat([return_val, ret], ignore_index=True) - - if return_type == 'df': - return return_val - - """Process MAGE results with filtering and weighting.""" - # Filter by direction (equivalent to the previous R filtering code) - if direction == 'plus': - res = return_val[return_val['plus_or_minus'] == 'PLUS'].copy() - elif direction == 'minus': - res = return_val[return_val['plus_or_minus'] == 'MINUS'].copy() - elif direction == 'avg': - res = return_val[return_val['MAGE'].notna()].copy() - elif direction == 'max': - # Group by start,end and keep max mage in each group - idx = return_val.groupby(['start', 'end'])['MAGE'].idxmax() - res = return_val.loc[idx].reset_index(drop=True) - else: # default: first excursions only - res = return_val[return_val['first_excursion'] == True].copy() - - # Calculate time-weighted MAGE - if res.empty: - return np.nan - - res['hours'] = res['end'] - res['start'] - res['weight'] = res['hours'] / res['hours'].sum() - weighted_mage = (res['MAGE'] * res['weight']).sum() - - return weighted_mage - - def mage_atomic(data, short_ma,long_ma): - """ 0. Calculates MAGE on 1 segment of CGM trace """ - - # 2c. Calculate the moving average values - data = data.copy() - data["MA_Short"] = data["gl"].rolling(window=short_ma, min_periods=1).mean() - data["MA_Long"] = data["gl"].rolling(window=long_ma, min_periods=1).mean() - # Fill leading NAs (forward fill first valid value) - if short_ma > len(data): - data.loc[data.index[:short_ma], 'MA_Short'] = data['MA_Short'].iloc[-1] - else: - data.loc[data.index[:short_ma], 'MA_Short'] = data['MA_Short'].iloc[short_ma-1] - if long_ma > len(data): - data.loc[data.index[:long_ma], 'MA_Long'] = data['MA_Long'].iloc[-1] - else: - data.loc[data.index[:long_ma], 'MA_Long'] = data['MA_Long'].iloc[long_ma-1] - # Calculate difference - data['DELTA_SHORT_LONG'] = data['MA_Short'] - data['MA_Long'] - data = data.reset_index(drop=True) - nmeasurements = len(data) - - # Sanity check - if ( - data['gl'].isnull().all() or - nmeasurements < 7 or - nmeasurements < short_ma or - np.std(data['gl'], ddof=1) < 1 - ): - return pd.DataFrame({ - 'start': [data['time'].iloc[0]], - 'end': [data['time'].iloc[-1]], - 'MAGE': [np.nan], - 'plus_or_minus': [np.nan], - 'first_excursion': [np.nan] - }) - - # 2d. Create a preallocated list of crossing point ids & type - # Find crossing points - # Detect trend reversal points in glucose data using DELTA signal. - # Initialize variables - idx = list(data.index) # R: idx = as.numeric(rownames(.data)) - types = {'REL_MIN': 0, 'REL_MAX': 1} # R: types = list2env(list(REL_MIN=0, REL_MAX=1)) - - # Create storage lists - R: list_cross <- list("id" = rep.int(NA, nmeasurements), "type" = rep.int(NA, nmeasurements)) - list_cross = { - 'id': [np.nan] * nmeasurements, - 'type': [np.nan] * nmeasurements - } - - # Always add 1st point - list_cross['id'][0] = idx[0] - list_cross['type'][0] = types['REL_MAX'] if data['DELTA_SHORT_LONG'].iloc[0] > 0 else types['REL_MIN'] - count = 1 # Python uses 0-based indexing, so count starts at 1 - - # treat DELTA_SHORT_LONG==0 as NaN ( so we can skip its multiplication) - data.loc[data['DELTA_SHORT_LONG'] == 0, 'DELTA_SHORT_LONG'] = np.nan - - # Main loop - R: for(i in 2:length(.data$DELTA_SHORT_LONG)) - for i in range(1, len(data['DELTA_SHORT_LONG'])): - # Check data validity - if (not pd.isna(data['gl'].iloc[i]) and - not pd.isna(data['gl'].iloc[i-1]) and - not pd.isna(data['DELTA_SHORT_LONG'].iloc[i]) and - not pd.isna(data['DELTA_SHORT_LONG'].iloc[i-1])): - - # Primary crossover detection: crossing point if DELTA changes sign - if (data['DELTA_SHORT_LONG'].iloc[i] * data['DELTA_SHORT_LONG'].iloc[i-1] < 0): - list_cross['id'][count] = idx[i] - if data['DELTA_SHORT_LONG'].iloc[i] < data['DELTA_SHORT_LONG'].iloc[i-1]: - list_cross['type'][count] = types['REL_MIN'] - else: - list_cross['type'][count] = types['REL_MAX'] - count += 1 - - # Gap handling: needed for gaps, where DELTA_SHORT_LONG(i-1 | i-2) = NaN - elif (not pd.isna(data['DELTA_SHORT_LONG'].iloc[i]) and - count >= 1): # Make sure we have a previous crossover - - # R: match(list_cross$id[count-1], idx) - find index of previous crossover - try: - prev_cross_idx = idx.index(list_cross['id'][count-1]) - prev_delta = data['DELTA_SHORT_LONG'].iloc[prev_cross_idx] - - if (data['DELTA_SHORT_LONG'].iloc[i] * prev_delta < 0): - list_cross['id'][count] = idx[i] - if data['DELTA_SHORT_LONG'].iloc[i] < prev_delta: - list_cross['type'][count] = types['REL_MIN'] - else: - list_cross['type'][count] = types['REL_MAX'] - count += 1 - except ValueError: - # Handle case where previous crossover id not found in idx - pass - - # Add last point to capture excursion at end - # R: utils::tail(idx, 1) - last_idx = idx[-1] - list_cross['id'][count] = last_idx - - if data['DELTA_SHORT_LONG'].iloc[-1] > 0: - list_cross['type'][count] = types['REL_MAX'] - else: - list_cross['type'][count] = types['REL_MIN'] - - # Filter out NaN values - R: list_cross$id[!is.na(list_cross$id)] - clean_ids = [x for x in list_cross['id'] if not pd.isna(x)] - clean_types = [x for x in list_cross['type'] if not pd.isna(x)] - - # Create DataFrame - R: do.call(cbind.data.frame, list_cross) - crosses = pd.DataFrame({ - "id":clean_ids, - "type":clean_types - }) - - # 2e. Calculate min and max glucose values from ids and types in crosses + store indexes for plotting later - # R: num_extrema = nrow(crosses)-1 - num_extrema = len(crosses) - 1 - - # R: minmax <- rep(NA_real_, num_extrema), indexes <- rep(NA_real_, num_extrema) - minmax = [np.nan] * num_extrema - indexes = [np.nan] * num_extrema - - # R: for(i in 1:num_extrema) - for i in range(num_extrema): - # Define search boundaries - # R: s1 <- ifelse(i == 1, crosses[i, 1], indexes[i-1]) - if i == 0: # First extrema - s1 = int(crosses.iloc[i]['id']) # crosses[i, 1] in R (1-indexed) - else: - s1 = int(indexes[i-1]) # last minmax index - - # R: s2 <- crosses[i+1,1] - s2 = int(crosses.iloc[i+1]['id']) # crosses[i+1, 1] in R - - # Extract glucose segment - R: .data[as.character(s1:s2), ]$gl - segment_start = s1 - segment_end = s2 - glucose_segment = data['gl'].iloc[segment_start:segment_end+1] # including next cross point - - # Find min or max based on crossover type - if crosses.iloc[i]['type'] == types['REL_MIN']: # crosses[i, "type"] in R - # R: min(.data[as.character(s1:s2), ]$gl, na.rm = TRUE) - minmax[i] = glucose_segment.min() - # R: which.min(.data[as.character(s1:s2), ]$gl)+s1-1 - indexes[i] = glucose_segment.idxmin() - else: - # R: max(.data[as.character(s1:s2), ]$gl, na.rm = TRUE) - minmax[i] = glucose_segment.max() - # R: which.max(.data[as.character(s1:s2), ]$gl)+s1-1 - indexes[i] = glucose_segment.idxmax() - - # excursion elimination - differences = np.subtract.outer(minmax, minmax).T - standardD = data['gl'].std() # pandas uses sample std dev by default - N = len(minmax) - - - # MAGE+ algorithm, which identifies and measures positive glycemic excursions - # (nadir-to-peak movements that exceed the standard deviation threshold). - mage_plus_heights, mage_plus_tp_pairs = calculate_mage_plus(differences, minmax, standardD) - - # MAGE- algorithm, which identifies and measures negative glycemic excursions - # (peak-to-nadir movements that exceed the standard deviation threshold). - mage_minus_heights, mage_minus_tp_pairs = calculate_mage_minus(differences, minmax, standardD) - - if len(mage_minus_heights) == 0 and len(mage_plus_heights) == 0: - return pd.DataFrame({ - 'start': [data['time'].iloc[0]], - 'end': [data['time'].iloc[-1]], - 'MAGE': [np.nan], - 'plus_or_minus': [np.nan], - 'first_excursion': [np.nan] - }, index=[0]) - - # Determine which excursion type occurs first - if (len(mage_plus_heights) > 0 and - (len(mage_minus_heights) == 0 or - mage_plus_tp_pairs[0][1] <= mage_minus_tp_pairs[0][0])): - is_plus_first = True - else: - is_plus_first = False - - # Create MAGE+ result dataframe - mage_plus = pd.DataFrame({ - 'start': [data['time'].iloc[0]], - 'end': [data['time'].iloc[-1]], - 'MAGE': [np.mean(mage_plus_heights) if len(mage_plus_heights) > 0 else np.nan], - 'plus_or_minus': ['PLUS'], - 'first_excursion': [is_plus_first] - }) - - # Create MAGE- result dataframe - mage_minus = pd.DataFrame({ - 'start': [data['time'].iloc[0]], - 'end': [data['time'].iloc[-1]], - 'MAGE': [abs(np.mean(mage_minus_heights)) if len(mage_minus_heights) > 0 else np.nan], - 'plus_or_minus': ['MINUS'], - 'first_excursion': [not is_plus_first] - }) - - # Determine which direction has maximum MAGE value - is_plus_max = ((mage_plus['MAGE'].iloc[0] >= mage_minus['MAGE'].iloc[0]) - if not pd.isna(mage_plus['MAGE'].iloc[0]) - and not pd.isna(mage_minus['MAGE'].iloc[0]) - else False - ) - - return pd.concat([mage_plus, mage_minus], ignore_index=True) - - # ------------------- # start mage() # Handle Series input if isinstance(data, pd.Series): - # Convert Series to DataFrame format + if not isinstance(data.index, pd.DatetimeIndex): + raise ValueError("Series must have a DatetimeIndex") data_df = pd.DataFrame( { - "id": ["subject1"] * len(data), - "time": pd.date_range( - start="2020-01-01", periods=len(data), freq="5min" - ), + "id": ["subject1"] * len(data.values), + "time": data.index, "gl": data.values, } ) if version == "ma": - mage_val = mage_ma_single(data_df, short_ma, long_ma, direction, return_type='num') - result = pd.DataFrame({"MAGE": [mage_val]}) + mage_val = mage_ma_single(data_df, short_ma, long_ma, direction, + return_type='num', + inter_gap=inter_gap, + max_gap=max_gap, + tz=tz) else: - result = pd.DataFrame({"MAGE": [mage_naive(data_df)]}) - return result + mage_val = mage_naive(data_df,sd_multiplier=sd_multiplier) + return mage_val # Handle DataFrame input data = check_data_columns(data) @@ -455,19 +144,333 @@ def mage_atomic(data, short_ma,long_ma): continue if version == "ma": - mage_val = mage_ma_single(subject_data, short_ma, long_ma, direction, return_type) + mage_val = mage_ma_single(subject_data, short_ma, long_ma, direction, return_type,inter_gap,max_gap,tz) if return_type == "df" : subject_result_dict = mage_val.to_dict() else: subject_result_dict = {"MAGE": mage_val} else: - mage_val = mage_naive(subject_data) + mage_val = mage_naive(subject_data,sd_multiplier=sd_multiplier) subject_result_dict = {"MAGE": mage_val} result.append({"id": subject, **subject_result_dict}) return pd.DataFrame(result) +def mage_naive(data: pd.DataFrame,sd_multiplier:float = 1.0) -> float: + """Calculate MAGE using naive algorithm""" + # Calculate absolute differences from mean + mean_gl = data["gl"].mean() + abs_diff_mean = abs(data["gl"] - mean_gl) + + # Calculate standard deviation + std_gl = data["gl"].std() + + # Calculate MAGE as mean of differences greater than sd_multiplier * std + mage_val = abs_diff_mean[abs_diff_mean > (sd_multiplier * std_gl)].mean() + + return float(mage_val) if not pd.isna(mage_val) else np.nan + +def mage_ma_single(data: pd.DataFrame, short_ma: int, long_ma: int, + direction:str ='avg', return_type:str = "num",inter_gap:int = 45, max_gap:int = 180, tz:str = "" ) -> pd.DataFrame: + """Calculate MAGE using moving average algorithm for a single subject""" + ## 1. Preprocessing + # 1.1 Interpolate over uniform grid + # Note: always interpolate to 5 minute grid + data_ip = CGMS2DayByDay(data, dt0=5, inter_gap=inter_gap, tz=tz) + dt0 = data_ip[2] # Time between measurements in minutes + # replace for 5 min to fix bug in CGMS2DayByDay + dt0 = 5 + day_one = data_ip[1][0] + ndays = len(data_ip[1]) + + # 1.2 Generate grid times by starting from day one and cumulatively summing + # note fix 5 min used in interpretation + gl = data_ip[0].flatten().tolist() + time_ip = [pd.Timedelta(i * 5, unit="m") + day_one for i in range(1,len(gl)+1)] + + # 1.3 Recalculate short_ma and long_ma because short and long are based on 5 minutes originally + # > Multiply by 5 to get length in min + # > Divide by dt0 to get rounded number of measurements that are roughly equal to original short/long ma definition + # short_ma = round(short_ma*5/dt0) + # long_ma = round(long_ma*5/dt0) + # Ensure short_ma and long_ma are appropriate + if short_ma >= long_ma: + short_ma, long_ma = long_ma, short_ma + + ## 2. Change to interpolated data (times and glucose) + # > change data into id, interpolated times, interpolated glucose (t to get rowwise) + # > drop NA rows before first glucose reading + # > then drop NA rows after last glucose reading + # > Label NA glucose as gap (gap = 1) + interpolated_data = pd.DataFrame({ + "id" : data['id'].iloc[0], + "time": pd.Series(time_ip, dtype='datetime64[ns]'), + "gl": pd.Series(gl, dtype='float64') + }) + # Drop NA rows before first glucose reading + first_valid_idx = interpolated_data['gl'].first_valid_index() + if first_valid_idx is not None: + interpolated_data = interpolated_data.iloc[first_valid_idx:] + # Drop NA rows after last glucose reading + last_valid_idx = interpolated_data['gl'].last_valid_index() + if last_valid_idx is not None: + interpolated_data = interpolated_data.iloc[:last_valid_idx+1] + # Add gap column to mark NA values as 1 + interpolated_data['gap'] = interpolated_data['gl'].isna().astype(int) + + # 4. Time Series Segmentation: split gaps > max_gap into separate segments + dfs = segment_time_series(interpolated_data,max_gap) # note: max_gap is in minutes + + # 5. Calculate MAGE on each identified segment + return_val = pd.DataFrame(columns=["start", "end", "mage", "plus_or_minus", "first_excursion"]) + for segment in dfs: + ret = mage_atomic(segment,short_ma,long_ma) + if return_val.empty: + return_val = ret + else: + return_val = pd.concat([return_val, ret], ignore_index=True) + + if return_type == 'df': + return return_val + + """Process MAGE results with filtering and weighting.""" + # Filter by direction (equivalent to the previous R filtering code) + if direction == 'plus': + res = return_val[return_val['plus_or_minus'] == 'PLUS'].copy() + elif direction == 'minus': + res = return_val[return_val['plus_or_minus'] == 'MINUS'].copy() + elif direction == 'avg': + res = return_val[return_val['MAGE'].notna()].copy() + elif direction == 'max': + # Group by start,end and keep max mage in each group + idx = return_val.groupby(['start', 'end'])['MAGE'].idxmax() + res = return_val.loc[idx].reset_index(drop=True) + else: # default: first excursions only + res = return_val[return_val['first_excursion'] == True].copy() + + # Calculate time-weighted MAGE + if res.empty: + return np.nan + + res['hours'] = res['end'] - res['start'] + res['weight'] = res['hours'] / res['hours'].sum() + weighted_mage = (res['MAGE'] * res['weight']).sum() + + return weighted_mage + +def mage_atomic(data, short_ma,long_ma): + """ 0. Calculates MAGE on 1 segment of CGM trace """ + + # 2c. Calculate the moving average values + data = data.copy() + data["MA_Short"] = data["gl"].rolling(window=short_ma, min_periods=1).mean() + data["MA_Long"] = data["gl"].rolling(window=long_ma, min_periods=1).mean() + # Fill leading NAs (forward fill first valid value) + if short_ma > len(data): + data.loc[data.index[:short_ma], 'MA_Short'] = data['MA_Short'].iloc[-1] + else: + data.loc[data.index[:short_ma], 'MA_Short'] = data['MA_Short'].iloc[short_ma-1] + if long_ma > len(data): + data.loc[data.index[:long_ma], 'MA_Long'] = data['MA_Long'].iloc[-1] + else: + data.loc[data.index[:long_ma], 'MA_Long'] = data['MA_Long'].iloc[long_ma-1] + # Calculate difference + data['DELTA_SHORT_LONG'] = data['MA_Short'] - data['MA_Long'] + data = data.reset_index(drop=True) + nmeasurements = len(data) + + # Sanity check + if ( + data['gl'].isnull().all() or + nmeasurements < 7 or + nmeasurements < short_ma or + np.std(data['gl'], ddof=1) < 1 + ): + return pd.DataFrame({ + 'start': [data['time'].iloc[0]], + 'end': [data['time'].iloc[-1]], + 'MAGE': [np.nan], + 'plus_or_minus': [np.nan], + 'first_excursion': [np.nan] + }) + + # 2d. Create a preallocated list of crossing point ids & type + # Find crossing points + # Detect trend reversal points in glucose data using DELTA signal. + # Initialize variables + idx = list(data.index) # R: idx = as.numeric(rownames(.data)) + types = {'REL_MIN': 0, 'REL_MAX': 1} # R: types = list2env(list(REL_MIN=0, REL_MAX=1)) + + # Create storage lists - R: list_cross <- list("id" = rep.int(NA, nmeasurements), "type" = rep.int(NA, nmeasurements)) + list_cross = { + 'id': [np.nan] * nmeasurements, + 'type': [np.nan] * nmeasurements + } + + # Always add 1st point + list_cross['id'][0] = idx[0] + list_cross['type'][0] = types['REL_MAX'] if data['DELTA_SHORT_LONG'].iloc[0] > 0 else types['REL_MIN'] + count = 1 # Python uses 0-based indexing, so count starts at 1 + + # treat DELTA_SHORT_LONG==0 as NaN ( so we can skip its multiplication) + data.loc[data['DELTA_SHORT_LONG'] == 0, 'DELTA_SHORT_LONG'] = np.nan + + # Main loop - R: for(i in 2:length(.data$DELTA_SHORT_LONG)) + for i in range(1, len(data['DELTA_SHORT_LONG'])): + # Check data validity + if (not pd.isna(data['gl'].iloc[i]) and + not pd.isna(data['gl'].iloc[i-1]) and + not pd.isna(data['DELTA_SHORT_LONG'].iloc[i]) and + not pd.isna(data['DELTA_SHORT_LONG'].iloc[i-1])): + + # Primary crossover detection: crossing point if DELTA changes sign + if (data['DELTA_SHORT_LONG'].iloc[i] * data['DELTA_SHORT_LONG'].iloc[i-1] < 0): + list_cross['id'][count] = idx[i] + if data['DELTA_SHORT_LONG'].iloc[i] < data['DELTA_SHORT_LONG'].iloc[i-1]: + list_cross['type'][count] = types['REL_MIN'] + else: + list_cross['type'][count] = types['REL_MAX'] + count += 1 + + # Gap handling: needed for gaps, where DELTA_SHORT_LONG(i-1 | i-2) = NaN + elif (not pd.isna(data['DELTA_SHORT_LONG'].iloc[i]) and + count >= 1): # Make sure we have a previous crossover + + # R: match(list_cross$id[count-1], idx) - find index of previous crossover + try: + prev_cross_idx = idx.index(list_cross['id'][count-1]) + prev_delta = data['DELTA_SHORT_LONG'].iloc[prev_cross_idx] + + if (data['DELTA_SHORT_LONG'].iloc[i] * prev_delta < 0): + list_cross['id'][count] = idx[i] + if data['DELTA_SHORT_LONG'].iloc[i] < prev_delta: + list_cross['type'][count] = types['REL_MIN'] + else: + list_cross['type'][count] = types['REL_MAX'] + count += 1 + except ValueError: + # Handle case where previous crossover id not found in idx + pass + + # Add last point to capture excursion at end + # R: utils::tail(idx, 1) + last_idx = idx[-1] + list_cross['id'][count] = last_idx + + if data['DELTA_SHORT_LONG'].iloc[-1] > 0: + list_cross['type'][count] = types['REL_MAX'] + else: + list_cross['type'][count] = types['REL_MIN'] + + # Filter out NaN values - R: list_cross$id[!is.na(list_cross$id)] + clean_ids = [x for x in list_cross['id'] if not pd.isna(x)] + clean_types = [x for x in list_cross['type'] if not pd.isna(x)] + + # Create DataFrame - R: do.call(cbind.data.frame, list_cross) + crosses = pd.DataFrame({ + "id":clean_ids, + "type":clean_types + }) + + # 2e. Calculate min and max glucose values from ids and types in crosses + store indexes for plotting later + # R: num_extrema = nrow(crosses)-1 + num_extrema = len(crosses) - 1 + + # R: minmax <- rep(NA_real_, num_extrema), indexes <- rep(NA_real_, num_extrema) + minmax = [np.nan] * num_extrema + indexes = [np.nan] * num_extrema + + # R: for(i in 1:num_extrema) + for i in range(num_extrema): + # Define search boundaries + # R: s1 <- ifelse(i == 1, crosses[i, 1], indexes[i-1]) + if i == 0: # First extrema + s1 = int(crosses.iloc[i]['id']) # crosses[i, 1] in R (1-indexed) + else: + s1 = int(indexes[i-1]) # last minmax index + + # R: s2 <- crosses[i+1,1] + s2 = int(crosses.iloc[i+1]['id']) # crosses[i+1, 1] in R + + # Extract glucose segment - R: .data[as.character(s1:s2), ]$gl + segment_start = s1 + segment_end = s2 + glucose_segment = data['gl'].iloc[segment_start:segment_end+1] # including next cross point + + # Find min or max based on crossover type + if crosses.iloc[i]['type'] == types['REL_MIN']: # crosses[i, "type"] in R + # R: min(.data[as.character(s1:s2), ]$gl, na.rm = TRUE) + minmax[i] = glucose_segment.min() + # R: which.min(.data[as.character(s1:s2), ]$gl)+s1-1 + indexes[i] = glucose_segment.idxmin() + else: + # R: max(.data[as.character(s1:s2), ]$gl, na.rm = TRUE) + minmax[i] = glucose_segment.max() + # R: which.max(.data[as.character(s1:s2), ]$gl)+s1-1 + indexes[i] = glucose_segment.idxmax() + + # excursion elimination + differences = np.subtract.outer(minmax, minmax).T + standardD = data['gl'].std() # pandas uses sample std dev by default + N = len(minmax) + + + # MAGE+ algorithm, which identifies and measures positive glycemic excursions + # (nadir-to-peak movements that exceed the standard deviation threshold). + mage_plus_heights, mage_plus_tp_pairs = calculate_mage_plus(differences, minmax, standardD) + + # MAGE- algorithm, which identifies and measures negative glycemic excursions + # (peak-to-nadir movements that exceed the standard deviation threshold). + mage_minus_heights, mage_minus_tp_pairs = calculate_mage_minus(differences, minmax, standardD) + + if len(mage_minus_heights) == 0 and len(mage_plus_heights) == 0: + return pd.DataFrame({ + 'start': [data['time'].iloc[0]], + 'end': [data['time'].iloc[-1]], + 'MAGE': [np.nan], + 'plus_or_minus': [np.nan], + 'first_excursion': [np.nan] + }, index=[0]) + + # Determine which excursion type occurs first + if (len(mage_plus_heights) > 0 and + (len(mage_minus_heights) == 0 or + mage_plus_tp_pairs[0][1] <= mage_minus_tp_pairs[0][0])): + is_plus_first = True + else: + is_plus_first = False + + # Create MAGE+ result dataframe + mage_plus = pd.DataFrame({ + 'start': [data['time'].iloc[0]], + 'end': [data['time'].iloc[-1]], + 'MAGE': [np.mean(mage_plus_heights) if len(mage_plus_heights) > 0 else np.nan], + 'plus_or_minus': ['PLUS'], + 'first_excursion': [is_plus_first] + }) + + # Create MAGE- result dataframe + mage_minus = pd.DataFrame({ + 'start': [data['time'].iloc[0]], + 'end': [data['time'].iloc[-1]], + 'MAGE': [abs(np.mean(mage_minus_heights)) if len(mage_minus_heights) > 0 else np.nan], + 'plus_or_minus': ['MINUS'], + 'first_excursion': [not is_plus_first] + }) + + # Determine which direction has maximum MAGE value + is_plus_max = ((mage_plus['MAGE'].iloc[0] >= mage_minus['MAGE'].iloc[0]) + if not pd.isna(mage_plus['MAGE'].iloc[0]) + and not pd.isna(mage_minus['MAGE'].iloc[0]) + else False + ) + + return pd.concat([mage_plus, mage_minus], ignore_index=True) + + + + def calculate_mage_plus(differences, minmax, standardD): """ Calculate MAGE+ (positive glycemic excursions) diff --git a/iglu_python/mean_glu.py b/iglu_python/mean_glu.py index 8861bda..4757ccb 100644 --- a/iglu_python/mean_glu.py +++ b/iglu_python/mean_glu.py @@ -1,11 +1,12 @@ from typing import Union +import numpy as np import pandas as pd from .utils import check_data_columns -def mean_glu(data: Union[pd.DataFrame, pd.Series]) -> pd.DataFrame: +def mean_glu(data: Union[pd.DataFrame, list, np.ndarray, pd.Series]) -> pd.DataFrame|float: """ Calculate mean glucose value for each subject. @@ -44,8 +45,10 @@ def mean_glu(data: Union[pd.DataFrame, pd.Series]) -> pd.DataFrame: 0 157.5 """ # Handle Series input - if isinstance(data, pd.Series): - return pd.DataFrame({"mean": [data.mean()]}) + if isinstance(data, (list, np.ndarray, pd.Series)): + if isinstance(data, (list,np.ndarray)): + data = pd.Series(data) + return data.mean() # Handle DataFrame input data = check_data_columns(data) diff --git a/iglu_python/sd_glu.py b/iglu_python/sd_glu.py index e75ab95..42d4899 100644 --- a/iglu_python/sd_glu.py +++ b/iglu_python/sd_glu.py @@ -1,11 +1,12 @@ from typing import Union +import numpy as np import pandas as pd from .utils import check_data_columns -def sd_glu(data: Union[pd.DataFrame, pd.Series]) -> pd.DataFrame: +def sd_glu(data: Union[pd.DataFrame, list, np.ndarray, pd.Series]) -> pd.DataFrame|float: """ Calculate standard deviation of glucose values. @@ -44,14 +45,16 @@ def sd_glu(data: Union[pd.DataFrame, pd.Series]) -> pd.DataFrame: 0 38.89 """ # Handle Series input - if isinstance(data, pd.Series): - return pd.DataFrame({"SD": [data.std()]}) + if isinstance(data, (list, np.ndarray, pd.Series)): + if isinstance(data, (list,np.ndarray)): + data = pd.Series(data) + return data.std(ddof=1) # Handle DataFrame input data = check_data_columns(data) # Calculate standard deviation for each subject - out = data.groupby("id")["gl"].std().reset_index() + out = data.groupby("id")["gl"].std(ddof=1).reset_index() out.columns = ["id", "SD"] return out diff --git a/pyproject.toml b/pyproject.toml index 8c5b951..a7d5fd4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "iglu_python" -version = "0.1.6" +version = "0.1.7" description = "Python implementation of the iglu package for continuous glucose monitoring data analysis" readme = "README.md" requires-python = ">=3.11" diff --git a/tests/test_cogi.py b/tests/test_cogi.py index 2b90c2a..7dde3cd 100644 --- a/tests/test_cogi.py +++ b/tests/test_cogi.py @@ -54,13 +54,10 @@ def test_cogi_series_input(): result = iglu.cogi(data) # Check output format - assert isinstance(result, pd.DataFrame) - assert "COGI" in result.columns - assert "id" not in result.columns - assert len(result) == 1 + assert isinstance(result, (float,np.float64)) # Check that COGI value is between 0 and 100 - assert (result["COGI"].iloc[0] >= 0) and (result["COGI"].iloc[0] <= 100) + np.testing.assert_allclose(result, 81.934259, rtol=0.001) def test_cogi_custom_parameters(): diff --git a/tests/test_gmi.py b/tests/test_gmi.py index 44dcf90..3e6047a 100644 --- a/tests/test_gmi.py +++ b/tests/test_gmi.py @@ -105,15 +105,13 @@ def test_gmi_series(): """Test GMI with Series input""" series_data = pd.Series([150, 160, 170, 180, 190, 200]) result = iglu.gmi(series_data) - assert isinstance(result, pd.DataFrame) - assert "GMI" in result.columns - assert len(result) == 1 + assert isinstance(result, (float,np.float64)) # Calculate expected GMI # Mean glucose = (150 + 160 + 170 + 180 + 190 + 200) / 6 = 175 # GMI = 3.31 + (0.02392 * 175) = 7.496 expected_gmi = 3.31 + (0.02392 * 175) - assert abs(result["GMI"].iloc[0] - expected_gmi) < 0.001 + np.testing.assert_allclose(result, expected_gmi, rtol=0.001) def test_gmi_empty(): diff --git a/tests/test_mage.py b/tests/test_mage.py index d0f3c62..c357675 100644 --- a/tests/test_mage.py +++ b/tests/test_mage.py @@ -143,15 +143,19 @@ def test_mage_naive_version(base_data): assert np.isnan(result.iloc[1]["MAGE"]) -def test_mage_series_input(): +def test_mage_series_without_datetime_index(): """Test MAGE calculation with Series input""" series_data = pd.Series([150, 200, 180, 160, 140, 190]) - result = iglu.mage(series_data) - assert isinstance(result, pd.DataFrame) - assert "MAGE" in result.columns - assert len(result) == 1 + with pytest.raises(ValueError): + iglu.mage(series_data) +def test_mage_series_with_datetime_index(): + """Test MAGE calculation with Series input""" + series_data = pd.Series([150, 200, 180, 160, 140, 190], index=pd.date_range(start="2020-01-01 00:00:00", periods=6, freq="5min")) + result = iglu.mage(series_data) + assert isinstance(result, (float,np.float64)) + def test_mage_empty_data(): """Test MAGE calculation with empty DataFrame""" empty_data = pd.DataFrame(columns=["id", "time", "gl"]) diff --git a/tests/test_mean_glu.py b/tests/test_mean_glu.py index 94ae5c0..a789a73 100644 --- a/tests/test_mean_glu.py +++ b/tests/test_mean_glu.py @@ -109,13 +109,10 @@ def test_mean_glu_series_input(): result = mean_glu(data) # Check output format - assert isinstance(result, pd.DataFrame) - assert "mean" in result.columns - assert "id" not in result.columns - assert len(result) == 1 + assert isinstance(result, (float,np.float64)) # Check that mean value is correct - assert abs(result["mean"].iloc[0] - 168.33) < 0.01 # 168.33 is the expected mean + np.testing.assert_allclose(result, 168.33, rtol=0.001) # 168.33 is the expected mean def test_mean_glu_empty_data(): diff --git a/tests/test_sd_glu.py b/tests/test_sd_glu.py index 6dfdfc0..d695f2a 100644 --- a/tests/test_sd_glu.py +++ b/tests/test_sd_glu.py @@ -109,13 +109,10 @@ def test_sd_glu_series_input(): result = iglu.sd_glu(data) # Check output format - assert isinstance(result, pd.DataFrame) - assert "SD" in result.columns - assert "id" not in result.columns - assert len(result) == 1 + assert isinstance(result, (float,np.float64)) # Check that SD value is non-negative - assert result["SD"].iloc[0] >= 0 + np.testing.assert_allclose(result, 26.394444, rtol=0.001) def test_sd_glu_empty_data(): From 9f6ad3a6c24a4ae4e3e8a24479d2a84aa717ebe7 Mon Sep 17 00:00:00 2001 From: Stas Khirman Date: Mon, 16 Jun 2025 17:06:07 +0300 Subject: [PATCH 05/16] support for Series, list and ndarray -> return float or dict --- README.md | 8 ++-- iglu_python/above_percent.py | 63 ++++++++++++------------ iglu_python/below_percent.py | 64 +++++++++++++------------ iglu_python/cogi.py | 8 ++-- iglu_python/gri.py | 85 ++++++++++++++------------------- iglu_python/in_range_percent.py | 74 +++++++++++++--------------- tests/test_above_percent.py | 14 +++++- tests/test_below_percent.py | 13 +++-- tests/test_gri.py | 17 +++---- tests/test_in_range_percent.py | 17 +++---- 10 files changed, 175 insertions(+), 188 deletions(-) diff --git a/README.md b/README.md index 090b0d6..5aae055 100644 --- a/README.md +++ b/README.md @@ -25,11 +25,11 @@ Unless noted, IGLU-R test compatability is considered successful if it achieves | Function | Description | IGLU-R test compatibility | list /ndarray /Series input | TZ | Comments | |----------|-------------|-------------|-------------------|----|----------| -| above_percent | percentage of values above target thresholds| ✅ | ||| +| above_percent | percentage of values above target thresholds| ✅ |✅ returns dict ||| | active_percent | percentage of time CGM was active | ✅ | | adrr | average daily risk range | ✅ | | auc| Area Under Curve | 🟡 (0.01 precision) |✅ only Series(DatetimeIndex) returns float || see [auc_evaluation.ipynb](https://github.com/staskh/iglu_python/blob/main/notebooks/auc_evaluation.ipynb)| -| below_percent| percentage of values below target thresholds| ✅ | +| below_percent| percentage of values below target thresholds| ✅ | ✅ returns dict | cogi |Coefficient of Glucose Irregularity | ✅ | ✅ returns float | conga | Continuous Overall Net Glycemic Action |✅ | | cv_glu | Coefficient of Variation | ✅| ✅ returns float | @@ -41,13 +41,13 @@ Unless noted, IGLU-R test compatability is considered successful if it achieves | grade_hyper |percentage of GRADE score attributable to hyperglycemia| ✅ | | grade_hypo |percentage of GRADE score attributable to hypoglycemia| ✅ | | grade |mean GRADE score| ✅ | -| gri |Glycemia Risk Index | ✅ | +| gri |Glycemia Risk Index | ✅ | ✅ returns float | gvp |Glucose Variability Percentage| ✅ | | hbgi |High Blood Glucose Index| ✅ | | hyper_index |Hyperglycemia Index| ✅ | | hypo_index |Hypoglycemia Index| ✅ | | igc |Index of Glycemic Control| ✅ | -| in_range_percent |percentage of values within target ranges| ✅ | +| in_range_percent |percentage of values within target ranges| ✅ | ✅ returns dict | iqr_glu |glucose level interquartile range|✅ | | j_index |J-Index score for glucose measurements| ✅ | | lbgi | Low Blood Glucose Index| ✅ | diff --git a/iglu_python/above_percent.py b/iglu_python/above_percent.py index cc1c0eb..703952e 100644 --- a/iglu_python/above_percent.py +++ b/iglu_python/above_percent.py @@ -1,14 +1,15 @@ from typing import List, Union import pandas as pd +import numpy as np from .utils import check_data_columns def above_percent( - data: Union[pd.DataFrame, pd.Series, list], + data: Union[pd.DataFrame, pd.Series, list,np.ndarray], targets_above: List[int] = [140, 180, 250], -) -> pd.DataFrame: +) -> pd.DataFrame|dict[str:float]: """ Calculate percentage of values above target thresholds. @@ -58,22 +59,11 @@ def above_percent( 0 75.0 25.0 """ # Handle Series input - if isinstance(data, (pd.Series, list)): - # Convert targets to float - targets_above = [int(t) for t in targets_above] - - # Calculate total non-NA readings - total_readings = len(data.dropna()) - if total_readings == 0: - return pd.DataFrame(columns=[f"above_{t}" for t in targets_above]) - - # Calculate percentages for each target - percentages = {} - for target in targets_above: - above_count = len(data[data > target]) - percentages[f"above_{target}"] = (above_count / total_readings) * 100 - - return pd.DataFrame([percentages]) + if isinstance(data, (pd.Series, list,np.ndarray)): + if isinstance(data, (list, np.ndarray)): + data = pd.Series(data) + return above_percent_single(data, targets_above) + # Handle DataFrame input data = check_data_columns(data) @@ -85,19 +75,32 @@ def above_percent( # Process each subject for subject in data["id"].unique(): subject_data = data[data["id"] == subject] - total_readings = len(subject_data.dropna(subset=["gl"])) - - if total_readings == 0: - continue - - # Calculate percentages for each target - percentages = {} - for target in targets_above: - above_count = len(subject_data[subject_data["gl"] > target]) - percentages[f"above_{target}"] = (above_count / total_readings) * 100 - + percentages = above_percent_single(subject_data["gl"], targets_above) percentages["id"] = subject result.append(percentages) # Convert to DataFrame - return pd.DataFrame(result) + df = pd.DataFrame(result) + df = df[['id'] + [col for col in df.columns if col != 'id']] + return df + +def above_percent_single(data: pd.Series, targets_above: List[int] = [140, 180, 250]) -> dict[str:float]: + """ + Calculate percentage of values above target thresholds for a single series/subject. + """ + # Convert targets to float + targets_above = [int(t) for t in targets_above] + + # Calculate total non-NA readings + total_readings = len(data.dropna()) + if total_readings == 0: + return {f"above_{t}": 0 for t in targets_above} + + # Calculate percentages for each target + percentages = {} + for target in targets_above: + above_count = len(data[data > target]) + percentages[f"above_{target}"] = (above_count / total_readings) * 100 + + return percentages + \ No newline at end of file diff --git a/iglu_python/below_percent.py b/iglu_python/below_percent.py index c264bbc..99b3421 100644 --- a/iglu_python/below_percent.py +++ b/iglu_python/below_percent.py @@ -1,13 +1,14 @@ from typing import List, Union import pandas as pd +import numpy as np from .utils import check_data_columns def below_percent( - data: Union[pd.DataFrame, pd.Series, list], targets_below: List[int] = [54, 70] -) -> pd.DataFrame: + data: Union[pd.DataFrame, pd.Series, list,np.ndarray], targets_below: List[int] = [54, 70] +) -> pd.DataFrame|dict[str:float]: """ Calculate percentage of values below target thresholds. @@ -57,26 +58,14 @@ def below_percent( 0 25.0 50.0 """ # Handle Series input - if isinstance(data, (pd.Series, list)): - # Convert targets to float - targets_below = [int(t) for t in targets_below] - - # Calculate total non-NA readings - total_readings = len(data.dropna()) - if total_readings == 0: - return pd.DataFrame(columns=[f"below_{t}" for t in targets_below]) - - # Calculate percentages for each target - percentages = {} - for target in targets_below: - below_count = len(data[data < target]) - percentages[f"below_{target}"] = (below_count / total_readings) * 100 - - return pd.DataFrame([percentages]) + if isinstance(data, (pd.Series, list,np.ndarray)): + if isinstance(data, (list, np.ndarray)): + data = pd.Series(data) + return below_percent_single(data, targets_below) + # Handle DataFrame input data = check_data_columns(data) - targets_below = [int(t) for t in targets_below] # Initialize result list result = [] @@ -84,19 +73,34 @@ def below_percent( # Process each subject for subject in data["id"].unique(): subject_data = data[data["id"] == subject] - total_readings = len(subject_data.dropna(subset=["gl"])) - - if total_readings == 0: - continue - - # Calculate percentages for each target - percentages = {} - for target in targets_below: - below_count = len(subject_data[subject_data["gl"] < target]) - percentages[f"below_{target}"] = (below_count / total_readings) * 100 + percentages = below_percent_single(subject_data["gl"], targets_below) percentages["id"] = subject result.append(percentages) # Convert to DataFrame - return pd.DataFrame(result) + df = pd.DataFrame(result) + df = df[['id'] + [col for col in df.columns if col != 'id']] + return df + +def below_percent_single(data: pd.Series, targets_below: List[int] = [54, 70]) -> dict[str:float]: + """ + Calculate percentage of values below target thresholds for a single series/subject. + """ + # Convert targets to float + targets_below = [int(t) for t in targets_below] + + # Calculate total non-NA readings + total_readings = len(data.dropna()) + if total_readings == 0: + return {f"below_{t}": 0 for t in targets_below} + + # Calculate percentages for each target + percentages = {} + for target in targets_below: + below_count = len(data[data < target]) + percentages[f"below_{target}"] = (below_count / total_readings) * 100 + + return percentages + + \ No newline at end of file diff --git a/iglu_python/cogi.py b/iglu_python/cogi.py index b25d172..ab2b08e 100644 --- a/iglu_python/cogi.py +++ b/iglu_python/cogi.py @@ -86,10 +86,10 @@ def cogi( def cogi_single(data: pd.Series, targets: List[int] = [70, 180], weights: List[float] = [0.5, 0.35, 0.15]) -> float: """Calculate COGI for a single subject""" # Calculate components - ir_df = in_range_percent(data, [targets]) - ir = ir_df["in_range_" + "_".join(map(str, targets))].iloc[0] - br_df = below_percent(data, targets_below=[targets[0]]) - br = br_df["below_" + str(int(targets[0]))].iloc[0] + ir_dict = in_range_percent(data, [targets]) + ir = ir_dict["in_range_" + "_".join(map(str, targets))] + br_dict = below_percent(data, targets_below=[targets[0]]) + br = br_dict["below_" + str(int(targets[0]))] stddev = sd_glu(data) # Calculate weighted features diff --git a/iglu_python/gri.py b/iglu_python/gri.py index 4c3eb5d..dc62c70 100644 --- a/iglu_python/gri.py +++ b/iglu_python/gri.py @@ -8,7 +8,7 @@ from .utils import check_data_columns -def gri(data: Union[pd.DataFrame, pd.Series], tz: str = "") -> pd.DataFrame: +def gri(data: Union[pd.DataFrame, pd.Series,list,np.ndarray], tz: str = "") -> pd.DataFrame|float: """ Calculate Glycemia Risk Index (GRI). @@ -55,61 +55,48 @@ def gri(data: Union[pd.DataFrame, pd.Series], tz: str = "") -> pd.DataFrame: 0 35.43 """ # Handle Series input - if isinstance(data, pd.Series): - data = data.dropna() - if len(data) == 0: - return pd.DataFrame({"GRI": [np.nan]}) - - # Get percentages in each range - below_54 = below_percent(data, targets_below=[54])["below_54"].iloc[0] - below_70 = below_percent(data, targets_below=[70])["below_70"].iloc[0] - above_180 = above_percent(data, targets_above=[180])["above_180"].iloc[0] - above_250 = above_percent(data, targets_above=[250])["above_250"].iloc[0] - - # Calculate GRI - gri_value = ( - 3.0 * below_54 - + 2.4 * (below_70 - below_54) - + 1.6 * above_250 - + 0.8 * (above_180 - above_250) - ) - - # Threshold at 100% - gri_value = min(gri_value, 100) - - return pd.DataFrame({"GRI": [gri_value]}) - + if isinstance(data, (pd.Series, list,np.ndarray)): + if isinstance(data, (list, np.ndarray)): + data = pd.Series(data) + return gri_single(data, tz) + # Handle DataFrame input data = check_data_columns(data, tz=tz) # Calculate GRI for each subject result = [] + for subject in data["id"].unique(): subject_data = data[data["id"] == subject].dropna(subset=["gl"]) - if len(subject_data) == 0: - continue - - # Get percentages in each range - below_54 = below_percent(subject_data, targets_below=[54])["below_54"].iloc[0] - below_70 = below_percent(subject_data, targets_below=[70])["below_70"].iloc[0] - above_180 = above_percent(subject_data, targets_above=[180])["above_180"].iloc[ - 0 - ] - above_250 = above_percent(subject_data, targets_above=[250])["above_250"].iloc[ - 0 - ] - - # Calculate GRI - gri_value = ( - 3.0 * below_54 - + 2.4 * (below_70 - below_54) - + 1.6 * above_250 - + 0.8 * (above_180 - above_250) - ) - - # Threshold at 100% - gri_value = min(gri_value, 100) - + gri_value = gri_single(subject_data["gl"], tz) result.append({"id": subject, "GRI": gri_value}) return pd.DataFrame(result) + +def gri_single(data: pd.Series, tz: str = "") -> float: + """ + Calculate Glycemia Risk Index (GRI) for a single series/subject. + """ + data = data.dropna() + if len(data) == 0: + return np.nan + + + # Get percentages in each range + below_54 = below_percent(data, targets_below=[54])["below_54"] + below_70 = below_percent(data, targets_below=[70])["below_70"] + above_180 = above_percent(data, targets_above=[180])["above_180"] + above_250 = above_percent(data, targets_above=[250])["above_250"] + + # Calculate GRI + gri_value = ( + 3.0 * below_54 + + 2.4 * (below_70 - below_54) + + 1.6 * above_250 + + 0.8 * (above_180 - above_250) + ) + + # Threshold at 100% + gri_value = min(gri_value, 100.0) + + return gri_value diff --git a/iglu_python/in_range_percent.py b/iglu_python/in_range_percent.py index 967fdd4..591b3ab 100644 --- a/iglu_python/in_range_percent.py +++ b/iglu_python/in_range_percent.py @@ -1,14 +1,15 @@ from typing import List, Union import pandas as pd +import numpy as np from .utils import check_data_columns def in_range_percent( - data: Union[pd.DataFrame, pd.Series, list], + data: Union[pd.DataFrame, pd.Series, list,np.ndarray], target_ranges: List[List[int]] = [[70, 180], [63, 140]], -) -> pd.DataFrame: +) -> pd.DataFrame|float: """ Calculate percentage of values within target ranges. @@ -31,8 +32,8 @@ def in_range_percent( ------- pd.DataFrame DataFrame with 1 row for each subject, a column for subject id and a column - for each target range. If a list of glucose values is passed, then a DataFrame - without the subject id is returned. + for each target range. If a list of glucose values is passed, then a dictionary + with the percentage is returned. References ---------- @@ -64,24 +65,10 @@ def in_range_percent( 0 75.0 """ # Handle Series input - if isinstance(data, (pd.Series, list)): - # Calculate total non-NA readings - total_readings = len(data.dropna()) - if total_readings == 0: - return pd.DataFrame( - columns=[f"in_range_{min(r)}_{max(r)}" for r in target_ranges] - ) - - # Calculate percentages for each range - percentages = {} - for range_vals in target_ranges: - min_val, max_val = sorted(range_vals) - in_range_count = len(data[(data >= min_val) & (data <= max_val)]) - percentages[f"in_range_{min_val}_{max_val}"] = ( - in_range_count / total_readings - ) * 100 - - return pd.DataFrame([percentages]) + if isinstance(data, (pd.Series, list,np.ndarray)): + if isinstance(data, (list, np.ndarray)): + data = pd.Series(data) + return in_range_percent_single(data, target_ranges) data = check_data_columns(data) @@ -91,26 +78,33 @@ def in_range_percent( # Process each subject for subject in data["id"].unique(): subject_data = data[data["id"] == subject] - total_readings = len(subject_data.dropna(subset=["gl"])) - - if total_readings == 0: - continue - - # Calculate percentages for each range - percentages = {} - for range_vals in target_ranges: - min_val, max_val = sorted(range_vals) - in_range_count = len( - subject_data[ - (subject_data["gl"] >= min_val) & (subject_data["gl"] <= max_val) - ] - ) - percentages[f"in_range_{min_val}_{max_val}"] = ( - in_range_count / total_readings - ) * 100 + percentages = in_range_percent_single(subject_data["gl"], target_ranges) percentages["id"] = subject result.append(percentages) # Convert to DataFrame - return pd.DataFrame(result) + df = pd.DataFrame(result) + df = df[['id'] + [col for col in df.columns if col != 'id']] + return df + +def in_range_percent_single(data: pd.Series, target_ranges: List[List[int]] = [[70, 180], [63, 140]]) -> float: + """ + Calculate percentage of values within target ranges for a single series/subject. + """ + # Calculate total non-NA readings + total_readings = len(data.dropna()) + if total_readings == 0: + return {f"in_range_{min(range_vals)}_{max(range_vals)}": 0 + for range_vals in target_ranges} + + # Calculate percentages for each range + percentages = {} + for range_vals in target_ranges: + min_val, max_val = sorted(range_vals) + in_range_count = len(data[(data >= min_val) & (data <= max_val)]) + percentages[f"in_range_{min_val}_{max_val}"] = ( + in_range_count / total_readings + ) * 100 + + return percentages diff --git a/tests/test_above_percent.py b/tests/test_above_percent.py index 3dd6d84..3f0b60f 100644 --- a/tests/test_above_percent.py +++ b/tests/test_above_percent.py @@ -112,5 +112,15 @@ def test_above_percent_output_format(): # Test with Series input result_series = iglu.above_percent(data["gl"], targets_above=custom_targets) - assert "id" not in result_series.columns - assert len(result_series) == 1 # Single row for Series input + assert isinstance(result_series, dict) + assert all(f"above_{t}" in result_series for t in custom_targets) + + # Test with list input + result_list = iglu.above_percent(data["gl"].tolist(), targets_above=custom_targets) + assert isinstance(result_list, dict) + assert all(f"above_{t}" in result_list for t in custom_targets) + + # Test with numpy array input + result_array = iglu.above_percent(data["gl"].values, targets_above=custom_targets) + assert isinstance(result_array, dict) + assert all(f"above_{t}" in result_array for t in custom_targets) \ No newline at end of file diff --git a/tests/test_below_percent.py b/tests/test_below_percent.py index a5d493c..1f374d0 100644 --- a/tests/test_below_percent.py +++ b/tests/test_below_percent.py @@ -117,15 +117,14 @@ def test_below_percent_series_input(): result = iglu.below_percent(data) # Check output format - assert isinstance(result, pd.DataFrame) - assert "below_54" in result.columns - assert "below_70" in result.columns - assert "id" not in result.columns - assert len(result) == 1 + assert isinstance(result, dict) + assert "below_54" in result + assert "below_70" in result + assert len(result) == 2 # Check that percentages are between 0 and 100 - assert (result["below_54"].iloc[0] >= 0) and (result["below_54"].iloc[0] <= 100) - assert (result["below_70"].iloc[0] >= 0) and (result["below_70"].iloc[0] <= 100) + assert (result["below_54"] >= 0) and (result["below_54"] <= 100) + assert (result["below_70"] >= 0) and (result["below_70"] <= 100) def test_below_percent_custom_targets(): diff --git a/tests/test_gri.py b/tests/test_gri.py index 771ce58..df7687b 100644 --- a/tests/test_gri.py +++ b/tests/test_gri.py @@ -102,11 +102,8 @@ def test_gri_series(): [150, 50, 160, 260, 140, 85] ) # Include values in all GRI ranges result = iglu.gri(series_data) - assert isinstance(result, pd.DataFrame) - assert "GRI" in result.columns - assert len(result) == 1 - assert result["GRI"].iloc[0] >= 0 - assert result["GRI"].iloc[0] <= 100 + assert isinstance(result, float) + np.testing.assert_allclose(result, 76.66, rtol=0.001) def test_gri_empty(): @@ -121,16 +118,14 @@ def test_gri_constant_glucose(): # Test with constant glucose in target range series_data = pd.Series([150] * 10) result = iglu.gri(series_data) - assert len(result) == 1 - assert ( - result["GRI"].iloc[0] == 0 - ) # Should be 0 for constant glucose in target range + assert isinstance(result, (np.int64, float,int)) + np.testing.assert_allclose(result, 0, rtol=0.001) # Test with constant glucose in severe hypoglycemia range series_data = pd.Series([40] * 10) result = iglu.gri(series_data) - assert len(result) == 1 - assert result["GRI"].iloc[0] > 0 # Should be positive for constant glucose below 54 + assert isinstance(result, (np.int64, float,int)) + np.testing.assert_allclose(result, 100, rtol=0.001) def test_gri_missing_values(): diff --git a/tests/test_in_range_percent.py b/tests/test_in_range_percent.py index 32f5091..9e20ab1 100644 --- a/tests/test_in_range_percent.py +++ b/tests/test_in_range_percent.py @@ -113,19 +113,14 @@ def test_in_range_percent_series_input(): result = iglu.in_range_percent(data) # Check output format - assert isinstance(result, pd.DataFrame) - assert "in_range_70_180" in result.columns - assert "in_range_63_140" in result.columns - assert "id" not in result.columns - assert len(result) == 1 + assert isinstance(result, dict) + assert "in_range_70_180" in result + assert "in_range_63_140" in result + assert len(result) == 2 # Check that percentages are between 0 and 100 - assert (result["in_range_70_180"].iloc[0] >= 0) and ( - result["in_range_70_180"].iloc[0] <= 100 - ) - assert (result["in_range_63_140"].iloc[0] >= 0) and ( - result["in_range_63_140"].iloc[0] <= 100 - ) + assert (result["in_range_70_180"] >= 0) and (result["in_range_70_180"] <= 100) + assert (result["in_range_63_140"] >= 0) and (result["in_range_63_140"] <= 100) def test_in_range_percent_custom_targets(): From a9f984142b8b4e1bc5751103721508ff38c9d8b6 Mon Sep 17 00:00:00 2001 From: Stas Khirman Date: Mon, 16 Jun 2025 17:43:08 +0300 Subject: [PATCH 06/16] support for Series, list and ndarray -> return float or dict --- README.md | 2 +- iglu_python/active_percent.py | 158 +++++++++++++++++++--------------- tests/test_active_percent.py | 72 +++++++++++++++- 3 files changed, 160 insertions(+), 72 deletions(-) diff --git a/README.md b/README.md index 5aae055..48eeffd 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ Unless noted, IGLU-R test compatability is considered successful if it achieves | Function | Description | IGLU-R test compatibility | list /ndarray /Series input | TZ | Comments | |----------|-------------|-------------|-------------------|----|----------| | above_percent | percentage of values above target thresholds| ✅ |✅ returns dict ||| -| active_percent | percentage of time CGM was active | ✅ | +| active_percent | percentage of time CGM was active | ✅ | ✅ only Series(DatetimeIndex) returns dict[str:float] | adrr | average daily risk range | ✅ | | auc| Area Under Curve | 🟡 (0.01 precision) |✅ only Series(DatetimeIndex) returns float || see [auc_evaluation.ipynb](https://github.com/staskh/iglu_python/blob/main/notebooks/auc_evaluation.ipynb)| | below_percent| percentage of values below target thresholds| ✅ | ✅ returns dict diff --git a/iglu_python/active_percent.py b/iglu_python/active_percent.py index b4c2f00..0b3d3e1 100644 --- a/iglu_python/active_percent.py +++ b/iglu_python/active_percent.py @@ -2,18 +2,19 @@ from typing import Optional, Union import pandas as pd +import numpy as np from .utils import check_data_columns, localize_naive_timestamp def active_percent( - data: pd.DataFrame, + data: Union[pd.DataFrame, pd.Series], dt0: Optional[int] = None, tz: str = "", range_type: str = "automatic", ndays: int = 14, consistent_end_date: Optional[Union[str, datetime]] = None, -) -> pd.DataFrame: +) -> pd.DataFrame|dict[str:float]: """ Calculate percentage of time CGM was active. @@ -70,6 +71,12 @@ def active_percent( 0 subject1 66.67 0.0 2020-01-01 00:00:00 2020-01-01 00:10:00 1 subject2 100.00 0.0 2020-01-01 00:00:00 2020-01-01 00:05:00 """ + + if isinstance(data, pd.Series): + if not isinstance(data.index, pd.DatetimeIndex): + raise ValueError("Series must have a DatetimeIndex") + return active_percent_single(data, dt0, tz, range_type, ndays, consistent_end_date) + # Check data format and convert time to datetime data = check_data_columns(data, tz) @@ -85,75 +92,86 @@ def active_percent( .sort_values("time") ) - if len(sub_data) == 0: - continue - - # Calculate time differences between consecutive measurements - time_diffs = ( - sub_data["time"].diff().dt.total_seconds() / 60 - ) # Convert to minutes - - # Automatically determine dt0 if not provided - if dt0 is None: - dt0 = round(time_diffs.median()) - - if range_type == "automatic": - # Determine range of observed data - min_time = sub_data["time"].min() - max_time = sub_data["time"].max() - - # Calculate theoretical number of measurements - total_minutes = (max_time - min_time).total_seconds() / 60 - theoretical_gl_vals = round(total_minutes / dt0) + 1 - - # Calculate missing values due to gaps - gaps = time_diffs[time_diffs > dt0] - gap_minutes = gaps.sum() - n_gaps = len(gaps) - missing_gl_vals = round((gap_minutes - n_gaps * dt0) / dt0) - - # Calculate number of days - ndays = (max_time - min_time).total_seconds() / (24 * 3600) - - # Calculate active percentage - active_percent = ( - (theoretical_gl_vals - missing_gl_vals) / theoretical_gl_vals - ) * 100 - elif range_type == "manual": - # Handle consistent end date if provided - if consistent_end_date is not None: - end_date = localize_naive_timestamp(pd.to_datetime(consistent_end_date)) - else: - end_date = sub_data["time"].max() - start_date = end_date - pd.Timedelta(days=int(ndays)) - - # Filter data to the specified date range - mask = (sub_data["time"] >= start_date) & (sub_data["time"] <= end_date) - sub_data = sub_data[mask] - - # Recalculate active percentage for the specified range - active_percent = (len(sub_data) / (ndays * (24 * (60 / dt0)))) * 100 - min_time = start_date - max_time = end_date - ndays = (end_date - start_date).total_seconds() / (24 * 3600) - else: - raise ValueError(f"Invalid range_type: {range_type}") - - active_perc_data.append( - { - "id": subject, - "active_percent": active_percent, - "ndays": round(ndays, 1), - "start_date": min_time, - "end_date": max_time, - } - ) + timeseries = sub_data.set_index("time")["gl"] + active_percent_dict = active_percent_single(timeseries, dt0, tz, range_type, ndays, consistent_end_date) + active_percent_dict["id"] = subject + active_perc_data.append(active_percent_dict) + # Convert to DataFrame - result = pd.DataFrame(active_perc_data) + df = pd.DataFrame(active_perc_data) + df = df[['id'] + [col for col in df.columns if col != 'id']] + return df - # If input was a Series (glucose values only), remove id column - if hasattr(data, "is_vector") and data.is_vector: - result = result.drop("id", axis=1) - return result +def active_percent_single(data: pd.Series, dt0: Optional[int] = None, tz: str = "", range_type: str = "automatic", ndays: int = 14, consistent_end_date: Optional[Union[str, datetime]] = None) -> dict[str:float]: + """ + Calculate percentage of time CGM was active for a single series/subject. + """ + + if not isinstance(data, pd.Series): + raise ValueError("Input must be a Series") + + if not isinstance(data.index, pd.DatetimeIndex): + raise ValueError("Series must have a DatetimeIndex") + + data = data.dropna() + if len(data) == 0: + return {"active_percent": 0, "ndays": 0, "start_date": None, "end_date": None} + + # Calculate time differences between consecutive measurements + time_diffs = np.array( + data.index.diff().total_seconds() / 60 + ) # Convert to minutes + + # Automatically determine dt0 if not provided + if dt0 is None: + dt0 = round(np.nanmedian(time_diffs)) + + if range_type == "automatic": + # Determine range of observed data + min_time = data.index.min() + max_time = data.index.max() + + # Calculate theoretical number of measurements + total_minutes = (max_time - min_time).total_seconds() / 60 + theoretical_gl_vals = round(total_minutes / dt0) + 1 + + # Calculate missing values due to gaps + gaps = time_diffs[time_diffs > dt0] + gap_minutes = gaps.sum() + n_gaps = len(gaps) + missing_gl_vals = round((gap_minutes - n_gaps * dt0) / dt0) + + # Calculate number of days + ndays = (max_time - min_time).total_seconds() / (24 * 3600) + + # Calculate active percentage + active_percent = ( + (theoretical_gl_vals - missing_gl_vals) / theoretical_gl_vals + ) * 100 + elif range_type == "manual": + # Handle consistent end date if provided + if consistent_end_date is not None: + end_date = localize_naive_timestamp(pd.to_datetime(consistent_end_date)) + else: + end_date = data.index.max() + start_date = end_date - pd.Timedelta(days=int(ndays)) + + # Filter data to the specified date range + mask = (data.index >= start_date) & (data.index <= end_date) + data = data[mask] + + # Recalculate active percentage for the specified range + active_percent = (len(data) / (ndays * (24 * (60 / dt0)))) * 100 + min_time = start_date + max_time = end_date + ndays = (end_date - start_date).total_seconds() / (24 * 3600) + else: + raise ValueError(f"Invalid range_type: {range_type}") + + return {"active_percent": active_percent, "ndays": round(ndays, 1), "start_date": min_time, "end_date": max_time} + + + + \ No newline at end of file diff --git a/tests/test_active_percent.py b/tests/test_active_percent.py index 6f4fad3..cb61899 100644 --- a/tests/test_active_percent.py +++ b/tests/test_active_percent.py @@ -191,4 +191,74 @@ def test_active_percent_single_subject_no_gaps(): result = iglu.active_percent(single_subject, dt0=5) assert len(result) == 1 - assert result["active_percent"].iloc[0] == 100.0 # Should be 100% active with no gaps \ No newline at end of file + assert result["active_percent"].iloc[0] == 100.0 # Should be 100% active with no gaps + +def test_active_percent_series_with_datetime_index(): + """Test active_percent calculation with Series input that has DatetimeIndex.""" + # Create test data with DatetimeIndex + time = pd.to_datetime(['2020-01-01 10:00:00', '2020-01-01 10:05:00', '2020-01-01 10:10:00', + '2020-01-02 10:00:00', '2020-01-02 10:05:00', '2020-01-02 10:10:00']) + data = pd.Series( + [100, 120, 110, # Day 1: 3 measurements + 90, 130, 95], # Day 2: 3 measurements + index=time + ) + + # Calculate active_percent + result = iglu.active_percent(data) + + assert isinstance(result, dict) + assert "active_percent" in result + assert "ndays" in result + assert "start_date" in result + assert "end_date" in result + + # Expected results: + # Total possible measurements: 2 days * 24 hours * 12 measurements per hour = 576 + # Actual measurements: 6 + # active_percent = (6 / 576) * 100 = 1.042 + expected = 2.061856 + + # Compare results + np.testing.assert_allclose(result["active_percent"], expected, rtol=0.001) + +def test_active_percent_series_without_datetime_index(): + """Test active_percent calculation with Series input that doesn't have DatetimeIndex.""" + # Create test data with regular index + data = pd.Series( + [100, 120, 110, 90, 130, 95], + index=range(6) # Regular integer index instead of DatetimeIndex + ) + + # Attempt to calculate active_percent - should raise ValueError + with pytest.raises(ValueError, match="Series must have a DatetimeIndex"): + iglu.active_percent(data) + +def test_active_percent_series_with_missing_values(): + """Test active_percent calculation with Series input containing missing values.""" + # Create test data with DatetimeIndex and missing values + time = pd.to_datetime(['2020-01-01 10:00:00', '2020-01-01 10:05:00', '2020-01-01 10:10:00', + '2020-01-02 10:00:00', '2020-01-02 10:05:00', '2020-01-02 10:10:00']) + data = pd.Series( + [100, np.nan, 110, # Day 1: 2 measurements + 90, 130, np.nan], # Day 2: 2 measurements + index=time + ) + + # Calculate active_percent + result = iglu.active_percent(data) + assert isinstance(result, dict) + assert "active_percent" in result + assert "ndays" in result + assert "start_date" in result + assert "end_date" in result + + # Expected results: + # Total possible measurements: 2 days * 24 hours * 12 measurements per hour = 576 + # Actual measurements (excluding NaN): 4 + # active_percent = (4 / 576) * 100 = 0.694 + expected = 2.068966 + + # Compare results + np.testing.assert_allclose(result["active_percent"], expected, rtol=0.001) + From 3f108c2285290eda3046a7c695452af156a6c072 Mon Sep 17 00:00:00 2001 From: Stas Khirman Date: Mon, 16 Jun 2025 17:43:56 +0300 Subject: [PATCH 07/16] ruff formating --- iglu_python/__init__.py | 4 +- iglu_python/above_percent.py | 5 +- iglu_python/active_percent.py | 13 ++- iglu_python/auc.py | 5 +- iglu_python/below_percent.py | 5 +- iglu_python/cogi.py | 4 +- iglu_python/conga.py | 2 +- iglu_python/cv_glu.py | 13 +-- iglu_python/cv_measures.py | 20 +++-- iglu_python/episode_calculation.py | 4 +- iglu_python/gmi.py | 9 +- iglu_python/grade.py | 1 + iglu_python/gri.py | 2 +- iglu_python/gvp.py | 1 + iglu_python/in_range_percent.py | 4 +- iglu_python/lbgi.py | 1 + iglu_python/mag.py | 5 +- iglu_python/mage.py | 128 ++++++++++++++--------------- iglu_python/pgs.py | 2 +- iglu_python/process_data.py | 96 +++++++++++----------- iglu_python/sd_measures.py | 90 ++++++++++---------- iglu_python/summary_glu.py | 32 ++++---- iglu_python/utils.py | 3 +- 23 files changed, 227 insertions(+), 222 deletions(-) diff --git a/iglu_python/__init__.py b/iglu_python/__init__.py index 52356da..fbc7b95 100644 --- a/iglu_python/__init__.py +++ b/iglu_python/__init__.py @@ -9,11 +9,11 @@ from .cv_measures import cv_measures from .ea1c import ea1c from .episode_calculation import episode_calculation +from .gmi import gmi from .grade import grade from .grade_eugly import grade_eugly from .grade_hyper import grade_hyper from .grade_hypo import grade_hypo -from .gmi import gmi from .gri import gri from .gvp import gvp from .hbgi import hbgi @@ -40,7 +40,7 @@ from .sd_measures import sd_measures from .sd_roc import sd_roc from .summary_glu import summary_glu -from .utils import set_iglu_r_compatible, is_iglu_r_compatible, CGMS2DayByDay, check_data_columns, gd2d_to_df +from .utils import CGMS2DayByDay, check_data_columns, gd2d_to_df, is_iglu_r_compatible, set_iglu_r_compatible __all__ = [ "above_percent", diff --git a/iglu_python/above_percent.py b/iglu_python/above_percent.py index 703952e..65d5ebe 100644 --- a/iglu_python/above_percent.py +++ b/iglu_python/above_percent.py @@ -1,7 +1,7 @@ from typing import List, Union -import pandas as pd import numpy as np +import pandas as pd from .utils import check_data_columns @@ -63,7 +63,7 @@ def above_percent( if isinstance(data, (list, np.ndarray)): data = pd.Series(data) return above_percent_single(data, targets_above) - + # Handle DataFrame input data = check_data_columns(data) @@ -103,4 +103,3 @@ def above_percent_single(data: pd.Series, targets_above: List[int] = [140, 180, percentages[f"above_{target}"] = (above_count / total_readings) * 100 return percentages - \ No newline at end of file diff --git a/iglu_python/active_percent.py b/iglu_python/active_percent.py index 0b3d3e1..d137f51 100644 --- a/iglu_python/active_percent.py +++ b/iglu_python/active_percent.py @@ -1,8 +1,8 @@ from datetime import datetime from typing import Optional, Union -import pandas as pd import numpy as np +import pandas as pd from .utils import check_data_columns, localize_naive_timestamp @@ -76,7 +76,7 @@ def active_percent( if not isinstance(data.index, pd.DatetimeIndex): raise ValueError("Series must have a DatetimeIndex") return active_percent_single(data, dt0, tz, range_type, ndays, consistent_end_date) - + # Check data format and convert time to datetime data = check_data_columns(data, tz) @@ -111,14 +111,14 @@ def active_percent_single(data: pd.Series, dt0: Optional[int] = None, tz: str = if not isinstance(data, pd.Series): raise ValueError("Input must be a Series") - + if not isinstance(data.index, pd.DatetimeIndex): raise ValueError("Series must have a DatetimeIndex") data = data.dropna() if len(data) == 0: return {"active_percent": 0, "ndays": 0, "start_date": None, "end_date": None} - + # Calculate time differences between consecutive measurements time_diffs = np.array( data.index.diff().total_seconds() / 60 @@ -172,6 +172,5 @@ def active_percent_single(data: pd.Series, dt0: Optional[int] = None, tz: str = return {"active_percent": active_percent, "ndays": round(ndays, 1), "start_date": min_time, "end_date": max_time} - - - \ No newline at end of file + + diff --git a/iglu_python/auc.py b/iglu_python/auc.py index 55ede21..b1ef01b 100644 --- a/iglu_python/auc.py +++ b/iglu_python/auc.py @@ -5,7 +5,6 @@ from .utils import CGMS2DayByDay, check_data_columns, gd2d_to_df, is_iglu_r_compatible - def auc(data: pd.DataFrame, tz: str = "") -> pd.DataFrame: """ Calculate Area Under Curve (AUC) for glucose measurements. @@ -60,10 +59,10 @@ def auc(data: pd.DataFrame, tz: str = "") -> pd.DataFrame: if isinstance(data, pd.Series): if not isinstance(data.index, pd.DatetimeIndex): raise ValueError("Series must have a DatetimeIndex") - + auc = auc_single(data,tz=tz) return auc - + # Check data format and convert time to datetime data = check_data_columns(data) diff --git a/iglu_python/below_percent.py b/iglu_python/below_percent.py index 99b3421..6c3017a 100644 --- a/iglu_python/below_percent.py +++ b/iglu_python/below_percent.py @@ -1,7 +1,7 @@ from typing import List, Union -import pandas as pd import numpy as np +import pandas as pd from .utils import check_data_columns @@ -62,7 +62,7 @@ def below_percent( if isinstance(data, (list, np.ndarray)): data = pd.Series(data) return below_percent_single(data, targets_below) - + # Handle DataFrame input data = check_data_columns(data) @@ -103,4 +103,3 @@ def below_percent_single(data: pd.Series, targets_below: List[int] = [54, 70]) - return percentages - \ No newline at end of file diff --git a/iglu_python/cogi.py b/iglu_python/cogi.py index ab2b08e..5131451 100644 --- a/iglu_python/cogi.py +++ b/iglu_python/cogi.py @@ -1,7 +1,7 @@ from typing import List, Union -import pandas as pd import numpy as np +import pandas as pd from .below_percent import below_percent from .in_range_percent import in_range_percent @@ -74,7 +74,7 @@ def cogi( if isinstance(data, (list, np.ndarray)): data = pd.Series(data) return cogi_single(data, targets, weights) - + data = check_data_columns(data) out = data.groupby("id").agg( diff --git a/iglu_python/conga.py b/iglu_python/conga.py index 8917119..2b249cd 100644 --- a/iglu_python/conga.py +++ b/iglu_python/conga.py @@ -82,7 +82,7 @@ def conga_single(data: pd.DataFrame, hours: int = 1, tz: str = "") -> float: valid_diffs = diffs[~np.isnan(diffs)] if len(valid_diffs) < 2: return np.nan - + return float(np.nanstd(diffs, ddof=1)) # Handle Series input diff --git a/iglu_python/cv_glu.py b/iglu_python/cv_glu.py index b2e28b3..9e04e58 100644 --- a/iglu_python/cv_glu.py +++ b/iglu_python/cv_glu.py @@ -11,10 +11,13 @@ """ from typing import Union -import pandas as pd + import numpy as np +import pandas as pd + from .utils import check_data_columns + def cv_glu(data: Union[pd.DataFrame, pd.Series, list, np.ndarray]) -> Union[pd.DataFrame, float]: """Calculate Coefficient of Variation (CV) of glucose levels. @@ -47,14 +50,14 @@ def cv_glu(data: Union[pd.DataFrame, pd.Series, list, np.ndarray]) -> Union[pd.D # Calculate CV for Series cv_val = 100 * data.std() / data.mean() return cv_val - + # Check and prepare data data = check_data_columns(data) - + data = data.dropna() # Calculate CV for each subject out = data.groupby('id').agg( CV=('gl', lambda x: 100 * x.std() / x.mean()) ).reset_index() - - return out \ No newline at end of file + + return out diff --git a/iglu_python/cv_measures.py b/iglu_python/cv_measures.py index 6f926ac..73552cc 100644 --- a/iglu_python/cv_measures.py +++ b/iglu_python/cv_measures.py @@ -11,9 +11,11 @@ doi:10.1016/j.amjms.2018.09.010. """ -import pandas as pd import numpy as np -from .utils import check_data_columns, CGMS2DayByDay, is_iglu_r_compatible +import pandas as pd + +from .utils import CGMS2DayByDay, check_data_columns + def cv_measures(data, dt0=None, inter_gap=45, tz="")->pd.DataFrame|dict[str:float]: """Calculate Coefficient of Variation subtypes (CVmean and CVsd). @@ -51,28 +53,28 @@ def cv_measures(data, dt0=None, inter_gap=45, tz="")->pd.DataFrame|dict[str:floa if isinstance(data, pd.Series): if not isinstance(data.index, pd.DatetimeIndex): raise ValueError("Series must have a DatetimeIndex") - + results_dict = _calculate_series_cv(data, dt0=dt0, inter_gap=inter_gap, tz=tz) return results_dict # Check and prepare data data = check_data_columns(data) - - + + # Process each subject results = [] for subject_id in data['id'].unique(): subject_data = data[data['id'] == subject_id] - + results_dict = _calculate_series_cv(subject_data, dt0=dt0, inter_gap=inter_gap, tz=tz) - + results.append({ 'id': subject_id, 'CVmean': results_dict['CVmean'], 'CVsd': results_dict['CVsd'] }) - - return pd.DataFrame(results) + + return pd.DataFrame(results) def _calculate_series_cv(subject_data: pd.DataFrame|pd.Series, dt0=None, inter_gap=45, tz="") -> dict[str:float]: """Calculate CV for series/single subject input""" diff --git a/iglu_python/episode_calculation.py b/iglu_python/episode_calculation.py index c5b759c..dd01412 100644 --- a/iglu_python/episode_calculation.py +++ b/iglu_python/episode_calculation.py @@ -172,7 +172,7 @@ def episode_calculation( episode_data_df = subject_episode_data else: episode_data_df = pd.concat([episode_data_df, subject_episode_data], ignore_index=True) - + if episode_summary_df.empty: episode_summary_df = subject_summary else: @@ -320,7 +320,7 @@ def calculate_exclusion(df, lv1_col, lv2_col): df['group_id'] = df.groupby(['segment', lv1_col]).ngroup() group_has_lv2 = df.groupby('group_id')[lv2_col].transform(lambda x: (x > 0).any()) return df[lv1_col].where(~group_has_lv2, 0) - + ep_per_seg['lv1_hypo_excl'] = calculate_exclusion(ep_per_seg, 'lv1_hypo', 'lv2_hypo') ep_per_seg['lv1_hyper_excl'] = calculate_exclusion(ep_per_seg, 'lv1_hyper', 'lv2_hyper') diff --git a/iglu_python/gmi.py b/iglu_python/gmi.py index 57d01be..cbf5df9 100644 --- a/iglu_python/gmi.py +++ b/iglu_python/gmi.py @@ -11,10 +11,11 @@ doi:10.2337/dc18-1581. """ -import pandas as pd -import numpy as np from typing import Union +import numpy as np +import pandas as pd + from iglu_python.utils import check_data_columns @@ -47,7 +48,7 @@ def gmi(data: Union[pd.DataFrame, pd.Series, list]) -> float|pd.DataFrame: # Calculate GMI for Series gmi_val = 3.31 + (0.02392 * data.mean()) return gmi_val - + # Check and prepare data data = check_data_columns(data) is_vector = getattr(data, "is_vector", False) @@ -57,4 +58,4 @@ def gmi(data: Union[pd.DataFrame, pd.Series, list]) -> float|pd.DataFrame: GMI=("gl", lambda x: 3.31 + (0.02392 * x.mean())) ).reset_index() - return out \ No newline at end of file + return out diff --git a/iglu_python/grade.py b/iglu_python/grade.py index dddb8ba..0268484 100644 --- a/iglu_python/grade.py +++ b/iglu_python/grade.py @@ -5,6 +5,7 @@ from .utils import check_data_columns + def grade(data: Union[pd.DataFrame, pd.Series]) -> pd.DataFrame: """ Calculate mean GRADE score for each subject. diff --git a/iglu_python/gri.py b/iglu_python/gri.py index dc62c70..5a023b4 100644 --- a/iglu_python/gri.py +++ b/iglu_python/gri.py @@ -59,7 +59,7 @@ def gri(data: Union[pd.DataFrame, pd.Series,list,np.ndarray], tz: str = "") -> p if isinstance(data, (list, np.ndarray)): data = pd.Series(data) return gri_single(data, tz) - + # Handle DataFrame input data = check_data_columns(data, tz=tz) diff --git a/iglu_python/gvp.py b/iglu_python/gvp.py index 0bf3689..0d923ef 100644 --- a/iglu_python/gvp.py +++ b/iglu_python/gvp.py @@ -5,6 +5,7 @@ from .utils import CGMS2DayByDay, check_data_columns + def gvp(data: Union[pd.DataFrame, pd.Series, list, np.ndarray]) -> pd.DataFrame: r""" Calculate Glucose Variability Percentage (GVP). diff --git a/iglu_python/in_range_percent.py b/iglu_python/in_range_percent.py index 591b3ab..35e7c66 100644 --- a/iglu_python/in_range_percent.py +++ b/iglu_python/in_range_percent.py @@ -1,7 +1,7 @@ from typing import List, Union -import pandas as pd import numpy as np +import pandas as pd from .utils import check_data_columns @@ -95,7 +95,7 @@ def in_range_percent_single(data: pd.Series, target_ranges: List[List[int]] = [[ # Calculate total non-NA readings total_readings = len(data.dropna()) if total_readings == 0: - return {f"in_range_{min(range_vals)}_{max(range_vals)}": 0 + return {f"in_range_{min(range_vals)}_{max(range_vals)}": 0 for range_vals in target_ranges} # Calculate percentages for each range diff --git a/iglu_python/lbgi.py b/iglu_python/lbgi.py index 55ae880..61bf883 100644 --- a/iglu_python/lbgi.py +++ b/iglu_python/lbgi.py @@ -5,6 +5,7 @@ from .utils import check_data_columns + def lbgi(data: Union[pd.DataFrame, pd.Series]) -> pd.DataFrame: r""" Calculate the Low Blood Glucose Index (LBGI) for each subject. diff --git a/iglu_python/mag.py b/iglu_python/mag.py index 70fe644..9e91c55 100644 --- a/iglu_python/mag.py +++ b/iglu_python/mag.py @@ -6,7 +6,6 @@ from .utils import CGMS2DayByDay, check_data_columns, is_iglu_r_compatible - def mag( data: Union[pd.DataFrame, pd.Series], n: int = 60, @@ -101,7 +100,7 @@ def mag_single(data: pd.DataFrame, n: int) -> float: diffs = np.abs(diffs) diffs = diffs[~np.isnan(diffs)] # to be IGLU-R test compatible, imho they made error. - # has to be total_time_hours = ((len(diffs)) * n) / 60 + # has to be total_time_hours = ((len(diffs)) * n) / 60 total_time_hours = ((len(gl_values_idx[~np.isnan(gl_values_idx)])) * n) / 60 if total_time_hours == 0: return 0.0 @@ -112,7 +111,7 @@ def mag_single(data: pd.DataFrame, n: int) -> float: diffs = diffs[~np.isnan(diffs)] # Calculate MAG: sum of absolute differences divided by total time in hours - total_time_hours = ((len(diffs)) * n) / 60 + total_time_hours = ((len(diffs)) * n) / 60 if total_time_hours == 0: return 0.0 mag = float(np.sum(diffs) / total_time_hours) diff --git a/iglu_python/mage.py b/iglu_python/mage.py index 16916f8..4c48e57 100644 --- a/iglu_python/mage.py +++ b/iglu_python/mage.py @@ -3,7 +3,7 @@ import numpy as np import pandas as pd -from .utils import CGMS2DayByDay, check_data_columns,gd2d_to_df +from .utils import CGMS2DayByDay, check_data_columns def mage( @@ -124,7 +124,7 @@ def mage( } ) if version == "ma": - mage_val = mage_ma_single(data_df, short_ma, long_ma, direction, + mage_val = mage_ma_single(data_df, short_ma, long_ma, direction, return_type='num', inter_gap=inter_gap, max_gap=max_gap, @@ -171,7 +171,7 @@ def mage_naive(data: pd.DataFrame,sd_multiplier:float = 1.0) -> float: return float(mage_val) if not pd.isna(mage_val) else np.nan -def mage_ma_single(data: pd.DataFrame, short_ma: int, long_ma: int, +def mage_ma_single(data: pd.DataFrame, short_ma: int, long_ma: int, direction:str ='avg', return_type:str = "num",inter_gap:int = 45, max_gap:int = 180, tz:str = "" ) -> pd.DataFrame: """Calculate MAGE using moving average algorithm for a single subject""" ## 1. Preprocessing @@ -185,7 +185,7 @@ def mage_ma_single(data: pd.DataFrame, short_ma: int, long_ma: int, ndays = len(data_ip[1]) # 1.2 Generate grid times by starting from day one and cumulatively summing - # note fix 5 min used in interpretation + # note fix 5 min used in interpretation gl = data_ip[0].flatten().tolist() time_ip = [pd.Timedelta(i * 5, unit="m") + day_one for i in range(1,len(gl)+1)] @@ -218,7 +218,7 @@ def mage_ma_single(data: pd.DataFrame, short_ma: int, long_ma: int, interpolated_data = interpolated_data.iloc[:last_valid_idx+1] # Add gap column to mark NA values as 1 interpolated_data['gap'] = interpolated_data['gl'].isna().astype(int) - + # 4. Time Series Segmentation: split gaps > max_gap into separate segments dfs = segment_time_series(interpolated_data,max_gap) # note: max_gap is in minutes @@ -233,8 +233,8 @@ def mage_ma_single(data: pd.DataFrame, short_ma: int, long_ma: int, if return_type == 'df': return return_val - - """Process MAGE results with filtering and weighting.""" + + """Process MAGE results with filtering and weighting.""" # Filter by direction (equivalent to the previous R filtering code) if direction == 'plus': res = return_val[return_val['plus_or_minus'] == 'PLUS'].copy() @@ -248,16 +248,16 @@ def mage_ma_single(data: pd.DataFrame, short_ma: int, long_ma: int, res = return_val.loc[idx].reset_index(drop=True) else: # default: first excursions only res = return_val[return_val['first_excursion'] == True].copy() - + # Calculate time-weighted MAGE if res.empty: return np.nan - + res['hours'] = res['end'] - res['start'] res['weight'] = res['hours'] / res['hours'].sum() weighted_mage = (res['MAGE'] * res['weight']).sum() - - return weighted_mage + + return weighted_mage def mage_atomic(data, short_ma,long_ma): """ 0. Calculates MAGE on 1 segment of CGM trace """ @@ -267,7 +267,7 @@ def mage_atomic(data, short_ma,long_ma): data["MA_Short"] = data["gl"].rolling(window=short_ma, min_periods=1).mean() data["MA_Long"] = data["gl"].rolling(window=long_ma, min_periods=1).mean() # Fill leading NAs (forward fill first valid value) - if short_ma > len(data): + if short_ma > len(data): data.loc[data.index[:short_ma], 'MA_Short'] = data['MA_Short'].iloc[-1] else: data.loc[data.index[:short_ma], 'MA_Short'] = data['MA_Short'].iloc[short_ma-1] @@ -280,7 +280,7 @@ def mage_atomic(data, short_ma,long_ma): data = data.reset_index(drop=True) nmeasurements = len(data) - # Sanity check + # Sanity check if ( data['gl'].isnull().all() or nmeasurements < 7 or @@ -289,7 +289,7 @@ def mage_atomic(data, short_ma,long_ma): ): return pd.DataFrame({ 'start': [data['time'].iloc[0]], - 'end': [data['time'].iloc[-1]], + 'end': [data['time'].iloc[-1]], 'MAGE': [np.nan], 'plus_or_minus': [np.nan], 'first_excursion': [np.nan] @@ -301,29 +301,29 @@ def mage_atomic(data, short_ma,long_ma): # Initialize variables idx = list(data.index) # R: idx = as.numeric(rownames(.data)) types = {'REL_MIN': 0, 'REL_MAX': 1} # R: types = list2env(list(REL_MIN=0, REL_MAX=1)) - + # Create storage lists - R: list_cross <- list("id" = rep.int(NA, nmeasurements), "type" = rep.int(NA, nmeasurements)) list_cross = { 'id': [np.nan] * nmeasurements, 'type': [np.nan] * nmeasurements } - + # Always add 1st point list_cross['id'][0] = idx[0] list_cross['type'][0] = types['REL_MAX'] if data['DELTA_SHORT_LONG'].iloc[0] > 0 else types['REL_MIN'] count = 1 # Python uses 0-based indexing, so count starts at 1 - + # treat DELTA_SHORT_LONG==0 as NaN ( so we can skip its multiplication) data.loc[data['DELTA_SHORT_LONG'] == 0, 'DELTA_SHORT_LONG'] = np.nan # Main loop - R: for(i in 2:length(.data$DELTA_SHORT_LONG)) for i in range(1, len(data['DELTA_SHORT_LONG'])): # Check data validity - if (not pd.isna(data['gl'].iloc[i]) and + if (not pd.isna(data['gl'].iloc[i]) and not pd.isna(data['gl'].iloc[i-1]) and - not pd.isna(data['DELTA_SHORT_LONG'].iloc[i]) and + not pd.isna(data['DELTA_SHORT_LONG'].iloc[i]) and not pd.isna(data['DELTA_SHORT_LONG'].iloc[i-1])): - + # Primary crossover detection: crossing point if DELTA changes sign if (data['DELTA_SHORT_LONG'].iloc[i] * data['DELTA_SHORT_LONG'].iloc[i-1] < 0): list_cross['id'][count] = idx[i] @@ -332,16 +332,16 @@ def mage_atomic(data, short_ma,long_ma): else: list_cross['type'][count] = types['REL_MAX'] count += 1 - + # Gap handling: needed for gaps, where DELTA_SHORT_LONG(i-1 | i-2) = NaN - elif (not pd.isna(data['DELTA_SHORT_LONG'].iloc[i]) and + elif (not pd.isna(data['DELTA_SHORT_LONG'].iloc[i]) and count >= 1): # Make sure we have a previous crossover - + # R: match(list_cross$id[count-1], idx) - find index of previous crossover try: prev_cross_idx = idx.index(list_cross['id'][count-1]) prev_delta = data['DELTA_SHORT_LONG'].iloc[prev_cross_idx] - + if (data['DELTA_SHORT_LONG'].iloc[i] * prev_delta < 0): list_cross['id'][count] = idx[i] if data['DELTA_SHORT_LONG'].iloc[i] < prev_delta: @@ -352,12 +352,12 @@ def mage_atomic(data, short_ma,long_ma): except ValueError: # Handle case where previous crossover id not found in idx pass - + # Add last point to capture excursion at end # R: utils::tail(idx, 1) last_idx = idx[-1] list_cross['id'][count] = last_idx - + if data['DELTA_SHORT_LONG'].iloc[-1] > 0: list_cross['type'][count] = types['REL_MAX'] else: @@ -376,11 +376,11 @@ def mage_atomic(data, short_ma,long_ma): # 2e. Calculate min and max glucose values from ids and types in crosses + store indexes for plotting later # R: num_extrema = nrow(crosses)-1 num_extrema = len(crosses) - 1 - + # R: minmax <- rep(NA_real_, num_extrema), indexes <- rep(NA_real_, num_extrema) minmax = [np.nan] * num_extrema indexes = [np.nan] * num_extrema - + # R: for(i in 1:num_extrema) for i in range(num_extrema): # Define search boundaries @@ -389,15 +389,15 @@ def mage_atomic(data, short_ma,long_ma): s1 = int(crosses.iloc[i]['id']) # crosses[i, 1] in R (1-indexed) else: s1 = int(indexes[i-1]) # last minmax index - + # R: s2 <- crosses[i+1,1] s2 = int(crosses.iloc[i+1]['id']) # crosses[i+1, 1] in R - + # Extract glucose segment - R: .data[as.character(s1:s2), ]$gl - segment_start = s1 + segment_start = s1 segment_end = s2 glucose_segment = data['gl'].iloc[segment_start:segment_end+1] # including next cross point - + # Find min or max based on crossover type if crosses.iloc[i]['type'] == types['REL_MIN']: # crosses[i, "type"] in R # R: min(.data[as.character(s1:s2), ]$gl, na.rm = TRUE) @@ -416,11 +416,11 @@ def mage_atomic(data, short_ma,long_ma): N = len(minmax) - # MAGE+ algorithm, which identifies and measures positive glycemic excursions + # MAGE+ algorithm, which identifies and measures positive glycemic excursions # (nadir-to-peak movements that exceed the standard deviation threshold). mage_plus_heights, mage_plus_tp_pairs = calculate_mage_plus(differences, minmax, standardD) - # MAGE- algorithm, which identifies and measures negative glycemic excursions + # MAGE- algorithm, which identifies and measures negative glycemic excursions # (peak-to-nadir movements that exceed the standard deviation threshold). mage_minus_heights, mage_minus_tp_pairs = calculate_mage_minus(differences, minmax, standardD) @@ -432,10 +432,10 @@ def mage_atomic(data, short_ma,long_ma): 'plus_or_minus': [np.nan], 'first_excursion': [np.nan] }, index=[0]) - + # Determine which excursion type occurs first - if (len(mage_plus_heights) > 0 and - (len(mage_minus_heights) == 0 or + if (len(mage_plus_heights) > 0 and + (len(mage_minus_heights) == 0 or mage_plus_tp_pairs[0][1] <= mage_minus_tp_pairs[0][0])): is_plus_first = True else: @@ -444,26 +444,26 @@ def mage_atomic(data, short_ma,long_ma): # Create MAGE+ result dataframe mage_plus = pd.DataFrame({ 'start': [data['time'].iloc[0]], - 'end': [data['time'].iloc[-1]], + 'end': [data['time'].iloc[-1]], 'MAGE': [np.mean(mage_plus_heights) if len(mage_plus_heights) > 0 else np.nan], 'plus_or_minus': ['PLUS'], 'first_excursion': [is_plus_first] }) - # Create MAGE- result dataframe + # Create MAGE- result dataframe mage_minus = pd.DataFrame({ 'start': [data['time'].iloc[0]], 'end': [data['time'].iloc[-1]], 'MAGE': [abs(np.mean(mage_minus_heights)) if len(mage_minus_heights) > 0 else np.nan], - 'plus_or_minus': ['MINUS'], + 'plus_or_minus': ['MINUS'], 'first_excursion': [not is_plus_first] }) # Determine which direction has maximum MAGE value - is_plus_max = ((mage_plus['MAGE'].iloc[0] >= mage_minus['MAGE'].iloc[0]) - if not pd.isna(mage_plus['MAGE'].iloc[0]) - and not pd.isna(mage_minus['MAGE'].iloc[0]) - else False + is_plus_max = ((mage_plus['MAGE'].iloc[0] >= mage_minus['MAGE'].iloc[0]) + if not pd.isna(mage_plus['MAGE'].iloc[0]) + and not pd.isna(mage_minus['MAGE'].iloc[0]) + else False ) return pd.concat([mage_plus, mage_minus], ignore_index=True) @@ -487,39 +487,39 @@ def calculate_mage_plus(differences, minmax, standardD): mage_plus_heights = [] mage_plus_tp_pairs = [] j = prev_j = 0 # Python uses 0-based indexing - + while j < N: # Get differences from previous extrema to current point j delta = differences[prev_j:j+1, j] # j+1 because Python slicing is exclusive - + if len(delta) == 0: j += 1 continue - + max_v = np.max(delta) # Find maximum upward movement i = int(np.argmax(delta) + prev_j) # Index of extrema creating maximum - + if max_v > standardD: # Found significant upward excursion (nadir to peak > SD) k = j while k < N: if minmax[k] >= minmax[j]: j = k # Continue riding the peak upward - + # Check if excursion ends (significant drop or end of data) if differences[j, k] < -standardD or k == N - 1: max_v = minmax[j] - minmax[i] # Record the excursion mage_plus_heights.append(max_v) mage_plus_tp_pairs.append((i, j)) # (nadir_index, peak_index) - + prev_j = k j = k break k += 1 else: j += 1 - + return mage_plus_heights, mage_plus_tp_pairs def calculate_mage_minus(differences, minmax, standardD): @@ -538,38 +538,38 @@ def calculate_mage_minus(differences, minmax, standardD): mage_minus_heights = [] mage_minus_tp_pairs = [] j = prev_j = 0 # Python uses 0-based indexing - + while j < N: - # Get differences from previous extrema to current point j + # Get differences from previous extrema to current point j delta = differences[prev_j:j+1, j] # j+1 because Python slicing is exclusive - + if len(delta) == 0: j += 1 continue - + min_v = np.min(delta) # Find maximum downward movement (most negative) i = np.argmin(delta) + prev_j # Index of extrema creating minimum - + if min_v < -standardD: # Found significant downward excursion k = j while k < N: if minmax[k] <= minmax[j]: j = k # Continue riding the nadir downward - + # Check if excursion ends (significant rise or end of data) if differences[j, k] > standardD or k == N - 1: min_v = minmax[j] - minmax[i] # Calculate final excursion magnitude # Record the excursion (note: min_v will be negative) mage_minus_heights.append(min_v) mage_minus_tp_pairs.append((i, j, k)) # (peak_index, nadir_index, end_index) - + prev_j = j j = k break k += 1 else: j += 1 - + return mage_minus_heights, mage_minus_tp_pairs def segment_time_series(data, max_gap_minutes): @@ -578,7 +578,7 @@ def segment_time_series(data, max_gap_minutes): Simpler approach using time differences """ # Calculate time differences - + # Calculate time differences between consecutive non-NA glucose readings data['time_diff'] = np.nan valid_indices = data['gl'].notna() @@ -589,14 +589,14 @@ def segment_time_series(data, max_gap_minutes): time_diffs = valid_times.diff().dt.total_seconds() / 60 # Convert to minutes # Assign differences back to original dataframe at valid indices data.loc[valid_indices, 'time_diff'] = time_diffs - + # Identify where gaps exceed threshold large_gaps = data['time_diff'] > max_gap_minutes - + # Create segment labels by cumulatively summing large gaps # This creates a new segment ID each time we encounter a large gap data['segment_id'] = large_gaps.cumsum() - + # Group by segment and return list of DataFrames segments = [] for segment_id, group in data.groupby('segment_id'): @@ -606,6 +606,6 @@ def segment_time_series(data, max_gap_minutes): while len(group) > 0 and pd.isna(group['gl'].iloc[-1]): group = group.iloc[:-1] segments.append(group.reset_index(drop=True)) - + return segments # Identify where gaps exceed threshold diff --git a/iglu_python/pgs.py b/iglu_python/pgs.py index 2536a43..df7816c 100644 --- a/iglu_python/pgs.py +++ b/iglu_python/pgs.py @@ -134,5 +134,5 @@ def pgs_single(subj_data: pd.DataFrame) -> float: subject_data = data[data["id"] == subject_id].copy() pgs_value = pgs_single(subject_data) results.append({"id": subject_id, "PGS": pgs_value}) - + return pd.DataFrame(results) diff --git a/iglu_python/process_data.py b/iglu_python/process_data.py index 0e86df4..b703348 100644 --- a/iglu_python/process_data.py +++ b/iglu_python/process_data.py @@ -1,12 +1,12 @@ -from typing import Optional, Callable, Union import warnings -import re +from typing import Callable, Optional, Union -import pandas as pd import numpy as np +import pandas as pd from .utils import localize_naive_timestamp + def process_data( data: Union[pd.DataFrame, pd.Series, list, np.ndarray], id: Optional[str] = None, @@ -82,11 +82,11 @@ def process_data( # Default time parser if time_parser is None: time_parser = pd.to_datetime - + # Validate input data type if not isinstance(data, (pd.DataFrame, pd.Series, list, np.ndarray)): raise TypeError("Invalid data type, please use DataFrame, Series, list, or numpy array.") - + # Convert to DataFrame if necessary if isinstance(data, (list, np.ndarray)): if glu is None and timestamp is None and id is None: @@ -94,27 +94,27 @@ def process_data( data = pd.DataFrame({'gl': data}) else: raise ValueError("Cannot process list/array data with column specifications. Please provide a DataFrame.") - + if isinstance(data, pd.Series): if data.index.dtype.kind == 'M': # datetime index data = pd.DataFrame({'time': data.index, 'gl': data.values}) else: data = pd.DataFrame({'gl': data.values}) - + # Ensure we have a DataFrame if not isinstance(data, pd.DataFrame): raise TypeError("Could not convert data to DataFrame") - + # Drop NAs data = data.dropna() - + if data.empty: raise ValueError("No data remaining after removing NAs") - + # Make column names lowercase for matching original_columns = data.columns.tolist() data.columns = [col.lower() if isinstance(col, str) else str(col).lower() for col in data.columns] - + # Process id column if id is None: print("No 'id' parameter passed, defaulting id to 1") @@ -122,27 +122,27 @@ def process_data( else: if not isinstance(id, str): raise ValueError("User-defined id name must be string.") - + id_lower = id.lower() if id_lower not in data.columns: warning_msg = (f"Could not find user-defined id argument name '{id}' in dataset. " f"Available columns: {original_columns}") warnings.warn(warning_msg) - + # Check if there's a column named 'id' if 'id' in data.columns: - raise ValueError(f"Fix user-defined argument name for id. " - f"Note: A column in the dataset DOES match the name 'id': " - f"If this is the correct column, indicate as such in function argument. " - f"i.e. id = 'id'") + raise ValueError("Fix user-defined argument name for id. " + "Note: A column in the dataset DOES match the name 'id': " + "If this is the correct column, indicate as such in function argument. " + "i.e. id = 'id'") else: raise ValueError(f"Column '{id}' not found in data") - + # Move id column to first position and rename id_col = data[id_lower] data = data.drop(columns=[id_lower]) data.insert(0, 'id', id_col.astype('string')) - + # Process timestamp column if timestamp is None: if 'time' not in data.columns: @@ -151,37 +151,37 @@ def process_data( else: if not isinstance(timestamp, str): raise ValueError("User-defined timestamp name must be string.") - + timestamp_lower = timestamp.lower() if timestamp_lower not in data.columns: warning_msg = (f"Could not find user-defined timestamp argument name '{timestamp}' in dataset. " f"Available columns: {original_columns}") warnings.warn(warning_msg) - + # Check if there's a column named 'time' if 'time' in data.columns: - raise ValueError(f"Fix user-defined argument name for timestamp. " - f"Note: A column in the dataset DOES match the name 'time': " - f"If this is the correct column, indicate as such in function argument. " - f"i.e. timestamp = 'time'") + raise ValueError("Fix user-defined argument name for timestamp. " + "Note: A column in the dataset DOES match the name 'time': " + "If this is the correct column, indicate as such in function argument. " + "i.e. timestamp = 'time'") else: raise ValueError(f"Column '{timestamp}' not found in data") - + timestamp_col = timestamp_lower - + # Move timestamp column to second position and rename if 'time' not in data.columns or timestamp_col != 'time': time_data = data[timestamp_col] if timestamp_col != 'time': data = data.drop(columns=[timestamp_col]) - + # Parse time try: time_data = time_parser(time_data) except Exception as e: raise ValueError(f"Failed to parse times, ensure times are in parsable format. " f"Original error: {str(e)}") - + # Insert at position 1 (after id) data.insert(1, 'time', time_data) @@ -196,62 +196,62 @@ def process_data( else: if not isinstance(glu, str): raise ValueError("User-defined glucose name must be string.") - + glu_lower = glu.lower() if glu_lower not in data.columns: warning_msg = (f"Could not find user-defined glucose argument name '{glu}' in dataset. " f"Available columns: {original_columns}") warnings.warn(warning_msg) - + # Check if there's a column named 'gl' if 'gl' in data.columns: - raise ValueError(f"Fix user-defined argument name for glucose. " - f"Note: A column in the dataset DOES match the name 'gl': " - f"If this is the correct column, indicate as such in function argument. " - f"i.e. glu = 'gl'") + raise ValueError("Fix user-defined argument name for glucose. " + "Note: A column in the dataset DOES match the name 'gl': " + "If this is the correct column, indicate as such in function argument. " + "i.e. glu = 'gl'") else: raise ValueError(f"Column '{glu}' not found in data") - + glu_col = glu_lower - + # Check if glucose values are in mmol/L mmol_conversion = False if glu and 'mmol/l' in glu.lower(): mmol_conversion = True - + # Move glucose column to third position and rename if 'gl' not in data.columns or glu_col != 'gl': gl_data = data[glu_col] if glu_col != 'gl': data = data.drop(columns=[glu_col]) - + # Convert to numeric try: gl_data = pd.to_numeric(gl_data, errors='coerce') except Exception as e: raise ValueError(f"Failed to convert glucose values to numeric: {str(e)}") - + # Convert mmol/L to mg/dL if needed if mmol_conversion: gl_data = gl_data * 18 - + # Insert at position 2 (after id and time) data.insert(2, 'gl', gl_data) - + # Validation warnings if data['gl'].min() < 20: warnings.warn("Minimum glucose reading below 20. Data may not be cleaned.") - + if data['gl'].max() > 500: warnings.warn("Maximum glucose reading above 500. Data may not be cleaned.") - + # Keep only the three required columns in correct order data = data[['id', 'time', 'gl']] - + # Drop rows with NaN glucose values data = data.dropna(subset=['gl']) - + if data.empty: raise ValueError("No valid data remaining after processing") - - return data \ No newline at end of file + + return data diff --git a/iglu_python/sd_measures.py b/iglu_python/sd_measures.py index 0c785b0..317322c 100644 --- a/iglu_python/sd_measures.py +++ b/iglu_python/sd_measures.py @@ -1,13 +1,15 @@ -import pandas as pd -import numpy as np -from typing import Optional, List, Dict, Any import warnings +from typing import Any, Dict, Optional + +import numpy as np +import pandas as pd -from .utils import check_data_columns,CGMS2DayByDay +from .utils import CGMS2DayByDay, check_data_columns -def sd_measures(data: pd.DataFrame, - dt0: Optional[int] = None, - inter_gap: int = 45, + +def sd_measures(data: pd.DataFrame, + dt0: Optional[int] = None, + inter_gap: int = 45, tz: str = "") -> pd.DataFrame: """ Calculate SD subtypes for glucose variability analysis @@ -74,41 +76,41 @@ def sd_measures(data: pd.DataFrame, >>> result = sd_measures(glucose_data) >>> print(result) """ - + # Data validation (placeholder - implement check_data_columns equivalent) data = check_data_columns(data, time_check=True, tz=tz) - + subjects = data['id'].unique() n_subjects = len(subjects) - + # Calculate uniform grid for all subjects gdall = [] current_dt0 = dt0 - + for i, subject_id in enumerate(subjects): subject_data = data[data['id'] == subject_id].copy() - + # Convert to day-by-day format (placeholder - implement CGMS2DayByDay equivalent) gd2d, actual_dates, gd2d_dt0 = CGMS2DayByDay(subject_data, tz=tz, dt0=current_dt0, inter_gap=inter_gap) gdall.append(gd2d) - + # Use the dt0 from first subject for consistency if i == 0: current_dt0 = gd2d_dt0 - + dt0 = current_dt0 - + # Calculate SD measures for each subject results = [] - + for i, gd2d in enumerate(gdall): subject_id = subjects[i] result = _calculate_sd_subtypes(gd2d, dt0, subject_id) results.append(result) - + # Combine results final_results = pd.DataFrame(results) - + return final_results @@ -130,45 +132,45 @@ def _calculate_sd_subtypes(gd2d: np.ndarray, dt0: int, subject_id: Any) -> Dict[ dict Dictionary containing all SD measures """ - + result = {'id': subject_id} - + # 1. SDw - vertical within days # Standard deviation within each day, then mean across days daily_sds = _safe_nanstd(gd2d, axis=1, ddof=1) # ddof=1 for sample std result['SDw'] = _safe_nanmean(daily_sds) - + # 2. SDhhmm - between time points # Mean at each time point across days, then SD of those means timepoint_means = _safe_nanmean(gd2d, axis=0) result['SDhhmm'] = _safe_nanstd(timepoint_means, ddof=1) - + # 3. SDwsh - within series (1-hour windows) # Rolling standard deviation over 1-hour windows win = round(60 / dt0) # Number of measurements in 1 hour gs = gd2d.flatten() # Vectorize by columns (time-first order) - + # Calculate rolling standard deviation rolling_sds = _rolling_std(gs, window=win) result['SDwsh'] = _safe_nanmean(rolling_sds) - + # 4. SDdm - horizontal sd (between daily means) # Standard deviation of daily mean glucose values daily_means = _safe_nanmean(gd2d, axis=1) result['SDdm'] = _safe_nanstd(daily_means, ddof=1) - + # 5. SDb - between days, within timepoints # SD across days for each time point, then mean of those SDs timepoint_sds = _safe_nanstd(gd2d, axis=0, ddof=1) result['SDb'] = _safe_nanmean(timepoint_sds) - + # 6. SDbdm - between days, within timepoints, corrected for daily means # Subtract daily mean from each value, then calculate SDb on corrected values daily_means_matrix = daily_means[:, np.newaxis] # Convert to column vector corrected_gd2d = gd2d - daily_means_matrix corrected_timepoint_sds = _safe_nanstd(corrected_gd2d, axis=0, ddof=1) result['SDbdm'] = _safe_nanmean(corrected_timepoint_sds) - + return result @@ -191,17 +193,17 @@ def _rolling_std(data: np.ndarray, window: int) -> np.ndarray: #valid_data = data[~np.isnan(data)] valid_data = np.concatenate([data, np.full(window, np.nan)]) # add nan tail to match R n = len(valid_data) - + if n < window: return np.array([np.nan]) - + rolling_stds = [] - + for i in range(n - window + 1): window_data = valid_data[i:i + window] if len(window_data) == window: # Full window rolling_stds.append(_safe_nanstd(window_data, ddof=1)) - + return np.array(rolling_stds) if rolling_stds else np.array([np.nan]) def _safe_nanstd(data: np.ndarray, axis: Optional[int] = None, ddof: int = 1) -> float: @@ -224,7 +226,7 @@ def _safe_nanstd(data: np.ndarray, axis: Optional[int] = None, ddof: int = 1) -> """ with warnings.catch_warnings(): warnings.simplefilter("ignore", category=RuntimeWarning) - + if axis is None: # Check if we have enough non-NaN values valid_data = data[~np.isnan(data)] @@ -234,7 +236,7 @@ def _safe_nanstd(data: np.ndarray, axis: Optional[int] = None, ddof: int = 1) -> # For axis operations, we need to check each slice # This is more complex, so we'll just suppress warnings pass - + return np.nanstd(data, axis=axis, ddof=ddof) @@ -256,7 +258,7 @@ def _safe_nanmean(data: np.ndarray, axis: Optional[int] = None) -> float: """ with warnings.catch_warnings(): warnings.simplefilter("ignore", category=RuntimeWarning) - + if axis is None: # Check if we have any non-NaN values if np.isnan(data).all(): @@ -264,29 +266,29 @@ def _safe_nanmean(data: np.ndarray, axis: Optional[int] = None) -> float: else: # For axis operations, suppress warnings and let numpy handle it pass - + return np.nanmean(data, axis=axis) # Alternative vectorized implementation for better performance -def sd_measures_vectorized(data: pd.DataFrame, - dt0: Optional[int] = None, - inter_gap: int = 45, +def sd_measures_vectorized(data: pd.DataFrame, + dt0: Optional[int] = None, + inter_gap: int = 45, tz: str = "") -> pd.DataFrame: """ Vectorized version of sd_measures for better performance with large datasets """ data = check_data_columns(data, time_check=True, tz=tz) - + results = [] - + for subject_id in data['id'].unique(): subject_data = data[data['id'] == subject_id].copy() gd2d, actual_dates, gd2d_dt0 = CGMS2DayByDay(subject_data, tz=tz, dt0=current_dt0, inter_gap=inter_gap) - + result = _calculate_sd_subtypes_vectorized(gd2d, gd2d_dt0, subject_id) results.append(result) - + return pd.DataFrame(results) @@ -297,7 +299,7 @@ def _calculate_sd_subtypes_vectorized(gd2d: np.ndarray, dt0: int, subject_id: An # Use numpy's built-in functions for better performance with warnings.catch_warnings(): warnings.simplefilter("ignore", category=RuntimeWarning) - + return { 'id': subject_id, 'SDw': _safe_nanmean(np.nanstd(gd2d, axis=1, ddof=1)), @@ -305,6 +307,6 @@ def _calculate_sd_subtypes_vectorized(gd2d: np.ndarray, dt0: int, subject_id: An 'SDwsh': _safe_nanmean(_rolling_std(gd2d.T.flatten(), round(60/dt0))), 'SDdm': np.nanstd(_safe_nanmean(gd2d, axis=1), ddof=1), 'SDb': _safe_nanmean(np.nanstd(gd2d, axis=0, ddof=1)), - 'SDbdm': _safe_nanmean(np.nanstd(gd2d - _safe_nanmean(gd2d, axis=1, keepdims=True), + 'SDbdm': _safe_nanmean(np.nanstd(gd2d - _safe_nanmean(gd2d, axis=1, keepdims=True), axis=0, ddof=1)) } diff --git a/iglu_python/summary_glu.py b/iglu_python/summary_glu.py index d15711d..f607a4a 100644 --- a/iglu_python/summary_glu.py +++ b/iglu_python/summary_glu.py @@ -1,8 +1,8 @@ -from typing import Union import warnings +from typing import Union -import pandas as pd import numpy as np +import pandas as pd from .utils import check_data_columns @@ -49,10 +49,10 @@ def summary_glu(data: Union[pd.DataFrame, pd.Series, list, np.ndarray]) -> pd.Da """ # Handle vector input (Series, list, or numpy array) is_vector = False - + if isinstance(data, (pd.Series, list, np.ndarray)): is_vector = True - + # Convert to numpy array for consistent handling if isinstance(data, pd.Series): glucose_values = data.values @@ -60,31 +60,31 @@ def summary_glu(data: Union[pd.DataFrame, pd.Series, list, np.ndarray]) -> pd.Da glucose_values = np.array(data) else: # numpy array glucose_values = data - + # Remove NaN values glucose_values = glucose_values[~np.isnan(glucose_values)] - + if len(glucose_values) == 0: raise ValueError("No valid glucose values found") - + # Calculate summary statistics summary_stats = _calculate_summary_stats(glucose_values) - + # Return DataFrame without id column return pd.DataFrame([summary_stats]) - + # Handle DataFrame input else: # Check data format data = check_data_columns(data) - + # Filter out missing glucose values and group by id result_rows = [] - + for subject_id in data['id'].unique(): subject_data = data[data['id'] == subject_id] glucose_values = subject_data['gl'].dropna().values - + if len(glucose_values) == 0: warnings.warn(f"No valid glucose values found for subject {subject_id}") # Still include the subject with NaN values @@ -98,14 +98,14 @@ def summary_glu(data: Union[pd.DataFrame, pd.Series, list, np.ndarray]) -> pd.Da } else: summary_stats = _calculate_summary_stats(glucose_values) - + # Add subject id to the summary summary_stats['id'] = subject_id result_rows.append(summary_stats) - + # Create result DataFrame with id column first result_df = pd.DataFrame(result_rows) - + # Reorder columns to match R output (id first, then summary stats) column_order = ['id', 'Min.', '1st Qu.', 'Median', 'Mean', '3rd Qu.', 'Max.'] return result_df[column_order] @@ -134,4 +134,4 @@ def _calculate_summary_stats(glucose_values: np.ndarray) -> dict: 'Mean': np.mean(glucose_values), '3rd Qu.': np.percentile(glucose_values, 75), 'Max.': np.max(glucose_values) - } \ No newline at end of file + } diff --git a/iglu_python/utils.py b/iglu_python/utils.py index bc1e354..ac20ab3 100644 --- a/iglu_python/utils.py +++ b/iglu_python/utils.py @@ -1,4 +1,3 @@ -import warnings from datetime import datetime from typing import Optional, Tuple from zoneinfo import ZoneInfo @@ -256,7 +255,7 @@ def CGMS2DayByDay( if is_iglu_r_compatible(): # convert start_time into naive datetime start_time = start_time.tz_localize(None) - + actual_dates = [start_time + pd.Timedelta(days=i) for i in range(n_days)] return interp_data, actual_dates, dt0 From 32377810a4328816ab9ed6281f39aa5dec58c59e Mon Sep 17 00:00:00 2001 From: Stas Khirman Date: Mon, 16 Jun 2025 17:46:02 +0300 Subject: [PATCH 08/16] ruff formatting --- iglu_python/above_percent.py | 8 ++++-- iglu_python/adrr.py | 4 +-- iglu_python/auc.py | 2 +- iglu_python/below_percent.py | 8 ++++-- iglu_python/cogi.py | 14 +++++++--- iglu_python/cv_glu.py | 10 +++---- iglu_python/cv_measures.py | 14 +++++----- iglu_python/gmi.py | 2 +- iglu_python/in_range_percent.py | 8 ++++-- iglu_python/mage.py | 37 +++++++++++++------------- iglu_python/process_data.py | 38 +++++++++++++-------------- iglu_python/quantile_glu.py | 4 ++- iglu_python/sd_measures.py | 46 ++++++++++++++++----------------- iglu_python/sd_roc.py | 2 +- iglu_python/summary_glu.py | 18 ++++++------- 15 files changed, 117 insertions(+), 98 deletions(-) diff --git a/iglu_python/above_percent.py b/iglu_python/above_percent.py index 65d5ebe..bbdb9d6 100644 --- a/iglu_python/above_percent.py +++ b/iglu_python/above_percent.py @@ -8,7 +8,7 @@ def above_percent( data: Union[pd.DataFrame, pd.Series, list,np.ndarray], - targets_above: List[int] = [140, 180, 250], + targets_above: List[int] = None, ) -> pd.DataFrame|dict[str:float]: """ Calculate percentage of values above target thresholds. @@ -59,6 +59,8 @@ def above_percent( 0 75.0 25.0 """ # Handle Series input + if targets_above is None: + targets_above = [140, 180, 250] if isinstance(data, (pd.Series, list,np.ndarray)): if isinstance(data, (list, np.ndarray)): data = pd.Series(data) @@ -84,11 +86,13 @@ def above_percent( df = df[['id'] + [col for col in df.columns if col != 'id']] return df -def above_percent_single(data: pd.Series, targets_above: List[int] = [140, 180, 250]) -> dict[str:float]: +def above_percent_single(data: pd.Series, targets_above: List[int] = None) -> dict[str:float]: """ Calculate percentage of values above target thresholds for a single series/subject. """ # Convert targets to float + if targets_above is None: + targets_above = [140, 180, 250] targets_above = [int(t) for t in targets_above] # Calculate total non-NA readings diff --git a/iglu_python/adrr.py b/iglu_python/adrr.py index 6093954..00a13a9 100644 --- a/iglu_python/adrr.py +++ b/iglu_python/adrr.py @@ -68,7 +68,7 @@ def adrr_multi(data: pd.DataFrame) -> pd.DataFrame: data_filtered = data.dropna(subset=["gl"]) if len(data_filtered) == 0: - warnings.warn("All glucose values are NaN. Returning empty DataFrame.") + warnings.warn("All glucose values are NaN. Returning empty DataFrame.", stacklevel=2) return pd.DataFrame(columns=["id", "ADRR"]) # Group by id and date, then calculate BGI and daily risk range @@ -117,7 +117,7 @@ def _calculate_daily_risk(group: pd.DataFrame) -> pd.Series: ) if len(data) == 0: - warnings.warn("Input DataFrame is empty. Returning empty DataFrame.") + warnings.warn("Input DataFrame is empty. Returning empty DataFrame.", stacklevel=2) return pd.DataFrame(columns=["id", "ADRR"]) # Calculate ADRR diff --git a/iglu_python/auc.py b/iglu_python/auc.py index b1ef01b..e0cbdb4 100644 --- a/iglu_python/auc.py +++ b/iglu_python/auc.py @@ -17,7 +17,7 @@ def auc(data: pd.DataFrame, tz: str = "") -> pd.DataFrame: AUC is calculated using the formula: (dt0/60) * ((gl[2:length(gl)] + gl[1:(length(gl)-1)])/2), where dt0/60 is the frequency of the cgm measurements in hours and gl are the glucose values. - This formula is based off the Trapezoidal Rule: + This formula is based off the Trapezoidal Rule: (time[2]-time[1] * ((glucose[1]+glucose[2])/2)). diff --git a/iglu_python/below_percent.py b/iglu_python/below_percent.py index 6c3017a..2632bc1 100644 --- a/iglu_python/below_percent.py +++ b/iglu_python/below_percent.py @@ -7,7 +7,7 @@ def below_percent( - data: Union[pd.DataFrame, pd.Series, list,np.ndarray], targets_below: List[int] = [54, 70] + data: Union[pd.DataFrame, pd.Series, list,np.ndarray], targets_below: List[int] = None ) -> pd.DataFrame|dict[str:float]: """ Calculate percentage of values below target thresholds. @@ -58,6 +58,8 @@ def below_percent( 0 25.0 50.0 """ # Handle Series input + if targets_below is None: + targets_below = [54, 70] if isinstance(data, (pd.Series, list,np.ndarray)): if isinstance(data, (list, np.ndarray)): data = pd.Series(data) @@ -83,11 +85,13 @@ def below_percent( df = df[['id'] + [col for col in df.columns if col != 'id']] return df -def below_percent_single(data: pd.Series, targets_below: List[int] = [54, 70]) -> dict[str:float]: +def below_percent_single(data: pd.Series, targets_below: List[int] = None) -> dict[str:float]: """ Calculate percentage of values below target thresholds for a single series/subject. """ # Convert targets to float + if targets_below is None: + targets_below = [54, 70] targets_below = [int(t) for t in targets_below] # Calculate total non-NA readings diff --git a/iglu_python/cogi.py b/iglu_python/cogi.py index 5131451..3e3610f 100644 --- a/iglu_python/cogi.py +++ b/iglu_python/cogi.py @@ -11,8 +11,8 @@ def cogi( data: Union[pd.DataFrame, pd.Series, list,np.ndarray], - targets: List[int] = [70, 180], - weights: List[float] = [0.5, 0.35, 0.15], + targets: List[int] = None, + weights: List[float] = None, ) -> pd.DataFrame|float: """ Calculate Coefficient of Glucose Irregularity (COGI). @@ -68,6 +68,10 @@ def cogi( """ # Check and prepare data + if weights is None: + weights = [0.5, 0.35, 0.15] + if targets is None: + targets = [70, 180] targets = sorted([float(t) for t in targets]) if isinstance(data, (pd.Series, list, np.ndarray)): @@ -83,9 +87,13 @@ def cogi( return out -def cogi_single(data: pd.Series, targets: List[int] = [70, 180], weights: List[float] = [0.5, 0.35, 0.15]) -> float: +def cogi_single(data: pd.Series, targets: List[int] = None, weights: List[float] = None) -> float: """Calculate COGI for a single subject""" # Calculate components + if weights is None: + weights = [0.5, 0.35, 0.15] + if targets is None: + targets = [70, 180] ir_dict = in_range_percent(data, [targets]) ir = ir_dict["in_range_" + "_".join(map(str, targets))] br_dict = below_percent(data, targets_below=[targets[0]]) diff --git a/iglu_python/cv_glu.py b/iglu_python/cv_glu.py index 9e04e58..9a20a31 100644 --- a/iglu_python/cv_glu.py +++ b/iglu_python/cv_glu.py @@ -20,23 +20,23 @@ def cv_glu(data: Union[pd.DataFrame, pd.Series, list, np.ndarray]) -> Union[pd.DataFrame, float]: """Calculate Coefficient of Variation (CV) of glucose levels. - + The function cv_glu produces CV values in a pandas DataFrame object. - + Args: data: DataFrame object with column names "id", "time", and "gl", or pandas Series of glucose values. - + Returns: If a DataFrame object is passed, then a DataFrame with two columns: subject id and corresponding CV value is returned. If a Series of glucose values is passed, then a DataFrame with just the CV value is returned. - + Details: A DataFrame with 1 row for each subject, a column for subject id and a column for CV values is returned. NA glucose values are omitted from the calculation of the CV. - + CV (Coefficient of Variation) is calculated by 100 * sd(G) / mean(G) Where G is the list of all Glucose measurements for a subject. """ diff --git a/iglu_python/cv_measures.py b/iglu_python/cv_measures.py index 73552cc..7300a7b 100644 --- a/iglu_python/cv_measures.py +++ b/iglu_python/cv_measures.py @@ -19,30 +19,30 @@ def cv_measures(data, dt0=None, inter_gap=45, tz="")->pd.DataFrame|dict[str:float]: """Calculate Coefficient of Variation subtypes (CVmean and CVsd). - + The function cv_measures produces CV subtype values in a pandas DataFrame object. - + Args: data: DataFrame object with column names "id", "time", and "gl" dt0: The time frequency for interpolation in minutes. If None, will match the CGM meter's frequency inter_gap: The maximum allowable gap (in minutes) for interpolation. Default is 45 tz: String name of timezone. Default is "" - + Returns: A DataFrame with three columns: subject id and corresponding CV subtype values (CVmean and CVsd) - + Details: A DataFrame with 1 row for each subject, a column for subject id and a column for each CV subtype value is returned. - + Missing values will be linearly interpolated when close enough to non-missing values. - + 1. CVmean: Calculated by first taking the coefficient of variation of each day's glucose measurements, then taking the mean of all the coefficient of variations. That is, for x days we compute cv_1 ... cv_x daily coefficient of variations and calculate 1/x * sum(cv_i) - + 2. CVsd: Calculated by first taking the coefficient of variation of each day's glucose measurements, then taking the standard deviation of all the coefficient of variations. That is, for d diff --git a/iglu_python/gmi.py b/iglu_python/gmi.py index cbf5df9..312b7d4 100644 --- a/iglu_python/gmi.py +++ b/iglu_python/gmi.py @@ -51,7 +51,7 @@ def gmi(data: Union[pd.DataFrame, pd.Series, list]) -> float|pd.DataFrame: # Check and prepare data data = check_data_columns(data) - is_vector = getattr(data, "is_vector", False) + getattr(data, "is_vector", False) # Calculate GMI for each subject out = data.groupby("id").agg( diff --git a/iglu_python/in_range_percent.py b/iglu_python/in_range_percent.py index 35e7c66..76909e9 100644 --- a/iglu_python/in_range_percent.py +++ b/iglu_python/in_range_percent.py @@ -8,7 +8,7 @@ def in_range_percent( data: Union[pd.DataFrame, pd.Series, list,np.ndarray], - target_ranges: List[List[int]] = [[70, 180], [63, 140]], + target_ranges: List[List[int]] = None, ) -> pd.DataFrame|float: """ Calculate percentage of values within target ranges. @@ -65,6 +65,8 @@ def in_range_percent( 0 75.0 """ # Handle Series input + if target_ranges is None: + target_ranges = [[70, 180], [63, 140]] if isinstance(data, (pd.Series, list,np.ndarray)): if isinstance(data, (list, np.ndarray)): data = pd.Series(data) @@ -88,11 +90,13 @@ def in_range_percent( df = df[['id'] + [col for col in df.columns if col != 'id']] return df -def in_range_percent_single(data: pd.Series, target_ranges: List[List[int]] = [[70, 180], [63, 140]]) -> float: +def in_range_percent_single(data: pd.Series, target_ranges: List[List[int]] = None) -> float: """ Calculate percentage of values within target ranges for a single series/subject. """ # Calculate total non-NA readings + if target_ranges is None: + target_ranges = [[70, 180], [63, 140]] total_readings = len(data.dropna()) if total_readings == 0: return {f"in_range_{min(range_vals)}_{max(range_vals)}": 0 diff --git a/iglu_python/mage.py b/iglu_python/mage.py index 4c48e57..2c43522 100644 --- a/iglu_python/mage.py +++ b/iglu_python/mage.py @@ -27,18 +27,18 @@ def mage( one peak/nadir to the next nadir/peak from the original glucose values. If version 'ma' is selected, the function computationally emulates the manual method for calculating - the mean amplitude of glycemic excursions (MAGE) first suggested in - "Mean Amplitude of Glycemic Excursions, a Measure of Diabetic Instability", (Service, 1970). + the mean amplitude of glycemic excursions (MAGE) first suggested in + "Mean Amplitude of Glycemic Excursions, a Measure of Diabetic Instability", (Service, 1970). For this version, glucose values will be interpolated over a uniform time grid prior to calculation. - 'ma' is a more accurate algorithm that uses the crosses of a short and long moving average - to identify intervals where a peak/nadir might exist. Then, the height from one peak/nadir - to the next nadir/peak is calculated from the _original_ (not moving average) glucose values. - (Note: this function internally uses CGMS2DayByDay with dt0 = 5. + 'ma' is a more accurate algorithm that uses the crosses of a short and long moving average + to identify intervals where a peak/nadir might exist. Then, the height from one peak/nadir + to the next nadir/peak is calculated from the _original_ (not moving average) glucose values. + (Note: this function internally uses CGMS2DayByDay with dt0 = 5. Thus, all CGM data is linearly interpolated to 5 minute intervals. See the MAGE vignette for more details.) - 'naive' algorithm calculates MAGE by taking the mean of absolute glucose differences - (between each value and the mean) that are greater than the standard deviation. A multiplier can be added + 'naive' algorithm calculates MAGE by taking the mean of absolute glucose differences + (between each value and the mean) that are greater than the standard deviation. A multiplier can be added to the standard deviation using the `sd_multiplier` argument. @@ -178,11 +178,10 @@ def mage_ma_single(data: pd.DataFrame, short_ma: int, long_ma: int, # 1.1 Interpolate over uniform grid # Note: always interpolate to 5 minute grid data_ip = CGMS2DayByDay(data, dt0=5, inter_gap=inter_gap, tz=tz) - dt0 = data_ip[2] # Time between measurements in minutes + data_ip[2] # Time between measurements in minutes # replace for 5 min to fix bug in CGMS2DayByDay - dt0 = 5 day_one = data_ip[1][0] - ndays = len(data_ip[1]) + len(data_ip[1]) # 1.2 Generate grid times by starting from day one and cumulatively summing # note fix 5 min used in interpretation @@ -247,7 +246,7 @@ def mage_ma_single(data: pd.DataFrame, short_ma: int, long_ma: int, idx = return_val.groupby(['start', 'end'])['MAGE'].idxmax() res = return_val.loc[idx].reset_index(drop=True) else: # default: first excursions only - res = return_val[return_val['first_excursion'] == True].copy() + res = return_val[return_val['first_excursion']].copy() # Calculate time-weighted MAGE if res.empty: @@ -413,7 +412,7 @@ def mage_atomic(data, short_ma,long_ma): # excursion elimination differences = np.subtract.outer(minmax, minmax).T standardD = data['gl'].std() # pandas uses sample std dev by default - N = len(minmax) + len(minmax) # MAGE+ algorithm, which identifies and measures positive glycemic excursions @@ -460,7 +459,7 @@ def mage_atomic(data, short_ma,long_ma): }) # Determine which direction has maximum MAGE value - is_plus_max = ((mage_plus['MAGE'].iloc[0] >= mage_minus['MAGE'].iloc[0]) + ((mage_plus['MAGE'].iloc[0] >= mage_minus['MAGE'].iloc[0]) if not pd.isna(mage_plus['MAGE'].iloc[0]) and not pd.isna(mage_minus['MAGE'].iloc[0]) else False @@ -474,12 +473,12 @@ def mage_atomic(data, short_ma,long_ma): def calculate_mage_plus(differences, minmax, standardD): """ Calculate MAGE+ (positive glycemic excursions) - + Args: differences: NxN matrix of pairwise differences between extrema minmax: Array of extrema values (peaks and nadirs) standardD: Standard deviation threshold - + Returns: tuple: (mage_plus_heights, mage_plus_tp_pairs) """ @@ -525,12 +524,12 @@ def calculate_mage_plus(differences, minmax, standardD): def calculate_mage_minus(differences, minmax, standardD): """ Calculate MAGE- (negative glycemic excursions) - + Args: differences: NxN matrix of pairwise differences between extrema minmax: Array of extrema values (peaks and nadirs) standardD: Standard deviation threshold - + Returns: tuple: (mage_minus_heights, mage_minus_tp_pairs) """ @@ -599,7 +598,7 @@ def segment_time_series(data, max_gap_minutes): # Group by segment and return list of DataFrames segments = [] - for segment_id, group in data.groupby('segment_id'): + for _segment_id, group in data.groupby('segment_id'): # Drop the temporary columns we added group = group.drop(['time_diff', 'segment_id'], axis=1) # Drop rows with NA glucose values at the end of the segment diff --git a/iglu_python/process_data.py b/iglu_python/process_data.py index b703348..9ee62a3 100644 --- a/iglu_python/process_data.py +++ b/iglu_python/process_data.py @@ -17,41 +17,41 @@ def process_data( """ Data Pre-Processor - A helper function to assist in pre-processing the user-supplied input data - for use with other functions. This function ensures that the returned data - will be compatible with every function within the iglu package. All NAs + A helper function to assist in pre-processing the user-supplied input data + for use with other functions. This function ensures that the returned data + will be compatible with every function within the iglu package. All NAs will be removed. Parameters ---------- data : pd.DataFrame, pd.Series, list, or np.ndarray User-supplied dataset containing continuous glucose monitor data. Must - contain data for time and glucose readings at a minimum. Accepted + contain data for time and glucose readings at a minimum. Accepted formats are DataFrame, Series, list, or numpy array. id : str, optional Column name (string) corresponding to subject id column. If no value is passed, an id of 1 will be assigned to the data. timestamp : str, optional - Column name (string) corresponding to time values in data. The dates - can be in any format parsable by pd.to_datetime, or any format accepted + Column name (string) corresponding to time values in data. The dates + can be in any format parsable by pd.to_datetime, or any format accepted by the parser passed to time_parser. glu : str, optional Column name (string) corresponding to glucose values, mg/dL time_parser : callable, optional - Function used to convert datetime strings to time objects. Defaults to - pd.to_datetime. If your times are in a format not parsable by + Function used to convert datetime strings to time objects. Defaults to + pd.to_datetime. If your times are in a format not parsable by pd.to_datetime, you can pass a custom parsing function. Returns ------- pd.DataFrame - A processed DataFrame object with columns "id", "time", and "gl" that - cooperates with every other function within the iglu package. All NAs + A processed DataFrame object with columns "id", "time", and "gl" that + cooperates with every other function within the iglu package. All NAs will be removed. Details ------- - If "mmol/l" appears in the glucose column name, the glucose values will be + If "mmol/l" appears in the glucose column name, the glucose values will be multiplied by 18 to convert to mg/dL. Raises @@ -60,10 +60,10 @@ def process_data( If data is not in a supported format ValueError If required columns are not found or cannot be processed - + Notes ----- - Based on John Schwenck's data_process for his bp package and + Based on John Schwenck's data_process for his bp package and David Buchanan's R implementation. Examples @@ -71,7 +71,7 @@ def process_data( >>> import pandas as pd >>> data = pd.DataFrame({ ... 'subject_id': ['A', 'A', 'B', 'B'], - ... 'datetime': ['2020-01-01 10:00:00', '2020-01-01 10:05:00', + ... 'datetime': ['2020-01-01 10:00:00', '2020-01-01 10:05:00', ... '2020-01-01 10:00:00', '2020-01-01 10:05:00'], ... 'glucose': [120, 130, 110, 125] ... }) @@ -127,7 +127,7 @@ def process_data( if id_lower not in data.columns: warning_msg = (f"Could not find user-defined id argument name '{id}' in dataset. " f"Available columns: {original_columns}") - warnings.warn(warning_msg) + warnings.warn(warning_msg, stacklevel=2) # Check if there's a column named 'id' if 'id' in data.columns: @@ -156,7 +156,7 @@ def process_data( if timestamp_lower not in data.columns: warning_msg = (f"Could not find user-defined timestamp argument name '{timestamp}' in dataset. " f"Available columns: {original_columns}") - warnings.warn(warning_msg) + warnings.warn(warning_msg, stacklevel=2) # Check if there's a column named 'time' if 'time' in data.columns: @@ -201,7 +201,7 @@ def process_data( if glu_lower not in data.columns: warning_msg = (f"Could not find user-defined glucose argument name '{glu}' in dataset. " f"Available columns: {original_columns}") - warnings.warn(warning_msg) + warnings.warn(warning_msg, stacklevel=2) # Check if there's a column named 'gl' if 'gl' in data.columns: @@ -240,10 +240,10 @@ def process_data( # Validation warnings if data['gl'].min() < 20: - warnings.warn("Minimum glucose reading below 20. Data may not be cleaned.") + warnings.warn("Minimum glucose reading below 20. Data may not be cleaned.", stacklevel=2) if data['gl'].max() > 500: - warnings.warn("Maximum glucose reading above 500. Data may not be cleaned.") + warnings.warn("Maximum glucose reading above 500. Data may not be cleaned.", stacklevel=2) # Keep only the three required columns in correct order data = data[['id', 'time', 'gl']] diff --git a/iglu_python/quantile_glu.py b/iglu_python/quantile_glu.py index b4a33d1..225da66 100644 --- a/iglu_python/quantile_glu.py +++ b/iglu_python/quantile_glu.py @@ -7,7 +7,7 @@ def quantile_glu( - data: Union[pd.DataFrame, pd.Series], quantiles: List[float] = [0, 25, 50, 75, 100] + data: Union[pd.DataFrame, pd.Series], quantiles: List[float] = None ) -> pd.DataFrame: """ Calculate glucose level quantiles. @@ -56,6 +56,8 @@ def quantile_glu( 0 130.0 145.0 182.5 200.0 """ # Handle Series input + if quantiles is None: + quantiles = [0, 25, 50, 75, 100] if isinstance(data, pd.Series): # Calculate quantiles for Series quantile_vals = np.quantile(data.dropna(), np.array(quantiles) / 100) diff --git a/iglu_python/sd_measures.py b/iglu_python/sd_measures.py index 317322c..91a7dc1 100644 --- a/iglu_python/sd_measures.py +++ b/iglu_python/sd_measures.py @@ -13,11 +13,11 @@ def sd_measures(data: pd.DataFrame, tz: str = "") -> pd.DataFrame: """ Calculate SD subtypes for glucose variability analysis - + This function produces SD subtype values in a DataFrame object with a row for each subject and columns corresponding to id followed by each SD subtype. - + Parameters ---------- data : pd.DataFrame @@ -28,47 +28,47 @@ def sd_measures(data: pd.DataFrame, The maximum allowable gap (in minutes) for interpolation tz : str, default "" Timezone specification - + Returns ------- pd.DataFrame A DataFrame with columns for id and each of the six SD subtypes: - SDw: vertical within days - - SDhhmm: between time points + - SDhhmm: between time points - SDwsh: within series (1-hour windows) - SDdm: horizontal sd (between daily means) - SDb: between days, within timepoints - SDbdm: between days, within timepoints, corrected for daily means - + Details ------- Missing values will be linearly interpolated when close enough to non-missing values. - + SD Subtypes: - + 1. SDw - vertical within days: Standard deviation of each day's glucose measurements, then mean of all SDs - + 2. SDhhmm - between time points: Standard deviation of mean glucose values at each time point across days - + 3. SDwsh - within series: Mean of standard deviations computed over hour-long sliding windows - + 4. SDdm - horizontal sd: Standard deviation of daily mean glucose values - + 5. SDb - between days, within timepoints: Mean of standard deviations of glucose values across days for each time point - + 6. SDbdm - between days, within timepoints, corrected for changes in daily means: Like SDb but after subtracting daily mean from each glucose value - + References ---------- Rodbard (2009) New and Improved Methods to Characterize Glycemic Variability Using Continuous Glucose Monitoring. Diabetes Technology and Therapeutics 11, 551-565. - + Examples -------- >>> import pandas as pd @@ -81,7 +81,7 @@ def sd_measures(data: pd.DataFrame, data = check_data_columns(data, time_check=True, tz=tz) subjects = data['id'].unique() - n_subjects = len(subjects) + len(subjects) # Calculate uniform grid for all subjects gdall = [] @@ -117,7 +117,7 @@ def sd_measures(data: pd.DataFrame, def _calculate_sd_subtypes(gd2d: np.ndarray, dt0: int, subject_id: Any) -> Dict[str, Any]: """ Calculate all SD subtypes for a single subject's glucose data matrix - + Parameters ---------- gd2d : np.ndarray @@ -126,7 +126,7 @@ def _calculate_sd_subtypes(gd2d: np.ndarray, dt0: int, subject_id: Any) -> Dict[ Time interval in minutes subject_id : Any Subject identifier - + Returns ------- dict @@ -177,14 +177,14 @@ def _calculate_sd_subtypes(gd2d: np.ndarray, dt0: int, subject_id: Any) -> Dict[ def _rolling_std(data: np.ndarray, window: int) -> np.ndarray: """ Calculate rolling standard deviation with non-trimmed ends - + Parameters ---------- data : np.ndarray Input data array window : int Window size for rolling calculation - + Returns ------- np.ndarray @@ -209,7 +209,7 @@ def _rolling_std(data: np.ndarray, window: int) -> np.ndarray: def _safe_nanstd(data: np.ndarray, axis: Optional[int] = None, ddof: int = 1) -> float: """ Safe version of np.nanstd that handles insufficient data gracefully - + Parameters ---------- data : np.ndarray @@ -218,7 +218,7 @@ def _safe_nanstd(data: np.ndarray, axis: Optional[int] = None, ddof: int = 1) -> Axis along which the standard deviation is computed ddof : int Delta degrees of freedom - + Returns ------- float @@ -243,14 +243,14 @@ def _safe_nanstd(data: np.ndarray, axis: Optional[int] = None, ddof: int = 1) -> def _safe_nanmean(data: np.ndarray, axis: Optional[int] = None) -> float: """ Safe version of np.nanmean that handles empty slices gracefully - + Parameters ---------- data : np.ndarray Input data axis : int, optional Axis along which the mean is computed - + Returns ------- float diff --git a/iglu_python/sd_roc.py b/iglu_python/sd_roc.py index 777a0b4..9c31578 100644 --- a/iglu_python/sd_roc.py +++ b/iglu_python/sd_roc.py @@ -40,7 +40,7 @@ def sd_roc( Returns ------- pd.DataFrame - DataFrame with two columns: subject id and standard deviation of the rate of change + DataFrame with two columns: subject id and standard deviation of the rate of change values for each subject. Notes diff --git a/iglu_python/summary_glu.py b/iglu_python/summary_glu.py index f607a4a..407c2e3 100644 --- a/iglu_python/summary_glu.py +++ b/iglu_python/summary_glu.py @@ -10,9 +10,9 @@ def summary_glu(data: Union[pd.DataFrame, pd.Series, list, np.ndarray]) -> pd.DataFrame: """ Calculate summary glucose level - + The function summary_glu is a wrapper that produces summary statistics - for glucose data. Output is a DataFrame object with subject id and the + for glucose data. Output is a DataFrame object with subject id and the summary values: Minimum, 1st Quartile, Median, Mean, 3rd Quartile and Max. Parameters @@ -25,8 +25,8 @@ def summary_glu(data: Union[pd.DataFrame, pd.Series, list, np.ndarray]) -> pd.Da ------- pd.DataFrame If a DataFrame object is passed, then a DataFrame object with - a column for subject id and then a column for each summary value is returned. - If a vector of glucose values is passed, then a DataFrame object without + a column for subject id and then a column for each summary value is returned. + If a vector of glucose values is passed, then a DataFrame object without the subject id is returned. Details @@ -48,10 +48,8 @@ def summary_glu(data: Union[pd.DataFrame, pd.Series, list, np.ndarray]) -> pd.Da ['id', 'Min.', '1st Qu.', 'Median', 'Mean', '3rd Qu.', 'Max.'] """ # Handle vector input (Series, list, or numpy array) - is_vector = False if isinstance(data, (pd.Series, list, np.ndarray)): - is_vector = True # Convert to numpy array for consistent handling if isinstance(data, pd.Series): @@ -86,7 +84,7 @@ def summary_glu(data: Union[pd.DataFrame, pd.Series, list, np.ndarray]) -> pd.Da glucose_values = subject_data['gl'].dropna().values if len(glucose_values) == 0: - warnings.warn(f"No valid glucose values found for subject {subject_id}") + warnings.warn(f"No valid glucose values found for subject {subject_id}", stacklevel=2) # Still include the subject with NaN values summary_stats = { 'Min.': np.nan, @@ -114,14 +112,14 @@ def summary_glu(data: Union[pd.DataFrame, pd.Series, list, np.ndarray]) -> pd.Da def _calculate_summary_stats(glucose_values: np.ndarray) -> dict: """ Calculate summary statistics for glucose values. - + This mimics R's summary() function output for numeric vectors. - + Parameters ---------- glucose_values : np.ndarray Array of glucose values (without NaN) - + Returns ------- dict From b06dd9217540de45f0ae75ce4da5842b3fccc84a Mon Sep 17 00:00:00 2001 From: Stas Khirman Date: Mon, 16 Jun 2025 17:55:53 +0300 Subject: [PATCH 09/16] ruff formatting --- iglu_python/active_percent.py | 9 ++++++++- iglu_python/adrr.py | 2 +- iglu_python/cogi.py | 5 ++++- iglu_python/episode_calculation.py | 3 ++- iglu_python/mage.py | 9 +++++++-- iglu_python/process_data.py | 4 ++-- iglu_python/roc.py | 6 +++++- iglu_python/sd_measures.py | 6 ++++-- iglu_python/utils.py | 12 ++++++------ 9 files changed, 39 insertions(+), 17 deletions(-) diff --git a/iglu_python/active_percent.py b/iglu_python/active_percent.py index d137f51..b1cfbfa 100644 --- a/iglu_python/active_percent.py +++ b/iglu_python/active_percent.py @@ -104,7 +104,14 @@ def active_percent( return df -def active_percent_single(data: pd.Series, dt0: Optional[int] = None, tz: str = "", range_type: str = "automatic", ndays: int = 14, consistent_end_date: Optional[Union[str, datetime]] = None) -> dict[str:float]: +def active_percent_single( + data: pd.Series, + dt0: Optional[int] = None, + tz: str = "", + range_type: str = "automatic", + ndays: int = 14, + consistent_end_date: Optional[Union[str, datetime]] = None, +) -> dict[str:float]: """ Calculate percentage of time CGM was active for a single series/subject. """ diff --git a/iglu_python/adrr.py b/iglu_python/adrr.py index 00a13a9..1a84917 100644 --- a/iglu_python/adrr.py +++ b/iglu_python/adrr.py @@ -58,7 +58,7 @@ def adrr_multi(data: pd.DataFrame) -> pd.DataFrame: try: data["time"] = pd.to_datetime(data["time"]) except Exception as e: - raise ValueError(f"Could not convert 'time' column to datetime: {e}") + raise ValueError(f"Could not convert 'time' column to datetime: {e}") from e # Extract date from time data = data.copy() diff --git a/iglu_python/cogi.py b/iglu_python/cogi.py index 3e3610f..ab56908 100644 --- a/iglu_python/cogi.py +++ b/iglu_python/cogi.py @@ -117,7 +117,10 @@ def weight_features( weight: float = 1, increasing: bool = False, ) -> Union[float, pd.Series, list]: - """Helper function to weight and scale features. If feature is a Series (or a list), the output is a Series (or list) with the same number of rows (or length) as the input, with values clipped (or "inverse" clipped) so that they are between 0 and 1.""" + """Helper function to weight and scale features. + If feature is a Series (or a list), the output is a Series (or list) + with the same number of rows (or length) as the input, with values clipped + (or "inverse" clipped) so that they are between 0 and 1.""" if isinstance(feature, pd.Series): scaled = (feature - min(scale_range)) / ( max(scale_range) - min(scale_range) diff --git a/iglu_python/episode_calculation.py b/iglu_python/episode_calculation.py index dd01412..927047d 100644 --- a/iglu_python/episode_calculation.py +++ b/iglu_python/episode_calculation.py @@ -245,7 +245,8 @@ def episode_single( day_one = day_one.tz_convert(local_tz) ndays = len(gd2d_tuple[1]) # generate grid times by starting from day one and cumulatively summing - time_ip = pd.date_range(start=day_one + pd.Timedelta(minutes=dt0), periods=int(ndays * 24 * 60 /dt0), freq=f"{dt0}min") + time_ip = pd.date_range(start=day_one + pd.Timedelta(minutes=dt0), periods=int(ndays * 24 * 60 /dt0), + freq=f"{dt0}min") data_ip = gd2d_tuple[0].flatten().tolist() new_data = pd.DataFrame({ "time": time_ip, diff --git a/iglu_python/mage.py b/iglu_python/mage.py index 2c43522..f97fa54 100644 --- a/iglu_python/mage.py +++ b/iglu_python/mage.py @@ -172,7 +172,11 @@ def mage_naive(data: pd.DataFrame,sd_multiplier:float = 1.0) -> float: return float(mage_val) if not pd.isna(mage_val) else np.nan def mage_ma_single(data: pd.DataFrame, short_ma: int, long_ma: int, - direction:str ='avg', return_type:str = "num",inter_gap:int = 45, max_gap:int = 180, tz:str = "" ) -> pd.DataFrame: + direction:str ='avg', + return_type:str = "num", + inter_gap:int = 45, + max_gap:int = 180, + tz:str = "" ) -> pd.DataFrame|float: """Calculate MAGE using moving average algorithm for a single subject""" ## 1. Preprocessing # 1.1 Interpolate over uniform grid @@ -301,7 +305,8 @@ def mage_atomic(data, short_ma,long_ma): idx = list(data.index) # R: idx = as.numeric(rownames(.data)) types = {'REL_MIN': 0, 'REL_MAX': 1} # R: types = list2env(list(REL_MIN=0, REL_MAX=1)) - # Create storage lists - R: list_cross <- list("id" = rep.int(NA, nmeasurements), "type" = rep.int(NA, nmeasurements)) + # Create storage lists - R: list_cross <- list("id" = rep.int(NA, nmeasurements), + # "type" = rep.int(NA, nmeasurements)) list_cross = { 'id': [np.nan] * nmeasurements, 'type': [np.nan] * nmeasurements diff --git a/iglu_python/process_data.py b/iglu_python/process_data.py index 9ee62a3..d5ef53b 100644 --- a/iglu_python/process_data.py +++ b/iglu_python/process_data.py @@ -180,7 +180,7 @@ def process_data( time_data = time_parser(time_data) except Exception as e: raise ValueError(f"Failed to parse times, ensure times are in parsable format. " - f"Original error: {str(e)}") + f"Original error: {str(e)}") from e # Insert at position 1 (after id) data.insert(1, 'time', time_data) @@ -229,7 +229,7 @@ def process_data( try: gl_data = pd.to_numeric(gl_data, errors='coerce') except Exception as e: - raise ValueError(f"Failed to convert glucose values to numeric: {str(e)}") + raise ValueError(f"Failed to convert glucose values to numeric: {str(e)}") from e # Convert mmol/L to mg/dL if needed if mmol_conversion: diff --git a/iglu_python/roc.py b/iglu_python/roc.py index bb0482c..5a2f0f6 100644 --- a/iglu_python/roc.py +++ b/iglu_python/roc.py @@ -87,7 +87,11 @@ def roc( 3 NaN """ - def roc_single(data: pd.DataFrame, timelag: int, dt0: int = None , inter_gap: int = 45, tz: str = "") -> np.ndarray: + def roc_single(data: pd.DataFrame, + timelag: int, + dt0: int = None , + inter_gap: int = 45, + tz: str = "") -> np.ndarray: """Calculate ROC for a single subject's data""" data_ip = CGMS2DayByDay(data, dt0=dt0, inter_gap=inter_gap, tz=tz) gl_ip_vec = data_ip[0].flatten() # Flatten the interpolated glucose matrix diff --git a/iglu_python/sd_measures.py b/iglu_python/sd_measures.py index 91a7dc1..6e801de 100644 --- a/iglu_python/sd_measures.py +++ b/iglu_python/sd_measures.py @@ -282,10 +282,12 @@ def sd_measures_vectorized(data: pd.DataFrame, results = [] - for subject_id in data['id'].unique(): + current_dt0 = dt0 + for i, subject_id in enumerate(data['id'].unique()): subject_data = data[data['id'] == subject_id].copy() gd2d, actual_dates, gd2d_dt0 = CGMS2DayByDay(subject_data, tz=tz, dt0=current_dt0, inter_gap=inter_gap) - + if i == 0: + current_dt0 = gd2d_dt0 result = _calculate_sd_subtypes_vectorized(gd2d, gd2d_dt0, subject_id) results.append(result) diff --git a/iglu_python/utils.py b/iglu_python/utils.py index ac20ab3..e487f73 100644 --- a/iglu_python/utils.py +++ b/iglu_python/utils.py @@ -77,14 +77,14 @@ def check_data_columns(data: pd.DataFrame, time_check=False, tz="") -> pd.DataFr if not pd.api.types.is_numeric_dtype(data["gl"]): try: data["gl"] = pd.to_numeric(data["gl"]) - except: - raise ValueError("Column 'gl' must be numeric") + except Exception as e: + raise ValueError("Column 'gl' must be numeric") from e if not pd.api.types.is_datetime64_any_dtype(data["time"]): try: data["time"] = pd.to_datetime(data["time"]) - except: - raise ValueError("Column 'time' must be datetime") + except Exception as e: + raise ValueError("Column 'time' must be datetime") from e if not pd.api.types.is_string_dtype(data["id"]): data["id"] = data["id"].astype(str) @@ -234,8 +234,8 @@ def CGMS2DayByDay( gap_end_time = data["time"].iloc[gap_end_idx] # find the index of the gap end in the time grid gap_end_idx_in_time_grid = int( - np.floor(((gap_end_time - start_time).total_seconds() -1 ) / (60 * dt0)) # -1sec to indicate time before measurement - ) + # -1sec to indicate time before measurement + np.floor(((gap_end_time - start_time).total_seconds() -1 ) / (60 * dt0))) # put nan in the gap interp_data[gap_start_idx_in_time_grid:gap_end_idx_in_time_grid] = np.nan From 0c16450aee724c3b4b26a483b94d3e3215ad0d34 Mon Sep 17 00:00:00 2001 From: Stas Khirman Date: Mon, 16 Jun 2025 21:23:09 +0300 Subject: [PATCH 10/16] support for Series, list and ndarray -> return float or dict --- README.md | 18 +++--- iglu_python/adrr.py | 112 ++++++++++++++++-------------------- iglu_python/conga.py | 89 +++++++++++++---------------- iglu_python/ea1c.py | 45 ++++++++------- iglu_python/grade.py | 6 +- iglu_python/grade_eugly.py | 71 +++++++++++------------ iglu_python/grade_hyper.py | 74 +++++++++++------------- iglu_python/grade_hypo.py | 76 +++++++++++-------------- tests/test_adrr.py | 67 +++++++++++++++++++--- tests/test_conga.py | 32 ++++++----- tests/test_ea1c.py | 92 ++++++++++++++++++++++++++++-- tests/test_grade.py | 23 ++++++-- tests/test_grade_eugly.py | 114 +++++++++++++++++++++++++++++++++++-- tests/test_grade_hyper.py | 23 ++++++-- tests/test_grade_hypo.py | 34 ++++++----- 15 files changed, 554 insertions(+), 322 deletions(-) diff --git a/README.md b/README.md index 48eeffd..2d4fb63 100644 --- a/README.md +++ b/README.md @@ -27,20 +27,20 @@ Unless noted, IGLU-R test compatability is considered successful if it achieves |----------|-------------|-------------|-------------------|----|----------| | above_percent | percentage of values above target thresholds| ✅ |✅ returns dict ||| | active_percent | percentage of time CGM was active | ✅ | ✅ only Series(DatetimeIndex) returns dict[str:float] -| adrr | average daily risk range | ✅ | +| adrr | average daily risk range | ✅ |✅ only Series(DatetimeIndex) returns float | | auc| Area Under Curve | 🟡 (0.01 precision) |✅ only Series(DatetimeIndex) returns float || see [auc_evaluation.ipynb](https://github.com/staskh/iglu_python/blob/main/notebooks/auc_evaluation.ipynb)| | below_percent| percentage of values below target thresholds| ✅ | ✅ returns dict | cogi |Coefficient of Glucose Irregularity | ✅ | ✅ returns float -| conga | Continuous Overall Net Glycemic Action |✅ | +| conga | Continuous Overall Net Glycemic Action |✅ | ✅ only Series(DatetimeIndex) returns float | cv_glu | Coefficient of Variation | ✅| ✅ returns float | -| cv_measures |Coefficient of Variation subtypes (CVmean and CVsd) |✅ |✅ only Series(DatetimeIndex) returns dict[str:float]| | -| ea1c |estimated A1C (eA1C) values| ✅ | -| episode_calculation | Hypo/Hyperglycemic episodes with summary statistics| ✅| || | +| cv_measures |Coefficient of Variation subtypes (CVmean and CVsd) |✅ |✅ only Series(DatetimeIndex) returns dict| | +| ea1c |estimated A1C (eA1C) values| ✅ | ✅ returns float | +| episode_calculation | Hypo/Hyperglycemic episodes with summary statistics| ✅| 🟡 always returns DataFrame(s)|| | | gmi | Glucose Management Indicator | ✅ | ✅ returns float | -| grade_eugly |percentage of GRADE score attributable to target range| ✅ | -| grade_hyper |percentage of GRADE score attributable to hyperglycemia| ✅ | -| grade_hypo |percentage of GRADE score attributable to hypoglycemia| ✅ | -| grade |mean GRADE score| ✅ | +| grade_eugly |percentage of GRADE score attributable to target range| ✅ | ✅ returns float +| grade_hyper |percentage of GRADE score attributable to hyperglycemia| ✅ |✅ returns float +| grade_hypo |percentage of GRADE score attributable to hypoglycemia| ✅ |✅ returns float +| grade |mean GRADE score| ✅ | ✅ returns float | gri |Glycemia Risk Index | ✅ | ✅ returns float | gvp |Glucose Variability Percentage| ✅ | | hbgi |High Blood Glucose Index| ✅ | diff --git a/iglu_python/adrr.py b/iglu_python/adrr.py index 1a84917..5e7a419 100644 --- a/iglu_python/adrr.py +++ b/iglu_python/adrr.py @@ -3,8 +3,9 @@ import numpy as np import pandas as pd +from .utils import check_data_columns -def adrr(data: pd.DataFrame) -> pd.DataFrame: +def adrr(data: pd.DataFrame|pd.Series) -> pd.DataFrame|float: """ Calculate average daily risk range (ADRR) @@ -12,14 +13,15 @@ def adrr(data: pd.DataFrame) -> pd.DataFrame: Parameters ---------- - data : pd.DataFrame + data : pd.DataFrame|pd.Series DataFrame object with column names "id", "time", and "gl". + or a Timeseries of glucose values. Returns ------- - pd.DataFrame + pd.DataFrame|float A DataFrame object with two columns: subject id and corresponding - ADRR value. + ADRR value. or a float value for a Timeseries of glucose values. Details ------- @@ -50,77 +52,59 @@ def adrr(data: pd.DataFrame) -> pd.DataFrame: >>> iglu.adrr(data) """ - def adrr_multi(data: pd.DataFrame) -> pd.DataFrame: - """Internal function to calculate ADRR for multiple subjects""" - # Ensure time column is datetime - if not pd.api.types.is_datetime64_any_dtype(data["time"]): - try: - data["time"] = pd.to_datetime(data["time"]) - except Exception as e: - raise ValueError(f"Could not convert 'time' column to datetime: {e}") from e - - # Extract date from time - data = data.copy() - data["date"] = data["time"].dt.date - - # Filter out NaN glucose values - data_filtered = data.dropna(subset=["gl"]) - - if len(data_filtered) == 0: - warnings.warn("All glucose values are NaN. Returning empty DataFrame.", stacklevel=2) - return pd.DataFrame(columns=["id", "ADRR"]) - - # Group by id and date, then calculate BGI and daily risk range - result = ( - data_filtered.groupby(["id", "date"]) - .apply(lambda group: _calculate_daily_risk(group), include_groups=False) - .reset_index() - .groupby("id")["drr"] - .mean() - .reset_index() - .rename(columns={"drr": "ADRR"}) - ) + # Validate input + if isinstance(data, pd.Series): + if not isinstance(data.index, pd.DatetimeIndex): + raise ValueError("Series must have a DatetimeIndex") + return adrr_single(data) - return result + data = check_data_columns(data) - def _calculate_daily_risk(group: pd.DataFrame) -> pd.Series: - """Calculate daily risk range for a single day and subject""" + data.set_index("time", inplace=True,drop=True) + out = data.groupby("id").agg( + ADRR = ("gl", lambda x: adrr_single(x)) + ).reset_index() - # Calculate BGI (Blood Glucose Index) - bgi = (np.log(group["gl"]) ** 1.084) - 5.381 + return out - # Calculate max and min BGI values for the day - max_bgi = np.maximum(bgi.max(), 0) - min_bgi = np.minimum(bgi.min(), 0) - # Calculate risk components - max_risk = 22.77 * (max_bgi**2) - min_risk = 22.77 * (min_bgi**2) +def adrr_single(data: pd.DataFrame|pd.Series) -> float: + """Internal function to calculate ADRR for a single subject or timeseries of glucose values""" - # Daily risk range is the sum of max and min risks - drr = min_risk + max_risk + if isinstance(data, pd.Series): + if not isinstance(data.index, pd.DatetimeIndex): + raise ValueError("Series must have a DatetimeIndex") + elif isinstance(data, pd.DataFrame): + data = data.set_index("time")["gl"] + else: + raise ValueError("Data must be a pandas DataFrame or Series") - return pd.Series({"drr": drr}) + data_filtered = data.dropna() + if len(data_filtered) == 0: + return np.nan + + # Group by date and calculate daily risk for each day + daily_risks = data_filtered.groupby(data_filtered.index.date).apply( + lambda x: _calculate_daily_risk(x) + ) + return daily_risks.mean() - # Validate input - if not isinstance(data, pd.DataFrame): - raise TypeError("Data must be a pandas DataFrame") +def _calculate_daily_risk(gl: pd.Series) -> float: + """Calculate daily risk range for a single day and subject""" - required_columns = ["id", "time", "gl"] - missing_columns = [col for col in required_columns if col not in data.columns] + # Calculate BGI (Blood Glucose Index) + bgi = (np.log(gl) ** 1.084) - 5.381 - if missing_columns: - raise ValueError( - f"Data must contain columns: {required_columns}. " - f"Missing columns: {missing_columns}" - ) + # Calculate max and min BGI values for the day + max_bgi = np.maximum(bgi.max(), 0) + min_bgi = np.minimum(bgi.min(), 0) - if len(data) == 0: - warnings.warn("Input DataFrame is empty. Returning empty DataFrame.", stacklevel=2) - return pd.DataFrame(columns=["id", "ADRR"]) + # Calculate risk components + max_risk = 22.77 * (max_bgi**2) + min_risk = 22.77 * (min_bgi**2) - # Calculate ADRR - result = adrr_multi(data) + # Daily risk range is the sum of max and min risks + drr = min_risk + max_risk - return result + return drr diff --git a/iglu_python/conga.py b/iglu_python/conga.py index 2b249cd..4beda95 100644 --- a/iglu_python/conga.py +++ b/iglu_python/conga.py @@ -7,8 +7,8 @@ def conga( - data: Union[pd.DataFrame, pd.Series, list], n: int = 24, tz: str = "" -) -> pd.DataFrame: + data: Union[pd.DataFrame, pd.Series], n: int = 24, tz: str = "" +) -> pd.DataFrame|float: """ Calculate Continuous Overall Net Glycemic Action (CONGA). @@ -59,58 +59,45 @@ def conga( 0 35.355 """ - def conga_single(data: pd.DataFrame, hours: int = 1, tz: str = "") -> float: - """Calculate CONGA for a single subject""" - # Convert data to day-by-day format - # Missing values will be linearly interpolated when close enough to non-missing values. - gl_by_id_ip, _, dt0 = CGMS2DayByDay(data, tz=tz) - - # Calculate number of readings per hour - hourly_readings = round(60 / dt0) - - # Calculate differences between measurements n hours apart - # Flatten the matrix and calculate differences with lag - gl_vector = gl_by_id_ip.flatten() - - # Calculate differences between measurements n hours apart - # Flatten the matrix and calculate differences with lag - lag = hourly_readings * hours - diffs = gl_vector[lag:] - gl_vector[:-lag] - - # Check if we have sufficient data for std calculation - # Need at least 2 non-NaN values for ddof=1 - valid_diffs = diffs[~np.isnan(diffs)] - if len(valid_diffs) < 2: - return np.nan - - return float(np.nanstd(diffs, ddof=1)) - # Handle Series input - if isinstance(data, (pd.Series, list)): - # Convert Series to DataFrame format (assuming that the data is collected with 5-minute intervals) - data_df = pd.DataFrame( - { - "id": ["subject1"] * len(data), - "time": pd.date_range( - start="2020-01-01", periods=len(data), freq="5min" - ), - "gl": data.values, - } - ) - conga_val = conga_single(data_df, hours=n, tz=tz) - return pd.DataFrame({"CONGA": [conga_val]}) + if isinstance(data, (pd.Series)): + if not isinstance(data.index, pd.DatetimeIndex): + raise ValueError("Series must have a DatetimeIndex") + return conga_single(data, hours=n, tz=tz) # Handle DataFrame input data = check_data_columns(data) # Calculate CONGA for each subject - result = [] - for subject in data["id"].unique(): - subject_data = data[data["id"] == subject].copy() - if len(subject_data.dropna(subset=["gl"])) == 0: - continue - - conga_val = conga_single(subject_data, hours=n, tz=tz) - result.append({"id": subject, "CONGA": conga_val}) - - return pd.DataFrame(result) + data.set_index("time", inplace=True,drop=True) + out = data.groupby('id').agg( + CONGA = ("gl", lambda x: conga_single(x, hours=n, tz=tz)) + ).reset_index() + + return out + +def conga_single(data: pd.DataFrame|pd.Series, hours: int = 1, tz: str = "") -> float: + """Calculate CONGA for a single subject""" + # Convert data to day-by-day format + # Missing values will be linearly interpolated when close enough to non-missing values. + gl_by_id_ip, _, dt0 = CGMS2DayByDay(data, tz=tz) + + # Calculate number of readings per hour + hourly_readings = round(60 / dt0) + + # Calculate differences between measurements n hours apart + # Flatten the matrix and calculate differences with lag + gl_vector = gl_by_id_ip.flatten() + + # Calculate differences between measurements n hours apart + # Flatten the matrix and calculate differences with lag + lag = hourly_readings * hours + diffs = gl_vector[lag:] - gl_vector[:-lag] + + # Check if we have sufficient data for std calculation + # Need at least 2 non-NaN values for ddof=1 + valid_diffs = diffs[~np.isnan(diffs)] + if len(valid_diffs) < 2: + return np.nan + + return float(np.nanstd(diffs, ddof=1)) diff --git a/iglu_python/ea1c.py b/iglu_python/ea1c.py index d628659..730ea69 100644 --- a/iglu_python/ea1c.py +++ b/iglu_python/ea1c.py @@ -6,7 +6,7 @@ from .utils import check_data_columns -def ea1c(data: Union[pd.DataFrame, pd.Series, list]) -> pd.DataFrame: +def ea1c(data: Union[pd.DataFrame, pd.Series, list, np.ndarray]) -> pd.DataFrame|float: """ Calculate estimated A1C (eA1C) values. @@ -16,15 +16,15 @@ def ea1c(data: Union[pd.DataFrame, pd.Series, list]) -> pd.DataFrame: Parameters ---------- - data : Union[pd.DataFrame, pd.Series] + data : Union[pd.DataFrame, pd.Series, list, np.ndarray] DataFrame with columns 'id', 'time', and 'gl', or a Series of glucose values Returns ------- - pd.DataFrame + pd.DataFrame|float DataFrame with 1 row for each subject, a column for subject id and a column - for eA1C values. If a Series of glucose values is passed, then a DataFrame - without the subject id is returned. + for eA1C values. If a Series of glucose values is passed, then a float value + is returned. References ---------- @@ -51,27 +51,30 @@ def ea1c(data: Union[pd.DataFrame, pd.Series, list]) -> pd.DataFrame: 0 7.67 """ # Handle Series input - if isinstance(data, pd.Series): - data = data.dropna() - if len(data) == 0: - return pd.DataFrame({"eA1C": [np.nan]}) + if isinstance(data, (pd.Series, np.ndarray, list)): + if isinstance(data, (np.ndarray, list)): + data = pd.Series(data) + return ea1c_single(data) - mean_glucose = data.mean() - ea1c_value = (46.7 + mean_glucose) / 28.7 - return pd.DataFrame({"eA1C": [ea1c_value]}) # Handle DataFrame input data = check_data_columns(data) # Calculate eA1C for each subject - result = [] - for subject in data["id"].unique(): - subject_data = data[data["id"] == subject].dropna(subset=["gl"]) - if len(subject_data) == 0: - continue + out = data.groupby('id').agg( + eA1C = ("gl", lambda x: ea1c_single(x)) + ).reset_index() - mean_glucose = subject_data["gl"].mean() - ea1c_value = (46.7 + mean_glucose) / 28.7 - result.append({"id": subject, "eA1C": ea1c_value}) + return out - return pd.DataFrame(result) + +def ea1c_single(data: pd.Series) -> float: + """Calculate eA1C for a single subject""" + if not isinstance(data, pd.Series): + raise ValueError("Data must be a pandas Series") + + data = data.dropna() + if len(data) == 0: + return np.nan + + return (46.7 + data.mean()) / 28.7 \ No newline at end of file diff --git a/iglu_python/grade.py b/iglu_python/grade.py index 0268484..3ebe98f 100644 --- a/iglu_python/grade.py +++ b/iglu_python/grade.py @@ -52,8 +52,10 @@ def grade(data: Union[pd.DataFrame, pd.Series]) -> pd.DataFrame: 0 22.34 """ # Handle Series input - if isinstance(data, pd.Series): - return pd.DataFrame({"GRADE": [np.mean(_grade_formula(data.dropna()))]}) + if isinstance(data, (pd.Series, np.ndarray, list)): + if isinstance(data, (np.ndarray, list)): + data = pd.Series(data) + return np.mean(_grade_formula(data.dropna())) # Handle DataFrame input data = check_data_columns(data) diff --git a/iglu_python/grade_eugly.py b/iglu_python/grade_eugly.py index ede7e86..6577caa 100644 --- a/iglu_python/grade_eugly.py +++ b/iglu_python/grade_eugly.py @@ -8,8 +8,8 @@ def grade_eugly( - data: Union[pd.DataFrame, pd.Series], lower: int = 70, upper: int = 140 -) -> pd.DataFrame: + data: Union[pd.DataFrame, pd.Series, np.ndarray, list], lower: int = 70, upper: int = 140 +) -> pd.DataFrame|float: """ Calculate percentage of GRADE score attributable to target range. @@ -19,8 +19,8 @@ def grade_eugly( Parameters ---------- - data : Union[pd.DataFrame, pd.Series] - DataFrame with columns 'id', 'time', and 'gl', or a Series of glucose values + data : Union[pd.DataFrame, pd.Series, np.ndarray, list] + DataFrame with columns 'id', 'time', and 'gl', or a Series of glucose values, or a numpy array or list of glucose values lower : int, default=70 Lower bound used for hypoglycemia cutoff, in mg/dL upper : int, default=140 @@ -28,10 +28,10 @@ def grade_eugly( Returns ------- - pd.DataFrame + pd.DataFrame|float DataFrame with 1 row for each subject, a column for subject id and a column - for GRADE euglycemia value. If a Series of glucose values is passed, then a DataFrame - without the subject id is returned. + for GRADE euglycemia value. If a Series of glucose values is passed, then a float + value is returned. References ---------- @@ -59,43 +59,36 @@ def grade_eugly( 0 65.43 """ # Handle Series input - if isinstance(data, pd.Series): - data = data.dropna() - if len(data) == 0: - return pd.DataFrame({"GRADE_eugly": [np.nan]}) - - # Calculate GRADE scores - grade_scores = _grade_formula(data) - - # Calculate percentage in target range - in_range = (data >= lower) & (data <= upper) - total_grade = np.sum(grade_scores) - if total_grade == 0: - return pd.DataFrame({"GRADE_eugly": [np.nan]}) - - eugly_percent = (np.sum(grade_scores[in_range]) / total_grade) * 100 - return pd.DataFrame({"GRADE_eugly": [eugly_percent]}) - + if isinstance(data, (pd.Series, np.ndarray, list)): + if isinstance(data, (np.ndarray, list)): + data = pd.Series(data) + return grade_eugly_single(data, lower, upper) + # Handle DataFrame input data = check_data_columns(data) # Calculate GRADE euglycemia for each subject - result = [] - for subject in data["id"].unique(): - subject_data = data[data["id"] == subject].dropna(subset=["gl"]) - if len(subject_data) == 0: - continue + out = data.groupby('id').agg( + GRADE_eugly = ("gl", lambda x: grade_eugly_single(x, lower, upper)) + ).reset_index() + + return out + - # Calculate GRADE scores - grade_scores = _grade_formula(subject_data["gl"]) +def grade_eugly_single(data: pd.Series, lower: int = 70, upper: int = 140) -> float: + """Calculate GRADE euglycemia for a single timeseries""" + data = data.dropna() + if len(data) == 0: + return np.nan - # Calculate percentage in target range - in_range = (subject_data["gl"] >= lower) & (subject_data["gl"] <= upper) - total_grade = np.sum(grade_scores) - if total_grade == 0: - continue + # Calculate GRADE scores + grade_scores = _grade_formula(data) - eugly_percent = (np.sum(grade_scores[in_range]) / total_grade) * 100 - result.append({"id": subject, "GRADE_eugly": eugly_percent}) + # Calculate percentage in target range + in_range = (data >= lower) & (data <= upper) + total_grade = np.sum(grade_scores) + if total_grade == 0: + return np.nan - return pd.DataFrame(result) + eugly_percent = (np.sum(grade_scores[in_range]) / total_grade) * 100 + return eugly_percent \ No newline at end of file diff --git a/iglu_python/grade_hyper.py b/iglu_python/grade_hyper.py index e517642..4255396 100644 --- a/iglu_python/grade_hyper.py +++ b/iglu_python/grade_hyper.py @@ -7,7 +7,7 @@ from .utils import check_data_columns -def grade_hyper(data: Union[pd.DataFrame, pd.Series], upper: int = 140) -> pd.DataFrame: +def grade_hyper(data: Union[pd.DataFrame, pd.Series, np.ndarray, list], upper: int = 140) -> pd.DataFrame|float: """ Calculate percentage of GRADE score attributable to hyperglycemia. @@ -16,17 +16,17 @@ def grade_hyper(data: Union[pd.DataFrame, pd.Series], upper: int = 140) -> pd.Da Parameters ---------- - data : Union[pd.DataFrame, pd.Series] - DataFrame with columns 'id', 'time', and 'gl', or a Series of glucose values + data : Union[pd.DataFrame, pd.Series, np.ndarray, list] + DataFrame with columns 'id', 'time', and 'gl', or a Series of glucose values, or a numpy array or list of glucose values upper : int, default=140 Upper bound used for hyperglycemia cutoff, in mg/dL Returns ------- - pd.DataFrame + pd.DataFrame|float DataFrame with 1 row for each subject, a column for subject id and a column - for GRADE hyperglycemia value. If a Series of glucose values is passed, then a DataFrame - without the subject id is returned. + for GRADE hyperglycemia value. If a Series of glucose values is passed, then a float + value is returned. References ---------- @@ -54,43 +54,33 @@ def grade_hyper(data: Union[pd.DataFrame, pd.Series], upper: int = 140) -> pd.Da 0 65.43 """ # Handle Series input - if isinstance(data, pd.Series): - data = data.dropna() - if len(data) == 0: - return pd.DataFrame({"GRADE_hyper": [np.nan]}) - - # Calculate GRADE scores - grade_scores = _grade_formula(data) - - # Calculate percentage above upper bound - above_upper = data > upper - total_grade = np.sum(grade_scores) - if total_grade == 0: - return pd.DataFrame({"GRADE_hyper": [np.nan]}) - - hyper_percent = (np.sum(grade_scores[above_upper]) / total_grade) * 100 - return pd.DataFrame({"GRADE_hyper": [hyper_percent]}) + if isinstance(data, (pd.Series, np.ndarray, list)): + if isinstance(data, (np.ndarray, list)): + data = pd.Series(data) + return grade_hyper_single(data, upper) # Handle DataFrame input data = check_data_columns(data) - # Calculate GRADE hyperglycemia for each subject - result = [] - for subject in data["id"].unique(): - subject_data = data[data["id"] == subject].dropna(subset=["gl"]) - if len(subject_data) == 0: - continue - - # Calculate GRADE scores - grade_scores = _grade_formula(subject_data["gl"]) - - # Calculate percentage above upper bound - above_upper = subject_data["gl"] > upper - total_grade = np.sum(grade_scores) - if total_grade == 0: - continue - - hyper_percent = (np.sum(grade_scores[above_upper]) / total_grade) * 100 - result.append({"id": subject, "GRADE_hyper": hyper_percent}) - - return pd.DataFrame(result) + out = data.groupby('id').agg( + GRADE_hyper = ("gl", lambda x: grade_hyper_single(x, upper)) + ).reset_index() + return out + +def grade_hyper_single(data: pd.Series, upper: int = 140) -> float: + """Calculate GRADE hyperglycemia for a single timeseries""" + data = data.dropna() + if len(data) == 0: + return np.nan + + # Calculate GRADE scores + grade_scores = _grade_formula(data) + + # Calculate percentage above upper bound + above_upper = data > upper + total_grade = np.sum(grade_scores) + if total_grade == 0: + return np.nan + + hyper_percent = (np.sum(grade_scores[above_upper]) / total_grade) * 100 + return hyper_percent \ No newline at end of file diff --git a/iglu_python/grade_hypo.py b/iglu_python/grade_hypo.py index 1bef98e..2a18697 100644 --- a/iglu_python/grade_hypo.py +++ b/iglu_python/grade_hypo.py @@ -7,7 +7,7 @@ from .utils import check_data_columns -def grade_hypo(data: Union[pd.DataFrame, pd.Series], lower: int = 80) -> pd.DataFrame: +def grade_hypo(data: Union[pd.DataFrame, pd.Series, np.ndarray, list], lower: int = 80) -> pd.DataFrame|float: """ Calculate percentage of GRADE score attributable to hypoglycemia. @@ -16,17 +16,17 @@ def grade_hypo(data: Union[pd.DataFrame, pd.Series], lower: int = 80) -> pd.Data Parameters ---------- - data : Union[pd.DataFrame, pd.Series] - DataFrame with columns 'id', 'time', and 'gl', or a Series of glucose values + data : Union[pd.DataFrame, pd.Series, np.ndarray, list] + DataFrame with columns 'id', 'time', and 'gl', or a Series of glucose values, or a numpy array or list of glucose values lower : int, default=80 Lower bound used for hypoglycemia cutoff, in mg/dL Returns ------- - pd.DataFrame + pd.DataFrame|float DataFrame with 1 row for each subject, a column for subject id and a column - for GRADE hypoglycemia value. If a Series of glucose values is passed, then a DataFrame - without the subject id is returned. + for GRADE hypoglycemia value. If a Series of glucose values is passed, then a float + value is returned. References ---------- @@ -54,43 +54,35 @@ def grade_hypo(data: Union[pd.DataFrame, pd.Series], lower: int = 80) -> pd.Data 0 35.43 """ # Handle Series input - if isinstance(data, pd.Series): - data = data.dropna() - if len(data) == 0: - return pd.DataFrame({"GRADE_hypo": [np.nan]}) - - # Calculate GRADE scores - grade_scores = _grade_formula(data) - - # Calculate percentage below lower bound - below_lower = data < lower - total_grade = np.sum(grade_scores) - if total_grade == 0: - return pd.DataFrame({"GRADE_hypo": [np.nan]}) - - hypo_percent = (np.sum(grade_scores[below_lower]) / total_grade) * 100 - return pd.DataFrame({"GRADE_hypo": [hypo_percent]}) - + if isinstance(data, (pd.Series, np.ndarray, list)): + if isinstance(data, (np.ndarray, list)): + data = pd.Series(data) + return grade_hypo_single(data, lower) + # Handle DataFrame input data = check_data_columns(data) # Calculate GRADE hypoglycemia for each subject - result = [] - for subject in data["id"].unique(): - subject_data = data[data["id"] == subject].dropna(subset=["gl"]) - if len(subject_data) == 0: - continue - - # Calculate GRADE scores - grade_scores = _grade_formula(subject_data["gl"]) - - # Calculate percentage below lower bound - below_lower = subject_data["gl"] < lower - total_grade = np.sum(grade_scores) - if total_grade == 0: - continue - - hypo_percent = (np.sum(grade_scores[below_lower]) / total_grade) * 100 - result.append({"id": subject, "GRADE_hypo": hypo_percent}) - - return pd.DataFrame(result) + out = data.groupby('id').agg( + GRADE_hypo = ("gl", lambda x: grade_hypo_single(x, lower)) + ).reset_index() + + return out + +def grade_hypo_single(data: pd.Series, lower: int = 80) -> float: + """Calculate GRADE hypoglycemia for a single timeseries""" + data = data.dropna() + if len(data) == 0: + return np.nan + + # Calculate GRADE scores + grade_scores = _grade_formula(data) + + # Calculate percentage below lower bound + below_lower = data < lower + total_grade = np.sum(grade_scores) + if total_grade == 0: + return np.nan + + hypo_percent = (np.sum(grade_scores[below_lower]) / total_grade) * 100 + return hypo_percent \ No newline at end of file diff --git a/tests/test_adrr.py b/tests/test_adrr.py index a9a2399..0f26da2 100644 --- a/tests/test_adrr.py +++ b/tests/test_adrr.py @@ -8,7 +8,6 @@ method_name = "adrr" - def get_test_scenarios(): """Get test scenarios for ADRR calculations""" # Load expected results @@ -23,13 +22,6 @@ def get_test_scenarios(): if scenario["method"] == method_name ] - -@pytest.fixture -def test_data(): - """Fixture that provides test data for ADRR calculations""" - return get_test_scenarios() - - @pytest.mark.parametrize("scenario", get_test_scenarios()) def test_adrr_iglu_r_compatible(scenario): """Test ADRR calculation against expected results""" @@ -69,3 +61,62 @@ def test_adrr_iglu_r_compatible(scenario): check_freq=True, check_flags=True, ) + +def test_adrr_series_with_datetime_index(): + """Test ADRR calculation with Series input that has DatetimeIndex.""" + # Create test data with DatetimeIndex + time = pd.to_datetime(['2020-01-01 10:00:00', '2020-01-01 10:05:00', '2020-01-01 10:10:00', + '2020-01-02 10:00:00', '2020-01-02 10:05:00', '2020-01-02 10:10:00']) + data = pd.Series( + [100, 120, 110, # Day 1: LBGI=0.5, HBGI=0.8 + 90, 130, 95], # Day 2: LBGI=0.7, HBGI=1.2 + index=time + ) + + # Calculate ADRR + result = iglu.adrr(data) + + # Expected results: + # Day 1: LBGI=0.5, HBGI=0.8, Risk=1.3 + # Day 2: LBGI=0.7, HBGI=1.2, Risk=1.9 + # ADRR = mean([1.3, 1.9]) = 1.6 + expected = 1.538552 + # Compare results + assert isinstance(result, float) + np.testing.assert_allclose(result, expected, rtol=0.001) + +def test_adrr_series_without_datetime_index(): + """Test ADRR calculation with Series input that doesn't have DatetimeIndex.""" + # Create test data with regular index + data = pd.Series( + [100, 120, 110, 90, 130, 95], + index=range(6) # Regular integer index instead of DatetimeIndex + ) + + # Attempt to calculate ADRR - should raise ValueError + with pytest.raises(ValueError, match="Series must have a DatetimeIndex"): + iglu.adrr(data) + +def test_adrr_series_with_missing_values(): + """Test ADRR calculation with Series input containing missing values.""" + # Create test data with DatetimeIndex and missing values + time = pd.to_datetime(['2020-01-01 10:00:00', '2020-01-01 10:05:00', '2020-01-01 10:10:00', + '2020-01-02 10:00:00', '2020-01-02 10:05:00', '2020-01-02 10:10:00']) + data = pd.Series( + [100, np.nan, 110, # Day 1: LBGI=0.5, HBGI=0.8 (after interpolation) + 90, 130, np.nan], # Day 2: LBGI=0.7, HBGI=1.2 (after interpolation) + index=time + ) + + # Calculate ADRR with interpolation + result = iglu.adrr(data) + + # Expected results: + # Day 1: LBGI=0.5, HBGI=0.8, Risk=0.48 + # Day 2: LBGI=0.7, HBGI=1.2, Risk=2.45 + # ADRR = mean([0.48, 2.45]) = 1.466489 + expected = 1.466489 + + # Compare results + assert isinstance(result, float) + np.testing.assert_allclose(result, expected, rtol=0.001) diff --git a/tests/test_conga.py b/tests/test_conga.py index 7a20f3c..e487bd6 100644 --- a/tests/test_conga.py +++ b/tests/test_conga.py @@ -99,13 +99,15 @@ def test_conga_default(): def test_conga_series(): """Test CONGA with Series input""" + days = 2 series_data = pd.Series( - ([150, 155, 160, 165, 140, 145] * 2) * 10 - ) # 120 data points/10 hours + [150, 155, 160, 165, 140, 145] * 2*24*days, + index=pd.date_range(start='2020-01-01', periods=12*24*days, freq='5min') + ) # 12 data points/2 hours result = iglu.conga(series_data, n=1) # CONGA to be calculated for 1 hour - assert isinstance(result, pd.DataFrame) - assert "CONGA" in result.columns - assert len(result) == 1 + + assert isinstance(result, float) + assert result == 0.0 # no change in glucose values over 2 days def test_conga_empty(): @@ -115,15 +117,17 @@ def test_conga_empty(): iglu.conga(empty_data) -def test_conga_constant_glucose(): - """Test CONGA with constant glucose values""" - series_data = pd.Series( - ([150, 155, 160, 165, 140, 145] * 2) * 10 - ) # 120 data points/10 hours - result = iglu.conga(series_data, n=1) # CONGA to be calculated for 1 hour - assert len(result) == 1 - assert result["CONGA"].iloc[0] == 0 # Should be 0 for constant glucose - +def test_conga_series_without_datetime_index(): + """Test CONGA with Series input that doesn't have DatetimeIndex.""" + # Create test data with regular index + data = pd.Series( + [100, 120, 110, 90, 130, 95], + index=range(6) # Regular integer index instead of DatetimeIndex + ) + + # Attempt to calculate CONGA - should raise ValueError + with pytest.raises(ValueError, match="Series must have a DatetimeIndex"): + iglu.conga(data) def test_conga_missing_values(): """Test CONGA with missing values""" diff --git a/tests/test_ea1c.py b/tests/test_ea1c.py index 5a19725..0ff6538 100644 --- a/tests/test_ea1c.py +++ b/tests/test_ea1c.py @@ -111,10 +111,9 @@ def test_ea1c_series(): """Test eA1C with Series input""" series_data = pd.Series([150, 155, 160, 165, 140, 145] * 10) # 60 data points result = iglu.ea1c(series_data) - assert isinstance(result, pd.DataFrame) - assert "eA1C" in result.columns - assert len(result) == 1 - assert (result["eA1C"].iloc[0] >= 0) & (result["eA1C"].iloc[0] <= 20) + assert isinstance(result, float) + expected = 6.9407 + np.testing.assert_allclose(result, expected, rtol=1e-3) def test_ea1c_empty(): @@ -195,3 +194,88 @@ def test_ea1c_extreme_values(): assert (result["eA1C"].iloc[0] >= 0) & (result["eA1C"].iloc[0] <= 20) # eA1C should be reasonable even with extreme values assert result["eA1C"].iloc[0] > 0 + + +def test_ea1c_list_input(): + """Test EA1c calculation with list input.""" + # Create test data as a list + data = [100, 120, 110, 90, 130, 95] # mean = 107.5 + + # Calculate EA1c + result = iglu.ea1c(data) + + # Expected results: + # EA1c = (mean_glucose + 46.7) / 28.7 + # EA1c = (107.5 + 46.7) / 28.7 = 5.376 + expected = 5.376 + + # Compare results + assert isinstance(result, float) + np.testing.assert_allclose(result, expected, rtol=0.001) + + +def test_ea1c_numpy_array_input(): + """Test EA1c calculation with numpy array input.""" + # Create test data as a numpy array + data = np.array([100, 120, 110, 90, 130, 95]) # mean = 107.5 + + # Calculate EA1c + result = iglu.ea1c(data) + + # Expected results: + # EA1c = (mean_glucose + 46.7) / 28.7 + # EA1c = (107.5 + 46.7) / 28.7 = 5.376 + expected = 5.376 + + # Compare results + assert isinstance(result, float) + np.testing.assert_allclose(result, expected, rtol=0.001) + + +def test_ea1c_list_with_missing_values(): + """Test EA1c calculation with list input containing missing values.""" + # Create test data as a list with None values + data = [100, None, 110, 90, 130, None] # mean = 107.5 (excluding None) + + # Calculate EA1c + result = iglu.ea1c(data) + + # Expected results: + # EA1c = (mean_glucose + 46.7) / 28.7 + # EA1c = (107.5 + 46.7) / 28.7 = 5.376 + expected = 5.376 + + # Compare results + assert isinstance(result, float) + np.testing.assert_allclose(result, expected, rtol=0.001) + + +def test_ea1c_numpy_array_with_nan(): + """Test EA1c calculation with numpy array input containing NaN values.""" + # Create test data as a numpy array with NaN values + data = np.array([100, np.nan, 110, 90, 130, np.nan]) # mean = 107.5 (excluding NaN) + + # Calculate EA1c + result = iglu.ea1c(data) + + # Expected results: + # EA1c = (mean_glucose + 46.7) / 28.7 + # EA1c = (107.5 + 46.7) / 28.7 = 5.376 + expected = 5.376 + + # Compare results + assert isinstance(result, float) + np.testing.assert_allclose(result, expected, rtol=0.001) + + +def test_ea1c_empty_input(): + """Test EA1c calculation with empty input.""" + # Test with empty list + result = iglu.ea1c([]) + assert isinstance(result, float) + assert np.isnan(result) + + # Test with empty numpy array + result = iglu.ea1c(np.array([])) + assert isinstance(result, float) + assert np.isnan(result) diff --git a/tests/test_grade.py b/tests/test_grade.py index e3fed71..c9fc897 100644 --- a/tests/test_grade.py +++ b/tests/test_grade.py @@ -123,11 +123,26 @@ def test_grade_series_input(): result = grade(data) # Check output format - assert isinstance(result, pd.DataFrame) - assert "GRADE" in result.columns - assert len(result) == 1 - assert len(result.columns) == 1 + assert isinstance(result, float) + np.testing.assert_allclose(result, 3.747116, rtol=1e-3) + +def test_grade_list_input(): + """Test GRADE calculation with list input.""" + data = [100, 200, 100, 100] + result = grade(data) + # Check output format + assert isinstance(result, float) + np.testing.assert_allclose(result, 3.747116, rtol=1e-3) + +def test_grade_numpy_array_input(): + """Test GRADE calculation with numpy array input.""" + data = np.array([100, 200, 100, 100]) + result = grade(data) + + # Check output format + assert isinstance(result, float) + np.testing.assert_allclose(result, 3.747116, rtol=1e-3) def test_grade_empty_data(): """Test GRADE calculation with empty DataFrame.""" diff --git a/tests/test_grade_eugly.py b/tests/test_grade_eugly.py index 8776d14..1acde3f 100644 --- a/tests/test_grade_eugly.py +++ b/tests/test_grade_eugly.py @@ -105,10 +105,8 @@ def test_grade_eugly_series_input(): result = grade_eugly(data) # Check output format - assert isinstance(result, pd.DataFrame) - assert "GRADE_eugly" in result.columns - assert len(result) == 1 - assert len(result.columns) == 1 + assert isinstance(result, float) + np.testing.assert_allclose(result, 8.70927656, rtol=1e-3) def test_grade_eugly_custom_targets(): @@ -243,3 +241,111 @@ def test_grade_eugly_multiple_subjects(): result.loc[result["id"] == "subject3", "GRADE_eugly"].values[0] < result.loc[result["id"] == "subject1", "GRADE_eugly"].values[0] ) + + +def test_grade_eugly_list_input(): + """Test GRADE euglycemia calculation with list input.""" + # Create test data as a list + data = [70, 80, 90, 100, 110, 120, 130, 140, 150, 160] # 4 values in euglycemic range (70-140) + + # Calculate GRADE euglycemia + result = iglu.grade_eugly(data) + + # Expected results: + # Euglycemic values: 80, 90, 100, 110, 120, 130, 140 (7 values) + # Total values: 10 + # GRADE euglycemia = (7 / 10) * 100 = 70.0 + expected = 51.496907 + + # Compare results + assert isinstance(result, float) + np.testing.assert_allclose(result, expected, rtol=0.001) + + +def test_grade_eugly_numpy_array_input(): + """Test GRADE euglycemia calculation with numpy array input.""" + # Create test data as a numpy array + data = np.array([70, 80, 90, 100, 110, 120, 130, 140, 150, 160]) # 4 values in euglycemic range (70-140) + + # Calculate GRADE euglycemia + result = iglu.grade_eugly(data) + + # Expected results: + # Euglycemic values: 80, 90, 100, 110, 120, 130, 140 (7 values) + # Total values: 10 + # GRADE euglycemia = (7 / 10) * 100 = 70.0 + expected = 51.496907 + + # Compare results + assert isinstance(result, float) + np.testing.assert_allclose(result, expected, rtol=0.001) + + +def test_grade_eugly_list_with_missing_values(): + """Test GRADE euglycemia calculation with list input containing missing values.""" + # Create test data as a list with None values + data = [70, None, 90, 100, None, 120, 130, 140, None, 160] # 5 values in euglycemic range (70-140) + + # Calculate GRADE euglycemia + result = iglu.grade_eugly(data) + + # Expected results: + # Euglycemic values: 90, 100, 120, 130, 140 (5 values) + # Total non-None values: 7 + # GRADE euglycemia = (5 / 7) * 100 = 71.429 + expected = 63.297202 + + # Compare results + assert isinstance(result, float) + np.testing.assert_allclose(result, expected, rtol=0.001) + + +def test_grade_eugly_numpy_array_with_nan(): + """Test GRADE euglycemia calculation with numpy array input containing NaN values.""" + # Create test data as a numpy array with NaN values + data = np.array([70, np.nan, 90, 100, np.nan, 120, 130, 140, np.nan, 160]) # 5 values in euglycemic range (70-140) + + # Calculate GRADE euglycemia + result = iglu.grade_eugly(data) + + # Expected results: + # Euglycemic values: 90, 100, 120, 130, 140 (5 values) + # Total non-NaN values: 7 + # GRADE euglycemia = (5 / 7) * 100 = 71.429 + expected = 63.297202 + + # Compare results + assert isinstance(result, float) + np.testing.assert_allclose(result, expected, rtol=0.001) + + +def test_grade_eugly_empty_input(): + """Test GRADE euglycemia calculation with empty input.""" + # Test with empty list + result = iglu.grade_eugly([]) + assert isinstance(result, float) + assert np.isnan(result) + + # Test with empty numpy array + result = iglu.grade_eugly(np.array([])) + assert isinstance(result, float) + assert np.isnan(result) + + +def test_grade_eugly_boundary_values(): + """Test GRADE euglycemia calculation with boundary values.""" + # Create test data with boundary values (70 and 140) + data = [60, 70, 80, 140, 150] # 70 and 140 are included in euglycemic range + + # Calculate GRADE euglycemia + result = iglu.grade_eugly(data) + + # Expected results: + # Euglycemic values: 70, 80, 140 (3 values) + # Total values: 5 + # GRADE euglycemia = (3 / 5) * 100 = 60.0 + expected = 36.910589 + + # Compare results + assert isinstance(result, float) + np.testing.assert_allclose(result, expected, rtol=0.001) diff --git a/tests/test_grade_hyper.py b/tests/test_grade_hyper.py index 1d9b2d7..50c116e 100644 --- a/tests/test_grade_hyper.py +++ b/tests/test_grade_hyper.py @@ -105,11 +105,26 @@ def test_grade_hyper_series_input(): result = grade_hyper(data) # Check output format - assert isinstance(result, pd.DataFrame) - assert "GRADE_hyper" in result.columns - assert len(result) == 1 - assert len(result.columns) == 1 + assert isinstance(result, float) + np.testing.assert_allclose(result, 89.684900, rtol=1e-3) + +def test_grade_hyper_list_input(): + """Test GRADE hyperglycemia calculation with Series input.""" + data = [150, 200, 130, 190] + result = grade_hyper(data) + # Check output format + assert isinstance(result, float) + np.testing.assert_allclose(result, 89.684900, rtol=1e-3) + +def test_grade_hyper_numpy_array_input(): + """Test GRADE hyperglycemia calculation with Series input.""" + data = np.array([150, 200, 130, 190]) + result = grade_hyper(data) + + # Check output format + assert isinstance(result, float) + np.testing.assert_allclose(result, 89.684900, rtol=1e-3) def test_grade_hyper_custom_upper(): """Test GRADE hyperglycemia calculation with custom upper bound.""" diff --git a/tests/test_grade_hypo.py b/tests/test_grade_hypo.py index a30f77b..d0fc35c 100644 --- a/tests/test_grade_hypo.py +++ b/tests/test_grade_hypo.py @@ -96,18 +96,28 @@ def test_grade_hypo_default(): assert all(result["GRADE_hypo"] <= 100) # Percentages should not exceed 100% -def test_grade_hypo_series(): +def test_grade_hypo_series_input(): """Test GRADE hypoglycemia with Series input""" series_data = pd.Series( [150, 75, 160, 65, 140, 85] ) # Include some hypoglycemic values result = iglu.grade_hypo(series_data) - assert isinstance(result, pd.DataFrame) - assert "GRADE_hypo" in result.columns - assert len(result) == 1 - assert result["GRADE_hypo"].iloc[0] >= 0 - assert result["GRADE_hypo"].iloc[0] <= 100 + assert isinstance(result, float) + np.testing.assert_allclose(result, 19.225537, rtol=1e-3) +def test_grade_hypo_list_input(): + """Test GRADE hypoglycemia with Series input""" + list_data = [150, 75, 160, 65, 140, 85] + result = iglu.grade_hypo(list_data) + assert isinstance(result, float) + np.testing.assert_allclose(result, 19.225537, rtol=1e-3) + +def test_grade_hypo_numpy_array_input(): + """Test GRADE hypoglycemia with Series input""" + array_data = np.array([150, 75, 160, 65, 140, 85]) + result = iglu.grade_hypo(array_data) + assert isinstance(result, float) + np.testing.assert_allclose(result, 19.225537, rtol=1e-3) def test_grade_hypo_empty(): """Test GRADE hypoglycemia with empty data""" @@ -121,18 +131,14 @@ def test_grade_hypo_constant_glucose(): # Test with constant glucose above lower bound series_data = pd.Series([150] * 10) result = iglu.grade_hypo(series_data) - assert len(result) == 1 - assert ( - result["GRADE_hypo"].iloc[0] == 0 - ) # Should be 0 for constant glucose above lower bound + assert isinstance(result, float) + np.testing.assert_allclose(result, 0, rtol=1e-3) # Test with constant glucose below lower bound series_data = pd.Series([70] * 10) result = iglu.grade_hypo(series_data) - assert len(result) == 1 - assert ( - result["GRADE_hypo"].iloc[0] == 100 - ) # Should be 100 for constant glucose below lower bound + assert isinstance(result, float) + np.testing.assert_allclose(result, 100, rtol=1e-3) def test_grade_hypo_missing_values(): From ae290594b3608f725288a5797420829d15609614 Mon Sep 17 00:00:00 2001 From: Stas Khirman Date: Mon, 16 Jun 2025 22:23:28 +0300 Subject: [PATCH 11/16] support for Series, list and ndarray -> return float or dict --- README.md | 8 ++-- iglu_python/gvp.py | 92 ++++++++++++++++----------------------------- iglu_python/hbgi.py | 65 ++++++++++++++------------------ tests/test_gvp.py | 41 ++------------------ tests/test_hbgi.py | 30 ++++++++++++--- 5 files changed, 93 insertions(+), 143 deletions(-) diff --git a/README.md b/README.md index 2d4fb63..4db5073 100644 --- a/README.md +++ b/README.md @@ -40,10 +40,10 @@ Unless noted, IGLU-R test compatability is considered successful if it achieves | grade_eugly |percentage of GRADE score attributable to target range| ✅ | ✅ returns float | grade_hyper |percentage of GRADE score attributable to hyperglycemia| ✅ |✅ returns float | grade_hypo |percentage of GRADE score attributable to hypoglycemia| ✅ |✅ returns float -| grade |mean GRADE score| ✅ | ✅ returns float -| gri |Glycemia Risk Index | ✅ | ✅ returns float -| gvp |Glucose Variability Percentage| ✅ | -| hbgi |High Blood Glucose Index| ✅ | +| grade |mean GRADE score| ✅ | ✅ returns float | +| gri |Glycemia Risk Index | ✅ | ✅ returns float | +| gvp |Glucose Variability Percentage| ✅ | ✅ only Series(DatetimeIndex) returns float +| hbgi |High Blood Glucose Index| ✅ | ✅ returns float | | hyper_index |Hyperglycemia Index| ✅ | | hypo_index |Hypoglycemia Index| ✅ | | igc |Index of Glycemic Control| ✅ | diff --git a/iglu_python/gvp.py b/iglu_python/gvp.py index 0d923ef..b53fbce 100644 --- a/iglu_python/gvp.py +++ b/iglu_python/gvp.py @@ -6,7 +6,7 @@ from .utils import CGMS2DayByDay, check_data_columns -def gvp(data: Union[pd.DataFrame, pd.Series, list, np.ndarray]) -> pd.DataFrame: +def gvp(data: Union[pd.DataFrame, pd.Series]) -> pd.DataFrame|float: r""" Calculate Glucose Variability Percentage (GVP). @@ -27,10 +27,9 @@ def gvp(data: Union[pd.DataFrame, pd.Series, list, np.ndarray]) -> pd.DataFrame: Returns ------- - pd.DataFrame + pd.DataFrame|float DataFrame with 1 row for each subject, a column for subject id and a column - for GVP value. If a Series of glucose values is passed, then a DataFrame - without the subject id is returned. + for GVP value. If a Series of glucose values is passed, then a float is returned. References ---------- @@ -58,64 +57,19 @@ def gvp(data: Union[pd.DataFrame, pd.Series, list, np.ndarray]) -> pd.DataFrame: 0 42.30 """ # Handle Series input - is_vector = False - if isinstance(data, (list, np.ndarray)): - data = pd.Series(data) - if isinstance(data, pd.Series): - is_vector = True - data = data.dropna() - if len(data) == 0: - return pd.DataFrame({"GVP": [np.nan]}) - - # Convert to DataFrame format for processing - data = pd.DataFrame( - { - "id": ["subject1"] * len(data), - "time": pd.date_range( - start="2020-01-01", periods=len(data), freq="5min" - ), - "gl": data.values, - } - ) + if isinstance(data, (list, pd.Series)): + if not isinstance(data.index, pd.DatetimeIndex): + raise ValueError("Data must be a Series with a DatetimeIndex") + return gvp_single(data) # Handle DataFrame input - data = check_data_columns(data) + data = check_data_columns(data) + data.set_index("time", inplace=True, drop=True) - def gvp_single(subj_data): - """Calculate GVP for a single subject""" - # Get interpolated data - daybyday, _, reading_gap = CGMS2DayByDay(subj_data) - daybyday = daybyday.flatten() - - # Calculate differences between consecutive readings - diffvec = np.diff(daybyday) - # Exclude NA values from diffvec - diffvec = diffvec[~np.isnan(diffvec)] - - # Calculate added length (hypotenuse) and base length - added_length = np.sqrt(reading_gap**2 + diffvec**2) - base_length = len(diffvec) * reading_gap - - # Calculate GVP - if base_length == 0: - return np.nan - - return (np.sum(added_length) / base_length - 1) * 100 - - # Calculate GVP for each subject - result = [] - for subject in data["id"].unique(): - subject_data = data[data["id"] == subject].dropna(subset=["gl"]) - if len(subject_data) == 0: - continue - - gvp_value = gvp_single(subject_data) - result.append({"id": subject, "GVP": gvp_value}) - - df = pd.DataFrame(result) - if is_vector: - df = df.drop(columns=["id"]) - return df + out = data.groupby('id').agg( + GVP = ("gl", lambda x: gvp_single(x)) + ).reset_index() + return out def calculate_gvp(glucose_values: pd.Series, timestamps: pd.Series) -> float: @@ -165,3 +119,23 @@ def calculate_gvp(glucose_values: pd.Series, timestamps: pd.Series) -> float: return gvp +def gvp_single(subj_data): + """Calculate GVP for a single subject""" + # Get interpolated data + daybyday, _, reading_gap = CGMS2DayByDay(subj_data) + daybyday = daybyday.flatten() + + # Calculate differences between consecutive readings + diffvec = np.diff(daybyday) + # Exclude NA values from diffvec + diffvec = diffvec[~np.isnan(diffvec)] + + # Calculate added length (hypotenuse) and base length + added_length = np.sqrt(reading_gap**2 + diffvec**2) + base_length = len(diffvec) * reading_gap + + # Calculate GVP + if base_length == 0: + return np.nan + + return (np.sum(added_length) / base_length - 1) * 100 \ No newline at end of file diff --git a/iglu_python/hbgi.py b/iglu_python/hbgi.py index 85b67a5..3eef969 100644 --- a/iglu_python/hbgi.py +++ b/iglu_python/hbgi.py @@ -6,7 +6,7 @@ from .utils import check_data_columns -def hbgi(data: Union[pd.DataFrame, pd.Series]) -> pd.DataFrame: +def hbgi(data: Union[pd.DataFrame, pd.Series, np.ndarray, list]) -> pd.DataFrame|float: r""" Calculate High Blood Glucose Index (HBGI). @@ -24,15 +24,15 @@ def hbgi(data: Union[pd.DataFrame, pd.Series]) -> pd.DataFrame: Parameters ---------- - data : Union[pd.DataFrame, pd.Series] - DataFrame with columns 'id', 'time', and 'gl', or a Series of glucose values + data : Union[pd.DataFrame, pd.Series, np.ndarray, list] + DataFrame with columns 'id', 'time', and 'gl', or a Series of glucose values, + or a numpy array or list of glucose values Returns ------- - pd.DataFrame + pd.DataFrame|float DataFrame with 1 row for each subject, a column for subject id and a column - for HBGI values. If a Series of glucose values is passed, then a DataFrame - without the subject id is returned. + for HBGI values. If a Series of glucose values is passed, then a float is returned. References ---------- @@ -60,41 +60,32 @@ def hbgi(data: Union[pd.DataFrame, pd.Series]) -> pd.DataFrame: 0 4.95 """ - def calculate_hbgi(glucose_values: pd.Series) -> float: - """Helper function to calculate HBGI for a single series of values.""" - if len(glucose_values) == 0: - return np.nan - - # Calculate fbg values - fbg = 1.509 * (np.log(glucose_values) ** 1.084 - 5.381) - fbg = np.maximum(fbg, 0) # Take max with 0 - - # Calculate HBGI - n = len(glucose_values) - hbgi_value = 10 * np.sum(fbg[glucose_values >= 112.5] ** 2) / n - - return hbgi_value - # Handle Series input - if isinstance(data, pd.Series): - data = data.dropna() - if len(data) == 0: - return pd.DataFrame({"HBGI": [np.nan]}) - - hbgi_value = calculate_hbgi(data) - return pd.DataFrame({"HBGI": [hbgi_value]}) + if isinstance(data, (pd.Series, np.ndarray, list)): + if isinstance(data, (np.ndarray, list)): + data = pd.Series(data) + return calculate_hbgi_single(data) # Handle DataFrame input data = check_data_columns(data) - # Calculate HBGI for each subject - result = [] - for subject in data["id"].unique(): - subject_data = data[data["id"] == subject].dropna(subset=["gl"]) - if len(subject_data) == 0: - continue + out = data.groupby('id').agg( + HBGI = ("gl", lambda x: calculate_hbgi_single(x)) + ).reset_index() + return out + +def calculate_hbgi_single(glucose_values: pd.Series) -> float: + """Helper function to calculate HBGI for a single series of values.""" + glucose_values = glucose_values.dropna() + if len(glucose_values) == 0: + return np.nan + + # Calculate fbg values + fbg = 1.509 * (np.log(glucose_values) ** 1.084 - 5.381) + fbg = np.maximum(fbg, 0) # Take max with 0 - hbgi_value = calculate_hbgi(subject_data["gl"]) - result.append({"id": subject, "HBGI": hbgi_value}) + # Calculate HBGI + n = len(glucose_values) + hbgi_value = 10 * np.sum(fbg[glucose_values >= 112.5] ** 2) / n - return pd.DataFrame(result) + return hbgi_value \ No newline at end of file diff --git a/tests/test_gvp.py b/tests/test_gvp.py index c985106..b2f51a0 100644 --- a/tests/test_gvp.py +++ b/tests/test_gvp.py @@ -118,48 +118,15 @@ def test_gvp_basic(): ) -def test_gvp_series_input(): + +def test_gvp_series_no_timestamp_input(): """Test GVP calculation with Series input.""" data = pd.Series( [100, 120, 100, 80], - index=pd.to_datetime( - [ - "2020-01-01 00:00:00", - "2020-01-01 00:05:00", - "2020-01-01 00:10:00", - "2020-01-01 00:15:00", - ] - ), ) - result = gvp(data) - - # Check output format - assert isinstance(result, pd.DataFrame) - assert "GVP" in result.columns - assert len(result) == 1 - assert len(result.columns) == 1 - - # Check that GVP is calculated - assert not np.isnan(result.loc[0, "GVP"]) - assert result.loc[0, "GVP"] > 0 - - -def test_gvp_series_input_list(): - """Test GVP calculation with Series input without datetime index.""" - data = [100, 120, 100, 80] - - result = gvp(data) - - # Check output format - assert isinstance(result, pd.DataFrame) - assert "GVP" in result.columns - assert len(result) == 1 - assert len(result.columns) == 1 - - # Check that GVP is calculated - assert not np.isnan(result.loc[0, "GVP"]) - assert result.loc[0, "GVP"] > 0 + with pytest.raises(ValueError): + gvp(data) def test_gvp_empty_data(): """Test GVP calculation with empty DataFrame.""" diff --git a/tests/test_hbgi.py b/tests/test_hbgi.py index f77dd9f..6a1d935 100644 --- a/tests/test_hbgi.py +++ b/tests/test_hbgi.py @@ -103,15 +103,33 @@ def test_hbgi_series_input(): data = pd.Series([150, 200, 130, 190]) result = iglu.hbgi(data) + expected = 6.208971 + # Check output format - assert isinstance(result, pd.DataFrame) - assert "HBGI" in result.columns - assert len(result) == 1 - assert len(result.columns) == 1 + assert isinstance(result, float) + np.testing.assert_allclose(result, expected, rtol=1e-3) - # Check that HBGI is calculated (should be positive due to hyperglycemia) - assert result.loc[0, "HBGI"] > 0 +def test_hbgi_list_input(): + """Test HBGI calculation with list input.""" + data = [150, 200, 130, 190] + result = iglu.hbgi(data) + + expected = 6.208971 + # Check output format + assert isinstance(result, float) + np.testing.assert_allclose(result, expected, rtol=1e-3) + +def test_hbgi_numpy_array_input(): + """Test HBGI calculation with numpy array input.""" + data = np.array([150, 200, 130, 190]) + result = iglu.hbgi(data) + + expected = 6.208971 + + # Check output format + assert isinstance(result, float) + np.testing.assert_allclose(result, expected, rtol=1e-3) def test_hbgi_empty_data(): """Test HBGI calculation with empty DataFrame.""" From 9c0ddc9aaa152716ca7e9950fe573c9971ebce3c Mon Sep 17 00:00:00 2001 From: Stas Khirman Date: Mon, 16 Jun 2025 23:09:27 +0300 Subject: [PATCH 12/16] support for Series, list and ndarray -> return float or dict --- README.md | 3 +- iglu_python/hyper_index.py | 74 +++++++++++++---------------------- iglu_python/hypo_index.py | 80 ++++++++++++++------------------------ tests/test_hyper_index.py | 33 ++++++++++++---- tests/test_hypo_index.py | 24 +++++++++--- 5 files changed, 103 insertions(+), 111 deletions(-) diff --git a/README.md b/README.md index 4db5073..610d1c9 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,8 @@ Unless noted, IGLU-R test compatability is considered successful if it achieves | gri |Glycemia Risk Index | ✅ | ✅ returns float | | gvp |Glucose Variability Percentage| ✅ | ✅ only Series(DatetimeIndex) returns float | hbgi |High Blood Glucose Index| ✅ | ✅ returns float | -| hyper_index |Hyperglycemia Index| ✅ | +| hyper_index |Hyperglycemia Index| ✅ |✅ returns float | +| hyper_index |Hyperglycemia Index| ✅ |✅ returns float | | hypo_index |Hypoglycemia Index| ✅ | | igc |Index of Glycemic Control| ✅ | | in_range_percent |percentage of values within target ranges| ✅ | ✅ returns dict diff --git a/iglu_python/hyper_index.py b/iglu_python/hyper_index.py index 56539ad..c184555 100644 --- a/iglu_python/hyper_index.py +++ b/iglu_python/hyper_index.py @@ -7,8 +7,8 @@ def hyper_index( - data: Union[pd.DataFrame, pd.Series], ULTR: int = 140, a: float = 1.1, c: int = 30 -) -> pd.DataFrame: + data: Union[pd.DataFrame, pd.Series, np.ndarray, list], ULTR: int = 140, a: float = 1.1, c: int = 30 +) -> pd.DataFrame|float: """ Calculate Hyperglycemia Index. @@ -19,8 +19,8 @@ def hyper_index( Parameters ---------- - data : Union[pd.DataFrame, pd.Series] - DataFrame with columns 'id', 'time', and 'gl', or a Series of glucose values + data : Union[pd.DataFrame, pd.Series, np.ndarray, list] + DataFrame with columns 'id', 'time', and 'gl', or a Series of glucose values, or a numpy array or list of glucose values ULTR : int, default=140 Upper Limit of Target Range, in mg/dL a : float, default=1.1 @@ -31,10 +31,10 @@ def hyper_index( Returns ------- - pd.DataFrame + pd.DataFrame|float DataFrame with 1 row for each subject, a column for subject id and a column for the Hyperglycemia Index value. If a Series of glucose values is passed, - then a DataFrame without the subject id is returned. + then a float is returned. References ---------- @@ -62,50 +62,32 @@ def hyper_index( 0 0.106 """ # Handle Series input - is_vector = False - if isinstance(data, (list, np.ndarray)): - data = pd.Series(data) - if isinstance(data, pd.Series): - is_vector = True - data = data.dropna() - if len(data) == 0: - return pd.DataFrame({"GVP": [np.nan]}) - - # Convert to DataFrame format for processing - data = pd.DataFrame( - { - "id": ["subject1"] * len(data), - "time": pd.date_range( - start="2020-01-01", periods=len(data), freq="5min" - ), - "gl": data.values, - } - ) + if isinstance(data, (pd.Series,list, np.ndarray)): + if isinstance(data, (np.ndarray, list)): + data = pd.Series(data) + return hyper_index_single(data, ULTR, a, c) # Check and prepare data data = check_data_columns(data) # Calculate hyper_index for each subject - result = [] - for subject in data["id"].unique(): - subject_data = data[data["id"] == subject] - # Remove NA values - subject_data = subject_data.dropna(subset=["gl"]) - - if len(subject_data) == 0: - continue - - # Calculate hyper_index - hyper_values = subject_data[subject_data["gl"] > ULTR]["gl"] - ULTR - hyper_index = np.sum(hyper_values**a) / (len(subject_data) * c) - - result.append({"id": subject, "hyper_index": hyper_index}) - - # Convert to DataFrame - out = pd.DataFrame(result) - - # Remove id column if input was a Series - if is_vector and not out.empty: - out = out.drop("id", axis=1) + out = data.groupby('id').agg( + hyper_index = ("gl", lambda x: hyper_index_single(x, ULTR, a, c)) + ).reset_index() return out + +def hyper_index_single( + gl: pd.Series, ULTR: int = 140, a: float = 1.1, c: int = 30 +) -> float: + """ + Calculate Hyperglycemia Index for a single subject. + """ + gl = gl.dropna() + if len(gl) == 0: + return np.nan + # Calculate hyper_index + hyper_values = gl[gl > ULTR] - ULTR + hyper_index = np.sum(hyper_values**a) / (len(gl) * c) + + return hyper_index \ No newline at end of file diff --git a/iglu_python/hypo_index.py b/iglu_python/hypo_index.py index 5cb5eb2..c92ac37 100644 --- a/iglu_python/hypo_index.py +++ b/iglu_python/hypo_index.py @@ -7,8 +7,8 @@ def hypo_index( - data: Union[pd.DataFrame, pd.Series], LLTR: int = 80, b: float = 2, d: int = 30 -) -> pd.DataFrame: + data: Union[pd.DataFrame, pd.Series, np.ndarray, list], LLTR: int = 80, b: float = 2, d: int = 30 +) -> pd.DataFrame|float: """ Calculate Hypoglycemia Index. @@ -19,8 +19,9 @@ def hypo_index( Parameters ---------- - data : Union[pd.DataFrame, pd.Series] - DataFrame with columns 'id', 'time', and 'gl', or a Series of glucose values + data : Union[pd.DataFrame, pd.Series, np.ndarray, list] + DataFrame with columns 'id', 'time', and 'gl', or a Series of glucose values, + or a numpy array or list of glucose values LLTR : int, default=80 Lower Limit of Target Range, in mg/dL b : float, default=2 @@ -31,10 +32,10 @@ def hypo_index( Returns ------- - pd.DataFrame + pd.DataFrame|float DataFrame with 1 row for each subject, a column for subject id and a column for the Hypoglycemia Index value. If a Series of glucose values is passed, - then a DataFrame without the subject id is returned. + then a float is returned. References ---------- @@ -62,50 +63,27 @@ def hypo_index( 0 0.106 """ # Handle Series input - is_vector = False - if isinstance(data, (list, np.ndarray)): - data = pd.Series(data) - if isinstance(data, pd.Series): - is_vector = True - data = data.dropna() - if len(data) == 0: - return pd.DataFrame({"GVP": [np.nan]}) - - # Convert to DataFrame format for processing - data = pd.DataFrame( - { - "id": ["subject1"] * len(data), - "time": pd.date_range( - start="2020-01-01", periods=len(data), freq="5min" - ), - "gl": data.values, - } - ) - - # Check and prepare data + if isinstance(data, (pd.Series,list, np.ndarray)): + if isinstance(data, (np.ndarray, list)): + data = pd.Series(data) + return hypo_index_single(data, LLTR, b, d) + data = check_data_columns(data) - - # Calculate hypo_index for each subject - result = [] - for subject in data["id"].unique(): - subject_data = data[data["id"] == subject] - # Remove NA values - subject_data = subject_data.dropna(subset=["gl"]) - - if len(subject_data) == 0: - continue - - # Calculate hypo_index - hypo_values = LLTR - subject_data[subject_data["gl"] < LLTR]["gl"] - hypo_index = np.sum(hypo_values**b) / (len(subject_data) * d) - - result.append({"id": subject, "hypo_index": hypo_index}) - - # Convert to DataFrame - out = pd.DataFrame(result) - - # Remove id column if input was a Series - if is_vector and not out.empty: - out = out.drop("id", axis=1) - + out = data.groupby('id').agg( + hypo_index = ("gl", lambda x: hypo_index_single(x, LLTR, b, d)) + ).reset_index() return out + +def hypo_index_single( + gl: pd.Series, LLTR: int = 80, b: float = 2, d: int = 30 +) -> float: + """ + Calculate Hypoglycemia Index for a single subject. + """ + gl = gl.dropna() + if len(gl) == 0: + return np.nan + # Calculate hypo_index + hypo_values = LLTR - gl[gl < LLTR] + hypo_index = np.sum(hypo_values**b) / (len(gl) * d) + return hypo_index diff --git a/tests/test_hyper_index.py b/tests/test_hyper_index.py index c06e8c0..d7c2bfc 100644 --- a/tests/test_hyper_index.py +++ b/tests/test_hyper_index.py @@ -101,6 +101,29 @@ def test_hyper_index_basic(): subject2_index = result[result["id"] == "subject2"]["hyper_index"].iloc[0] assert subject1_index > subject2_index +def test_hyper_index_list_input(): + + # Create test data as Series + data = [150, 200, 180, 130, 190, 160] + + # Calculate hyper_index + result = iglu.hyper_index(data) + expected = 1.453976 + # Check output format + assert isinstance(result, float) + np.testing.assert_allclose(result, expected, rtol=1e-3) + +def test_hyper_index_numpy_array_input(): + + # Create test data as Series + data = np.array([150, 200, 180, 130, 190, 160]) + + # Calculate hyper_index + result = iglu.hyper_index(data) + expected = 1.453976 + # Check output format + assert isinstance(result, float) + np.testing.assert_allclose(result, expected, rtol=1e-3) def test_hyper_index_series_input(): """Test hyper_index calculation with Series input.""" @@ -109,15 +132,11 @@ def test_hyper_index_series_input(): # Calculate hyper_index result = iglu.hyper_index(data) - + expected = 1.453976 # Check output format - assert isinstance(result, pd.DataFrame) - assert "hyper_index" in result.columns - assert "id" not in result.columns - assert len(result) == 1 + assert isinstance(result, float) + np.testing.assert_allclose(result, expected, rtol=1e-3) - # Check that hyper_index value is non-negative - assert result["hyper_index"].iloc[0] >= 0 def test_hyper_index_custom_parameters(): diff --git a/tests/test_hypo_index.py b/tests/test_hypo_index.py index 21d6971..1ea5302 100644 --- a/tests/test_hypo_index.py +++ b/tests/test_hypo_index.py @@ -110,15 +110,27 @@ def test_hypo_index_series_input(): # Calculate hypo_index result = iglu.hypo_index(data) + expected = 7.638889 # Check output format - assert isinstance(result, pd.DataFrame) - assert "hypo_index" in result.columns - assert "id" not in result.columns - assert len(result) == 1 + assert isinstance(result, float) + np.testing.assert_allclose(result, expected, rtol=1e-3) - # Check that hypo_index value is non-negative - assert result["hypo_index"].iloc[0] >= 0 +def test_hypo_index_list_input(): + """Test hypo_index calculation with list input.""" + data = [70, 60, 75, 65, 85, 55] + result = iglu.hypo_index(data) + expected = 7.638889 + assert isinstance(result, float) + np.testing.assert_allclose(result, expected, rtol=1e-3) + +def test_hypo_index_numpy_array_input(): + """Test hypo_index calculation with numpy array input.""" + data = np.array([70, 60, 75, 65, 85, 55]) + result = iglu.hypo_index(data) + expected = 7.638889 + assert isinstance(result, float) + np.testing.assert_allclose(result, expected, rtol=1e-3) def test_hypo_index_custom_parameters(): From c2645cbd77accdea7bf8fe0796aa48079f54e2a3 Mon Sep 17 00:00:00 2001 From: Stas Khirman Date: Mon, 16 Jun 2025 23:30:24 +0300 Subject: [PATCH 13/16] support for Series, list and ndarray -> return float or dict --- README.md | 9 ++--- iglu_python/igc.py | 69 +++++++++++++++------------------- iglu_python/iqr_glu.py | 46 +++++++++++++++++------ iglu_python/j_index.py | 61 ++++++++++++++---------------- tests/test_igc.py | 25 +++++++++--- tests/test_in_range_percent.py | 29 +++++++++++++- tests/test_iqr_glu.py | 20 ++++++---- tests/test_j_index.py | 19 +++------- 8 files changed, 163 insertions(+), 115 deletions(-) diff --git a/README.md b/README.md index 610d1c9..83fed99 100644 --- a/README.md +++ b/README.md @@ -45,12 +45,11 @@ Unless noted, IGLU-R test compatability is considered successful if it achieves | gvp |Glucose Variability Percentage| ✅ | ✅ only Series(DatetimeIndex) returns float | hbgi |High Blood Glucose Index| ✅ | ✅ returns float | | hyper_index |Hyperglycemia Index| ✅ |✅ returns float | -| hyper_index |Hyperglycemia Index| ✅ |✅ returns float | -| hypo_index |Hypoglycemia Index| ✅ | -| igc |Index of Glycemic Control| ✅ | +| hypo_index |Hypoglycemia Index| ✅ |✅ returns float | +| igc |Index of Glycemic Control| ✅ |✅ returns float | | in_range_percent |percentage of values within target ranges| ✅ | ✅ returns dict -| iqr_glu |glucose level interquartile range|✅ | -| j_index |J-Index score for glucose measurements| ✅ | +| iqr_glu |glucose level interquartile range|✅ |✅ returns float | +| j_index |J-Index score for glucose measurements| ✅ |✅ returns float | | lbgi | Low Blood Glucose Index| ✅ | | m_value | M-value of Schlichtkrull et al | ✅ | | mad_glu | Median Absolute Deviation | ✅ | diff --git a/iglu_python/igc.py b/iglu_python/igc.py index c397e9c..0d058e1 100644 --- a/iglu_python/igc.py +++ b/iglu_python/igc.py @@ -9,14 +9,14 @@ def igc( - data: Union[pd.DataFrame, pd.Series], + data: Union[pd.DataFrame, pd.Series, np.ndarray, list], LLTR: int = 80, ULTR: int = 140, a: float = 1.1, b: float = 2, c: int = 30, d: int = 30, -) -> pd.DataFrame: +) -> pd.DataFrame|float: """ Calculate Index of Glycemic Control (IGC). @@ -25,8 +25,8 @@ def igc( Parameters ---------- - data : Union[pd.DataFrame, pd.Series] - DataFrame with columns 'id', 'time', and 'gl', or a Series of glucose values + data : Union[pd.DataFrame, pd.Series, np.ndarray, list] + DataFrame with columns 'id', 'time', and 'gl', or a Series of glucose values, or a numpy array or list of glucose values LLTR : int, default=80 Lower Limit of Target Range, in mg/dL ULTR : int, default=140 @@ -42,10 +42,9 @@ def igc( Returns ------- - pd.DataFrame + pd.DataFrame|float DataFrame with 1 row for each subject, a column for subject id and a column - for the IGC value. If a Series of glucose values is passed, then a DataFrame - without the subject id is returned. + for the IGC value. If a Series of glucose values is passed, then a float is returned. References ---------- @@ -73,40 +72,34 @@ def igc( 0 0.106 """ # Handle Series input - is_vector = False - if isinstance(data, (list, np.ndarray)): - data = pd.Series(data) - if isinstance(data, pd.Series): - is_vector = True - data = data.dropna() - if len(data) == 0: - return pd.DataFrame({"GVP": [np.nan]}) - - # Convert to DataFrame format for processing - data = pd.DataFrame( - { - "id": ["subject1"] * len(data), - "time": pd.date_range( - start="2020-01-01", periods=len(data), freq="5min" - ), - "gl": data.values, - } - ) + if isinstance(data, (pd.Series,list, np.ndarray)): + if isinstance(data, (np.ndarray, list)): + data = pd.Series(data) + return igc_single(data, LLTR, ULTR, a, b, c, d) # Check and prepare data data = check_data_columns(data) - # Calculate hyper_index and hypo_index - out_hyper = hyper_index(data, ULTR=ULTR, a=a, c=c) - out_hypo = hypo_index(data, LLTR=LLTR, b=b, d=d) - - # Combine the indices - out = pd.merge(out_hyper, out_hypo, on="id") - out["IGC"] = out["hyper_index"] + out["hypo_index"] - out = out[["id", "IGC"]] + out = data.groupby('id').agg( + IGC = ("gl", lambda x: igc_single(x, LLTR, ULTR, a, b, c, d)) + ).reset_index() + return out - # Remove id column if input was a Series - if is_vector: - out = out.drop("id", axis=1) +def igc_single( + gl: pd.Series, + LLTR: int = 80, + ULTR: int = 140, + a: float = 1.1, + b: float = 2, + c: int = 30, + d: int = 30 +) -> float: + """ + Calculate Index of Glycemic Control for a single subject. + """ + # Calculate hyper_index and hypo_index + out_hyper = hyper_index(gl, ULTR=ULTR, a=a, c=c) + out_hypo = hypo_index(gl, LLTR=LLTR, b=b, d=d) - return out + out = out_hyper + out_hypo + return out \ No newline at end of file diff --git a/iglu_python/iqr_glu.py b/iglu_python/iqr_glu.py index 6c1b27f..93dc40b 100644 --- a/iglu_python/iqr_glu.py +++ b/iglu_python/iqr_glu.py @@ -6,7 +6,7 @@ from .utils import check_data_columns -def iqr_glu(data: Union[pd.DataFrame, pd.Series]) -> pd.DataFrame: +def iqr_glu(data: Union[pd.DataFrame, pd.Series, np.ndarray, list]) -> pd.DataFrame|float: """ Calculate glucose level interquartile range (IQR). @@ -15,15 +15,14 @@ def iqr_glu(data: Union[pd.DataFrame, pd.Series]) -> pd.DataFrame: Parameters ---------- - data : Union[pd.DataFrame, pd.Series] - DataFrame with columns 'id', 'time', and 'gl', or a Series of glucose values + data : Union[pd.DataFrame, pd.Series, np.ndarray, list] + DataFrame with columns 'id', 'time', and 'gl', or a Series of glucose values, or a numpy array or list of glucose values Returns ------- - pd.DataFrame - DataFrame with columns: - - id: subject identifier (if DataFrame input) - - IQR: interquartile range of glucose values (75th percentile - 25th percentile) + pd.DataFrame|float + DataFrame with 1 row for each subject, a column for subject id and a column + for the IQR value. If a Series of glucose values is passed, then a float is returned. Examples -------- @@ -44,10 +43,15 @@ def iqr_glu(data: Union[pd.DataFrame, pd.Series]) -> pd.DataFrame: 0 70.0 """ # Handle Series input - if isinstance(data, pd.Series): + if isinstance(data, (pd.Series,list, np.ndarray)): + if isinstance(data, (np.ndarray, list)): + data = pd.Series(data) + data = data.dropna() + if len(data) == 0: + return np.nan # Calculate IQR for Series - iqr_val = np.percentile(data, 75) - np.percentile(data, 25) - return pd.DataFrame({"IQR": [iqr_val]}) + iqr_val = iqr_glu_single(data) + return iqr_val # Handle DataFrame input data = check_data_columns(data) @@ -57,8 +61,28 @@ def iqr_glu(data: Union[pd.DataFrame, pd.Series]) -> pd.DataFrame: data = data.dropna() result = ( data.groupby("id") - .agg(IQR=("gl", lambda x: np.percentile(x, 75) - np.percentile(x, 25))) + .agg(IQR=("gl", lambda x: iqr_glu_single(x))) .reset_index() ) return result + +def iqr_glu_single( + gl: pd.Series, +) -> float: + """ + Calculate glucose level interquartile range (IQR) for a single subject. + + Parameters + ---------- + gl : pd.Series + Series of glucose values + + Returns + """ + gl = gl.dropna() + if len(gl) == 0: + return np.nan + # Calculate IQR for Series + iqr_val = np.percentile(gl, 75) - np.percentile(gl, 25) + return iqr_val \ No newline at end of file diff --git a/iglu_python/j_index.py b/iglu_python/j_index.py index 4ce2053..c3b477d 100644 --- a/iglu_python/j_index.py +++ b/iglu_python/j_index.py @@ -1,11 +1,12 @@ from typing import Union +import numpy as np import pandas as pd from .utils import check_data_columns -def j_index(data: Union[pd.DataFrame, pd.Series]) -> pd.DataFrame: +def j_index(data: Union[pd.DataFrame, pd.Series, np.ndarray, list]) -> pd.DataFrame|float: """ Calculate J-Index score for glucose measurements. @@ -15,15 +16,15 @@ def j_index(data: Union[pd.DataFrame, pd.Series]) -> pd.DataFrame: Parameters ---------- - data : Union[pd.DataFrame, pd.Series] - DataFrame with columns 'id', 'time', and 'gl', or a Series of glucose values + data : Union[pd.DataFrame, pd.Series, np.ndarray, list] + DataFrame with columns 'id', 'time', and 'gl', or a Series of glucose values, + or a numpy array or list of glucose values Returns ------- - pd.DataFrame + pd.DataFrame|float DataFrame with 1 row for each subject, a column for subject id and a column - for J-Index value. If a Series of glucose values is passed, then a DataFrame - without the subject id is returned. + for J-Index value. If a Series of glucose values is passed, then a float is returned. References ---------- @@ -51,34 +52,30 @@ def j_index(data: Union[pd.DataFrame, pd.Series]) -> pd.DataFrame: 0 1.5000 """ # Handle Series input - if isinstance(data, pd.Series): - # Calculate mean and standard deviation - mean_gl = data.mean() - sd_gl = data.std() - - # Calculate J-index - j_index = 0.001 * (mean_gl + sd_gl) ** 2 - - return pd.DataFrame({"J_index": [j_index]}) + if isinstance(data, (pd.Series,list, np.ndarray)): + if isinstance(data, (np.ndarray, list)): + data = pd.Series(data) + return j_index_single(data) # Handle DataFrame input data = check_data_columns(data) - # Initialize result list - result = [] - - # Process each subject - for subject in data["id"].unique(): - subject_data = data[data["id"] == subject] - - # Calculate mean and standard deviation - mean_gl = subject_data["gl"].mean() - sd_gl = subject_data["gl"].std() + out = data.groupby('id').agg( + J_index = ("gl", lambda x: j_index_single(x)) + ).reset_index() + return out - # Calculate J-index - j_index = 0.001 * (mean_gl + sd_gl) ** 2 - - result.append({"id": subject, "J_index": j_index}) - - # Convert to DataFrame - return pd.DataFrame(result) +def j_index_single(gl: pd.Series) -> float: + """ + Calculate J-Index score for a single subject. + """ + gl = gl.dropna() + if len(gl) == 0: + return np.nan + # Calculate mean and standard deviation + mean_gl = gl.mean() + sd_gl = gl.std() + + # Calculate J-index + j_index = 0.001 * (mean_gl + sd_gl) ** 2 + return j_index \ No newline at end of file diff --git a/tests/test_igc.py b/tests/test_igc.py index 25b2ac7..6a85ecb 100644 --- a/tests/test_igc.py +++ b/tests/test_igc.py @@ -106,15 +106,28 @@ def test_igc_series_input(): # Calculate IGC result = iglu.igc(data) + expected = 1.453976 # Check output format - assert isinstance(result, pd.DataFrame) - assert "IGC" in result.columns - assert "id" not in result.columns - assert len(result) == 1 + assert isinstance(result, float) + np.testing.assert_allclose(result, expected, rtol=1e-3) + +def test_igc_list_input(): + """Test IGC calculation with list input.""" + data = [150, 200, 180, 130, 190, 160] + result = iglu.igc(data) + expected = 1.453976 + assert isinstance(result, float) + np.testing.assert_allclose(result, expected, rtol=1e-3) + +def test_igc_numpy_array_input(): + """Test IGC calculation with numpy array input.""" + data = np.array([150, 200, 180, 130, 190, 160]) + result = iglu.igc(data) + expected = 1.453976 + assert isinstance(result, float) + np.testing.assert_allclose(result, expected, rtol=1e-3) - # Check that IGC value is non-negative - assert result["IGC"].iloc[0] >= 0 def test_igc_custom_parameters(): diff --git a/tests/test_in_range_percent.py b/tests/test_in_range_percent.py index 9e20ab1..a39876e 100644 --- a/tests/test_in_range_percent.py +++ b/tests/test_in_range_percent.py @@ -119,10 +119,35 @@ def test_in_range_percent_series_input(): assert len(result) == 2 # Check that percentages are between 0 and 100 - assert (result["in_range_70_180"] >= 0) and (result["in_range_70_180"] <= 100) - assert (result["in_range_63_140"] >= 0) and (result["in_range_63_140"] <= 100) + np.testing.assert_allclose(result["in_range_70_180"], 83.33, rtol=1e-3) + np.testing.assert_allclose(result["in_range_63_140"], 66.66, rtol=1e-3) +def test_in_range_percent_list_input(): + """Test in_range_percent calculation with list input.""" + data = [80, 90, 100, 130, 190, 160] + result = iglu.in_range_percent(data) + assert isinstance(result, dict) + assert "in_range_70_180" in result + assert "in_range_63_140" in result + assert len(result) == 2 + + # Check that percentages are between 0 and 100 + np.testing.assert_allclose(result["in_range_70_180"], 83.33, rtol=1e-3) + np.testing.assert_allclose(result["in_range_63_140"], 66.66, rtol=1e-3) +def test_in_range_percent_numpy_array_input(): + """Test in_range_percent calculation with numpy array input.""" + data = np.array([80, 90, 100, 130, 190, 160]) + result = iglu.in_range_percent(data) + assert isinstance(result, dict) + assert "in_range_70_180" in result + assert "in_range_63_140" in result + assert len(result) == 2 + + # Check that percentages are between 0 and 100 + np.testing.assert_allclose(result["in_range_70_180"], 83.33, rtol=1e-3) + np.testing.assert_allclose(result["in_range_63_140"], 66.66, rtol=1e-3) + def test_in_range_percent_custom_targets(): """Test in_range_percent calculation with custom targets.""" data = pd.DataFrame( diff --git a/tests/test_iqr_glu.py b/tests/test_iqr_glu.py index 156c04f..0e8faec 100644 --- a/tests/test_iqr_glu.py +++ b/tests/test_iqr_glu.py @@ -111,13 +111,19 @@ def test_iqr_glu_output_format(): # Test with Series input series_data = pd.Series([150, 155, 160, 165, 140, 145]) result_series = iglu.iqr_glu(series_data) - assert isinstance(result_series, pd.DataFrame) - assert "IQR" in result_series.columns - assert len(result_series) == 1 - assert ( - result_series["IQR"].iloc[0] == 12.5 - ) # 75th percentile (160) - 25th percentile (145) - + assert isinstance(result_series, float) + np.testing.assert_allclose(result_series, 12.5, rtol=1e-3) + + list_data = [150, 155, 160, 165, 140, 145] + result_list = iglu.iqr_glu(list_data) + assert isinstance(result_list, float) + np.testing.assert_allclose(result_list, 12.5, rtol=1e-3) + + array_data = np.array([150, 155, 160, 165, 140, 145]) + result_array = iglu.iqr_glu(array_data) + assert isinstance(result_array, float) + np.testing.assert_allclose(result_array, 12.5, rtol=1e-3) + # Test with empty data empty_data = pd.DataFrame(columns=["id", "time", "gl"]) with pytest.raises(ValueError): diff --git a/tests/test_j_index.py b/tests/test_j_index.py index 53622e1..323e129 100644 --- a/tests/test_j_index.py +++ b/tests/test_j_index.py @@ -96,25 +96,16 @@ def test_j_index_basic(): # Check calculations # For subject1: mean = 175, sd = 25, J-index = 0.001 * (175 + 25)*2 ~~ 40.000 # For subject2: mean = 160, sd = 30, J-index = 0.001 * (160 + 30)*2 ~~ 38.000 - assert ( - abs(result.loc[result["id"] == "subject1", "J_index"].iloc[0] - 45.000) / 45.000 - ) < 0.1 - assert ( - abs(result.loc[result["id"] == "subject2", "J_index"].iloc[0] - 40.000) / 40.000 - ) < 0.1 + np.testing.assert_allclose(result.loc[result["id"] == "subject1", "J_index"].iloc[0], 44.249369, rtol=1e-3) + np.testing.assert_allclose(result.loc[result["id"] == "subject2", "J_index"].iloc[0], 40.97645, rtol=1e-3) + # Test with Series input result_series = iglu.j_index(data["gl"]) # Check output format for Series input - assert isinstance(result_series, pd.DataFrame) - assert "J_index" in result_series.columns - assert "id" not in result_series.columns - assert len(result_series) == 1 - - # Check calculation for Series input - # Overall mean = 167.5, sd = 27.5, J-index = 0.001 * (167.5 + 27.5)**2 ~~ 40.000 - assert (abs(result_series["J_index"].iloc[0] - 40.000) / 40.000) < 0.1 + assert isinstance(result_series, float) + np.testing.assert_allclose(result_series, 40.216444, rtol=1e-3) def test_j_index_empty_data(): From 3614774d5ae9fc9aeffca165e705da8ce76d2b68 Mon Sep 17 00:00:00 2001 From: Stas Khirman Date: Mon, 16 Jun 2025 23:56:09 +0300 Subject: [PATCH 14/16] support for Series, list and ndarray -> return float or dict --- README.md | 20 +++--- iglu_python/lbgi.py | 34 ++++------ iglu_python/m_value.py | 41 +++++++----- iglu_python/mad_glu.py | 49 +++++++------- iglu_python/mag.py | 136 +++++++++++++++++--------------------- iglu_python/median_glu.py | 19 +++--- tests/test_lbgi.py | 27 ++++++-- tests/test_m_value.py | 26 ++++++-- tests/test_mad_glu.py | 5 +- tests/test_mag.py | 11 +-- tests/test_median_glu.py | 6 +- 11 files changed, 194 insertions(+), 180 deletions(-) diff --git a/README.md b/README.md index 83fed99..be46160 100644 --- a/README.md +++ b/README.md @@ -25,15 +25,15 @@ Unless noted, IGLU-R test compatability is considered successful if it achieves | Function | Description | IGLU-R test compatibility | list /ndarray /Series input | TZ | Comments | |----------|-------------|-------------|-------------------|----|----------| -| above_percent | percentage of values above target thresholds| ✅ |✅ returns dict ||| -| active_percent | percentage of time CGM was active | ✅ | ✅ only Series(DatetimeIndex) returns dict[str:float] +| above_percent | percentage of values above target thresholds| ✅ |✅ returns Dict[str,float] ||| +| active_percent | percentage of time CGM was active | ✅ | ✅ only Series(DatetimeIndex) returns Dict[str,float]| | adrr | average daily risk range | ✅ |✅ only Series(DatetimeIndex) returns float | | auc| Area Under Curve | 🟡 (0.01 precision) |✅ only Series(DatetimeIndex) returns float || see [auc_evaluation.ipynb](https://github.com/staskh/iglu_python/blob/main/notebooks/auc_evaluation.ipynb)| -| below_percent| percentage of values below target thresholds| ✅ | ✅ returns dict +| below_percent| percentage of values below target thresholds| ✅ | ✅ returns Dict[str,float]| | cogi |Coefficient of Glucose Irregularity | ✅ | ✅ returns float | conga | Continuous Overall Net Glycemic Action |✅ | ✅ only Series(DatetimeIndex) returns float | cv_glu | Coefficient of Variation | ✅| ✅ returns float | -| cv_measures |Coefficient of Variation subtypes (CVmean and CVsd) |✅ |✅ only Series(DatetimeIndex) returns dict| | +| cv_measures |Coefficient of Variation subtypes (CVmean and CVsd) |✅ |✅ only Series(DatetimeIndex) returns Dict[str,float]| | | ea1c |estimated A1C (eA1C) values| ✅ | ✅ returns float | | episode_calculation | Hypo/Hyperglycemic episodes with summary statistics| ✅| 🟡 always returns DataFrame(s)|| | | gmi | Glucose Management Indicator | ✅ | ✅ returns float | @@ -47,16 +47,16 @@ Unless noted, IGLU-R test compatability is considered successful if it achieves | hyper_index |Hyperglycemia Index| ✅ |✅ returns float | | hypo_index |Hypoglycemia Index| ✅ |✅ returns float | | igc |Index of Glycemic Control| ✅ |✅ returns float | -| in_range_percent |percentage of values within target ranges| ✅ | ✅ returns dict +| in_range_percent |percentage of values within target ranges| ✅ | ✅ returns Dict[str,float]| | iqr_glu |glucose level interquartile range|✅ |✅ returns float | | j_index |J-Index score for glucose measurements| ✅ |✅ returns float | -| lbgi | Low Blood Glucose Index| ✅ | -| m_value | M-value of Schlichtkrull et al | ✅ | -| mad_glu | Median Absolute Deviation | ✅ | -| mag | Mean Absolute Glucose| ✅ | || IMHO, Original R implementation has an error | +| lbgi | Low Blood Glucose Index| ✅ |✅ returns float | +| m_value | M-value of Schlichtkrull et al | ✅ |✅ returns float | +| mad_glu | Median Absolute Deviation | ✅ |✅ returns float | +| mag | Mean Absolute Glucose| ✅ | ✅ only Series(DatetimeIndex) returns float ||| IMHO, Original R implementation has an error | | mage | Mean Amplitude of Glycemic Excursions| ✅ |✅ only Series(DatetimeIndex) returns float || See algorithm at [MAGE](https://irinagain.github.io/iglu/articles/MAGE.html) | | mean_glu | Mean glucose value | ✅ | ✅ returns float| -| median_glu |Median glucose value| ✅ | +| median_glu |Median glucose value| ✅ |✅ returns float | | modd | Mean of Daily Differences| ✅ | | pgs | Personal Glycemic State | ✅ | || | | quantile_glu |glucose level quantiles| ✅ | diff --git a/iglu_python/lbgi.py b/iglu_python/lbgi.py index 61bf883..ea0a90c 100644 --- a/iglu_python/lbgi.py +++ b/iglu_python/lbgi.py @@ -6,7 +6,7 @@ from .utils import check_data_columns -def lbgi(data: Union[pd.DataFrame, pd.Series]) -> pd.DataFrame: +def lbgi(data: Union[pd.DataFrame, pd.Series, np.ndarray, list]) -> pd.DataFrame|float: r""" Calculate the Low Blood Glucose Index (LBGI) for each subject. @@ -21,15 +21,15 @@ def lbgi(data: Union[pd.DataFrame, pd.Series]) -> pd.DataFrame: Parameters ---------- - data : Union[pd.DataFrame, pd.Series] - DataFrame with columns ['id', 'time', 'gl'] or Series of glucose values + data : Union[pd.DataFrame, pd.Series, np.ndarray, list] + DataFrame with columns ['id', 'time', 'gl'] or Series of glucose values, or a numpy array or list of glucose values in mg/dL Returns ------- - pd.DataFrame + pd.DataFrame|float DataFrame with columns ['id', 'LBGI'] containing LBGI values for each subject - If input is a Series, returns DataFrame with single row and column 'LBGI' + If input is a Series, returns a float. References ---------- @@ -63,26 +63,18 @@ def lbgi(data: Union[pd.DataFrame, pd.Series]) -> pd.DataFrame: LBGI 0 0.123456 """ - if isinstance(data, pd.Series): - lbgi_value = calculate_lbgi(data) - return pd.DataFrame({"LBGI": [lbgi_value]}) + if isinstance(data, (pd.Series,list, np.ndarray)): + if isinstance(data, (np.ndarray, list)): + data = pd.Series(data) + return calculate_lbgi(data) # Check DataFrame format check_data_columns(data) - if len(data) == 0: - raise ValueError("Empty DataFrame provided") - - # Calculate LBGI for each subject - results = [] - - for subject_id in data["id"].unique(): - subject_data = data[data["id"] == subject_id]["gl"] - lbgi_value = calculate_lbgi(subject_data) - results.append({"id": subject_id, "LBGI": lbgi_value}) - - return pd.DataFrame(results) - + out = data.groupby('id').agg( + LBGI = ("gl", lambda x: calculate_lbgi(x)) + ).reset_index() + return out def calculate_lbgi(glucose_values: pd.Series) -> float: """ diff --git a/iglu_python/m_value.py b/iglu_python/m_value.py index c151af2..0859a11 100644 --- a/iglu_python/m_value.py +++ b/iglu_python/m_value.py @@ -6,7 +6,7 @@ from .utils import check_data_columns -def m_value(data: Union[pd.DataFrame, pd.Series], r: float = 90) -> pd.DataFrame: +def m_value(data: Union[pd.DataFrame, pd.Series, np.ndarray, list], r: float = 90) -> pd.DataFrame|float: r""" Calculate the M-value of Schlichtkrull et al. (1965) for each subject. @@ -22,17 +22,16 @@ def m_value(data: Union[pd.DataFrame, pd.Series], r: float = 90) -> pd.DataFrame Parameters ---------- - data : Union[pd.DataFrame, pd.Series] - DataFrame with columns 'id', 'time', and 'gl', or a Series of glucose values + data : Union[pd.DataFrame, pd.Series, np.ndarray, list] + DataFrame with columns 'id', 'time', and 'gl', or a Series of glucose values, or a numpy array or list of glucose values r : float, default=90 A reference value corresponding to basal glycemia in normal subjects Returns ------- - pd.DataFrame + pd.DataFrame|float DataFrame with 1 row for each subject, a column for subject id and a column - for M-value. If a Series of glucose values is passed, then a DataFrame - without the subject id is returned. + for M-value. If a Series of glucose values is passed, then a float is returned. References ---------- @@ -60,20 +59,26 @@ def m_value(data: Union[pd.DataFrame, pd.Series], r: float = 90) -> pd.DataFrame 0 111.11 """ # Handle Series input - if isinstance(data, pd.Series): - return pd.DataFrame( - {"M_value": [1000 * np.mean(np.abs(np.log10(data / r)) ** 3)]} - ) + if isinstance(data, (pd.Series,list, np.ndarray)): + if isinstance(data, (np.ndarray, list)): + data = pd.Series(data) + return m_value_single(data, r) # Handle DataFrame input data = check_data_columns(data) - # Calculate M-value for each subject - result = ( - data.groupby("id") - .apply(lambda x: 1000 * np.mean(np.abs(np.log10(x["gl"] / r)) ** 3), include_groups=False) - .reset_index() - ) - result.columns = ["id", "M_value"] + out = data.groupby('id').agg( + M_value = ("gl", lambda x: m_value_single(x, r)) + ).reset_index() + return out + +def m_value_single(gl: pd.Series, r: float = 90) -> float: + """ + Calculate the M-value of Schlichtkrull et al. (1965) for a single subject. + """ + gl = gl.dropna() + if len(gl) == 0: + return np.nan + m_value = 1000 * np.mean(np.abs(np.log10(gl / r)) ** 3) + return m_value - return result diff --git a/iglu_python/mad_glu.py b/iglu_python/mad_glu.py index 642962d..d46edc1 100644 --- a/iglu_python/mad_glu.py +++ b/iglu_python/mad_glu.py @@ -7,8 +7,8 @@ def mad_glu( - data: Union[pd.DataFrame, pd.Series], constant: float = 1.4826 -) -> pd.DataFrame: + data: Union[pd.DataFrame, pd.Series, np.ndarray, list], constant: float = 1.4826 +) -> pd.DataFrame|float: """ Calculate Median Absolute Deviation (MAD) of glucose values. @@ -18,8 +18,9 @@ def mad_glu( Parameters ---------- - data : Union[pd.DataFrame, pd.Series] - DataFrame with columns 'id', 'time', and 'gl', or a Series of glucose values + data : Union[pd.DataFrame, pd.Series, np.ndarray, list] + DataFrame with columns 'id', 'time', and 'gl', or a Series of glucose values, + or a numpy array or list of glucose values constant : float, default=1.4826 Scaling factor to multiply the MAD value. The default value of 1.4826 makes the MAD consistent with the standard deviation for normally @@ -27,10 +28,11 @@ def mad_glu( Returns ------- - pd.DataFrame + pd.DataFrame|float DataFrame with columns: - id: subject identifier (if DataFrame input) - - MAD: MAD value (median absolute deviation of glucose values) + - MAD: MAD value (median absolute deviation of glucose values). + If a Series of glucose values is passed, then a float is returned. Examples -------- @@ -51,26 +53,25 @@ def mad_glu( 0 27.5 """ # Handle Series input - if isinstance(data, pd.Series): - # Calculate MAD for the Series - mad_val = np.median(np.abs(data - np.median(data))) * constant - return pd.DataFrame({"MAD": [mad_val]}) + if isinstance(data, (pd.Series,list, np.ndarray)): + if isinstance(data, (np.ndarray, list)): + data = pd.Series(data) + return mad_glu_single(data, constant) # Handle DataFrame input data = check_data_columns(data) - # Calculate MAD for each subject - result = [] - for subject in data["id"].unique(): - subject_data = data[data["id"] == subject] - if len(subject_data.dropna(subset=["gl"])) == 0: - continue + out = data.groupby('id').agg( + MAD = ("gl", lambda x: mad_glu_single(x, constant)) + ).reset_index() + return out - # Calculate MAD for this subject - mad_val = ( - np.median(np.abs(subject_data["gl"] - np.median(subject_data["gl"]))) - * constant - ) - result.append({"id": subject, "MAD": mad_val}) - - return pd.DataFrame(result) +def mad_glu_single(gl: pd.Series, constant: float = 1.4826) -> float: + """ + Calculate Median Absolute Deviation (MAD) of glucose values for a single subject. + """ + gl = gl.dropna() + if len(gl) == 0: + return np.nan + mad_val = np.median(np.abs(gl - np.median(gl))) * constant + return mad_val \ No newline at end of file diff --git a/iglu_python/mag.py b/iglu_python/mag.py index 9e91c55..faddd1d 100644 --- a/iglu_python/mag.py +++ b/iglu_python/mag.py @@ -12,7 +12,7 @@ def mag( dt0: Optional[int] = None, inter_gap: int = 45, tz: str = "", -) -> pd.DataFrame: +) -> pd.DataFrame|float: """ Calculate Mean Absolute Glucose (MAG). @@ -40,10 +40,11 @@ def mag( Returns ------- - pd.DataFrame + pd.DataFrame|float DataFrame with columns: - id: subject identifier (if DataFrame input) - MAG: Mean Absolute Glucose value + If a Series of glucose values is passed, then a float is returned. References ---------- @@ -70,84 +71,65 @@ def mag( 0 66.0 """ - def mag_single(data: pd.DataFrame, n: int) -> float: - """Calculate MAG for a single subject""" - # Convert data to day-by-day format - data_ip = CGMS2DayByDay(data, dt0=dt0, inter_gap=inter_gap, tz=tz) - dt0_actual = data_ip[2] # Time between measurements in minutes - - # Ensure n is not less than data collection frequency - if n < dt0_actual: - n = dt0_actual - - # Calculate number of readings per interval - readings_per_interval = round(n / dt0_actual) - - # Get glucose values and calculate differences - gl_values = data_ip[0].flatten() # Flatten the matrix - # gl_values = gl_values[~np.isnan(gl_values)] # Remove NaN values - - if len(gl_values) <= 1: - return 0.0 - - # Calculate absolute differences between readings n minutes apart - lag = readings_per_interval - - if is_iglu_r_compatible(): - idx = np.arange(0,len(gl_values),lag) - gl_values_idx = gl_values[idx] - diffs = gl_values_idx[1:] - gl_values_idx[:-1] - diffs = np.abs(diffs) - diffs = diffs[~np.isnan(diffs)] - # to be IGLU-R test compatible, imho they made error. - # has to be total_time_hours = ((len(diffs)) * n) / 60 - total_time_hours = ((len(gl_values_idx[~np.isnan(gl_values_idx)])) * n) / 60 - if total_time_hours == 0: - return 0.0 - mag = float(np.sum(diffs) / total_time_hours) - else: - diffs = gl_values[lag:] - gl_values[:-lag] - diffs = np.abs(diffs) - diffs = diffs[~np.isnan(diffs)] - - # Calculate MAG: sum of absolute differences divided by total time in hours - total_time_hours = ((len(diffs)) * n) / 60 - if total_time_hours == 0: - return 0.0 - mag = float(np.sum(diffs) / total_time_hours) - - return mag - # Handle Series input if isinstance(data, pd.Series): - # Convert Series to DataFrame format - data_df = pd.DataFrame( - { - "id": ["subject1"] * len(data), - "time": pd.date_range( - start="2020-01-01", periods=len(data), freq="5min" - ), - "gl": data.values, - } - ) - mag_val = mag_single(data_df, n) - return pd.DataFrame({"MAG": [mag_val]}) + if not isinstance(data.index, pd.DatetimeIndex): + raise ValueError("Series must have a DatetimeIndex") + return mag_single(data, n, dt0, inter_gap, tz) # Handle DataFrame input data = check_data_columns(data) + data.set_index('time', drop=True, inplace=True) + + out = data.groupby('id').agg( + MAG = ("gl", lambda x: mag_single(x, n, dt0, inter_gap, tz)) + ).reset_index() + return out + +def mag_single(gl: pd.Series, n: int = 60, dt0: Optional[int] = None, inter_gap: int = 45, tz: str = "") -> float: + """Calculate MAG for a single subject""" + # Convert data to day-by-day format + data_ip = CGMS2DayByDay(gl, dt0=dt0, inter_gap=inter_gap, tz=tz) + dt0_actual = data_ip[2] # Time between measurements in minutes + + # Ensure n is not less than data collection frequency + if n < dt0_actual: + n = dt0_actual + + # Calculate number of readings per interval + readings_per_interval = round(n / dt0_actual) + + # Get glucose values and calculate differences + gl_values = data_ip[0].flatten() # Flatten the matrix + # gl_values = gl_values[~np.isnan(gl_values)] # Remove NaN values + + if len(gl_values) <= 1: + return 0.0 + + # Calculate absolute differences between readings n minutes apart + lag = readings_per_interval + + if is_iglu_r_compatible(): + idx = np.arange(0,len(gl_values),lag) + gl_values_idx = gl_values[idx] + diffs = gl_values_idx[1:] - gl_values_idx[:-1] + diffs = np.abs(diffs) + diffs = diffs[~np.isnan(diffs)] + # to be IGLU-R test compatible, imho they made error. + # has to be total_time_hours = ((len(diffs)) * n) / 60 + total_time_hours = ((len(gl_values_idx[~np.isnan(gl_values_idx)])) * n) / 60 + if total_time_hours == 0: + return 0.0 + mag = float(np.sum(diffs) / total_time_hours) + else: + diffs = gl_values[lag:] - gl_values[:-lag] + diffs = np.abs(diffs) + diffs = diffs[~np.isnan(diffs)] + + # Calculate MAG: sum of absolute differences divided by total time in hours + total_time_hours = ((len(diffs)) * n) / 60 + if total_time_hours == 0: + return 0.0 + mag = float(np.sum(diffs) / total_time_hours) - # Ensure n is an integer - if not isinstance(n, int): - n = round(n) - - # Calculate MAG for each subject - result = [] - for subject in data["id"].unique(): - subject_data = data[data["id"] == subject].copy() - if len(subject_data.dropna(subset=["gl"])) == 0: - continue - - mag_val = mag_single(subject_data, n) - result.append({"id": subject, "MAG": mag_val}) - - return pd.DataFrame(result) + return mag diff --git a/iglu_python/median_glu.py b/iglu_python/median_glu.py index ef0f9fe..b23c735 100644 --- a/iglu_python/median_glu.py +++ b/iglu_python/median_glu.py @@ -1,11 +1,12 @@ from typing import Union import pandas as pd +import numpy as np from .utils import check_data_columns -def median_glu(data: Union[pd.DataFrame, pd.Series]) -> pd.DataFrame: +def median_glu(data: Union[pd.DataFrame, pd.Series, np.ndarray, list]) -> pd.DataFrame|float: """ Calculate median glucose value for each subject. @@ -15,15 +16,15 @@ def median_glu(data: Union[pd.DataFrame, pd.Series]) -> pd.DataFrame: Parameters ---------- - data : Union[pd.DataFrame, pd.Series] - DataFrame with columns 'id', 'time', and 'gl', or a Series of glucose values + data : Union[pd.DataFrame, pd.Series, np.ndarray, list] + DataFrame with columns 'id', 'time', and 'gl', or a Series of glucose values, + or a numpy array or list of glucose values Returns ------- - pd.DataFrame + pd.DataFrame|float DataFrame with 1 row for each subject, a column for subject id and a column - for median glucose value. If a Series of glucose values is passed, then a DataFrame - without the subject id is returned. + for median glucose value. If a Series of glucose values is passed, then a float is returned. Examples -------- @@ -44,8 +45,10 @@ def median_glu(data: Union[pd.DataFrame, pd.Series]) -> pd.DataFrame: 0 160.0 """ # Handle Series input - if isinstance(data, pd.Series): - return pd.DataFrame({"median": [data.median()]}) + if isinstance(data, (pd.Series,list, np.ndarray)): + if isinstance(data, (np.ndarray, list)): + data = pd.Series(data) + return data.median() # Handle DataFrame input data = check_data_columns(data) diff --git a/tests/test_lbgi.py b/tests/test_lbgi.py index cbfaffa..280cc9e 100644 --- a/tests/test_lbgi.py +++ b/tests/test_lbgi.py @@ -103,16 +103,31 @@ def test_lbgi_series_input(): """Test LBGI calculation with Series input.""" data = pd.Series([70, 80, 60, 50]) result = iglu.lbgi(data) + expected = 11.960402 # Check output format - assert isinstance(result, pd.DataFrame) - assert "LBGI" in result.columns - assert len(result) == 1 - assert len(result.columns) == 1 + assert isinstance(result, float) + np.testing.assert_allclose(result, expected, rtol=1e-3) - # Check that LBGI is calculated (should be positive due to hypoglycemia) - assert result.loc[0, "LBGI"] > 0 +def test_lbgi_list_input(): + """Test LBGI calculation with list input.""" + data = [70, 80, 60, 50] + result = iglu.lbgi(data) + expected = 11.960402 + # Check output format + assert isinstance(result, float) + np.testing.assert_allclose(result, expected, rtol=1e-3) + +def test_lbgi_numpy_array_input(): + """Test LBGI calculation with numpy array input.""" + data = np.array([70, 80, 60, 50]) + result = iglu.lbgi(data) + expected = 11.960402 + + # Check output format + assert isinstance(result, float) + np.testing.assert_allclose(result, expected, rtol=1e-3) def test_lbgi_empty_data(): """Test LBGI calculation with empty DataFrame.""" diff --git a/tests/test_m_value.py b/tests/test_m_value.py index 42c441b..aef13d1 100644 --- a/tests/test_m_value.py +++ b/tests/test_m_value.py @@ -106,13 +106,31 @@ def test_m_value_series_input(): """Test M-value calculation with Series input.""" data = pd.Series([90, 180, 90, 90]) result = iglu.m_value(data) + expected = 6.819764 # Check output format - assert isinstance(result, pd.DataFrame) - assert "M_value" in result.columns - assert len(result) == 1 - assert len(result.columns) == 1 + assert isinstance(result, float) + np.testing.assert_allclose(result, expected, rtol=1e-3) + +def test_m_value_list_input(): + """Test M-value calculation with list input.""" + data = [90, 180, 90, 90] + result = iglu.m_value(data) + expected = 6.819764 + # Check output format + assert isinstance(result, float) + np.testing.assert_allclose(result, expected, rtol=1e-3) + +def test_m_value_numpy_array_input(): + """Test M-value calculation with numpy array input.""" + data = np.array([90, 180, 90, 90]) + result = iglu.m_value(data) + expected = 6.819764 + + # Check output format + assert isinstance(result, float) + np.testing.assert_allclose(result, expected, rtol=1e-3) def test_m_value_custom_reference(): """Test M-value calculation with custom reference value.""" diff --git a/tests/test_mad_glu.py b/tests/test_mad_glu.py index 6abb88e..e524703 100644 --- a/tests/test_mad_glu.py +++ b/tests/test_mad_glu.py @@ -111,9 +111,8 @@ def test_mad_glu_output_format(): # Test with Series input series_data = pd.Series([150, 155, 160, 165, 140, 145]) result_series = iglu.mad_glu(series_data) - assert isinstance(result_series, pd.DataFrame) - assert "MAD" in result_series.columns - assert len(result_series) == 1 + assert isinstance(result_series, float) + np.testing.assert_allclose(result_series, 11.1195, rtol=1e-3) # Test with empty data empty_data = pd.DataFrame(columns=["id", "time", "gl"]) diff --git a/tests/test_mag.py b/tests/test_mag.py index a85dd1d..a60b7f0 100644 --- a/tests/test_mag.py +++ b/tests/test_mag.py @@ -105,11 +105,12 @@ def test_mag_basic(): def test_mag_series_input(): """Test mag function with Series input""" - series_data = pd.Series([150, 155, 160, 165, 140, 145]) - result = iglu.mag(series_data) - assert isinstance(result, pd.DataFrame) - assert "MAG" in result.columns - assert len(result) == 1 + series_data = pd.Series([150, 160, 170, 180, 190, 200, 210, 220], + index=pd.date_range(start="2020-01-01 10:00:00", periods=8, freq="5min")) + result = iglu.mag(series_data,n=20) + expected = 60 + assert isinstance(result, float) + np.testing.assert_allclose(result, expected, rtol=1e-3) def test_mag_empty_data(): diff --git a/tests/test_median_glu.py b/tests/test_median_glu.py index 8c592bc..ece6458 100644 --- a/tests/test_median_glu.py +++ b/tests/test_median_glu.py @@ -103,10 +103,8 @@ def test_median_glu_series(): """Test median_glu with Series input""" series_data = pd.Series([150, 155, 160, 165, 140, 145]) result = iglu.median_glu(series_data) - assert isinstance(result, pd.DataFrame) - assert "median" in result.columns - assert len(result) == 1 - assert result["median"].iloc[0] == 152.5 # Median of all values + assert isinstance(result, float) + np.testing.assert_allclose(result, 152.5, rtol=1e-3) def test_median_glu_empty(): From bec5fa5d73ae3b84bd87a5fe582f599c7f99ce80 Mon Sep 17 00:00:00 2001 From: Stas Khirman Date: Tue, 17 Jun 2025 01:25:19 +0300 Subject: [PATCH 15/16] support for Series, list and ndarray -> return float or dict --- README.md | 14 ++--- iglu_python/modd.py | 87 +++++++++++++------------------ iglu_python/pgs.py | 100 +++++++++++++++++------------------- iglu_python/quantile_glu.py | 20 ++++---- iglu_python/range_glu.py | 20 +++++--- iglu_python/roc.py | 11 ++-- iglu_python/sd_measures.py | 71 ++++++++++++------------- iglu_python/sd_roc.py | 47 +++++++---------- iglu_python/summary_glu.py | 6 +-- tests/test_modd.py | 6 +-- tests/test_pgs.py | 12 +++-- tests/test_quantile_glu.py | 23 +++++++-- tests/test_range_glu.py | 6 +-- tests/test_sd_measures.py | 52 ++++++++++++++++++- tests/test_sd_roc.py | 20 +++++--- tests/test_summary_glu.py | 37 +++++++------ 16 files changed, 284 insertions(+), 248 deletions(-) diff --git a/README.md b/README.md index be46160..49e4ce9 100644 --- a/README.md +++ b/README.md @@ -57,14 +57,14 @@ Unless noted, IGLU-R test compatability is considered successful if it achieves | mage | Mean Amplitude of Glycemic Excursions| ✅ |✅ only Series(DatetimeIndex) returns float || See algorithm at [MAGE](https://irinagain.github.io/iglu/articles/MAGE.html) | | mean_glu | Mean glucose value | ✅ | ✅ returns float| | median_glu |Median glucose value| ✅ |✅ returns float | -| modd | Mean of Daily Differences| ✅ | -| pgs | Personal Glycemic State | ✅ | || | -| quantile_glu |glucose level quantiles| ✅ | -| range_glu |glucose level range| ✅ | -| roc | Rate of Change| ✅ | +| modd | Mean of Daily Differences| ✅ | ✅ only Series(DatetimeIndex) returns float| +| pgs | Personal Glycemic State | ✅ |✅ only Series(DatetimeIndex) returns float| || +| quantile_glu |glucose level quantiles| ✅ |✅ returns List[float] | +| range_glu |glucose level range| ✅ |✅ returns float| +| roc | Rate of Change| ✅ |🟡 always returns DataFrame| | sd_glu | standard deviation of glucose values| ✅ | ✅ returns float -| sd_measures |various standard deviation subtypes| ✅ | -| sd_roc | standard deviation of the rate of change| ✅ | ||| +| sd_measures |various standard deviation subtypes| ✅ |✅ only Series(DatetimeIndex) returns Dict[str,float]| +| sd_roc | standard deviation of the rate of change| ✅ |✅ only Series(DatetimeIndex) returns float || | summary_glu | summary glucose level| ✅ | | process_data | Data Pre-Processor | ✅ | | CGMS2DayByDay |Interpolate glucose input| ✅ | diff --git a/iglu_python/modd.py b/iglu_python/modd.py index c837fe3..af26259 100644 --- a/iglu_python/modd.py +++ b/iglu_python/modd.py @@ -7,8 +7,8 @@ def modd( - data: Union[pd.DataFrame, pd.Series], lag: int = 1, tz: str = "" -) -> pd.DataFrame: + data: Union[pd.DataFrame, pd.Series, np.ndarray, list], lag: int = 1, tz: str = "" +) -> pd.DataFrame|float: """ Calculate Mean of Daily Differences (MODD). @@ -18,8 +18,9 @@ def modd( Parameters ---------- - data : Union[pd.DataFrame, pd.Series] - DataFrame with columns 'id', 'time', and 'gl', or a Series of glucose values + data : Union[pd.DataFrame, pd.Series, np.ndarray, list] + DataFrame with columns 'id', 'time', and 'gl', or a Series of glucose values, + or a numpy array or list of glucose values lag : int, default=1 Integer indicating which lag (# days) to use. Default is 1. tz : str, default="" @@ -27,10 +28,11 @@ def modd( Returns ------- - pd.DataFrame + pd.DataFrame|float DataFrame with columns: - id: subject identifier (if DataFrame input) - - MODD: Mean of Daily Differences value + - MODD: Mean of Daily Differences value. + If a Series of glucose values is passed, then a float is returned. References ---------- @@ -56,55 +58,40 @@ def modd( 0 45.0 """ - def modd_single(data: pd.DataFrame) -> float: - """Calculate MODD for a single subject""" - # Convert data to day-by-day format - data_ip = CGMS2DayByDay(data, tz=tz) - gl_by_id_ip = data_ip[0].flatten() # Get interpolated glucose values - dt0 = data_ip[2] # Get time frequency - - # Calculate absolute differences with specified lag - # lag is in days, so we need to convert to minutes and divide of dt0 frequency - shift = int(lag * 24 * 60 / dt0) # Convert lag to minutes and divide by dt0 - # Shift array by lag and calculate differences - abs_diffs = np.abs(gl_by_id_ip[shift:] - gl_by_id_ip[:-shift]) - # Remove NaNs - abs_diffs = abs_diffs[~np.isnan(abs_diffs)] # Remove NaNs - - # Calculate mean of absolute differences, ignoring NaN values - if len(abs_diffs) == 0: - modd_val = np.nan - else: - modd_val = np.nanmean(abs_diffs) - - return float(modd_val) if not pd.isna(modd_val) else np.nan - # Handle Series input if isinstance(data, pd.Series): if not isinstance(data.index, pd.DatetimeIndex): raise ValueError("Series must have a DatetimeIndex") - data_df = pd.DataFrame( - { - "id": ["subject1"] * len(data.values), - "time": data.index, - "gl": data.values, - } - ) - - modd_val = modd_single(data_df) - return pd.DataFrame({"MODD": [modd_val]}) + return modd_single(data, lag, tz) # Handle DataFrame input data = check_data_columns(data) - # Calculate MODD for each subject - result = [] - for subject in data["id"].unique(): - subject_data = data[data["id"] == subject].copy() - if len(subject_data.dropna(subset=["gl"])) == 0: - continue - - modd_val = modd_single(subject_data) - result.append({"id": subject, "MODD": modd_val}) - - return pd.DataFrame(result) + data.set_index('time', drop=True, inplace=True) + out = data.groupby('id').agg( + MODD = ("gl", lambda x: modd_single(x, lag, tz)) + ).reset_index() + return out + +def modd_single(data: pd.Series, lag: int = 1, tz: str = "") -> float: + """Calculate MODD for a single subject""" + # Convert data to day-by-day format + data_ip = CGMS2DayByDay(data, tz=tz) + gl_by_id_ip = data_ip[0].flatten() # Get interpolated glucose values + dt0 = data_ip[2] # Get time frequency + + # Calculate absolute differences with specified lag + # lag is in days, so we need to convert to minutes and divide of dt0 frequency + shift = int(lag * 24 * 60 / dt0) # Convert lag to minutes and divide by dt0 + # Shift array by lag and calculate differences + abs_diffs = np.abs(gl_by_id_ip[shift:] - gl_by_id_ip[:-shift]) + # Remove NaNs + abs_diffs = abs_diffs[~np.isnan(abs_diffs)] # Remove NaNs + + # Calculate mean of absolute differences, ignoring NaN values + if len(abs_diffs) == 0: + modd_val = np.nan + else: + modd_val = np.nanmean(abs_diffs) + + return float(modd_val) if not pd.isna(modd_val) else np.nan diff --git a/iglu_python/pgs.py b/iglu_python/pgs.py index df7816c..a930133 100644 --- a/iglu_python/pgs.py +++ b/iglu_python/pgs.py @@ -12,7 +12,7 @@ def pgs( data: Union[pd.DataFrame, pd.Series], dur_length: int = 20, end_length: int = 30 -) -> pd.DataFrame: +) -> pd.DataFrame|float: """ Calculate Personal Glycemic State (PGS). @@ -23,7 +23,8 @@ def pgs( Parameters ---------- data : Union[pd.DataFrame, pd.Series] - DataFrame with columns 'id', 'time', and 'gl', or a Series of glucose values. + DataFrame with columns 'id', 'time', and 'gl', or a Series of glucose values, + or a numpy array or list of glucose values Should only be data for 1 subject. In case multiple subject ids are detected, a warning is produced and only 1st subject is used. dur_length : int, optional @@ -36,9 +37,9 @@ def pgs( Returns ------- - pd.DataFrame + pd.DataFrame|float DataFrame with 1 row for each subject, a column for subject id and a column - for PGS value. + for PGS value. If a Series of glucose values is passed, then a float is returned. Notes ----- @@ -82,57 +83,48 @@ def pgs( if isinstance(data, pd.Series): if not isinstance(data.index, pd.DatetimeIndex): raise ValueError("Series must have a DatetimeIndex") - data = pd.DataFrame( - { - "id": ["subject1"] * len(data.values), - "time": data.index, - "gl": data.values, - } - ) + return pgs_single(data, dur_length, end_length) # Handle DataFrame input data = check_data_columns(data) + data.set_index('time', drop=True, inplace=True) + + out = data.groupby('id').agg( + PGS = ("gl", lambda x: pgs_single(x, dur_length, end_length)) + ).reset_index() + return out + +def pgs_single(gl: pd.Series, dur_length: int = 20, end_length: int = 30) -> float: + """Calculate PGS for a single subject""" + # Calculate components + gvp_val = gvp(gl) + mean_val = mean_glu(gl) + ptir_val = in_range_percent(gl, target_ranges=[[70, 180]])['in_range_70_180'] + + # Calculate episode components + eps = episode_calculation( + gl, + lv1_hypo=70, + lv2_hypo=54, + dur_length=dur_length, + end_length=end_length, + ) + n54 = eps["avg_ep_per_day"].iloc[1] * 7 # Convert to weekly episodes + n70 = eps["avg_ep_per_day"].iloc[5] * 7 # Use lv1 exclusive, not lv1 super set + + # Calculate PGS components + f_gvp = 1 + (9 / (1 + np.exp(-0.049 * (gvp_val - 65.47)))) + f_ptir = 1 + (9 / (1 + np.exp(0.0833 * (ptir_val - 55.04)))) + f_mg = 1 + 9 * ( + (1 / (1 + np.exp(0.1139 * (mean_val - 72.08)))) + + (1 / (1 + np.exp(-0.09195 * (mean_val - 157.57)))) + ) + + f_h54 = 0.5 + 4.5 * (1 - np.exp(-0.91093 * n54)) + f_h70 = 0.5714 * n70 + 0.625 if n70 <= 7.65 else 5 + + # Calculate final PGS score + pgs_score = f_gvp + f_ptir + f_mg + f_h54 + f_h70 + + return pgs_score - def pgs_single(subj_data: pd.DataFrame) -> float: - """Calculate PGS for a single subject""" - # Calculate components - gvp_val = gvp(subj_data)["GVP"].iloc[0] - mean_val = mean_glu(subj_data)["mean"].iloc[0] - ptir_val = in_range_percent(subj_data, target_ranges=[[70, 180]])["in_range_70_180"].iloc[0] - - # Calculate episode components - eps = episode_calculation( - subj_data, - lv1_hypo=70, - lv2_hypo=54, - dur_length=dur_length, - end_length=end_length, - ) - n54 = eps["avg_ep_per_day"].iloc[1] * 7 # Convert to weekly episodes - n70 = eps["avg_ep_per_day"].iloc[5] * 7 # Use lv1 exclusive, not lv1 super set - - # Calculate PGS components - f_gvp = 1 + (9 / (1 + np.exp(-0.049 * (gvp_val - 65.47)))) - f_ptir = 1 + (9 / (1 + np.exp(0.0833 * (ptir_val - 55.04)))) - f_mg = 1 + 9 * ( - (1 / (1 + np.exp(0.1139 * (mean_val - 72.08)))) - + (1 / (1 + np.exp(-0.09195 * (mean_val - 157.57)))) - ) - - f_h54 = 0.5 + 4.5 * (1 - np.exp(-0.91093 * n54)) - f_h70 = 0.5714 * n70 + 0.625 if n70 <= 7.65 else 5 - - # Calculate final PGS score - pgs_score = f_gvp + f_ptir + f_mg + f_h54 + f_h70 - - return pgs_score - - - # Calculate PGS for each subject - results = [] - for subject_id in data["id"].unique(): - subject_data = data[data["id"] == subject_id].copy() - pgs_value = pgs_single(subject_data) - results.append({"id": subject_id, "PGS": pgs_value}) - - return pd.DataFrame(results) diff --git a/iglu_python/quantile_glu.py b/iglu_python/quantile_glu.py index 225da66..f493bb6 100644 --- a/iglu_python/quantile_glu.py +++ b/iglu_python/quantile_glu.py @@ -7,8 +7,8 @@ def quantile_glu( - data: Union[pd.DataFrame, pd.Series], quantiles: List[float] = None -) -> pd.DataFrame: + data: Union[pd.DataFrame, pd.Series, np.ndarray, list], quantiles: List[float] = None +) -> pd.DataFrame|list[float]: """ Calculate glucose level quantiles. @@ -18,17 +18,17 @@ def quantile_glu( Parameters ---------- - data : Union[pd.DataFrame, pd.Series] - DataFrame with columns 'id', 'time', and 'gl', or a Series of glucose values + data : Union[pd.DataFrame, pd.Series, np.ndarray, list] + DataFrame with columns 'id', 'time', and 'gl', or a Series of glucose values, + or a numpy array or list of glucose values quantiles : List[float], default=[0, 25, 50, 75, 100] List of quantile values between 0 and 100 Returns ------- - pd.DataFrame + pd.DataFrame|list[float] DataFrame with 1 row for each subject, a column for subject id and a column - for each quantile. If a Series of glucose values is passed, then a DataFrame - without the subject id is returned. + for each quantile. If a Series of glucose values is passed, then a list of floats is returned. Notes ----- @@ -58,10 +58,12 @@ def quantile_glu( # Handle Series input if quantiles is None: quantiles = [0, 25, 50, 75, 100] - if isinstance(data, pd.Series): + if isinstance(data, (pd.Series, np.ndarray, list)): + if isinstance(data, (np.ndarray, list)): + data = pd.Series(data) # Calculate quantiles for Series quantile_vals = np.quantile(data.dropna(), np.array(quantiles) / 100) - return pd.DataFrame([quantile_vals], columns=quantiles) + return quantile_vals.tolist() # Handle DataFrame input data = check_data_columns(data) diff --git a/iglu_python/range_glu.py b/iglu_python/range_glu.py index bdf7473..1f39ef4 100644 --- a/iglu_python/range_glu.py +++ b/iglu_python/range_glu.py @@ -1,11 +1,12 @@ from typing import Union +import numpy as np import pandas as pd from .utils import check_data_columns -def range_glu(data: Union[pd.DataFrame, pd.Series]) -> pd.DataFrame: +def range_glu(data: Union[pd.DataFrame, pd.Series, np.ndarray, list]) -> pd.DataFrame|float: """ Calculate glucose level range. @@ -14,15 +15,16 @@ def range_glu(data: Union[pd.DataFrame, pd.Series]) -> pd.DataFrame: Parameters ---------- - data : Union[pd.DataFrame, pd.Series] - DataFrame with columns 'id', 'time', and 'gl', or a Series of glucose values + data : Union[pd.DataFrame, pd.Series, np.ndarray, list] + DataFrame with columns 'id', 'time', and 'gl', or a Series of glucose values, + or a numpy array or list of glucose values Returns ------- - pd.DataFrame + pd.DataFrame|float DataFrame with columns: - id: subject identifier (if DataFrame input) - - range: range of glucose values (max - min) + - range: range of glucose values (max - min). If a Series of glucose values is passed, then a float is returned. Examples -------- @@ -43,10 +45,12 @@ def range_glu(data: Union[pd.DataFrame, pd.Series]) -> pd.DataFrame: 0 70 """ # Handle Series input - if isinstance(data, pd.Series): + if isinstance(data, (pd.Series, np.ndarray, list)): + if isinstance(data, (np.ndarray, list)): + data = pd.Series(data) # Calculate range for Series - range_val = data.max() - data.min() - return pd.DataFrame({"range": [range_val]}) + range_val = float(data.max() - data.min()) + return range_val # Handle DataFrame input data = check_data_columns(data) diff --git a/iglu_python/roc.py b/iglu_python/roc.py index 5a2f0f6..b75d8d8 100644 --- a/iglu_python/roc.py +++ b/iglu_python/roc.py @@ -118,17 +118,16 @@ def roc_single(data: pd.DataFrame, # Handle Series input if isinstance(data, pd.Series): - data = data.dropna() - if len(data) == 0: - return pd.DataFrame({"ROC": [np.nan]}) + if not isinstance(data.index, pd.DatetimeIndex): + raise ValueError("Series input must have a datetime index") + if len(data.dropna()) == 0: + return pd.DataFrame({"roc": [np.nan]}) # Convert Series to DataFrame format data = pd.DataFrame( { "id": ["subject1"] * len(data), - "time": pd.date_range( - start="2020-01-01", periods=len(data), freq=f"{dt0}min" - ), + "time": data.index, "gl": data.values, } ) diff --git a/iglu_python/sd_measures.py b/iglu_python/sd_measures.py index 6e801de..18242fa 100644 --- a/iglu_python/sd_measures.py +++ b/iglu_python/sd_measures.py @@ -7,10 +7,10 @@ from .utils import CGMS2DayByDay, check_data_columns -def sd_measures(data: pd.DataFrame, +def sd_measures(data: pd.DataFrame|pd.Series, dt0: Optional[int] = None, inter_gap: int = 45, - tz: str = "") -> pd.DataFrame: + tz: str = "") -> pd.DataFrame|dict[str, float]: """ Calculate SD subtypes for glucose variability analysis @@ -20,8 +20,8 @@ def sd_measures(data: pd.DataFrame, Parameters ---------- - data : pd.DataFrame - DataFrame with columns 'id', 'time', and 'gl' (glucose) + data : pd.DataFrame|pd.Series + DataFrame with columns 'id', 'time', and 'gl' (glucose), or a Series of glucose values dt0 : int, optional The time frequency for interpolation in minutes inter_gap : int, default 45 @@ -32,7 +32,8 @@ def sd_measures(data: pd.DataFrame, Returns ------- pd.DataFrame - A DataFrame with columns for id and each of the six SD subtypes: + A DataFrame with columns for id and each of the six SD subtypes. + If a Series of glucose values is passed, then a dictionary is returned. - SDw: vertical within days - SDhhmm: between time points - SDwsh: within series (1-hour windows) @@ -76,45 +77,38 @@ def sd_measures(data: pd.DataFrame, >>> result = sd_measures(glucose_data) >>> print(result) """ + if isinstance(data, pd.Series): + if not isinstance(data.index, pd.DatetimeIndex): + raise ValueError("Series must have a DatetimeIndex") + return sd_measures_single(data, dt0, inter_gap, tz) # Data validation (placeholder - implement check_data_columns equivalent) data = check_data_columns(data, time_check=True, tz=tz) - subjects = data['id'].unique() - len(subjects) - - # Calculate uniform grid for all subjects - gdall = [] - current_dt0 = dt0 - - for i, subject_id in enumerate(subjects): - subject_data = data[data['id'] == subject_id].copy() - - # Convert to day-by-day format (placeholder - implement CGMS2DayByDay equivalent) - gd2d, actual_dates, gd2d_dt0 = CGMS2DayByDay(subject_data, tz=tz, dt0=current_dt0, inter_gap=inter_gap) - gdall.append(gd2d) - - # Use the dt0 from first subject for consistency - if i == 0: - current_dt0 = gd2d_dt0 - - dt0 = current_dt0 - - # Calculate SD measures for each subject + # Convert the dictionary results into a DataFrame with proper columns results = [] + for subject_id in data['id'].unique(): + subject_data = data[data['id'] == subject_id] + result_dict = sd_measures_single(subject_data, dt0, inter_gap, tz) + result_dict['id'] = subject_id + results.append(result_dict) + + # convert result into dataframe with 'id' on the first place + out = pd.DataFrame(results) + out = out[['id'] + list(out.columns[:-1])] + return out + +def sd_measures_single(data: pd.DataFrame, + dt0: Optional[int] = None, + inter_gap: int = 45, + tz: str = "") -> dict[str, float]: + + gd2d, actual_dates, gd2d_dt0 = CGMS2DayByDay(data, tz=tz, dt0=dt0, inter_gap=inter_gap) - for i, gd2d in enumerate(gdall): - subject_id = subjects[i] - result = _calculate_sd_subtypes(gd2d, dt0, subject_id) - results.append(result) - - # Combine results - final_results = pd.DataFrame(results) - - return final_results + return _calculate_sd_subtypes(gd2d, gd2d_dt0) -def _calculate_sd_subtypes(gd2d: np.ndarray, dt0: int, subject_id: Any) -> Dict[str, Any]: +def _calculate_sd_subtypes(gd2d: np.ndarray, dt0: int) -> Dict[str, Any]: """ Calculate all SD subtypes for a single subject's glucose data matrix @@ -124,8 +118,7 @@ def _calculate_sd_subtypes(gd2d: np.ndarray, dt0: int, subject_id: Any) -> Dict[ 2D array where rows are days and columns are time points dt0 : int Time interval in minutes - subject_id : Any - Subject identifier + Returns ------- @@ -133,7 +126,7 @@ def _calculate_sd_subtypes(gd2d: np.ndarray, dt0: int, subject_id: Any) -> Dict[ Dictionary containing all SD measures """ - result = {'id': subject_id} + result = {} # 1. SDw - vertical within days # Standard deviation within each day, then mean across days diff --git a/iglu_python/sd_roc.py b/iglu_python/sd_roc.py index 9c31578..71ea28d 100644 --- a/iglu_python/sd_roc.py +++ b/iglu_python/sd_roc.py @@ -13,7 +13,7 @@ def sd_roc( dt0: int = 5, inter_gap: int = 45, tz: str = "", -) -> pd.DataFrame: +) -> pd.DataFrame|float: """ Calculate the standard deviation of the rate of change. @@ -39,9 +39,10 @@ def sd_roc( Returns ------- - pd.DataFrame + pd.DataFrame|float DataFrame with two columns: subject id and standard deviation of the rate of change - values for each subject. + values for each subject. + If a Series of glucose values is passed, then a float is returned. Notes ----- @@ -91,35 +92,23 @@ def sd_roc( # Convert Series to DataFrame format if not isinstance(data.index, pd.DatetimeIndex): raise ValueError("Series input must have a datetime index") - - data = pd.DataFrame( - { - "id": ["subject1"] * len(data), - "time": data.index, - "gl": data.values, - } - ) + return sd_roc_single(data, timelag, dt0, inter_gap, tz) # Validate input data data = check_data_columns(data, tz=tz) + data.set_index('time', drop=True, inplace=True) # Calculate ROC values for all subjects + out = data.groupby('id').apply(lambda x: sd_roc_single(x['gl'], timelag, dt0, inter_gap, tz)).reset_index() + out.columns = ['id', 'sd_roc'] + return out + +def sd_roc_single(data: pd.Series, + timelag: int = 15, + dt0: int = 5, + inter_gap: int = 45, + tz: str = "") -> float: + roc_data = roc(data, timelag=timelag, dt0=dt0, inter_gap=inter_gap, tz=tz) - - # Group by subject and calculate standard deviation of ROC values - # Remove NaN values before calculating standard deviation - result = ( - roc_data.groupby("id")["roc"] - .apply(lambda x: np.std(x.dropna())) - .reset_index() - ) - result.columns = ["id", "sd_roc"] - - # Handle case where Series was input - remove id column - if len(data["id"].unique()) == 1 and data["id"].iloc[0] == "subject1": - # Check if this was originally a Series input by looking at the time pattern - time_diffs = data["time"].diff().dropna() - if len(time_diffs.unique()) <= 1: # Regular time intervals suggest Series input - result = result.drop(columns=["id"]) - - return result + sd_roc = np.nanstd(roc_data.dropna()['roc'], ddof=1) + return sd_roc \ No newline at end of file diff --git a/iglu_python/summary_glu.py b/iglu_python/summary_glu.py index 407c2e3..c7951ef 100644 --- a/iglu_python/summary_glu.py +++ b/iglu_python/summary_glu.py @@ -7,7 +7,7 @@ from .utils import check_data_columns -def summary_glu(data: Union[pd.DataFrame, pd.Series, list, np.ndarray]) -> pd.DataFrame: +def summary_glu(data: Union[pd.DataFrame, pd.Series, list, np.ndarray]) -> pd.DataFrame|dict[str,float]: """ Calculate summary glucose level @@ -68,8 +68,8 @@ def summary_glu(data: Union[pd.DataFrame, pd.Series, list, np.ndarray]) -> pd.Da # Calculate summary statistics summary_stats = _calculate_summary_stats(glucose_values) - # Return DataFrame without id column - return pd.DataFrame([summary_stats]) + # Return dictionary without id column + return summary_stats # Handle DataFrame input else: diff --git a/tests/test_modd.py b/tests/test_modd.py index 2df5b3f..b17d04c 100644 --- a/tests/test_modd.py +++ b/tests/test_modd.py @@ -129,10 +129,8 @@ def test_modd_series_input(): index=pd.date_range(start="2020-01-01 00:00:00", periods=3*samples_per_day, freq="5min") ) result = iglu.modd(series_data) - assert isinstance(result, pd.DataFrame) - assert "MODD" in result.columns - assert len(result) == 1 - assert result["MODD"].iloc[0] == 50.0 + assert isinstance(result, float) + np.testing.assert_allclose(result, 50.0, rtol=1e-3) # Exception for series without DatetimeIndex with pytest.raises(ValueError): diff --git a/tests/test_pgs.py b/tests/test_pgs.py index fce35f8..e60e888 100644 --- a/tests/test_pgs.py +++ b/tests/test_pgs.py @@ -108,10 +108,14 @@ def test_pgs_series(): ) ) result = iglu.pgs(series_data) - assert isinstance(result, pd.DataFrame) - assert "PGS" in result.columns - assert len(result) == 1 - assert result["PGS"].iloc[0] > 0 # PGS should always be positive + expected = 16.494037 + assert isinstance(result, float) + np.testing.assert_allclose(result, expected, rtol=1e-3) + + # Exception for series without DatetimeIndex + with pytest.raises(ValueError): + iglu.pgs(series_data.reset_index(drop=True)) + def test_pgs_empty(): diff --git a/tests/test_quantile_glu.py b/tests/test_quantile_glu.py index 0467691..8f8a748 100644 --- a/tests/test_quantile_glu.py +++ b/tests/test_quantile_glu.py @@ -101,9 +101,26 @@ def test_quantile_glu_series(): """Test quantile calculation with Series input""" series_data = pd.Series([150, 155, 160, 165, 140, 145]) result = iglu.quantile_glu(series_data) - assert isinstance(result, pd.DataFrame) - assert all(col in result.columns for col in [0.0, 25.0, 50.0, 75.0, 100.0]) - assert len(result) == 1 + assert isinstance(result, list) + assert len(result) == 5 + np.testing.assert_allclose(result, [140, 146.25, 152.5 , 158.75, 165.], rtol=1e-3) + +def test_quantile_glu_list(): + """Test quantile calculation with list input""" + list_data = [150, 155, 160, 165, 140, 145] + result = iglu.quantile_glu(list_data) + assert isinstance(result, list) + assert len(result) == 5 + np.testing.assert_allclose(result, [140, 146.25, 152.5 , 158.75, 165.], rtol=1e-3) + + +def test_quantile_glu_numpy_array(): + """Test quantile calculation with numpy array input""" + numpy_data = np.array([150, 155, 160, 165, 140, 145]) + result = iglu.quantile_glu(numpy_data) + assert isinstance(result, list) + assert len(result) == 5 + np.testing.assert_allclose(result, [140, 146.25, 152.5 , 158.75, 165.], rtol=1e-3) def test_quantile_glu_empty(): diff --git a/tests/test_range_glu.py b/tests/test_range_glu.py index 1aa7cef..8f33875 100644 --- a/tests/test_range_glu.py +++ b/tests/test_range_glu.py @@ -108,10 +108,8 @@ def test_range_glu_series_input(): series_data = pd.Series([150, 155, 160, 165, 140, 145]) result = iglu.range_glu(series_data) - assert isinstance(result, pd.DataFrame) - assert "range" in result.columns - assert len(result) == 1 - assert result["range"].iloc[0] == 25 # max(165) - min(140) + assert isinstance(result, (float,np.float64)) + np.testing.assert_allclose(result, 25, rtol=1e-3) def test_range_glu_empty_data(): diff --git a/tests/test_sd_measures.py b/tests/test_sd_measures.py index fa0f96c..bd5177b 100644 --- a/tests/test_sd_measures.py +++ b/tests/test_sd_measures.py @@ -300,4 +300,54 @@ def test_sd_measures_reproducibility(): result2 = iglu.sd_measures(data) # Results should be identical - pd.testing.assert_frame_equal(result1, result2) \ No newline at end of file + pd.testing.assert_frame_equal(result1, result2) + + +def test_sd_measures_series_with_datetime_index(): + """Test SD measures calculation with Series input that has DatetimeIndex.""" + # Create test data with DatetimeIndex + time = pd.to_datetime(['2020-01-01 10:00:00', '2020-01-01 10:05:00', '2020-01-01 10:10:00', + '2020-01-02 10:00:00', '2020-01-02 10:05:00', '2020-01-02 10:10:00']) + data = pd.Series( + [100, 120, 110, # Day 1: mean=110, std=10 + 90, 130, 95], # Day 2: mean=105, std=21.21 + index=time + ) + + # Calculate SD measures + result = iglu.sd_measures(data) + + # Expected results: + # SDmean = mean([10, 21.21]) = 15.605 + # SDsd = std([10, 21.21], ddof=1) = 7.931 + expected = { + 'SDw': 15.8972, + 'SDhhmm':15.612495, + 'SDwsh': 16.341298, + 'SDdm': 3.535534, + 'SDb': 8.249579, + 'SDbdm': 7.071068 + } + + # Compare results + assert isinstance(result, dict) + assert len(result) == 6 + np.testing.assert_allclose(result['SDw'], expected['SDw'], rtol=0.001) + np.testing.assert_allclose(result['SDhhmm'], expected['SDhhmm'], rtol=0.001) + np.testing.assert_allclose(result['SDwsh'], expected['SDwsh'], rtol=0.001) + np.testing.assert_allclose(result['SDdm'], expected['SDdm'], rtol=0.001) + np.testing.assert_allclose(result['SDb'], expected['SDb'], rtol=0.001) + np.testing.assert_allclose(result['SDbdm'], expected['SDbdm'], rtol=0.001) + + +def test_sd_measures_series_without_datetime_index(): + """Test SD measures calculation with Series input that doesn't have DatetimeIndex.""" + # Create test data with regular index + data = pd.Series( + [100, 120, 110, 90, 130, 95], + index=range(6) # Regular integer index instead of DatetimeIndex + ) + + # Attempt to calculate SD measures - should raise ValueError + with pytest.raises(ValueError, match="Series must have a DatetimeIndex"): + iglu.sd_measures(data) diff --git a/tests/test_sd_roc.py b/tests/test_sd_roc.py index 854290e..a7cd97a 100644 --- a/tests/test_sd_roc.py +++ b/tests/test_sd_roc.py @@ -65,7 +65,7 @@ def test_sd_roc_iglu_r_compatible(scenario): check_freq=True, check_flags=True, check_exact=False, - rtol=0.2, + rtol=0.001, ) @@ -107,16 +107,20 @@ def test_sd_roc_series_input(): index=pd.date_range(start="2020-01-01 00:00:00", periods=n_measurements *2, freq="5min"), ) result = sd_roc(data) + expected = 1.340302 # Check output format - assert isinstance(result, pd.DataFrame) - assert "sd_roc" in result.columns - assert len(result) == 1 - assert len(result.columns) == 1 + assert isinstance(result, float) + np.testing.assert_allclose(result, expected, rtol=0.001) - # Check that SD of ROC is calculated - assert not np.isnan(result.loc[0, "sd_roc"]) - assert result.loc[0, "sd_roc"] > 0 +def test_sd_roc_series_no_timestamp_input(): + """Test SD of ROC calculation with Series input.""" + n_measurements = 12*24 + data = pd.Series( + [100, 120] * (n_measurements//2) + [80,100] * (n_measurements//2), + ) + with pytest.raises(ValueError): + sd_roc(data) def test_sd_roc_series_input_no_datetime_index(): diff --git a/tests/test_summary_glu.py b/tests/test_summary_glu.py index f2b02bc..fc8def7 100644 --- a/tests/test_summary_glu.py +++ b/tests/test_summary_glu.py @@ -127,19 +127,19 @@ def test_summary_glu_vector_input_series(): result = iglu.summary_glu(glucose_series) - assert isinstance(result, pd.DataFrame) - assert len(result) == 1 + assert isinstance(result, dict) + assert len(result) == 6 # Should not have id column for vector input expected_columns = ['Min.', '1st Qu.', 'Median', 'Mean', '3rd Qu.', 'Max.'] - assert list(result.columns) == expected_columns + assert list(result.keys()) == expected_columns # Check values - row = result.iloc[0] - assert row['Min.'] == 100 - assert row['Max.'] == 180 - assert row['Median'] == 140 - assert row['Mean'] == 140 + + assert result['Min.'] == 100 + assert result['Max.'] == 180 + assert result['Median'] == 140 + assert result['Mean'] == 140 def test_summary_glu_vector_input_list(): @@ -148,12 +148,12 @@ def test_summary_glu_vector_input_list(): result = iglu.summary_glu(glucose_list) - assert isinstance(result, pd.DataFrame) - assert len(result) == 1 + assert isinstance(result, dict) + assert len(result) == 6 # Should not have id column for vector input expected_columns = ['Min.', '1st Qu.', 'Median', 'Mean', '3rd Qu.', 'Max.'] - assert list(result.columns) == expected_columns + assert list(result.keys()) == expected_columns def test_summary_glu_vector_input_numpy(): @@ -162,12 +162,12 @@ def test_summary_glu_vector_input_numpy(): result = iglu.summary_glu(glucose_array) - assert isinstance(result, pd.DataFrame) - assert len(result) == 1 + assert isinstance(result, dict) + assert len(result) == 6 # Should not have id column for vector input expected_columns = ['Min.', '1st Qu.', 'Median', 'Mean', '3rd Qu.', 'Max.'] - assert list(result.columns) == expected_columns + assert list(result.keys()) == expected_columns def test_summary_glu_missing_values(): @@ -194,12 +194,11 @@ def test_summary_glu_missing_values_vector(): result = iglu.summary_glu(glucose_series) - assert len(result) == 1 + assert len(result) == 6 # Should calculate stats only on non-NaN values: [100, 140, 160, 180] - row = result.iloc[0] - assert row['Min.'] == 100 - assert row['Max.'] == 180 - assert row['Mean'] == 145 + assert result['Min.'] == 100 + assert result['Max.'] == 180 + assert result['Mean'] == 145 def test_summary_glu_all_missing_values(): From cbf6ff0e4a8f0ae5e628da968558c8cbb29513c4 Mon Sep 17 00:00:00 2001 From: Stas Khirman Date: Tue, 17 Jun 2025 01:27:25 +0300 Subject: [PATCH 16/16] support for Series, list and ndarray -> return float or dict --- tests/test_roc.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_roc.py b/tests/test_roc.py index aebf5eb..96dc030 100644 --- a/tests/test_roc.py +++ b/tests/test_roc.py @@ -108,7 +108,8 @@ def test_roc_default(): def test_roc_series(): """Test ROC with Series input""" - series_data = pd.Series([150, 160, 170, 180, 190, 200]) + series_data = pd.Series([150, 160, 170, 180, 190, 200], + index=pd.date_range(start="2020-01-01 00:00:00", periods=6, freq="5min")) result = iglu.roc(series_data) assert isinstance(result, pd.DataFrame) assert all(col in result.columns for col in ["id", "time", "roc"])