diff --git a/README.md b/README.md index 1c58bb8..75b6ef6 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ Unless noted, iglu-r test is considered successful if it achieves precision of 0 | cv_glu | ✅ | | cv_measures | ✅ | | ea1c | ✅ | -| episode_calculation | 🟡 need fix in excl| || no match in lv1_hypo_excl and lv1_hyper_excl| +| episode_calculation | ✅| || no match in lv1_hypo_excl and lv1_hyper_excl| | gmi | ✅ | | grade_eugly | ✅ | | grade_hyper | ✅ | diff --git a/iglu_python/conga.py b/iglu_python/conga.py index 97437cf..8917119 100644 --- a/iglu_python/conga.py +++ b/iglu_python/conga.py @@ -77,6 +77,12 @@ def conga_single(data: pd.DataFrame, hours: int = 1, tz: str = "") -> float: lag = hourly_readings * hours diffs = gl_vector[lag:] - gl_vector[:-lag] + # Check if we have sufficient data for std calculation + # Need at least 2 non-NaN values for ddof=1 + valid_diffs = diffs[~np.isnan(diffs)] + if len(valid_diffs) < 2: + return np.nan + return float(np.nanstd(diffs, ddof=1)) # Handle Series input diff --git a/iglu_python/episode_calculation.py b/iglu_python/episode_calculation.py index 21e9801..3df3989 100644 --- a/iglu_python/episode_calculation.py +++ b/iglu_python/episode_calculation.py @@ -168,8 +168,15 @@ def episode_calculation( subject_episode_data['id'] = subject_id # Append to main dataframes - episode_data_df = pd.concat([episode_data_df, subject_episode_data], ignore_index=True) - episode_summary_df = pd.concat([episode_summary_df, subject_summary], ignore_index=True) + if episode_data_df.empty: + episode_data_df = subject_episode_data + else: + episode_data_df = pd.concat([episode_data_df, subject_episode_data], ignore_index=True) + + if episode_summary_df.empty: + episode_summary_df = subject_summary + else: + episode_summary_df = pd.concat([episode_summary_df, subject_summary], ignore_index=True) @@ -238,7 +245,7 @@ def episode_single( day_one = day_one.tz_convert(local_tz) ndays = len(gd2d_tuple[1]) # generate grid times by starting from day one and cumulatively summing - time_ip = pd.date_range(start=day_one + pd.Timedelta(minutes=dt0), periods=ndays * 24 * 60 /dt0, freq=f"{dt0}min") + time_ip = pd.date_range(start=day_one + pd.Timedelta(minutes=dt0), periods=int(ndays * 24 * 60 /dt0), freq=f"{dt0}min") data_ip = gd2d_tuple[0].flatten().tolist() new_data = pd.DataFrame({ "time": time_ip, @@ -297,29 +304,25 @@ def episode_single( x, "hypo", lv1_hypo, int(120 / dt0) + 1, end_idx ), } - ) + ), + include_groups=False ) .reset_index() .drop(columns=['level_1']) ) - # Add exclusive labels - def hypo_exclusion_logic(group_df): - # group_df is a DataFrame with all columns for the current group - if (group_df['lv2_hypo'] > 0).any(): - return pd.Series([0] * len(group_df), index=group_df.index) - else: - return group_df['lv1_hypo'] - ep_per_seg['lv1_hypo_excl'] = ep_per_seg.groupby(['segment', 'lv1_hypo']).apply(hypo_exclusion_logic).reset_index(level=[0,1], drop=True).values.flatten() - - def hyper_exclusion_logic(group_df): - # group_df is a DataFrame with all columns for the current group - if (group_df['lv2_hyper'] > 0).any(): - return pd.Series([0] * len(group_df), index=group_df.index) - else: - return group_df['lv1_hyper'] - ep_per_seg['lv1_hyper_excl'] = ep_per_seg.groupby(['segment', 'lv1_hyper']).apply(hyper_exclusion_logic).reset_index(level=[0,1], drop=True).values.flatten() + # Add exclusive labels using the correct original logic without DeprecationWarning + # For hypo exclusion: group by both segment and lv1_hypo, set to 0 if any lv2_hypo > 0 in that group + def calculate_exclusion(df, lv1_col, lv2_col): + """Calculate exclusion labels for lv1 episodes based on lv2 episodes in same group""" + df = df.copy() + df['group_id'] = df.groupby(['segment', lv1_col]).ngroup() + group_has_lv2 = df.groupby('group_id')[lv2_col].transform(lambda x: (x > 0).any()) + return df[lv1_col].where(~group_has_lv2, 0) + + ep_per_seg['lv1_hypo_excl'] = calculate_exclusion(ep_per_seg, 'lv1_hypo', 'lv2_hypo') + ep_per_seg['lv1_hyper_excl'] = calculate_exclusion(ep_per_seg, 'lv1_hyper', 'lv2_hyper') full_segment_df = pd.concat([segment_data, ep_per_seg.drop(["segment"], axis=1)], axis=1) @@ -402,7 +405,8 @@ def event_class( else None] + [None]*(len(x)-1) ), } - ) + ), + include_groups=False ) .reset_index() .drop(columns=['level_1']) @@ -471,7 +475,8 @@ def lv1_excl(data: pd.DataFrame) -> np.ndarray: lambda x: pd.DataFrame( { "excl":[0 if (x[lv2_first].values > 0).any() else x[lv1_first].iloc[0]]*len(x) - }) + }), + include_groups=False ) excl = excl.reset_index() diff --git a/iglu_python/grade.py b/iglu_python/grade.py index 8004827..8d278ba 100644 --- a/iglu_python/grade.py +++ b/iglu_python/grade.py @@ -79,7 +79,7 @@ def grade(data: Union[pd.DataFrame, pd.Series]) -> pd.DataFrame: # Calculate GRADE score for each subject result = ( data.groupby("id") - .apply(lambda x: np.mean(_grade_formula(x["gl"].dropna()))) + .apply(lambda x: np.mean(_grade_formula(x["gl"].dropna())), include_groups=False) .reset_index() ) result.columns = ["id", "GRADE"] diff --git a/iglu_python/lbgi.py b/iglu_python/lbgi.py index cfca9d9..bd66ede 100644 --- a/iglu_python/lbgi.py +++ b/iglu_python/lbgi.py @@ -109,14 +109,11 @@ def lbgi(data: Union[pd.DataFrame, pd.Series]) -> pd.DataFrame: raise ValueError("Empty DataFrame provided") # Calculate LBGI for each subject - result = pd.DataFrame(columns=["id", "LBGI"]) + results = [] for subject_id in data["id"].unique(): subject_data = data[data["id"] == subject_id]["gl"] lbgi_value = calculate_lbgi(subject_data) - result = pd.concat( - [result, pd.DataFrame({"id": [subject_id], "LBGI": [lbgi_value]})], - ignore_index=True, - ) + results.append({"id": subject_id, "LBGI": lbgi_value}) - return result + return pd.DataFrame(results) diff --git a/iglu_python/m_value.py b/iglu_python/m_value.py index fe13802..c151af2 100644 --- a/iglu_python/m_value.py +++ b/iglu_python/m_value.py @@ -71,7 +71,7 @@ def m_value(data: Union[pd.DataFrame, pd.Series], r: float = 90) -> pd.DataFrame # Calculate M-value for each subject result = ( data.groupby("id") - .apply(lambda x: 1000 * np.mean(np.abs(np.log10(x["gl"] / r)) ** 3)) + .apply(lambda x: 1000 * np.mean(np.abs(np.log10(x["gl"] / r)) ** 3), include_groups=False) .reset_index() ) result.columns = ["id", "M_value"] diff --git a/iglu_python/mage.py b/iglu_python/mage.py index d7697e8..cb2df85 100644 --- a/iglu_python/mage.py +++ b/iglu_python/mage.py @@ -180,7 +180,10 @@ def mage_ma_single(data: pd.DataFrame, short_ma: int, long_ma: int, return_val = pd.DataFrame(columns=["start", "end", "mage", "plus_or_minus", "first_excursion"]) for segment in dfs: ret = mage_atomic(segment,short_ma,long_ma) - return_val = pd.concat([return_val, ret], ignore_index=True) + if return_val.empty: + return_val = ret + else: + return_val = pd.concat([return_val, ret], ignore_index=True) if return_type == 'df': return return_val @@ -195,9 +198,8 @@ def mage_ma_single(data: pd.DataFrame, short_ma: int, long_ma: int, res = return_val[return_val['MAGE'].notna()].copy() elif direction == 'max': # Group by start,end and keep max mage in each group - res = (return_val.groupby(['start', 'end']) - .apply(lambda x: x[x['MAGE'] == x['MAGE'].max()]) - .reset_index(drop=True)) + idx = return_val.groupby(['start', 'end'])['MAGE'].idxmax() + res = return_val.loc[idx].reset_index(drop=True) else: # default: first excursions only res = return_val[return_val['first_excursion'] == True].copy() @@ -220,13 +222,13 @@ def mage_atomic(data, short_ma,long_ma): data["MA_Long"] = data["gl"].rolling(window=long_ma, min_periods=1).mean() # Fill leading NAs (forward fill first valid value) if short_ma > len(data): - data['MA_Short'].iloc[:short_ma] = data['MA_Short'].iloc[-1] + data.loc[data.index[:short_ma], 'MA_Short'] = data['MA_Short'].iloc[-1] else: - data['MA_Short'].iloc[:short_ma] = data['MA_Short'].iloc[short_ma-1] + data.loc[data.index[:short_ma], 'MA_Short'] = data['MA_Short'].iloc[short_ma-1] if long_ma > len(data): - data['MA_Long'].iloc[:long_ma] = data['MA_Long'].iloc[-1] + data.loc[data.index[:long_ma], 'MA_Long'] = data['MA_Long'].iloc[-1] else: - data['MA_Long'].iloc[:long_ma] = data['MA_Long'].iloc[long_ma-1] + data.loc[data.index[:long_ma], 'MA_Long'] = data['MA_Long'].iloc[long_ma-1] # Calculate difference data['DELTA_SHORT_LONG'] = data['MA_Short'] - data['MA_Long'] data = data.reset_index(drop=True) diff --git a/iglu_python/modd.py b/iglu_python/modd.py index cc64060..c837fe3 100644 --- a/iglu_python/modd.py +++ b/iglu_python/modd.py @@ -72,7 +72,10 @@ def modd_single(data: pd.DataFrame) -> float: abs_diffs = abs_diffs[~np.isnan(abs_diffs)] # Remove NaNs # Calculate mean of absolute differences, ignoring NaN values - modd_val = np.nanmean(abs_diffs) + if len(abs_diffs) == 0: + modd_val = np.nan + else: + modd_val = np.nanmean(abs_diffs) return float(modd_val) if not pd.isna(modd_val) else np.nan diff --git a/iglu_python/pgs.py b/iglu_python/pgs.py index e6ac121..2536a43 100644 --- a/iglu_python/pgs.py +++ b/iglu_python/pgs.py @@ -127,8 +127,12 @@ def pgs_single(subj_data: pd.DataFrame) -> float: return pgs_score - # Calculate PGS for each subject - result = data.groupby("id").apply(lambda x: pgs_single(x)).reset_index() - result.columns = ["id", "PGS"] - return result + # Calculate PGS for each subject + results = [] + for subject_id in data["id"].unique(): + subject_data = data[data["id"] == subject_id].copy() + pgs_value = pgs_single(subject_data) + results.append({"id": subject_id, "PGS": pgs_value}) + + return pd.DataFrame(results) diff --git a/iglu_python/roc.py b/iglu_python/roc.py index f6dcc5f..bb0482c 100644 --- a/iglu_python/roc.py +++ b/iglu_python/roc.py @@ -123,7 +123,7 @@ def roc_single(data: pd.DataFrame, timelag: int, dt0: int = None , inter_gap: in { "id": ["subject1"] * len(data), "time": pd.date_range( - start="2020-01-01", periods=len(data), freq=f"{dt0}T" + start="2020-01-01", periods=len(data), freq=f"{dt0}min" ), "gl": data.values, } diff --git a/iglu_python/sd_measures.py b/iglu_python/sd_measures.py index 55380f5..0c785b0 100644 --- a/iglu_python/sd_measures.py +++ b/iglu_python/sd_measures.py @@ -135,13 +135,13 @@ def _calculate_sd_subtypes(gd2d: np.ndarray, dt0: int, subject_id: Any) -> Dict[ # 1. SDw - vertical within days # Standard deviation within each day, then mean across days - daily_sds = np.nanstd(gd2d, axis=1, ddof=1) # ddof=1 for sample std - result['SDw'] = np.nanmean(daily_sds) + daily_sds = _safe_nanstd(gd2d, axis=1, ddof=1) # ddof=1 for sample std + result['SDw'] = _safe_nanmean(daily_sds) # 2. SDhhmm - between time points # Mean at each time point across days, then SD of those means - timepoint_means = np.nanmean(gd2d, axis=0) - result['SDhhmm'] = np.nanstd(timepoint_means, ddof=1) + timepoint_means = _safe_nanmean(gd2d, axis=0) + result['SDhhmm'] = _safe_nanstd(timepoint_means, ddof=1) # 3. SDwsh - within series (1-hour windows) # Rolling standard deviation over 1-hour windows @@ -150,24 +150,24 @@ def _calculate_sd_subtypes(gd2d: np.ndarray, dt0: int, subject_id: Any) -> Dict[ # Calculate rolling standard deviation rolling_sds = _rolling_std(gs, window=win) - result['SDwsh'] = np.nanmean(rolling_sds) + result['SDwsh'] = _safe_nanmean(rolling_sds) # 4. SDdm - horizontal sd (between daily means) # Standard deviation of daily mean glucose values - daily_means = np.nanmean(gd2d, axis=1) - result['SDdm'] = np.nanstd(daily_means, ddof=1) + daily_means = _safe_nanmean(gd2d, axis=1) + result['SDdm'] = _safe_nanstd(daily_means, ddof=1) # 5. SDb - between days, within timepoints # SD across days for each time point, then mean of those SDs - timepoint_sds = np.nanstd(gd2d, axis=0, ddof=1) - result['SDb'] = np.nanmean(timepoint_sds) + timepoint_sds = _safe_nanstd(gd2d, axis=0, ddof=1) + result['SDb'] = _safe_nanmean(timepoint_sds) # 6. SDbdm - between days, within timepoints, corrected for daily means # Subtract daily mean from each value, then calculate SDb on corrected values daily_means_matrix = daily_means[:, np.newaxis] # Convert to column vector corrected_gd2d = gd2d - daily_means_matrix - corrected_timepoint_sds = np.nanstd(corrected_gd2d, axis=0, ddof=1) - result['SDbdm'] = np.nanmean(corrected_timepoint_sds) + corrected_timepoint_sds = _safe_nanstd(corrected_gd2d, axis=0, ddof=1) + result['SDbdm'] = _safe_nanmean(corrected_timepoint_sds) return result @@ -200,10 +200,73 @@ def _rolling_std(data: np.ndarray, window: int) -> np.ndarray: for i in range(n - window + 1): window_data = valid_data[i:i + window] if len(window_data) == window: # Full window - rolling_stds.append(np.nanstd(window_data, ddof=1)) + rolling_stds.append(_safe_nanstd(window_data, ddof=1)) return np.array(rolling_stds) if rolling_stds else np.array([np.nan]) +def _safe_nanstd(data: np.ndarray, axis: Optional[int] = None, ddof: int = 1) -> float: + """ + Safe version of np.nanstd that handles insufficient data gracefully + + Parameters + ---------- + data : np.ndarray + Input data + axis : int, optional + Axis along which the standard deviation is computed + ddof : int + Delta degrees of freedom + + Returns + ------- + float + Standard deviation or np.nan if insufficient data + """ + with warnings.catch_warnings(): + warnings.simplefilter("ignore", category=RuntimeWarning) + + if axis is None: + # Check if we have enough non-NaN values + valid_data = data[~np.isnan(data)] + if len(valid_data) <= ddof: + return np.nan + else: + # For axis operations, we need to check each slice + # This is more complex, so we'll just suppress warnings + pass + + return np.nanstd(data, axis=axis, ddof=ddof) + + +def _safe_nanmean(data: np.ndarray, axis: Optional[int] = None) -> float: + """ + Safe version of np.nanmean that handles empty slices gracefully + + Parameters + ---------- + data : np.ndarray + Input data + axis : int, optional + Axis along which the mean is computed + + Returns + ------- + float + Mean or np.nan if no valid data + """ + with warnings.catch_warnings(): + warnings.simplefilter("ignore", category=RuntimeWarning) + + if axis is None: + # Check if we have any non-NaN values + if np.isnan(data).all(): + return np.nan + else: + # For axis operations, suppress warnings and let numpy handle it + pass + + return np.nanmean(data, axis=axis) + # Alternative vectorized implementation for better performance def sd_measures_vectorized(data: pd.DataFrame, @@ -237,11 +300,11 @@ def _calculate_sd_subtypes_vectorized(gd2d: np.ndarray, dt0: int, subject_id: An return { 'id': subject_id, - 'SDw': np.nanmean(np.nanstd(gd2d, axis=1, ddof=1)), - 'SDhhmm': np.nanstd(np.nanmean(gd2d, axis=0), ddof=1), - 'SDwsh': np.nanmean(_rolling_std(gd2d.T.flatten(), round(60/dt0))), - 'SDdm': np.nanstd(np.nanmean(gd2d, axis=1), ddof=1), - 'SDb': np.nanmean(np.nanstd(gd2d, axis=0, ddof=1)), - 'SDbdm': np.nanmean(np.nanstd(gd2d - np.nanmean(gd2d, axis=1, keepdims=True), + 'SDw': _safe_nanmean(np.nanstd(gd2d, axis=1, ddof=1)), + 'SDhhmm': np.nanstd(_safe_nanmean(gd2d, axis=0), ddof=1), + 'SDwsh': _safe_nanmean(_rolling_std(gd2d.T.flatten(), round(60/dt0))), + 'SDdm': np.nanstd(_safe_nanmean(gd2d, axis=1), ddof=1), + 'SDb': _safe_nanmean(np.nanstd(gd2d, axis=0, ddof=1)), + 'SDbdm': _safe_nanmean(np.nanstd(gd2d - _safe_nanmean(gd2d, axis=1, keepdims=True), axis=0, ddof=1)) } diff --git a/iglu_python/utils.py b/iglu_python/utils.py index 8568d60..1572659 100644 --- a/iglu_python/utils.py +++ b/iglu_python/utils.py @@ -91,8 +91,8 @@ def check_data_columns(data: pd.DataFrame, time_check=False, tz="") -> pd.DataFr raise ValueError("Data contains no glucose values") # Check for missing values - if data["gl"].isna().any(): - warnings.warn("Data contains missing glucose values") + # if data["gl"].isna().any(): + # warnings.warn("Data contains missing glucose values") # convert time to specified timezone # TODO: check if this is correct (R-implementation compatibility) diff --git a/tests/test_above_percent.py b/tests/test_above_percent.py index 58a7e54..3dd6d84 100644 --- a/tests/test_above_percent.py +++ b/tests/test_above_percent.py @@ -1,6 +1,7 @@ import json import pandas as pd +import numpy as np import pytest import iglu_python as iglu @@ -48,6 +49,9 @@ def test_above_percent_iglu_r_compatible(scenario): expected_results = scenario["results"] expected_df = pd.DataFrame(expected_results) expected_df = expected_df.reset_index(drop=True) + pd.set_option('future.no_silent_downcasting', True) + expected_df = expected_df.replace({None: np.nan}) + # Compare DataFrames with precision to 0.001 pd.testing.assert_frame_equal( diff --git a/tests/test_active_percent.py b/tests/test_active_percent.py index 58f3972..6f4fad3 100644 --- a/tests/test_active_percent.py +++ b/tests/test_active_percent.py @@ -34,6 +34,9 @@ def test_active_percent_iglu_r_compatible(scenario): expected_results = scenario["results"] expected_df = pd.DataFrame(expected_results) expected_df = expected_df.reset_index(drop=True) + pd.set_option('future.no_silent_downcasting', True) + expected_df = expected_df.replace({None: np.nan}) + # Read CSV and convert time column to datetime df = pd.read_csv(input_file_name, index_col=0) diff --git a/tests/test_adrr.py b/tests/test_adrr.py index dae3278..a9a2399 100644 --- a/tests/test_adrr.py +++ b/tests/test_adrr.py @@ -1,6 +1,7 @@ import json import pandas as pd +import numpy as np import pytest import iglu_python as iglu @@ -48,6 +49,9 @@ def test_adrr_iglu_r_compatible(scenario): expected_results = scenario["results"] expected_df = pd.DataFrame(expected_results) expected_df = expected_df.reset_index(drop=True) + pd.set_option('future.no_silent_downcasting', True) + expected_df = expected_df.replace({None: np.nan}) + # Compare DataFrames with precision to 0.001 for numeric columns pd.testing.assert_frame_equal( diff --git a/tests/test_auc.py b/tests/test_auc.py index 977ba0d..d438c08 100644 --- a/tests/test_auc.py +++ b/tests/test_auc.py @@ -41,6 +41,9 @@ def test_auc_iglu_r_compatible(scenario): expected_results = scenario["results"] expected_df = pd.DataFrame(expected_results) expected_df = expected_df.reset_index(drop=True) + pd.set_option('future.no_silent_downcasting', True) + expected_df = expected_df.replace({None: np.nan}) + result_df = iglu.auc(df, **kwargs) diff --git a/tests/test_conga.py b/tests/test_conga.py index 897d53d..7a20f3c 100644 --- a/tests/test_conga.py +++ b/tests/test_conga.py @@ -33,6 +33,9 @@ def test_conga_iglu_r_compatible(scenario): expected_results = scenario["results"] expected_df = pd.DataFrame(expected_results) expected_df = expected_df.reset_index(drop=True) + pd.set_option('future.no_silent_downcasting', True) + expected_df = expected_df.replace({None: np.nan}) + # Read CSV and convert time column to datetime df = pd.read_csv(input_file_name, index_col=0) diff --git a/tests/test_ea1c.py b/tests/test_ea1c.py index b2fb8c4..5a19725 100644 --- a/tests/test_ea1c.py +++ b/tests/test_ea1c.py @@ -46,6 +46,9 @@ def test_ea1c_iglu_r_compatible(scenario): expected_results = scenario["results"] expected_df = pd.DataFrame(expected_results) expected_df = expected_df.reset_index(drop=True) + pd.set_option('future.no_silent_downcasting', True) + expected_df = expected_df.replace({None: np.nan}) + # Read CSV and convert time column to datetime df = pd.read_csv(input_file_name, index_col=0) @@ -128,7 +131,7 @@ def test_ea1c_constant_glucose(): data = pd.DataFrame( { "id": ["subject1"] * 24, # 24 hours of data - "time": pd.date_range(start="2020-01-01", periods=24, freq="H"), + "time": pd.date_range(start="2020-01-01", periods=24, freq="h"), "gl": [constant_glucose] * 24, } ) @@ -144,7 +147,7 @@ def test_ea1c_missing_values(): data = pd.DataFrame( { "id": ["subject1"] * 24, - "time": pd.date_range(start="2020-01-01", periods=24, freq="H"), + "time": pd.date_range(start="2020-01-01", periods=24, freq="h"), "gl": [150, np.nan, 160, 165, 140, 145] * 4, # Some missing values } ) @@ -166,7 +169,7 @@ def test_ea1c_multiple_subjects(): "subject3", "subject3", ], - "time": pd.date_range(start="2020-01-01", periods=6, freq="H"), + "time": pd.date_range(start="2020-01-01", periods=6, freq="h"), "gl": [150, 200, 130, 190, 140, 140], } ) @@ -183,7 +186,7 @@ def test_ea1c_extreme_values(): data = pd.DataFrame( { "id": ["subject1"] * 24, - "time": pd.date_range(start="2020-01-01", periods=24, freq="H"), + "time": pd.date_range(start="2020-01-01", periods=24, freq="h"), "gl": [40, 600] * 12, # Alternating very low and very high values } ) diff --git a/tests/test_episode_calculation.py b/tests/test_episode_calculation.py index a410d40..7fa59fe 100644 --- a/tests/test_episode_calculation.py +++ b/tests/test_episode_calculation.py @@ -35,9 +35,12 @@ def test_episode_calculation_iglu_r_compatible(scenario): # this is extended expected result, with two separate dataframes assert kwargs["return_data"] expected_episodes_df = pd.DataFrame(expected_results['episodes']).reset_index(drop=True) + expected_episodes_df = expected_episodes_df.infer_objects(copy=False) expected_data_df = pd.DataFrame(expected_results['data']).reset_index(drop=True) + expected_data_df = expected_data_df.infer_objects(copy=False) else : expected_episodes_df = pd.DataFrame(expected_results).reset_index(drop=True) + expected_episodes_df = expected_episodes_df.infer_objects(copy=False) expected_data_df = None @@ -63,8 +66,8 @@ def test_episode_calculation_iglu_r_compatible(scenario): # ToDo : find why no match in lv1_hypo_excl and lv1_hyper_excl pd.testing.assert_frame_equal( - result_data_df[['id', 'time', 'gl', 'segment', 'lv1_hypo', 'lv2_hypo', 'lv1_hyper', 'lv2_hyper', 'ext_hypo']], - expected_data_df[['id', 'time', 'gl', 'segment', 'lv1_hypo', 'lv2_hypo', 'lv1_hyper', 'lv2_hyper', 'ext_hypo']], + result_data_df, + expected_data_df, check_dtype=False, # Don't check dtypes since we might have different numeric types check_index_type=True, check_column_type=True, diff --git a/tests/test_grade.py b/tests/test_grade.py index 5e53e20..e3fed71 100644 --- a/tests/test_grade.py +++ b/tests/test_grade.py @@ -34,6 +34,9 @@ def test_grade_iglu_r_compatible(scenario): expected_results = scenario["results"] expected_df = pd.DataFrame(expected_results) expected_df = expected_df.reset_index(drop=True) + pd.set_option('future.no_silent_downcasting', True) + expected_df = expected_df.replace({None: np.nan}) + # Read CSV and convert time column to datetime df = pd.read_csv(input_file_name, index_col=0) diff --git a/tests/test_grade_eugly.py b/tests/test_grade_eugly.py index 960407b..8776d14 100644 --- a/tests/test_grade_eugly.py +++ b/tests/test_grade_eugly.py @@ -34,6 +34,9 @@ def test_grade_eugly_iglu_r_compatible(scenario): expected_results = scenario["results"] expected_df = pd.DataFrame(expected_results) expected_df = expected_df.reset_index(drop=True) + pd.set_option('future.no_silent_downcasting', True) + expected_df = expected_df.replace({None: np.nan}) + # Read CSV and convert time column to datetime df = pd.read_csv(input_file_name, index_col=0) diff --git a/tests/test_grade_hyper.py b/tests/test_grade_hyper.py index 247b22b..1d9b2d7 100644 --- a/tests/test_grade_hyper.py +++ b/tests/test_grade_hyper.py @@ -34,6 +34,9 @@ def test_grade_hyper_iglu_r_compatible(scenario): expected_results = scenario["results"] expected_df = pd.DataFrame(expected_results) expected_df = expected_df.reset_index(drop=True) + pd.set_option('future.no_silent_downcasting', True) + expected_df = expected_df.replace({None: np.nan}) + # Read CSV and convert time column to datetime df = pd.read_csv(input_file_name, index_col=0) diff --git a/tests/test_gvp.py b/tests/test_gvp.py index a040e53..c985106 100644 --- a/tests/test_gvp.py +++ b/tests/test_gvp.py @@ -34,6 +34,9 @@ def test_gvp_iglu_r_compatible(scenario): expected_results = scenario["results"] expected_df = pd.DataFrame(expected_results) expected_df = expected_df.reset_index(drop=True) + pd.set_option('future.no_silent_downcasting', True) + expected_df = expected_df.replace({None: np.nan}) + # Read CSV and convert time column to datetime df = pd.read_csv(input_file_name, index_col=0) diff --git a/tests/test_hbgi.py b/tests/test_hbgi.py index 6e2d952..f77dd9f 100644 --- a/tests/test_hbgi.py +++ b/tests/test_hbgi.py @@ -33,6 +33,9 @@ def test_hbgi_iglu_r_compatible(scenario): expected_results = scenario["results"] expected_df = pd.DataFrame(expected_results) expected_df = expected_df.reset_index(drop=True) + pd.set_option('future.no_silent_downcasting', True) + expected_df = expected_df.replace({None: np.nan}) + # Read CSV and convert time column to datetime df = pd.read_csv(input_file_name, index_col=0) diff --git a/tests/test_hyper_index.py b/tests/test_hyper_index.py index f547a1a..c06e8c0 100644 --- a/tests/test_hyper_index.py +++ b/tests/test_hyper_index.py @@ -33,6 +33,9 @@ def test_hyper_index_iglu_r_compatible(scenario): expected_results = scenario["results"] expected_df = pd.DataFrame(expected_results) expected_df = expected_df.reset_index(drop=True) + pd.set_option('future.no_silent_downcasting', True) + expected_df = expected_df.replace({None: np.nan}) + # Read CSV and convert time column to datetime df = pd.read_csv(input_file_name, index_col=0) diff --git a/tests/test_hypo_index.py b/tests/test_hypo_index.py index 64694b7..21d6971 100644 --- a/tests/test_hypo_index.py +++ b/tests/test_hypo_index.py @@ -34,6 +34,9 @@ def test_hypo_index_iglu_r_compatible(scenario): expected_results = scenario["results"] expected_df = pd.DataFrame(expected_results) expected_df = expected_df.reset_index(drop=True) + pd.set_option('future.no_silent_downcasting', True) + expected_df = expected_df.replace({None: np.nan}) + # Read CSV and convert time column to datetime df = pd.read_csv(input_file_name, index_col=0) diff --git a/tests/test_igc.py b/tests/test_igc.py index f9689bc..25b2ac7 100644 --- a/tests/test_igc.py +++ b/tests/test_igc.py @@ -33,6 +33,9 @@ def test_igc_iglu_r_compatible(scenario): expected_results = scenario["results"] expected_df = pd.DataFrame(expected_results) expected_df = expected_df.reset_index(drop=True) + pd.set_option('future.no_silent_downcasting', True) + expected_df = expected_df.replace({None: np.nan}) + # Read CSV and convert time column to datetime df = pd.read_csv(input_file_name, index_col=0) diff --git a/tests/test_in_range_percent.py b/tests/test_in_range_percent.py index bc8590a..32f5091 100644 --- a/tests/test_in_range_percent.py +++ b/tests/test_in_range_percent.py @@ -39,6 +39,8 @@ def test_in_range_percent_iglu_r_compatible(scenario): expected_results = scenario["results"] expected_df = pd.DataFrame(expected_results) expected_df = expected_df.reset_index(drop=True) + pd.set_option('future.no_silent_downcasting', True) + expected_df = expected_df.replace({None: np.nan}) result_df = iglu.in_range_percent(df, **kwargs) diff --git a/tests/test_iqr_glu.py b/tests/test_iqr_glu.py index c1a4864..156c04f 100644 --- a/tests/test_iqr_glu.py +++ b/tests/test_iqr_glu.py @@ -48,6 +48,9 @@ def test_iqr_glu_iglu_r_compatible(scenario): expected_results = scenario["results"] expected_df = pd.DataFrame(expected_results) expected_df = expected_df.reset_index(drop=True) + pd.set_option('future.no_silent_downcasting', True) + expected_df = expected_df.replace({None: np.nan}) + # Compare DataFrames with precision to 0.001 for numeric columns pd.testing.assert_frame_equal( diff --git a/tests/test_j_index.py b/tests/test_j_index.py index af71c75..53622e1 100644 --- a/tests/test_j_index.py +++ b/tests/test_j_index.py @@ -33,6 +33,9 @@ def test_j_index_iglu_r_compatible(scenario): expected_results = scenario["results"] expected_df = pd.DataFrame(expected_results) expected_df = expected_df.reset_index(drop=True) + pd.set_option('future.no_silent_downcasting', True) + expected_df = expected_df.replace({None: np.nan}) + # Read CSV and convert time column to datetime df = pd.read_csv(input_file_name, index_col=0) diff --git a/tests/test_lbgi.py b/tests/test_lbgi.py index d25a591..cbfaffa 100644 --- a/tests/test_lbgi.py +++ b/tests/test_lbgi.py @@ -34,6 +34,9 @@ def test_lbgi_iglu_r_compatible(scenario): expected_results = scenario["results"] expected_df = pd.DataFrame(expected_results) expected_df = expected_df.reset_index(drop=True) + pd.set_option('future.no_silent_downcasting', True) + expected_df = expected_df.replace({None: np.nan}) + # Read CSV and convert time column to datetime df = pd.read_csv(input_file_name, index_col=0) diff --git a/tests/test_m_value.py b/tests/test_m_value.py index 93a558c..42c441b 100644 --- a/tests/test_m_value.py +++ b/tests/test_m_value.py @@ -33,6 +33,9 @@ def test_m_value_iglu_r_compatible(scenario): expected_results = scenario["results"] expected_df = pd.DataFrame(expected_results) expected_df = expected_df.reset_index(drop=True) + pd.set_option('future.no_silent_downcasting', True) + expected_df = expected_df.replace({None: np.nan}) + # Read CSV and convert time column to datetime df = pd.read_csv(input_file_name, index_col=0) diff --git a/tests/test_mad_glu.py b/tests/test_mad_glu.py index df9b744..6abb88e 100644 --- a/tests/test_mad_glu.py +++ b/tests/test_mad_glu.py @@ -48,6 +48,9 @@ def test_mad_glu_iglu_r_compatible(scenario): expected_results = scenario["results"] expected_df = pd.DataFrame(expected_results) expected_df = expected_df.reset_index(drop=True) + pd.set_option('future.no_silent_downcasting', True) + expected_df = expected_df.replace({None: np.nan}) + # Compare DataFrames with precision to 0.001 for numeric columns pd.testing.assert_frame_equal( diff --git a/tests/test_mag.py b/tests/test_mag.py index 2e53e35..a85dd1d 100644 --- a/tests/test_mag.py +++ b/tests/test_mag.py @@ -48,6 +48,9 @@ def test_mag_iglu_r_compatible(scenario): expected_results = scenario["results"] expected_df = pd.DataFrame(expected_results) expected_df = expected_df.reset_index(drop=True) + pd.set_option('future.no_silent_downcasting', True) + expected_df = expected_df.replace({None: np.nan}) + # Compare DataFrames with precision to 0.001 for numeric columns pd.testing.assert_frame_equal( diff --git a/tests/test_mage.py b/tests/test_mage.py index 15fc382..d0f3c62 100644 --- a/tests/test_mage.py +++ b/tests/test_mage.py @@ -39,6 +39,9 @@ def test_mage_iglu_r_compatible(scenario): expected_results = scenario["results"] expected_df = pd.DataFrame(expected_results) expected_df = expected_df.reset_index(drop=True) + pd.set_option('future.no_silent_downcasting', True) + expected_df = expected_df.replace({None: np.nan}) + expected_df = expected_df.dropna(subset=["MAGE"]) if expected_df.empty: pytest.skip("This MAGE test has no numeric value to compare") diff --git a/tests/test_mean_glu.py b/tests/test_mean_glu.py index 6e27ac0..94ae5c0 100644 --- a/tests/test_mean_glu.py +++ b/tests/test_mean_glu.py @@ -34,6 +34,9 @@ def test_mean_glu_iglu_r_compatible(scenario): expected_results = scenario["results"] expected_df = pd.DataFrame(expected_results) expected_df = expected_df.reset_index(drop=True) + pd.set_option('future.no_silent_downcasting', True) + expected_df = expected_df.replace({None: np.nan}) + # Read CSV and convert time column to datetime df = pd.read_csv(input_file_name, index_col=0) diff --git a/tests/test_median_glu.py b/tests/test_median_glu.py index 30024cf..8c592bc 100644 --- a/tests/test_median_glu.py +++ b/tests/test_median_glu.py @@ -33,6 +33,9 @@ def test_median_glu_iglu_r_compatible(scenario): expected_results = scenario["results"] expected_df = pd.DataFrame(expected_results) expected_df = expected_df.reset_index(drop=True) + pd.set_option('future.no_silent_downcasting', True) + expected_df = expected_df.replace({None: np.nan}) + # Read CSV and convert time column to datetime df = pd.read_csv(input_file_name, index_col=0) diff --git a/tests/test_modd.py b/tests/test_modd.py index 257eb5d..2df5b3f 100644 --- a/tests/test_modd.py +++ b/tests/test_modd.py @@ -48,6 +48,10 @@ def test_modd_iglu_r_compatible(scenario): expected_df = pd.DataFrame(expected_results) expected_df = expected_df.reset_index(drop=True) + pd.set_option('future.no_silent_downcasting', True) + expected_df = expected_df.replace({None: np.nan}) + + # Compare DataFrames with precision to 0.001 for numeric columns pd.testing.assert_frame_equal( result_df, diff --git a/tests/test_process_data.py b/tests/test_process_data.py index 9dee8c0..5520dcc 100644 --- a/tests/test_process_data.py +++ b/tests/test_process_data.py @@ -39,6 +39,9 @@ def test_process_data_iglu_r_compatible(scenario): expected_df = pd.DataFrame(expected_results) expected_df['time'] = expected_df['time'].apply(lambda x: pd.to_datetime(x).tz_convert('UTC')) expected_df = expected_df.reset_index(drop=True) + pd.set_option('future.no_silent_downcasting', True) + expected_df = expected_df.replace({None: np.nan}) + result_df = iglu.process_data(df, **kwargs) @@ -340,7 +343,7 @@ def test_process_data_list_with_column_specs_error(): def test_process_data_output_dtypes(): """Test that output has correct data types.""" - dates = pd.date_range('2020-01-01', periods=48, freq='1H') + dates = pd.date_range('2020-01-01', periods=48, freq='1h') data = pd.DataFrame({ 'id': ['subject1'] * 48, 'time': dates, diff --git a/tests/test_quantile_glu.py b/tests/test_quantile_glu.py index 4c20618..0467691 100644 --- a/tests/test_quantile_glu.py +++ b/tests/test_quantile_glu.py @@ -33,6 +33,9 @@ def test_quantile_glu_iglu_r_compatible(scenario): expected_results = scenario["results"] expected_df = pd.DataFrame(expected_results) expected_df = expected_df.reset_index(drop=True) + pd.set_option('future.no_silent_downcasting', True) + expected_df = expected_df.replace({None: np.nan}) + # Read CSV and convert time column to datetime df = pd.read_csv(input_file_name, index_col=0) diff --git a/tests/test_range_glu.py b/tests/test_range_glu.py index 5b49571..1aa7cef 100644 --- a/tests/test_range_glu.py +++ b/tests/test_range_glu.py @@ -48,6 +48,9 @@ def test_range_glu_iglu_r_compatible(scenario): expected_results = scenario["results"] expected_df = pd.DataFrame(expected_results) expected_df = expected_df.reset_index(drop=True) + pd.set_option('future.no_silent_downcasting', True) + expected_df = expected_df.replace({None: np.nan}) + # Compare DataFrames with precision to 0.001 for numeric columns pd.testing.assert_frame_equal( diff --git a/tests/test_roc.py b/tests/test_roc.py index c08596d..aebf5eb 100644 --- a/tests/test_roc.py +++ b/tests/test_roc.py @@ -33,6 +33,9 @@ def test_roc_iglu_r_compatible(scenario): expected_results = scenario["results"] expected_df = pd.DataFrame(expected_results) expected_df = expected_df.reset_index(drop=True) + pd.set_option('future.no_silent_downcasting', True) + expected_df = expected_df.replace({None: np.nan}) + # Read CSV and convert time column to datetime df = pd.read_csv(input_file_name, index_col=0) diff --git a/tests/test_sd_measures.py b/tests/test_sd_measures.py index 4e0e593..fa0f96c 100644 --- a/tests/test_sd_measures.py +++ b/tests/test_sd_measures.py @@ -39,6 +39,8 @@ def test_sd_measures_iglu_r_compatible(scenario): expected_results = scenario["results"] expected_df = pd.DataFrame(expected_results) expected_df = expected_df.reset_index(drop=True) + pd.set_option('future.no_silent_downcasting', True) + expected_df = expected_df.replace({None: np.nan}) result_df = iglu.sd_measures(df, **kwargs) @@ -123,7 +125,7 @@ def test_sd_measures_multiple_days(): def test_sd_measures_constant_values(): """Test with constant glucose values.""" - dates = pd.date_range('2020-01-01', periods=48, freq='1H') + dates = pd.date_range('2020-01-01', periods=48, freq='1h') data = pd.DataFrame({ 'id': ['subject1'] * 48, 'time': dates, @@ -142,7 +144,7 @@ def test_sd_measures_constant_values(): def test_sd_measures_single_day(): """Test with single day of data.""" - dates = pd.date_range('2020-01-01 00:00', periods=24, freq='1H') + dates = pd.date_range('2020-01-01 00:00', periods=24, freq='1h') data = pd.DataFrame({ 'id': ['subject1'] * 24, 'time': dates, @@ -269,7 +271,7 @@ def test_sd_measures_empty_dataframe(): def test_sd_measures_output_dtypes(): """Test that output has correct data types.""" - dates = pd.date_range('2020-01-01', periods=48, freq='1H') + dates = pd.date_range('2020-01-01', periods=48, freq='1h') data = pd.DataFrame({ 'id': ['subject1'] * 48, 'time': dates, @@ -287,7 +289,7 @@ def test_sd_measures_output_dtypes(): def test_sd_measures_reproducibility(): """Test that results are reproducible with same input.""" np.random.seed(42) - dates = pd.date_range('2020-01-01', periods=48, freq='1H') + dates = pd.date_range('2020-01-01', periods=48, freq='1h') data = pd.DataFrame({ 'id': ['subject1'] * 48, 'time': dates, diff --git a/tests/test_sd_roc.py b/tests/test_sd_roc.py index bde6f65..854290e 100644 --- a/tests/test_sd_roc.py +++ b/tests/test_sd_roc.py @@ -34,6 +34,9 @@ def test_sd_roc_iglu_r_compatible(scenario): expected_results = scenario["results"] expected_df = pd.DataFrame(expected_results) expected_df = expected_df.reset_index(drop=True) + pd.set_option('future.no_silent_downcasting', True) + expected_df = expected_df.replace({None: np.nan}) + expected_df = expected_df.dropna(subset=["sd_roc"]) if expected_df.empty: pytest.skip("This SD ROC test has no numeric value to compare") diff --git a/tests/test_summary_glu.py b/tests/test_summary_glu.py index 1b18d8f..f2b02bc 100644 --- a/tests/test_summary_glu.py +++ b/tests/test_summary_glu.py @@ -39,6 +39,9 @@ def test_summary_glu_iglu_r_compatible(scenario): expected_results = scenario["results"] expected_df = pd.DataFrame(expected_results) expected_df = expected_df.reset_index(drop=True) + pd.set_option('future.no_silent_downcasting', True) + expected_df = expected_df.replace({None: np.nan}) + result_df = iglu.summary_glu(df, **kwargs)