staskh · staskh · Jun 10, 2025 · Jun 8, 2025 · Jun 8, 2025 · Jun 9, 2025
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -5,6 +5,7 @@ on:
     branches: [ main, develop ]
   pull_request:
     branches: [ main, develop ]
+  workflow_dispatch:
 
 jobs:
   test:

diff --git a/.gitignore b/.gitignore
@@ -176,3 +176,6 @@ cython_debug/
 .vscode
 iglu_python.code-workspace
 sandbox.ipynb
+tests/data/day1.csv
+tests/data/day2.csv
+tests/data/day5.csv
diff --git a/README.md b/README.md
@@ -19,14 +19,14 @@ A significant focus of this project has been ensuring compatibility with the ori
 This approach ensures that the Python implementation produces results consistent with the original R package.
 
 ## Unit Test Status
-Unless noted, iglu-r test is considered successful if it achives precision of 1e-3
+Unless noted, iglu-r test is considered successful if it achieves precision of 0.001
 
 | Function | IGLU-R test compatibility | array/list/Series | TZ | Comments |
 |----------|---------------------------|-------------------|----|----------|
 | above_percent | ✅ | |||
 | active_percent | ✅ |
 | adrr | ✅ |
-| auc| 🟡 (0.1 precision) | || see [auc_evaluation.ipynb](https://github.com/staskh/iglu_python/blob/main/notebooks/auc_evaluation.ipynb)|
+| auc| 🟡 (0.01 precision) | || see [auc_evaluation.ipynb](https://github.com/staskh/iglu_python/blob/main/notebooks/auc_evaluation.ipynb)|
 | below_percent| ✅ |
 | cogi | ✅ |
 | conga | ✅ |
@@ -48,8 +48,8 @@ Unless noted, iglu-r test is considered successful if it achives precision of 1e
 | j_index | ✅ |
 | lbgi | ✅ |
 | mad_glu | ✅ |
-| mag |  🟡 (0.1 precision)|
-| mage | 🟡 (0.2 precision) | || See algorithm at [MAGE](https://github.com/irinagain/iglu/blob/master/vignettes/MAGE.Rmd) |
+| mag |  ✅ | || IMHO, Original R implementation has an error |
+| mage | ✅ | || See algorithm at [MAGE](https://github.com/irinagain/iglu/blob/master/vignettes/MAGE.Rmd) |
 | mean_glu | ✅ |
 | median_glu | ✅ |
 | modd | ✅ |
@@ -60,6 +60,8 @@ Unless noted, iglu-r test is considered successful if it achives precision of 1e
 | sd_glu | ✅ |
 | sd_measures | ✅ |
 | sd_roc |  ✅ | |||
+| process_data | ✅ |
+| summary_glu | ✅ |
 | CGMS2DayByDay | ✅ |
 
 # Installation

diff --git a/R_REVIEW.md b/R_REVIEW.md
@@ -0,0 +1,17 @@
+## MAG
+
+[(length(na.omit(idx_gl))*n/60)](https://github.com/irinagain/iglu/blob/82e4d1a39901847881d5402d1ac61b3e678d2a5e/R/mag.R#L60) has to be 
+```
+    diffs = abs(diff(idx_gl))
+    mag = sum(diffs, na.rm = TRUE)/
+      (length(na.omit(diffs))*n/60)
+```
+
+## CGMS2DayByDay
+
+[ndays = ceiling(as.double(difftime(max(tr), min(tr), units = "days")) + 1)](https://github.com/irinagain/iglu/blob/82e4d1a39901847881d5402d1ac61b3e678d2a5e/R/utils.R#L208) has to be ndays = ceiling(as.double(difftime(max(tr), min(tr), units = "days")))`
+
+
+grid omits the first measurement of input data and shift timeline -dt0
+[dti_cum = cumsum(dti)](https://github.com/irinagain/iglu/blob/82e4d1a39901847881d5402d1ac61b3e678d2a5e/R/utils.R#L210C13-L210C19) has to be `dti_cum = c(0,cumsum(dti))`
+
diff --git a/iglu_python/__init__.py b/iglu_python/__init__.py
@@ -29,11 +29,14 @@
 from .median_glu import median_glu
 from .modd import modd
 from .pgs import pgs
+from .process_data import process_data
 from .quantile_glu import quantile_glu
 from .range_glu import range_glu
 from .roc import roc
 from .sd_glu import sd_glu
+from .sd_measures import sd_measures
 from .sd_roc import sd_roc
+from .summary_glu import summary_glu
 from .utils import IGLU_R_COMPATIBLE, CGMS2DayByDay, check_data_columns, gd2d_to_df
 
 __all__ = [
@@ -72,9 +75,12 @@
     "median_glu",
     "modd",
     "pgs",
+    "process_data",
     "quantile_glu",
     "range_glu",
     "roc",
     "sd_glu",
+    "sd_measures",
     "sd_roc",
+    "summary_glu",
 ]
diff --git a/iglu_python/active_percent.py b/iglu_python/active_percent.py
@@ -37,6 +37,7 @@ def active_percent(
         Number of days to consider in the calculation.
     consistent_end_date : Optional[Union[str, datetime]], default=None
         End date to be used for every subject. If None, each subject will have their own end date.
+        Used only in range_type=='manual' mode
 
     Returns
     -------
@@ -118,31 +119,35 @@ def active_percent(
             active_percent = (
                 (theoretical_gl_vals - missing_gl_vals) / theoretical_gl_vals
             ) * 100
-
+        elif range_type == "manual":
             # Handle consistent end date if provided
             if consistent_end_date is not None:
                 end_date = localize_naive_timestamp(pd.to_datetime(consistent_end_date))
-                start_date = end_date - pd.Timedelta(days=int(ndays))
-
-                # Filter data to the specified date range
-                mask = (sub_data["time"] >= start_date) & (sub_data["time"] <= end_date)
-                sub_data = sub_data[mask]
-
-                # Recalculate active percentage for the specified range
-                active_percent = (len(sub_data) / (ndays * (24 * (60 / dt0)))) * 100
-                min_time = start_date
-                max_time = end_date
-                ndays = (end_date - start_date).total_seconds() / (24 * 3600)
-
-            active_perc_data.append(
-                {
-                    "id": subject,
-                    "active_percent": active_percent,
-                    "ndays": round(ndays, 1),
-                    "start_date": min_time,
-                    "end_date": max_time,
-                }
-            )
+            else:
+                end_date = sub_data["time"].max()
+            start_date = end_date - pd.Timedelta(days=int(ndays))
+
+            # Filter data to the specified date range
+            mask = (sub_data["time"] >= start_date) & (sub_data["time"] <= end_date)
+            sub_data = sub_data[mask]
+
+            # Recalculate active percentage for the specified range
+            active_percent = (len(sub_data) / (ndays * (24 * (60 / dt0)))) * 100
+            min_time = start_date
+            max_time = end_date
+            ndays = (end_date - start_date).total_seconds() / (24 * 3600)
+        else:
+            raise ValueError(f"Invalid range_type: {range_type}")
+
+        active_perc_data.append(
+            {
+                "id": subject,
+                "active_percent": active_percent,
+                "ndays": round(ndays, 1),
+                "start_date": min_time,
+                "end_date": max_time,
+            }
+        )
 
     # Convert to DataFrame
     result = pd.DataFrame(active_perc_data)

diff --git a/iglu_python/auc.py b/iglu_python/auc.py
@@ -2,7 +2,7 @@
 import numpy as np
 import pandas as pd
 
-from .utils import CGMS2DayByDay, check_data_columns, gd2d_to_df
+from .utils import CGMS2DayByDay, check_data_columns, gd2d_to_df, IGLU_R_COMPATIBLE
 
 
 def auc(data: pd.DataFrame, tz: str = "") -> pd.DataFrame:
@@ -64,25 +64,41 @@ def auc_single(subject_data: pd.DataFrame) -> float:
         gd2d, actual_dates, dt0 = CGMS2DayByDay(subject_data, tz=tz)
 
         # Convert gd2d to DataFrame
-        hourly_data = gd2d_to_df(gd2d, actual_dates, dt0)
-        # Add hour column by rounding time to nearest hour
-        hourly_data['hour'] = hourly_data['time'].dt.floor('h')
-
-        hourly_data['gl_next'] = hourly_data['gl'].shift(-1)
-
-        # Calculate AUC for each hour using trapezoidal rule (mg*min/dL)
-        hourly_auc = hourly_data.groupby("hour").apply(
-            lambda x: np.nansum(
-                (dt0/60)*(x["gl"].values + x["gl_next"].values) / 2
-            ),
-            include_groups=False
-        )
-        # 0 mean no data in this hour, replace with nan
-        hourly_auc = hourly_auc.replace(0, np.nan)
-
-        hourly_avg = hourly_auc.mean(skipna=True)
-        # Return mean of daily hourly averages
-        return hourly_avg
+        input_data = gd2d_to_df(gd2d, actual_dates, dt0)
+        if IGLU_R_COMPATIBLE:
+            input_data['day'] = input_data['time'].dt.floor('d')
+            input_data['gl_next'] = input_data['gl'].shift(-1)
+            each_day_area = input_data.groupby("day").apply(
+                lambda x: np.nansum(
+                    (dt0/60)*(x["gl"].values + x["gl_next"].values) / 2
+                ),
+                include_groups=False
+            )
+            # calculate number of not nan trapezoids in total (number of not nan gl and gl_next)
+            n_trapezoids = (~np.isnan(input_data["gl"]) & ~np.isnan(input_data["gl_next"])).sum()
+            hours = dt0/60 * n_trapezoids
+            daily_area = each_day_area.sum()
+            hourly_avg = daily_area/hours
+            return hourly_avg
+        else:
+            # Add hour column by rounding time to nearest hour
+            input_data['hour'] = input_data['time'].dt.floor('h')
+
+            input_data['gl_next'] = input_data['gl'].shift(-1)
+
+            # Calculate AUC for each hour using trapezoidal rule (mg*min/dL)
+            hourly_auc = input_data.groupby("hour").apply(
+                lambda x: np.nansum(
+                    (dt0/60)*(x["gl"].values + x["gl_next"].values) / 2
+                ),
+                include_groups=False
+            )
+            # 0 mean no data in this hour, replace with nan
+            hourly_auc = hourly_auc.replace(0, np.nan)
+
+            hourly_avg = hourly_auc.mean(skipna=True)
+            # Return mean of daily hourly averages
+            return hourly_avg
 
     # Process each subject
     result = []

diff --git a/iglu_python/mag.py b/iglu_python/mag.py
@@ -3,7 +3,7 @@
 import numpy as np
 import pandas as pd
 
-from .utils import CGMS2DayByDay, check_data_columns
+from .utils import CGMS2DayByDay, check_data_columns, IGLU_R_COMPATIBLE
 
 
 def mag(
@@ -92,16 +92,31 @@ def mag_single(data: pd.DataFrame, n: int) -> float:
 
         # Calculate absolute differences between readings n minutes apart
         lag = readings_per_interval
-        diffs = gl_values[lag:] - gl_values[:-lag]
-        diffs = np.abs(diffs)
-        diffs = diffs[~np.isnan(diffs)]
 
-        # Calculate MAG: sum of absolute differences divided by total time in hours
-        total_time_hours = ((len(diffs)) * n) / 60
-        if total_time_hours == 0:
-            return 0.0
-
-        return float(np.sum(diffs) / total_time_hours)
+        if IGLU_R_COMPATIBLE:
+            idx = np.arange(0,len(gl_values),lag)
+            gl_values_idx = gl_values[idx]
+            diffs = gl_values_idx[1:] - gl_values_idx[:-1]
+            diffs = np.abs(diffs)
+            diffs = diffs[~np.isnan(diffs)]
+            # to be IGLU-R test compatible, imho they made error.
+            # has to be total_time_hours = ((len(diffs)) * n) / 60   
+            total_time_hours = ((len(gl_values_idx[~np.isnan(gl_values_idx)])) * n) / 60
+            if total_time_hours == 0:
+                return 0.0
+            mag = float(np.sum(diffs) / total_time_hours)
+        else:
+            diffs = gl_values[lag:] - gl_values[:-lag]
+            diffs = np.abs(diffs)
+            diffs = diffs[~np.isnan(diffs)]
+
+            # Calculate MAG: sum of absolute differences divided by total time in hours
+            total_time_hours = ((len(diffs)) * n) / 60   
+            if total_time_hours == 0:
+                return 0.0
+            mag = float(np.sum(diffs) / total_time_hours)
+
+        return mag
 
     # Handle Series input
     if isinstance(data, pd.Series):