Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ on:
branches: [ main, develop ]
pull_request:
branches: [ main, develop ]
workflow_dispatch:

jobs:
test:
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -176,3 +176,6 @@ cython_debug/
.vscode
iglu_python.code-workspace
sandbox.ipynb
tests/data/day1.csv
tests/data/day2.csv
tests/data/day5.csv
10 changes: 6 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,14 @@ A significant focus of this project has been ensuring compatibility with the ori
This approach ensures that the Python implementation produces results consistent with the original R package.

## Unit Test Status
Unless noted, iglu-r test is considered successful if it achives precision of 1e-3
Unless noted, iglu-r test is considered successful if it achieves precision of 0.001

| Function | IGLU-R test compatibility | array/list/Series | TZ | Comments |
|----------|---------------------------|-------------------|----|----------|
| above_percent | ✅ | |||
| active_percent | ✅ |
| adrr | ✅ |
| auc| 🟡 (0.1 precision) | || see [auc_evaluation.ipynb](https://github.com/staskh/iglu_python/blob/main/notebooks/auc_evaluation.ipynb)|
| auc| 🟡 (0.01 precision) | || see [auc_evaluation.ipynb](https://github.com/staskh/iglu_python/blob/main/notebooks/auc_evaluation.ipynb)|
| below_percent| ✅ |
| cogi | ✅ |
| conga | ✅ |
Expand All @@ -48,8 +48,8 @@ Unless noted, iglu-r test is considered successful if it achives precision of 1e
| j_index | ✅ |
| lbgi | ✅ |
| mad_glu | ✅ |
| mag | 🟡 (0.1 precision)|
| mage | 🟡 (0.2 precision) | || See algorithm at [MAGE](https://github.com/irinagain/iglu/blob/master/vignettes/MAGE.Rmd) |
| mag | ✅ | || IMHO, Original R implementation has an error |
| mage | | || See algorithm at [MAGE](https://github.com/irinagain/iglu/blob/master/vignettes/MAGE.Rmd) |
| mean_glu | ✅ |
| median_glu | ✅ |
| modd | ✅ |
Expand All @@ -60,6 +60,8 @@ Unless noted, iglu-r test is considered successful if it achives precision of 1e
| sd_glu | ✅ |
| sd_measures | ✅ |
| sd_roc | ✅ | |||
| process_data | ✅ |
| summary_glu | ✅ |
| CGMS2DayByDay | ✅ |

# Installation
Expand Down
17 changes: 17 additions & 0 deletions R_REVIEW.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
## MAG

[(length(na.omit(idx_gl))*n/60)](https://github.com/irinagain/iglu/blob/82e4d1a39901847881d5402d1ac61b3e678d2a5e/R/mag.R#L60) has to be
```
diffs = abs(diff(idx_gl))
mag = sum(diffs, na.rm = TRUE)/
(length(na.omit(diffs))*n/60)
```

## CGMS2DayByDay

[ndays = ceiling(as.double(difftime(max(tr), min(tr), units = "days")) + 1)](https://github.com/irinagain/iglu/blob/82e4d1a39901847881d5402d1ac61b3e678d2a5e/R/utils.R#L208) has to be ndays = ceiling(as.double(difftime(max(tr), min(tr), units = "days")))`


grid omits the first measurement of input data and shift timeline -dt0
[dti_cum = cumsum(dti)](https://github.com/irinagain/iglu/blob/82e4d1a39901847881d5402d1ac61b3e678d2a5e/R/utils.R#L210C13-L210C19) has to be `dti_cum = c(0,cumsum(dti))`

6 changes: 6 additions & 0 deletions iglu_python/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,14 @@
from .median_glu import median_glu
from .modd import modd
from .pgs import pgs
from .process_data import process_data
from .quantile_glu import quantile_glu
from .range_glu import range_glu
from .roc import roc
from .sd_glu import sd_glu
from .sd_measures import sd_measures
from .sd_roc import sd_roc
from .summary_glu import summary_glu
from .utils import IGLU_R_COMPATIBLE, CGMS2DayByDay, check_data_columns, gd2d_to_df

__all__ = [
Expand Down Expand Up @@ -72,9 +75,12 @@
"median_glu",
"modd",
"pgs",
"process_data",
"quantile_glu",
"range_glu",
"roc",
"sd_glu",
"sd_measures",
"sd_roc",
"summary_glu",
]
49 changes: 27 additions & 22 deletions iglu_python/active_percent.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ def active_percent(
Number of days to consider in the calculation.
consistent_end_date : Optional[Union[str, datetime]], default=None
End date to be used for every subject. If None, each subject will have their own end date.
Used only in range_type=='manual' mode

Returns
-------
Expand Down Expand Up @@ -118,31 +119,35 @@ def active_percent(
active_percent = (
(theoretical_gl_vals - missing_gl_vals) / theoretical_gl_vals
) * 100

elif range_type == "manual":
# Handle consistent end date if provided
if consistent_end_date is not None:
end_date = localize_naive_timestamp(pd.to_datetime(consistent_end_date))
start_date = end_date - pd.Timedelta(days=int(ndays))

# Filter data to the specified date range
mask = (sub_data["time"] >= start_date) & (sub_data["time"] <= end_date)
sub_data = sub_data[mask]

# Recalculate active percentage for the specified range
active_percent = (len(sub_data) / (ndays * (24 * (60 / dt0)))) * 100
min_time = start_date
max_time = end_date
ndays = (end_date - start_date).total_seconds() / (24 * 3600)

active_perc_data.append(
{
"id": subject,
"active_percent": active_percent,
"ndays": round(ndays, 1),
"start_date": min_time,
"end_date": max_time,
}
)
else:
end_date = sub_data["time"].max()
start_date = end_date - pd.Timedelta(days=int(ndays))

# Filter data to the specified date range
mask = (sub_data["time"] >= start_date) & (sub_data["time"] <= end_date)
sub_data = sub_data[mask]

# Recalculate active percentage for the specified range
active_percent = (len(sub_data) / (ndays * (24 * (60 / dt0)))) * 100
min_time = start_date
max_time = end_date
ndays = (end_date - start_date).total_seconds() / (24 * 3600)
else:
raise ValueError(f"Invalid range_type: {range_type}")

active_perc_data.append(
{
"id": subject,
"active_percent": active_percent,
"ndays": round(ndays, 1),
"start_date": min_time,
"end_date": max_time,
}
)

# Convert to DataFrame
result = pd.DataFrame(active_perc_data)
Expand Down
56 changes: 36 additions & 20 deletions iglu_python/auc.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import numpy as np
import pandas as pd

from .utils import CGMS2DayByDay, check_data_columns, gd2d_to_df
from .utils import CGMS2DayByDay, check_data_columns, gd2d_to_df, IGLU_R_COMPATIBLE


def auc(data: pd.DataFrame, tz: str = "") -> pd.DataFrame:
Expand Down Expand Up @@ -64,25 +64,41 @@ def auc_single(subject_data: pd.DataFrame) -> float:
gd2d, actual_dates, dt0 = CGMS2DayByDay(subject_data, tz=tz)

# Convert gd2d to DataFrame
hourly_data = gd2d_to_df(gd2d, actual_dates, dt0)
# Add hour column by rounding time to nearest hour
hourly_data['hour'] = hourly_data['time'].dt.floor('h')

hourly_data['gl_next'] = hourly_data['gl'].shift(-1)

# Calculate AUC for each hour using trapezoidal rule (mg*min/dL)
hourly_auc = hourly_data.groupby("hour").apply(
lambda x: np.nansum(
(dt0/60)*(x["gl"].values + x["gl_next"].values) / 2
),
include_groups=False
)
# 0 mean no data in this hour, replace with nan
hourly_auc = hourly_auc.replace(0, np.nan)

hourly_avg = hourly_auc.mean(skipna=True)
# Return mean of daily hourly averages
return hourly_avg
input_data = gd2d_to_df(gd2d, actual_dates, dt0)
if IGLU_R_COMPATIBLE:
input_data['day'] = input_data['time'].dt.floor('d')
input_data['gl_next'] = input_data['gl'].shift(-1)
each_day_area = input_data.groupby("day").apply(
lambda x: np.nansum(
(dt0/60)*(x["gl"].values + x["gl_next"].values) / 2
),
include_groups=False
)
# calculate number of not nan trapezoids in total (number of not nan gl and gl_next)
n_trapezoids = (~np.isnan(input_data["gl"]) & ~np.isnan(input_data["gl_next"])).sum()
hours = dt0/60 * n_trapezoids
daily_area = each_day_area.sum()
hourly_avg = daily_area/hours
return hourly_avg
else:
# Add hour column by rounding time to nearest hour
input_data['hour'] = input_data['time'].dt.floor('h')

input_data['gl_next'] = input_data['gl'].shift(-1)

# Calculate AUC for each hour using trapezoidal rule (mg*min/dL)
hourly_auc = input_data.groupby("hour").apply(
lambda x: np.nansum(
(dt0/60)*(x["gl"].values + x["gl_next"].values) / 2
),
include_groups=False
)
# 0 mean no data in this hour, replace with nan
hourly_auc = hourly_auc.replace(0, np.nan)

hourly_avg = hourly_auc.mean(skipna=True)
# Return mean of daily hourly averages
return hourly_avg

# Process each subject
result = []
Expand Down
35 changes: 25 additions & 10 deletions iglu_python/mag.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import numpy as np
import pandas as pd

from .utils import CGMS2DayByDay, check_data_columns
from .utils import CGMS2DayByDay, check_data_columns, IGLU_R_COMPATIBLE


def mag(
Expand Down Expand Up @@ -92,16 +92,31 @@ def mag_single(data: pd.DataFrame, n: int) -> float:

# Calculate absolute differences between readings n minutes apart
lag = readings_per_interval
diffs = gl_values[lag:] - gl_values[:-lag]
diffs = np.abs(diffs)
diffs = diffs[~np.isnan(diffs)]

# Calculate MAG: sum of absolute differences divided by total time in hours
total_time_hours = ((len(diffs)) * n) / 60
if total_time_hours == 0:
return 0.0

return float(np.sum(diffs) / total_time_hours)
if IGLU_R_COMPATIBLE:
idx = np.arange(0,len(gl_values),lag)
gl_values_idx = gl_values[idx]
diffs = gl_values_idx[1:] - gl_values_idx[:-1]
diffs = np.abs(diffs)
diffs = diffs[~np.isnan(diffs)]
# to be IGLU-R test compatible, imho they made error.
# has to be total_time_hours = ((len(diffs)) * n) / 60
total_time_hours = ((len(gl_values_idx[~np.isnan(gl_values_idx)])) * n) / 60
if total_time_hours == 0:
return 0.0
mag = float(np.sum(diffs) / total_time_hours)
else:
diffs = gl_values[lag:] - gl_values[:-lag]
diffs = np.abs(diffs)
diffs = diffs[~np.isnan(diffs)]

# Calculate MAG: sum of absolute differences divided by total time in hours
total_time_hours = ((len(diffs)) * n) / 60
if total_time_hours == 0:
return 0.0
mag = float(np.sum(diffs) / total_time_hours)

return mag

# Handle Series input
if isinstance(data, pd.Series):
Expand Down
Loading
Loading