Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ dependencies = [
"matplotlib",
"numpy",
"opm>=2023.04",
"pandas",
"pandas >= 2",
"pydantic",
"pyscal",
"pyyaml",
Expand Down
26 changes: 15 additions & 11 deletions src/subscript/fmuobs/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import re
from pathlib import Path

import numpy as np
import pandas as pd

from subscript import getLogger
Expand Down Expand Up @@ -431,20 +430,25 @@ def compute_date_from_days(
to this starttime, and converted to DATE.

Returns:
pd.DataFrame. DATE column is always of type datetime64
pd.DataFrame. DATE column is always datetime-like
(datetime64 unit depends on pandas)
Copy link

Copilot AI Jan 6, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The docstring update mentions that 'datetime64 unit depends on pandas' but this is imprecise. Consider clarifying whether this refers to the resolution (e.g., ns vs. us) and documenting which pandas versions use which units, or stating that the specific unit should not be relied upon by consumers of this function.

Suggested change
(datetime64 unit depends on pandas)
(typically ``datetime64[ns]``; the exact time unit/resolution is
determined by pandas and should not be relied upon by callers).

Copilot uses AI. Check for mistakes.
"""
assert isinstance(dframe, pd.DataFrame)
if starttime and "DAYS" in dframe:
if "DATE" not in dframe:
dframe["DATE"] = np.nan
start = pd.to_datetime(starttime)
date_needed_rows = ~dframe["DAYS"].isna() & dframe["DATE"].isna()
dframe["DATE"] = pd.to_datetime(dframe["DATE"])
dframe.loc[date_needed_rows, "DATE"] = start + pd.to_timedelta(
dframe.loc[date_needed_rows, "DAYS"], "d"
)

if "DATE" in dframe:
dframe["DATE"] = pd.to_datetime(dframe["DATE"])

if not starttime or "DAYS" not in dframe:
return dframe

start = pd.to_datetime(starttime)
computed_dates = start + pd.to_timedelta(dframe["DAYS"], unit="D")
Copy link

Copilot AI Jan 6, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The refactored logic computes dates for all rows (including those with NaN in DAYS column), which differs from the original behavior that only computed dates for date_needed_rows. While pandas handles NaN gracefully in timedelta operations, this change processes more data than necessary. Consider adding .where(dframe['DAYS'].notna()) to maintain the original selective computation behavior.

Suggested change
computed_dates = start + pd.to_timedelta(dframe["DAYS"], unit="D")
computed_dates = (start + pd.to_timedelta(dframe["DAYS"], unit="D")).where(
dframe["DAYS"].notna()
)

Copilot uses AI. Check for mistakes.

if "DATE" in dframe:
dframe["DATE"] = dframe["DATE"].combine_first(computed_dates)
else:
dframe["DATE"] = computed_dates

return dframe


Expand Down
12 changes: 4 additions & 8 deletions src/subscript/fmuobs/writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,14 +292,10 @@ def convert_dframe_date_to_str(dframe: pd.DataFrame) -> pd.DataFrame:
pd.DataFrame: DATE as a string type
"""
if "DATE" in dframe:
with pd.option_context("future.no_silent_downcasting", True):
dframe = dframe.copy()
dframe["DATE"] = (
dframe["DATE"]
.astype(str)
.replace(["NaT", "NaN", "nan"], np.nan)
.infer_objects(copy=False)
)
dframe = dframe.copy()
dframe["DATE"] = (
dframe["DATE"].astype(str).replace(["NaT", "NaN", "nan"], np.nan)
)

return dframe

Expand Down
4 changes: 3 additions & 1 deletion tests/test_check_swatinit.py
Original file line number Diff line number Diff line change
Expand Up @@ -558,7 +558,9 @@ def test_eqlnum2(tmp_path, mocker):
def test_reorder_dframe_for_nonnans(inputrows, expected):
"""Test that rows with less NaNs will be prioritized through the reorder function"""
pd.testing.assert_frame_equal(
reorder_dframe_for_nonnans(pd.DataFrame(inputrows)), pd.DataFrame(expected)
reorder_dframe_for_nonnans(pd.DataFrame(inputrows)),
pd.DataFrame(expected),
check_column_type=False,
)


Expand Down
4 changes: 3 additions & 1 deletion tests/test_csv2ofmvol.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,9 @@ def test_df2vol(dframe, expected_lines):
else:
# (bogus columns in dframe must be ignored)
pd.testing.assert_frame_equal(
dframe[backagain_df.columns].fillna(value=0.0), backagain_df
dframe[backagain_df.columns].fillna(value=0.0),
backagain_df,
check_index_type=False,
)


Expand Down
1 change: 1 addition & 0 deletions tests/test_fmuobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@ def test_roundtrip_yaml(filename, readonly_testdata_dir):
yaml_roundtrip_dframe.sort_index(axis="columns").sort_values("LABEL"),
dframe.sort_index(axis="columns").sort_values("LABEL"),
check_like=True,
check_dtype=False,
)


Expand Down
2 changes: 2 additions & 0 deletions tests/test_fmuobs_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -508,11 +508,13 @@ def test_ertobs2df_starttime(string, expected):
pd.testing.assert_frame_equal(
ertobs2df(string, starttime="2020-01-01").sort_index(axis=1),
expected.sort_index(axis=1),
check_dtype=False,
)
# Test again with datetime object passed, not string:
pd.testing.assert_frame_equal(
ertobs2df(string, starttime=datetime.date(2020, 1, 1)).sort_index(axis=1),
expected.sort_index(axis=1),
check_dtype=False,
)


Expand Down
1 change: 1 addition & 0 deletions tests/test_fmuobs_writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -533,6 +533,7 @@ def test_convert_dframe_date_to_str(dframe, expected_dframe):
pd.testing.assert_frame_equal(
convert_dframe_date_to_str(dframe),
expected_dframe,
check_dtype=False,
)


Expand Down
8 changes: 6 additions & 2 deletions tests/test_ofmvol2csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ def test_parse_well(inputlines, expected):
inputlines = ofmvol2csv.cleanse_ofm_lines(inputlines)
colnames = ofmvol2csv.extract_columnnames(inputlines)
dframe = ofmvol2csv.parse_well(inputlines[1:], colnames)
pd.testing.assert_frame_equal(dframe, expected)
pd.testing.assert_frame_equal(dframe, expected, check_index_type=False)


@pytest.mark.parametrize(
Expand Down Expand Up @@ -362,7 +362,11 @@ def test_process_volstr(inputlines, expected):
expected["DATE"] = pd.to_datetime(expected["DATE"])
expected = expected.set_index(["WELL", "DATE"])
dframe = ofmvol2csv.process_volstr("\n".join(inputlines))
pd.testing.assert_frame_equal(dframe, expected)
pd.testing.assert_frame_equal(
dframe,
expected,
check_index_type=False,
)


@pytest.mark.parametrize(
Expand Down