diff --git a/ingestify/domain/models/ingestion/ingestion_job.py b/ingestify/domain/models/ingestion/ingestion_job.py index 6b3b0b5..325ce5e 100644 --- a/ingestify/domain/models/ingestion/ingestion_job.py +++ b/ingestify/domain/models/ingestion/ingestion_job.py @@ -137,7 +137,7 @@ def run(self): for file_id, file_resource in self.dataset_resource.files.items() } - self.dataset_resource.run_post_load_files(files) + self.dataset_resource.run_post_load_files(files, self.dataset) try: revision = self.store.update_dataset( diff --git a/ingestify/domain/models/resources/dataset_resource.py b/ingestify/domain/models/resources/dataset_resource.py index 9b00f72..293073e 100644 --- a/ingestify/domain/models/resources/dataset_resource.py +++ b/ingestify/domain/models/resources/dataset_resource.py @@ -3,6 +3,7 @@ from pydantic import Field from ingestify.domain.models.base import BaseModel +from ingestify.domain.models.dataset import Dataset from ingestify.domain.models.dataset.dataset_state import DatasetState from ingestify.exceptions import DuplicateFile @@ -51,17 +52,19 @@ class DatasetResource(BaseModel): state: DatasetState = Field(default_factory=lambda: DatasetState.COMPLETE) files: dict[str, FileResource] = Field(default_factory=dict) post_load_files: Optional[ - Callable[["DatasetResource", Dict[str, DraftFile]], None] + Callable[["DatasetResource", Dict[str, DraftFile], Optional[Dataset]], None] ] = None - def run_post_load_files(self, files: Dict[str, DraftFile]): + def run_post_load_files( + self, files: Dict[str, DraftFile], existing_dataset: Optional[Dataset] = None + ): """Hook to modify dataset attributes based on loaded file content. Useful for setting state based on file content, e.g., keep state=SCHEDULED when files contain '{}', change to COMPLETE when they contain actual data. """ if self.post_load_files: - self.post_load_files(self, files) + self.post_load_files(self, files, existing_dataset) def add_file( self, diff --git a/ingestify/tests/test_engine.py b/ingestify/tests/test_engine.py index f504288..a7ec4d1 100644 --- a/ingestify/tests/test_engine.py +++ b/ingestify/tests/test_engine.py @@ -460,7 +460,9 @@ def test_dev_engine(): assert datasets.first().name == "Test Dataset" -def post_load_hook(dataset_resource: DatasetResource, files: dict[str, DraftFile]): +def post_load_hook( + dataset_resource: DatasetResource, files: dict[str, DraftFile], existing_dataset +): # Change state to COMPLETE if file content is not '{}' for file in files.values(): if file.size > 2: @@ -545,3 +547,13 @@ def test_force_save_creates_revision(config_file): dataset_without_files = engine.store.get_dataset_collection(season_id=3).first() assert len(dataset_without_files.revisions) == 1 assert len(dataset_without_files.current_revision.modified_files) == 0 + + dataset_with_last_modified = engine.store.get_dataset_collection( + season_id=2 + ).first() + + dataset_without_files = engine.store.get_dataset_collection(metadata_only=True) + assert ( + dataset_without_files.metadata.last_modified + == dataset_with_last_modified.last_modified_at + )