From b07aac3e83f16a583949d4120d5f2c1e145607f5 Mon Sep 17 00:00:00 2001 From: Hamdah Shafqat Abbasi Date: Mon, 16 Dec 2024 15:58:26 -0500 Subject: [PATCH 1/8] integrated filepattern in filerenaming --- formats/file-renaming-tool/.bumpversion.cfg | 6 +- formats/file-renaming-tool/Dockerfile | 2 +- formats/file-renaming-tool/README.md | 2 +- formats/file-renaming-tool/VERSION | 2 +- formats/file-renaming-tool/filerenaming.cwl | 2 +- formats/file-renaming-tool/ict.yaml | 110 +++++++++--------- formats/file-renaming-tool/plugin.json | 4 +- formats/file-renaming-tool/pyproject.toml | 13 ++- .../images/formats/file_renaming/__init__.py | 2 +- .../formats/file_renaming/file_renaming.py | 20 ++-- 10 files changed, 88 insertions(+), 75 deletions(-) diff --git a/formats/file-renaming-tool/.bumpversion.cfg b/formats/file-renaming-tool/.bumpversion.cfg index 9f1772079..4894cd358 100644 --- a/formats/file-renaming-tool/.bumpversion.cfg +++ b/formats/file-renaming-tool/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.2.4 +current_version = 0.2.4-dev2 commit = True tag = False parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-(?P[a-z]+)(?P\d+))? @@ -22,6 +22,10 @@ replace = version = "{new_version}" [bumpversion:file:plugin.json] +[bumpversion:file:filerenaming.cwl] + +[bumpversion:file:ict.yaml] + [bumpversion:file:VERSION] [bumpversion:file:src/polus/images/formats/file_renaming/__init__.py] diff --git a/formats/file-renaming-tool/Dockerfile b/formats/file-renaming-tool/Dockerfile index 52c8c942e..5a194d252 100644 --- a/formats/file-renaming-tool/Dockerfile +++ b/formats/file-renaming-tool/Dockerfile @@ -1,4 +1,4 @@ -FROM polusai/bfio:2.3.6 +FROM polusai/bfio:2.3.3 # environment variables defined in polusai/bfio ENV EXEC_DIR="/opt/executables" diff --git a/formats/file-renaming-tool/README.md b/formats/file-renaming-tool/README.md index 37e01d17d..cbdd4d0cb 100644 --- a/formats/file-renaming-tool/README.md +++ b/formats/file-renaming-tool/README.md @@ -1,4 +1,4 @@ -# File Renaming(0.2.4-dev0) +# File Renaming(0.2.4-dev2) This WIPP plugin uses supplied file naming patterns to dynamically rename and save files in an image collection to a new image collection. diff --git a/formats/file-renaming-tool/VERSION b/formats/file-renaming-tool/VERSION index abd410582..0184eef11 100644 --- a/formats/file-renaming-tool/VERSION +++ b/formats/file-renaming-tool/VERSION @@ -1 +1 @@ -0.2.4 +0.2.4-dev2 diff --git a/formats/file-renaming-tool/filerenaming.cwl b/formats/file-renaming-tool/filerenaming.cwl index 454a1dae8..2eafae2a4 100644 --- a/formats/file-renaming-tool/filerenaming.cwl +++ b/formats/file-renaming-tool/filerenaming.cwl @@ -28,7 +28,7 @@ outputs: type: Directory requirements: DockerRequirement: - dockerPull: polusai/file-renaming-tool:0.2.4-dev0 + dockerPull: polusai/file-renaming-tool:0.2.4-dev2 InitialWorkDirRequirement: listing: - entry: $(inputs.outDir) diff --git a/formats/file-renaming-tool/ict.yaml b/formats/file-renaming-tool/ict.yaml index 56e75a25e..05b4908e0 100644 --- a/formats/file-renaming-tool/ict.yaml +++ b/formats/file-renaming-tool/ict.yaml @@ -1,65 +1,65 @@ author: -- Melanie Parham -- Hamdah Shafqat + - Melanie Parham + - Hamdah Shafqat contact: melanie.parham@axleinfo.com -container: polusai/file-renaming-tool:0.2.4-dev0 +container: polusai/file-renaming-tool:0.2.4-dev2 description: Rename and store image collection files in a new image collection entrypoint: python3 -m polus.images.formats.file_renaming inputs: -- description: Filename pattern used to separate data - format: - - string - name: filePattern - required: true - type: string -- description: Input image collection to be processed by this plugin - format: - - collection - name: inpDir - required: true - type: path -- description: Desired filename pattern used to rename and separate data - format: - - string - name: outFilePattern - required: true - type: string -- description: Get directory name incorporated in renamed files - format: - - enum - name: mapDirectory - required: false - type: string + - description: Filename pattern used to separate data + format: + - string + name: filePattern + required: true + type: string + - description: Input image collection to be processed by this plugin + format: + - collection + name: inpDir + required: true + type: path + - description: Desired filename pattern used to rename and separate data + format: + - string + name: outFilePattern + required: true + type: string + - description: Get directory name incorporated in renamed files + format: + - enum + name: mapDirectory + required: false + type: string name: polusai/FileRenaming outputs: -- description: Output collection - format: - - collection - name: outDir - required: true - type: path -repository: https://github.com/PolusAI/polus-plugins + - description: Output collection + format: + - collection + name: outDir + required: true + type: path +repository: https://github.com/PolusAI/image-tools specVersion: 1.0.0 title: File Renaming ui: -- description: Filename pattern used to separate data - key: inputs.filePattern - title: Filename pattern - type: text -- description: Input image collection to be processed by this plugin - key: inputs.inpDir - title: Input collection - type: path -- description: Desired filename pattern used to rename and separate data - key: inputs.outFilePattern - title: Output filename pattern - type: text -- description: Get directory name incorporated in renamed files - fields: - - raw - - map - - default - key: inputs.mapDirectory - title: mapDirectory - type: select -version: 0.2.4-dev0 + - description: Filename pattern used to separate data + key: inputs.filePattern + title: Filename pattern + type: text + - description: Input image collection to be processed by this plugin + key: inputs.inpDir + title: Input collection + type: path + - description: Desired filename pattern used to rename and separate data + key: inputs.outFilePattern + title: Output filename pattern + type: text + - description: Get directory name incorporated in renamed files + fields: + - raw + - map + - default + key: inputs.mapDirectory + title: mapDirectory + type: select +version: 0.2.4-dev2 diff --git a/formats/file-renaming-tool/plugin.json b/formats/file-renaming-tool/plugin.json index 082e9600c..db3792d99 100644 --- a/formats/file-renaming-tool/plugin.json +++ b/formats/file-renaming-tool/plugin.json @@ -1,6 +1,6 @@ { "name": "File Renaming", - "version": "0.2.4", + "version": "0.2.4-dev2", "title": "File Renaming", "description": "Rename and store image collection files in a new image collection", "author": "Melanie Parham (melanie.parham@axleinfo.com), Hamdah Shafqat Abbasi (hamdahshafqat.abbasi@nih.gov)", @@ -8,7 +8,7 @@ "repository": "https://github.com/PolusAI/image-tools", "website": "https://ncats.nih.gov/preclinical/core/informatics", "citation": "", - "containerId": "polusai/file-renaming-tool:0.2.4", + "containerId": "polusai/file-renaming-tool:0.2.4-dev2", "baseCommand": [ "python3", "-m", diff --git a/formats/file-renaming-tool/pyproject.toml b/formats/file-renaming-tool/pyproject.toml index 3f1d2dafb..b3f924812 100644 --- a/formats/file-renaming-tool/pyproject.toml +++ b/formats/file-renaming-tool/pyproject.toml @@ -1,10 +1,10 @@ [tool.poetry] name = "polus-images-formats-file-renaming" -version = "0.2.4" -description = "Rename and store image collection files in a new image collection" +version = "0.2.4-dev2" +description = "Convert BioFormats datatypes to ome.tif or ome.zarr file format" authors = [ -"Melanie Parham ", -"Hamdah Shafqat abbasi " + "Hamdah Shafqat abbasi ", + "Melanie Parham ", ] readme = "README.md" packages = [{include = "polus", from = "src"}] @@ -26,3 +26,8 @@ pytest = "^7.2.1" [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" + +[tool.pytest.ini_options] +pythonpath = [ + "." +] diff --git a/formats/file-renaming-tool/src/polus/images/formats/file_renaming/__init__.py b/formats/file-renaming-tool/src/polus/images/formats/file_renaming/__init__.py index 53b82f8f8..e0a3361b7 100644 --- a/formats/file-renaming-tool/src/polus/images/formats/file_renaming/__init__.py +++ b/formats/file-renaming-tool/src/polus/images/formats/file_renaming/__init__.py @@ -1,4 +1,4 @@ """File Renaming.""" -__version__ = "0.2.4" +__version__ = "0.2.4-dev2" from . import file_renaming diff --git a/formats/file-renaming-tool/src/polus/images/formats/file_renaming/file_renaming.py b/formats/file-renaming-tool/src/polus/images/formats/file_renaming/file_renaming.py index 2b570d7b8..3ea108ed6 100644 --- a/formats/file-renaming-tool/src/polus/images/formats/file_renaming/file_renaming.py +++ b/formats/file-renaming-tool/src/polus/images/formats/file_renaming/file_renaming.py @@ -264,7 +264,8 @@ def letters_to_int(named_grp: str, all_matches: list) -> dict: """Alphabetically number matches for the given named group for all files. Make a dictionary where each key is a match for each filename and - the corresponding value is a number indicating its alphabetical rank. + the corresponding value is a number indicating its alphabetical rank, + with single-letter keys sorted first, followed by double-letter keys. Args: named_grp: Group with c in input pattern and d in out pattern. @@ -274,13 +275,16 @@ def letters_to_int(named_grp: str, all_matches: list) -> dict: cat_index_dict: dict key=category name, value=index after sorting. """ logger.debug(f"letters_to_int() inputs: {named_grp}, {all_matches}") - #: Generate list of strings belonging to the given category (element). - alphabetized_matches = sorted( - {namedgrp_match_dict[named_grp] for namedgrp_match_dict in all_matches}, - ) - str_alphabetindex_dict = {} - for i in range(0, len(alphabetized_matches)): - str_alphabetindex_dict[alphabetized_matches[i]] = i + + # Generate a set of unique matches for the given group + matches = {namedgrp_match_dict[named_grp] for namedgrp_match_dict in all_matches} + + # Sort with single-letter keys first, then double-letter keys + alphabetized_matches = sorted(matches, key=lambda x: (len(x) > 1, x)) + + # Create a dictionary mapping each match to its alphabetical rank + str_alphabetindex_dict = {match: i for i, match in enumerate(alphabetized_matches)} + logger.debug(f"letters_to_int() returns {str_alphabetindex_dict}") return str_alphabetindex_dict From 105b01e1fc40bc2cf1d042a5a28943c8fd80b1dc Mon Sep 17 00:00:00 2001 From: Hamdah Shafqat Abbasi Date: Tue, 17 Dec 2024 09:23:48 -0500 Subject: [PATCH 2/8] fix manifest --- formats/file-renaming-tool/Dockerfile | 2 +- .../src/polus/images/formats/file_renaming/__main__.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/formats/file-renaming-tool/Dockerfile b/formats/file-renaming-tool/Dockerfile index 5a194d252..46ad0ab4d 100644 --- a/formats/file-renaming-tool/Dockerfile +++ b/formats/file-renaming-tool/Dockerfile @@ -1,4 +1,4 @@ -FROM polusai/bfio:2.3.3 +FROM polusai/bfio:2.4.5 # environment variables defined in polusai/bfio ENV EXEC_DIR="/opt/executables" diff --git a/formats/file-renaming-tool/src/polus/images/formats/file_renaming/__main__.py b/formats/file-renaming-tool/src/polus/images/formats/file_renaming/__main__.py index d186f9ca5..c3ad6160f 100644 --- a/formats/file-renaming-tool/src/polus/images/formats/file_renaming/__main__.py +++ b/formats/file-renaming-tool/src/polus/images/formats/file_renaming/__main__.py @@ -82,8 +82,8 @@ def main( # noqa: PLR0913 D417 C901 PLR0912 PLR0915 logger.info(f"outFilePattern = {out_file_pattern}") logger.info(f"mapDirectory = {map_directory}") - inp_dir = inp_dir.resolve() - out_dir = out_dir.resolve() + inp_dir = pathlib.Path(inp_dir).resolve() + out_dir = pathlib.Path(inp_dir).resolve() assert ( inp_dir.exists() From e2fd424f9546dfe0d4602644efeb3ee3ecd6e991 Mon Sep 17 00:00:00 2001 From: Hamdah Shafqat Abbasi Date: Tue, 17 Dec 2024 14:32:51 -0500 Subject: [PATCH 3/8] fixed sorting key error --- formats/file-renaming-tool/.bumpversion.cfg | 6 +- formats/file-renaming-tool/CHANGELOG.md | 5 + formats/file-renaming-tool/README.md | 44 ++- formats/file-renaming-tool/VERSION | 2 +- formats/file-renaming-tool/filerenaming.cwl | 2 +- formats/file-renaming-tool/ict.yaml | 108 +++--- formats/file-renaming-tool/plugin.json | 23 +- formats/file-renaming-tool/pyproject.toml | 3 +- formats/file-renaming-tool/run-plugin.sh | 6 +- .../images/formats/file_renaming/__init__.py | 2 +- .../images/formats/file_renaming/__main__.py | 99 +---- .../formats/file_renaming/file_renaming.py | 308 +++++----------- formats/file-renaming-tool/tests/test_main.py | 340 ++++-------------- 13 files changed, 287 insertions(+), 661 deletions(-) diff --git a/formats/file-renaming-tool/.bumpversion.cfg b/formats/file-renaming-tool/.bumpversion.cfg index 4894cd358..33fe26444 100644 --- a/formats/file-renaming-tool/.bumpversion.cfg +++ b/formats/file-renaming-tool/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.2.4-dev2 +current_version = 0.2.5-dev0 commit = True tag = False parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-(?P[a-z]+)(?P\d+))? @@ -22,10 +22,12 @@ replace = version = "{new_version}" [bumpversion:file:plugin.json] -[bumpversion:file:filerenaming.cwl] +[bumpversion:file:FileRenaming.cwl] [bumpversion:file:ict.yaml] [bumpversion:file:VERSION] +[bumpversion:file:README.md] + [bumpversion:file:src/polus/images/formats/file_renaming/__init__.py] diff --git a/formats/file-renaming-tool/CHANGELOG.md b/formats/file-renaming-tool/CHANGELOG.md index 02a40369f..f3dd92b6f 100644 --- a/formats/file-renaming-tool/CHANGELOG.md +++ b/formats/file-renaming-tool/CHANGELOG.md @@ -2,3 +2,8 @@ ### Added - Pytests to test this plugin - Added a support for recursively searching for files within a directory and its subdirectories of specified pattern by passing value either raw or map for `mapDirectory` input argument. + +## [0.2.4-dev2] - 2024-12-17 +### Added +- Integrated filepattern in this tool +- Modified the sorting dictionary letters key with respect to length diff --git a/formats/file-renaming-tool/README.md b/formats/file-renaming-tool/README.md index cbdd4d0cb..b8bd16af3 100644 --- a/formats/file-renaming-tool/README.md +++ b/formats/file-renaming-tool/README.md @@ -1,4 +1,4 @@ -# File Renaming(0.2.4-dev2) +# File Renaming(v0.2.5-dev0) This WIPP plugin uses supplied file naming patterns to dynamically rename and save files in an image collection to a new image collection. @@ -16,10 +16,10 @@ naming conventions. `newdata_x001_y001_c002.tif` `newdata_x001_y001_c003.tif` - * **User input pattern:** + * **filePattern:** `img_x{row:dd}_y{col:dd}_{channel:c+}.ome.tif` - * **User output pattern:** + * **outFilePattern:** `newdata_x{row:ddd}_y{col:ddd}_c{channel:ddd}.ome.tif` * The user can format the output digit using the number of digits @@ -38,7 +38,41 @@ exception: then the script sorts the strings that match the character pattern and assigns numbers 0+ to them. -* New optional feature `mapDirectory` implemented to include directory name in renamed files. This plugin also handles nested directories and one level up directory name is added to renamed files if `raw` value passed, `map` for mapped subdirectories `d0, d1, d2, ... dn` and if not passed then no directory name is added in renamed files. +* Implemented a new optional boolean feature `mapDirectory` to append mapped directory names in renamed files. + + +## Renaming files within a complex nested directory structure: +In specific scenarios where users need to rename files within nested subdirectories, this functionality can be leveraged by providing an appropriate pattern + +For Example + +``` +└── BBBC001 + └── raw + ├── Ground_Truth + │ └── groundtruth_images + │ ├── AS_09125_050118150001_A03f00d0.tif + │ ├── AS_09125_050118150001_A03f01d0.tif + │ ├── AS_09125_050118150001_A03f02d0.tif + │ ├── AS_09125_050118150001_A03f03d0.tif + │ ├── AS_09125_050118150001_A03f04d0.tif + │ └── AS_09125_050118150001_A03f05d0.tif + └── Images + └── human_ht29_colon_cancer_1_images + ├── AS_09125_050118150001_A03f00d0.tif + ├── AS_09125_050118150001_A03f01d0.tif + ├── AS_09125_050118150001_A03f02d0.tif + ├── AS_09125_050118150001_A03f03d0.tif + ├── AS_09125_050118150001_A03f04d0.tif + └── AS_09125_050118150001_A03f05d0.tif + +``` + +Now, renaming files within the `human_ht29_colon_cancer_1_images` is achievable by providing a `filepattern` such as `/.*/.*/.*/Images/(?P.*)/.*_{row:c}{col:dd}f{f:dd}d{channel:d}.tif`, and specifying `outFilePattern` as `x{row:dd}_y{col:dd}_p{f:dd}_c{channel:d}.tif`. If the mapDirectory option is not utilized, the raw directory name will be appended in the renamed files. To handle directory names containing both letters and digits, employ `(?P.*)`; use `{directory:c+}` or `{directory:d+}` if it contains solely letters or digits, respectively. + +#### Note: +To extract directory names, the pattern should start with a backslash + Contact [Melanie Parham](mailto:melanie.parham@axleinfo.com), [Hamdah Shafqat abbasi](mailto:hamdahshafqat.abbasi@nih.gov) for more @@ -67,5 +101,5 @@ This plugin takes three input argument and one output argument: | `--filePattern` | Input filename pattern | Input | string | | `--outDir` | Output collection | Output | collection | | `--outFilePattern` | Output filename pattern | Input | string | -| `--mapDirectory` | Directory name (`raw`, `map`) | Input | enum | +| `--mapDirectory` | Extract mapped directory name | Input | boolean | | `--preview` | Generate a JSON file with outputs | Output | JSON | diff --git a/formats/file-renaming-tool/VERSION b/formats/file-renaming-tool/VERSION index 0184eef11..0eac58ed7 100644 --- a/formats/file-renaming-tool/VERSION +++ b/formats/file-renaming-tool/VERSION @@ -1 +1 @@ -0.2.4-dev2 +0.2.5-dev0 diff --git a/formats/file-renaming-tool/filerenaming.cwl b/formats/file-renaming-tool/filerenaming.cwl index 2eafae2a4..e633c76d2 100644 --- a/formats/file-renaming-tool/filerenaming.cwl +++ b/formats/file-renaming-tool/filerenaming.cwl @@ -28,7 +28,7 @@ outputs: type: Directory requirements: DockerRequirement: - dockerPull: polusai/file-renaming-tool:0.2.4-dev2 + dockerPull: polusai/file-renaming-tool:0.2.5-dev0 InitialWorkDirRequirement: listing: - entry: $(inputs.outDir) diff --git a/formats/file-renaming-tool/ict.yaml b/formats/file-renaming-tool/ict.yaml index 05b4908e0..bb6ebce21 100644 --- a/formats/file-renaming-tool/ict.yaml +++ b/formats/file-renaming-tool/ict.yaml @@ -1,65 +1,61 @@ author: - - Melanie Parham - - Hamdah Shafqat -contact: melanie.parham@axleinfo.com -container: polusai/file-renaming-tool:0.2.4-dev2 +- Hamdah Shafqat +- Melanie Parham +contact: hamdahshafqat.abbasi@nih.gov +container: polusai/file-renaming-tool:0.2.5-dev0 description: Rename and store image collection files in a new image collection entrypoint: python3 -m polus.images.formats.file_renaming inputs: - - description: Filename pattern used to separate data - format: - - string - name: filePattern - required: true - type: string - - description: Input image collection to be processed by this plugin - format: - - collection - name: inpDir - required: true - type: path - - description: Desired filename pattern used to rename and separate data - format: - - string - name: outFilePattern - required: true - type: string - - description: Get directory name incorporated in renamed files - format: - - enum - name: mapDirectory - required: false - type: string +- description: Filename pattern used to separate data + format: + - string + name: filePattern + required: true + type: string +- description: Input image collection to be processed by this plugin + format: + - collection + name: inpDir + required: true + type: path +- description: Desired filename pattern used to rename and separate data + format: + - string + name: outFilePattern + required: true + type: string +- description: Incorporate mapped directory names into renamed files + format: + - boolean + name: mapDirectory + required: false + type: boolean name: polusai/FileRenaming outputs: - - description: Output collection - format: - - collection - name: outDir - required: true - type: path -repository: https://github.com/PolusAI/image-tools +- description: Output collection + format: + - collection + name: outDir + required: true + type: path +repository: https://github.com/PolusAI/polus-plugins specVersion: 1.0.0 title: File Renaming ui: - - description: Filename pattern used to separate data - key: inputs.filePattern - title: Filename pattern - type: text - - description: Input image collection to be processed by this plugin - key: inputs.inpDir - title: Input collection - type: path - - description: Desired filename pattern used to rename and separate data - key: inputs.outFilePattern - title: Output filename pattern - type: text - - description: Get directory name incorporated in renamed files - fields: - - raw - - map - - default - key: inputs.mapDirectory - title: mapDirectory - type: select -version: 0.2.4-dev2 +- description: Filename pattern used to separate data + key: inputs.filePattern + title: Filename pattern + type: text +- description: Input image collection to be processed by this plugin + key: inputs.inpDir + title: Input collection + type: path +- description: Desired filename pattern used to rename and separate data + key: inputs.outFilePattern + title: Output filename pattern + type: text +- description: Incorporate mapped directory names into renamed files + key: inputs.mapDirectory + title: mapDirectory + type: checkbox +version: 0.2.5-dev0 diff --git a/formats/file-renaming-tool/plugin.json b/formats/file-renaming-tool/plugin.json index db3792d99..acb292d52 100644 --- a/formats/file-renaming-tool/plugin.json +++ b/formats/file-renaming-tool/plugin.json @@ -1,14 +1,14 @@ { "name": "File Renaming", - "version": "0.2.4-dev2", + "version": "0.2.5-dev0", "title": "File Renaming", "description": "Rename and store image collection files in a new image collection", - "author": "Melanie Parham (melanie.parham@axleinfo.com), Hamdah Shafqat Abbasi (hamdahshafqat.abbasi@nih.gov)", + "author": "Hamdah Shafqat Abbasi (hamdahshafqat.abbasi@nih.gov), Melanie Parham (melanie.parham@axleinfo.com)", "institution": "National Center for Advancing Translational Sciences, National Institutes of Health", - "repository": "https://github.com/PolusAI/image-tools", + "repository": "https://github.com/PolusAI/polus-plugins", "website": "https://ncats.nih.gov/preclinical/core/informatics", "citation": "", - "containerId": "polusai/file-renaming-tool:0.2.4-dev2", + "containerId": "polusai/file-renaming-tool:0.2.5-dev0", "baseCommand": [ "python3", "-m", @@ -35,16 +35,8 @@ }, { "name": "mapDirectory", - "type": "enum", - "description": "Get directory name incorporated in renamed files", - "default": "default", - "options": { - "values": [ - "raw", - "map", - "default" - ] - }, + "type": "boolean", + "description": "Incorporate mapped directory names into renamed files", "required": false } ], @@ -74,8 +66,7 @@ { "key": "inputs.mapDirectory", "title": "mapDirectory", - "description": "Get directory name incorporated in renamed files", - "default": "" + "description": "Incorporate mapped directory names into renamed files" } ] } diff --git a/formats/file-renaming-tool/pyproject.toml b/formats/file-renaming-tool/pyproject.toml index b3f924812..357b5625b 100644 --- a/formats/file-renaming-tool/pyproject.toml +++ b/formats/file-renaming-tool/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "polus-images-formats-file-renaming" -version = "0.2.4-dev2" +version = "0.2.5-dev0" description = "Convert BioFormats datatypes to ome.tif or ome.zarr file format" authors = [ "Hamdah Shafqat abbasi ", @@ -14,6 +14,7 @@ python = ">=3.9,<3.12" typer = "^0.7.0" tqdm = "^4.64.1" numpy = "^1.26.3" +filepattern = "^2.0.5" [tool.poetry.group.dev.dependencies] bump2version = "^1.0.1" diff --git a/formats/file-renaming-tool/run-plugin.sh b/formats/file-renaming-tool/run-plugin.sh index c9b7a5ef3..0e0a0f284 100644 --- a/formats/file-renaming-tool/run-plugin.sh +++ b/formats/file-renaming-tool/run-plugin.sh @@ -6,7 +6,7 @@ datapath=$(readlink --canonicalize data) inpDir=/data/inputs filePattern=".*_{row:c}{col:dd}_s{s:d}_w{channel:d}.*.tif" outFilePattern="r01_x{row:c}_y{col:dd}_p{s:d}_c{channel:d}.ome.tif" -mapDirectory="raw" +mapDirectory=true # Output paths outDir=/data/output @@ -15,9 +15,9 @@ docker run polusai/file-renaming-plugin:${version} # Run the plugin docker run --mount type=bind,source=${datapath},target=/data/ \ - polusai/file-renaming-plugin:${version} \ + polusai/file-renaming-tool:${version} \ --inpDir ${inpDir} \ --filePattern ${filePattern} \ --outFilePattern ${outFilePattern} \ - --mapDirectory ${outFilePattern} \ + --mapDirectory --outDir ${outDir} diff --git a/formats/file-renaming-tool/src/polus/images/formats/file_renaming/__init__.py b/formats/file-renaming-tool/src/polus/images/formats/file_renaming/__init__.py index e0a3361b7..93e7aaa20 100644 --- a/formats/file-renaming-tool/src/polus/images/formats/file_renaming/__init__.py +++ b/formats/file-renaming-tool/src/polus/images/formats/file_renaming/__init__.py @@ -1,4 +1,4 @@ """File Renaming.""" -__version__ = "0.2.4-dev2" +__version__ = "0.2.5-dev0" from . import file_renaming diff --git a/formats/file-renaming-tool/src/polus/images/formats/file_renaming/__main__.py b/formats/file-renaming-tool/src/polus/images/formats/file_renaming/__main__.py index c3ad6160f..825246d15 100644 --- a/formats/file-renaming-tool/src/polus/images/formats/file_renaming/__main__.py +++ b/formats/file-renaming-tool/src/polus/images/formats/file_renaming/__main__.py @@ -3,12 +3,9 @@ import logging import os import pathlib -import re -from re import Match from typing import Any from typing import Optional -import numpy as np import typer from polus.images.formats.file_renaming import file_renaming as fr @@ -24,7 +21,7 @@ @app.command() -def main( # noqa: PLR0913 D417 C901 PLR0912 PLR0915 +def main( # noqa: PLR0913, D417 inp_dir: pathlib.Path = typer.Option( ..., "--inpDir", @@ -45,8 +42,8 @@ def main( # noqa: PLR0913 D417 C901 PLR0912 PLR0915 "--outFilePattern", help="Desired filename pattern used to rename and separate data", ), - map_directory: Optional[fr.MappingDirectory] = typer.Option( - fr.MappingDirectory.Default, + map_directory: Optional[bool] = typer.Option( + False, "--mapDirectory", help="Get folder name", ), @@ -83,7 +80,7 @@ def main( # noqa: PLR0913 D417 C901 PLR0912 PLR0915 logger.info(f"mapDirectory = {map_directory}") inp_dir = pathlib.Path(inp_dir).resolve() - out_dir = pathlib.Path(inp_dir).resolve() + out_dir = pathlib.Path(out_dir).resolve() assert ( inp_dir.exists() @@ -92,92 +89,24 @@ def main( # noqa: PLR0913 D417 C901 PLR0912 PLR0915 out_dir.exists() ), f"{out_dir} does not exists!! Please check output path again" - subdirs, subfiles = fr.get_data(inp_dir) - if subfiles: - assert len(subfiles) != 0, "Files are missing in input directory!!!" - - if not map_directory: - fr.rename( - inp_dir, - out_dir, - file_pattern, - out_file_pattern, - ) - - elif map_directory: - file_ext = re.split("\\.", file_pattern)[-1] - - subdirs = np.unique( - [ - sub - for sub in subdirs - for f in pathlib.Path(sub).rglob("*") - if f.suffix == f".{file_ext}" - ], - ) - - if len(subdirs) == 1: - logger.info( - "Renaming files in a single directory.", - ) - dir_pattern = r"^[A-Za-z0-9_]+$" - # Iterate over the directories and check if they match the pattern - matching_directory: Optional[Match[Any]] = re.match( - dir_pattern, - pathlib.Path(subdirs[0]).stem, - ) - if matching_directory is not None: - matching_directory = matching_directory.group() - if f"{map_directory}" == "raw": - outfile_pattern = f"{matching_directory}_{out_file_pattern}" - if f"{map_directory}" == "map": - outfile_pattern = f"d1_{out_file_pattern}" - - fr.rename(subdirs[0], out_dir, file_pattern, outfile_pattern) - logger.info( - "Finished renaming files.", - ) - if len(subdirs) > 1: - subnames = [pathlib.Path(sb).name for sb in subdirs] - sub_check = all(name == subnames[0] for name in subnames) - - for i, sub in enumerate(subdirs): - assert ( - len([f for f in pathlib.Path(sub).iterdir() if f.is_file()]) != 0 - ), "Files are missing in input directory!!!" - dir_pattern = r"^[A-Za-z0-9_]+$" - # Iterate over the directories and check if they match the pattern - matching_directories: Optional[Match[Any]] = re.match( - dir_pattern, - pathlib.Path(sub).stem, - ) - if matching_directories is not None: - matching_directories = matching_directories.group() - - if not sub_check and f"{map_directory}" == "raw": - outfile_pattern = f"{matching_directories}_{out_file_pattern}" - elif subnames and f"{map_directory}" == "raw": - logger.error( - "Subdirectoy names are same, should be different.", - ) - break - else: - outfile_pattern = f"d{i}_{out_file_pattern}" - fr.rename(sub, out_dir, file_pattern, outfile_pattern) - logger.info( - "Finished renaming files.", - ) - - if preview: + if not preview: + fr.rename(inp_dir, out_dir, file_pattern, out_file_pattern, map_directory) + else: with pathlib.Path.open(pathlib.Path(out_dir, "preview.json"), "w") as jfile: + fr.rename(inp_dir, out_dir, file_pattern, out_file_pattern, map_directory) out_json: dict[str, Any] = { "filepattern": out_file_pattern, "outDir": [], } for file in out_dir.iterdir(): - if file.is_file() and file.suffix != ".json": + if ( + file.is_file() + and file.suffix != ".json" + and not file.name.startswith(".") + ): out_name = file.name out_json["outDir"].append(out_name) + pathlib.Path.unlink(file) json.dump(out_json, jfile, indent=2) diff --git a/formats/file-renaming-tool/src/polus/images/formats/file_renaming/file_renaming.py b/formats/file-renaming-tool/src/polus/images/formats/file_renaming/file_renaming.py index 3ea108ed6..f61b32b32 100644 --- a/formats/file-renaming-tool/src/polus/images/formats/file_renaming/file_renaming.py +++ b/formats/file-renaming-tool/src/polus/images/formats/file_renaming/file_renaming.py @@ -1,5 +1,4 @@ """File Renaming.""" -import enum import logging import os import pathlib @@ -10,12 +9,11 @@ from multiprocessing import cpu_count from sys import platform from typing import Any -from typing import Union +from typing import Optional +import filepattern as fp from tqdm import tqdm -EXT = (".csv", ".txt", ".cppipe", ".yml", ".yaml", ".xml", ".json") - logger = logging.getLogger(__name__) logger.setLevel(os.environ.get("POLUS_LOG", logging.INFO)) @@ -25,97 +23,6 @@ NUM_THREADS = max(cpu_count() // 2, 2) -class MappingDirectory(str, enum.Enum): - """Map Directory information.""" - - RAW = "raw" - MAP = "map" - Default = "" - - -def image_directory(dirpath: pathlib.Path) -> Union[bool, None]: - """Fetching image directory only. - - Args: - dirpath: Path to directory. - - Returns: - bool. - """ - for file in dirpath.iterdir(): - return bool(file.is_file() and file.suffix not in EXT) - return None - - -def get_data(inp_dir: str) -> tuple[list[pathlib.Path], list[pathlib.Path]]: - """Get group names from pattern. Convert patterns (c+ or dd) to regex. - - Args: - inp_dir: Path to input directory. - - Returns: - A tuple of list of subdirectories and files path. - """ - filepath: list[pathlib.Path] = [] - dirpaths: list[pathlib.Path] = [] - for path in pathlib.Path(inp_dir).rglob("*"): - if path.is_dir(): - if path.parent in dirpaths: - dirpaths.remove(path.parent) - if image_directory(path): - dirpaths.append(path) - elif path.is_file() and not path.name.endswith(tuple(EXT)): - fpath = pathlib.Path(inp_dir).joinpath(path) - filepath.append(fpath) - - return dirpaths, filepath - - -def map_pattern_grps_to_regex(file_pattern: str) -> dict: - """Get group names from pattern. Convert patterns (c+ or dd) to regex. - - Args: - file_pattern: File pattern, with special characters escaped. - - Returns: - rgx_patterns: The key is a named regex group. The value is regex. - """ - logger.debug(f"pattern_to_regex() inputs: {file_pattern}") - #: Extract the group name and associated pattern (ex: {row:dd}) - group_and_pattern_tuples = re.findall(r"\{(\w+):([dc+]+)\}", file_pattern) - pattern_map = {"d": r"[0-9]", "c": r"[a-zA-Z]", "+": "+"} - rgx_patterns = {} - for group_name, groups_pattern in group_and_pattern_tuples: - rgx = "".join([pattern_map[pattern] for pattern in groups_pattern]) - #: ?P is included to specify that foo is a named group. - rgx_patterns[group_name] = rf"(?P<{group_name}>{rgx})" - logger.debug(f"pattern_to_regex() returns {rgx_patterns}") - - return rgx_patterns - - -def convert_to_regex(file_pattern: str, extracted_rgx_patterns: dict) -> str: - """Integrate regex into original file pattern. - - The extracted_rgx_patterns helps replace simple patterns (ie. dd, c+) - with regex in the correct location, based on named groups. - - Args: - file_pattern: file pattern provided by the user. - extracted_rgx_patterns: named group and regex value dictionary. - - Returns: - new_pattern: file pattern converted to regex. - """ - logger.debug(f"convert_to_regex() inputs: {file_pattern}, {extracted_rgx_patterns}") - rgx_pattern = file_pattern - for named_grp, regex_str in extracted_rgx_patterns.items(): - #: The prefix "fr" creates raw f-strings, which act like format() - rgx_pattern = re.sub(rf"\{{{named_grp}:.*?\}}", regex_str, rgx_pattern) - logger.debug(f"convert_to_regex() returns {rgx_pattern}") - return rgx_pattern - - def specify_len(out_pattern: str) -> str: """Update output file pattern to output correct number of digits. @@ -187,60 +94,6 @@ def get_char_to_digit_grps(inp_pattern: str, out_pattern: str) -> list[str]: return special_categories -def extract_named_grp_matches( - rgx_pattern: str, - inp_files: list, -) -> list[dict[str, Union[str, Any]]]: - """Store matches from the substrings from each filename that vary. - - Loop through each file. Apply the regex pattern to each - filename. When a match occurs for a named group, add that match to - a dictionary, where the key is the named (regex capture) group and - the value is the corresponding match from the filename. - - Args: - rgx_pattern: input pattern in regex format. - inp_files: list of files in input directory. - - Returns: - grp_match_dict_list: list of dictionaries containing str matches. - """ - logger.debug(f"extract_named_grp_matches() inputs: {rgx_pattern}, {inp_files}") - grp_match_dict_list = [] - #: Build list of dicts, where key is capture group and value is match - for filename in inp_files: - try: - d = re.match(rgx_pattern, filename) - if d is None: - break - grp_match_dict = d.groupdict() - #: Add filename information to dictionary - grp_match_dict["fname"] = filename - grp_match_dict_list.append(grp_match_dict) - except AttributeError as e: - logger.error(e) - logger.error( - "File pattern does not match one or more files. " - "See README for pattern rules.", - ) - msg = "File pattern does not match with files." - raise AttributeError(msg) from e - except AssertionError as e: - if str(e).startswith("redefinition of group name"): - logger.error( - "Ensure that named groups in file patterns are unique. " - "({})".format(e), - ) - msg = f"Ensure that named groups in file patterns are unique. ({e})" - raise ValueError( - msg, - ) from e - - logger.debug(f"extract_named_grp_matches() returns {grp_match_dict_list}") - - return grp_match_dict_list - - def str_to_int(dictionary: dict) -> dict: """If a number in the dictionary is in str format, convert to int. @@ -289,11 +142,12 @@ def letters_to_int(named_grp: str, all_matches: list) -> dict: return str_alphabetindex_dict -def rename( # noqa: C901, PLR0915, PLR0912 - inp_dir: str, +def rename( # noqa: C901 + inp_dir: pathlib.Path, out_dir: pathlib.Path, file_pattern: str, out_file_pattern: str, + map_directory: Optional[bool] = False, ) -> None: """Scalable Extraction of Nyxus Features. @@ -302,107 +156,107 @@ def rename( # noqa: C901, PLR0915, PLR0912 out_dir : Path to image collection storing copies of renamed files. file_pattern : Input file pattern. out_file_pattern : Output file pattern. + map_directory : Mapping of folder name. """ logger.info("Start renaming files") - file_ext = re.split("\\.", file_pattern)[-1] - empty_ext = "" - ext_length = 5 - if file_ext == "*": - msg = "Please define filePattern including file extension!" - raise ValueError(msg) - if file_ext == empty_ext: - msg = "Please define filePattern including file extension!" - raise ValueError(msg) - if len(file_ext) > ext_length: - msg = "Please define filePattern including file extension!" - raise ValueError(msg) - - _, inpfiles = get_data(inp_dir) - - inp_files: list[str] = [ - f"{f.name}" for f in inpfiles if pathlib.Path(f).suffix == f".{file_ext}" - ] - - if len(inp_files) == 0: - msg = "Please check input directory again!! As it does not contain files" - raise ValueError(msg) - - chars_to_escape = ["(", ")", "[", "]", "$", "."] - for char in chars_to_escape: - file_pattern = file_pattern.replace(char, ("\\" + char)) - - if "\\.*" in file_pattern: - file_pattern = file_pattern.replace("\\.*", (".*")) - if "\\.+" in file_pattern: - file_pattern = file_pattern.replace("\\.+", (".+")) - groupname_regex_dict = map_pattern_grps_to_regex(file_pattern) - - # #: Integrate regex from dictionary into original file pattern - inp_pattern_rgx = convert_to_regex(file_pattern, groupname_regex_dict) - - # #: Integrate format strings into outFilePattern to specify digit/char len - out_pattern_fstring = specify_len(out_file_pattern) - - #: List named groups where input pattern=char & output pattern=digit - char_to_digit_categories = get_char_to_digit_grps(file_pattern, out_file_pattern) - #: List a dictionary (k=named grp, v=match) for each filename + files = fp.FilePattern(inp_dir, file_pattern, recursive=True) - all_grp_matches = extract_named_grp_matches(inp_pattern_rgx, inp_files) - - #: Convert numbers from strings to integers, if applicable - for i in range(0, len(all_grp_matches)): - tmp_match = all_grp_matches[i] - all_grp_matches[i] = str_to_int(tmp_match) - - if len(all_grp_matches) == 0: + if len(files) == 0: msg = f"Please define filePattern: {file_pattern} again!!" raise ValueError( msg, ) + inp_files: list[Any] = [file[0] for file in files()] + fpaths: list[str] = [file[1] for file in files()] + + #: Integrate format strings into outFilePattern to specify digit/char len + out_pattern_fstring = specify_len(out_file_pattern) + + #: List named groups where input pattern=char & output pattern=digit + char_to_digit_categories = get_char_to_digit_grps(file_pattern, out_file_pattern) + + #: Convert numbers from strings to integers, if applicable + for i in range(0, len(inp_files)): + tmp_match = inp_files[i] + inp_files[i] = str_to_int(tmp_match) + #: Populate dict if any matches need to be converted from char to digit #: Key=named group, Value=Int representing matched chars numbered_categories = {} for named_grp in char_to_digit_categories: - numbered_categories[named_grp] = letters_to_int(named_grp, all_grp_matches) + numbered_categories[named_grp] = letters_to_int(named_grp, inp_files) # Check named groups that need c->d conversion for named_grp in char_to_digit_categories: - for i in range(0, len(all_grp_matches)): - if all_grp_matches[i].get(named_grp): + for i in range(0, len(inp_files)): + if inp_files[i].get(named_grp): #: Replace original matched letter with new digit - all_grp_matches[i][named_grp] = numbered_categories[named_grp][ - all_grp_matches[i][named_grp] + inp_files[i][named_grp] = numbered_categories[named_grp][ + inp_files[i][named_grp] ] + # To create a dictionary mapping for folder names, + # The keys represent folder names and the values represent corresponding mappings. + check_dir_var = bool([d for d in inp_files if "directory" in list(d.keys())]) + if map_directory: + if check_dir_var is False: + logger.error("directory variable is not included in filepattern correctly") + + else: + subdirs = sorted({d["directory"] for d in inp_files if d["directory"]}) + map_dirs = [f"d{i}" for i in range(1, len(subdirs) + 1)] + map_dict = dict(zip(subdirs, map_dirs)) with ProcessPoolExecutor(max_workers=NUM_THREADS) as executor: threads = [] - for match in all_grp_matches: - # : If running on WIPP - if out_dir != inp_dir: - #: Apply str formatting to change digit or char length - out_name = out_dir.resolve() / out_pattern_fstring.format( - **match, - ) - old_file_name = pathlib.Path(inp_dir, match["fname"]) - threads.append(executor.submit(shutil.copy2, old_file_name, out_name)) - else: - out_name = out_pattern_fstring.format(**match) # type: ignore - old_file_name = match["fname"] # type: ignore - logger.info(f"Old name {old_file_name} & new name {out_name}") - threads.append( - executor.submit( - os.rename, - pathlib.Path(inp_dir, old_file_name), - pathlib.Path(out_dir, out_name), - ), - ) + for match, p in zip(inp_files, fpaths): + if check_dir_var is True: + # Apply str formatting to change digit or char length + out_name = out_pattern_fstring.format(**match) + if map_directory: + try: + out_path = pathlib.Path( + out_dir, + f"{map_dict[match['directory']]}_{out_name}", + ) + except ValueError: + logger.error( + f"{match['directory']} is not provided in filePattern", + ) + + if not map_directory: + try: + out_path = pathlib.Path( + out_dir, + f"{ match['directory']}_{out_name}", + ) + except ValueError: + logger.error( + f"{match['directory']} is not provided in filePattern", + ) + + old_file_name = pathlib.Path(inp_dir, p[0]) + threads.append(executor.submit(shutil.copy2, old_file_name, out_path)) + + if check_dir_var is False and not map_directory: + try: + # Apply str formatting to change digit or char length + out_name = out_pattern_fstring.format(**match) + out_path = pathlib.Path(out_dir, out_name) + old_file_name = pathlib.Path(inp_dir, p[0]) + threads.append( + executor.submit(shutil.copy2, old_file_name, out_path), + ) + except ValueError: + logger.error( + f"filePattern:{file_pattern} is incorrectly defined!!!", + ) for f in tqdm( as_completed(threads), total=len(threads), mininterval=5, - desc="converting images", + desc="Renaming images", initial=0, unit_scale=True, colour="cyan", diff --git a/formats/file-renaming-tool/tests/test_main.py b/formats/file-renaming-tool/tests/test_main.py index e9d981bda..8269ad640 100644 --- a/formats/file-renaming-tool/tests/test_main.py +++ b/formats/file-renaming-tool/tests/test_main.py @@ -1,15 +1,12 @@ """Testing of File Renaming.""" - import json import pathlib import shutil import tempfile -from typing import Any -from typing import DefaultDict -from typing import Tuple +from typing import Any, DefaultDict + import click import pytest -import numpy as np from typer.testing import CliRunner from polus.images.formats.file_renaming import file_renaming as fr @@ -34,14 +31,12 @@ def output_directory(self) -> pathlib.Path: """Create temporary output directory.""" return tempfile.mkdtemp(dir=self.dirpath) - def runcommands( - self, inputs: pathlib.Path, inp_pattern: str, out_pattern: str - ) -> click.testing.Result: + def runcommands(self, inputs, inp_pattern, out_pattern) -> click.testing.Result: """Run command line arguments.""" inp_dir = self.input_directory() out_dir = self.output_directory() for inp in inputs: - pathlib.Path.open(pathlib.Path(inp_dir, inp), "w").close() + open(pathlib.Path(inp_dir, inp), "w").close() outputs = runner.invoke( app, @@ -58,9 +53,9 @@ def runcommands( ) return outputs - def load_json(self, x: str) -> DefaultDict[Any, Any]: + def load_json(self, x) -> DefaultDict[Any, Any]: """Json file containing image filenames.""" - with pathlib.Path.open(self.jsonpath) as file: + with open(self.jsonpath) as file: data = json.load(file) return data[x] @@ -135,67 +130,51 @@ def clean_directories(self) -> None: @pytest.fixture(params=fixture_params) -def poly(request: Tuple[str, str]) -> pytest.FixtureRequest: +def poly(request): """To get the parameter of the fixture.""" return request.param -def test_duplicate_channels_to_digit(poly: pytest.FixtureRequest) -> None: - """Testing of duplicate channels to digits.""" - d = CreateData() - inputs = d.load_json("duplicate_channels_to_digit") - (inp_pattern, out_pattern) = poly[0] - outputs = d.runcommands(inputs, inp_pattern, out_pattern) - assert outputs.exit_code == 0 - - -def test_duplicate_channels_to_digit_non_spec_digit_len( - poly: pytest.FixtureRequest, -) -> None: - """Testing of duplicate channels to digits with non specified length of digits.""" - d = CreateData() - inputs = d.load_json("duplicate_channels_to_digit") - (inp_pattern, out_pattern) = poly[1] - outputs = d.runcommands(inputs, inp_pattern, out_pattern) - assert outputs.exit_code == 0 - - -def test_invalid_input_raises_error(poly: pytest.FixtureRequest) -> None: +def test_invalid_input_raises_error(poly): """Testing of invalid input filepattern.""" d = CreateData() inputs = d.load_json("duplicate_channels_to_digit") (inp_pattern, out_pattern) = poly[0] d.runcommands(inputs, inp_pattern, out_pattern) + d.clean_directories() -def test_non_alphanum_inputs_percentage_sign(poly: pytest.FixtureRequest) -> None: +def test_non_alphanum_inputs_percentage_sign(poly): """Testing of filename with non alphanumeric inputs such as percentage sign.""" d = CreateData() inputs = d.load_json("percentage_file") (inp_pattern, out_pattern) = poly[3] outputs = d.runcommands(inputs, inp_pattern, out_pattern) assert outputs.exit_code == 0 + d.clean_directories() -def test_numeric_fixed_width(poly: pytest.FixtureRequest) -> None: +def test_numeric_fixed_width(poly): """Testing of filename with numeric fixed length.""" d = CreateData() inputs = d.load_json("robot") (inp_pattern, out_pattern) = poly[4] outputs = d.runcommands(inputs, inp_pattern, out_pattern) assert outputs.exit_code == 0 + d.clean_directories() -def test_alphanumeric_fixed_width(poly: pytest.FixtureRequest) -> None: +def test_alphanumeric_fixed_width(poly): """Testing of filename with alphanumeric fixed length.""" d = CreateData() inputs = d.load_json("brain") (inp_pattern, out_pattern) = poly[5] outputs = d.runcommands(inputs, inp_pattern, out_pattern) assert outputs.exit_code == 0 + d.clean_directories() -def test_alphanumeric_variable_width(poly: pytest.FixtureRequest) -> None: +def test_alphanumeric_variable_width(poly): """Testing of filename with alphanumeric variable width.""" d = CreateData() inputs = d.load_json("variable") @@ -205,43 +184,37 @@ def test_alphanumeric_variable_width(poly: pytest.FixtureRequest) -> None: d.clean_directories() -def test_parenthesis(poly: pytest.FixtureRequest) -> None: - """Testing of filename with parenthesis.""" - d = CreateData() - inputs = d.load_json("parenthesis") - (inp_pattern, out_pattern) = poly[7] - outputs = d.runcommands(inputs, inp_pattern, out_pattern) - assert outputs.exit_code == 0 - - -def test_two_chan_to_digit(poly: pytest.FixtureRequest) -> None: +def test_two_chan_to_digit(poly): """Testing conversion of two channels to digits.""" d = CreateData() inputs = d.load_json("two_chan") (inp_pattern, out_pattern) = poly[8] outputs = d.runcommands(inputs, inp_pattern, out_pattern) assert outputs.exit_code == 0 + d.clean_directories() -def test_three_chan_to_digit(poly: pytest.FixtureRequest) -> None: +def test_three_chan_to_digit(poly): """Test conversion of three channels to digits.""" d = CreateData() inputs = d.load_json("three_chan") (inp_pattern, out_pattern) = poly[9] outputs = d.runcommands(inputs, inp_pattern, out_pattern) assert outputs.exit_code == 0 + d.clean_directories() -def test_three_char_chan(poly: pytest.FixtureRequest) -> None: +def test_three_char_chan(poly): """Test conversion of three character channels to digits.""" d = CreateData() inputs = d.load_json("three_char_chan") (inp_pattern, out_pattern) = poly[10] outputs = d.runcommands(inputs, inp_pattern, out_pattern) assert outputs.exit_code == 0 + d.clean_directories() -def test_varied_digits(poly: pytest.FixtureRequest) -> None: +def test_varied_digits(poly): """Test varied digits.""" d = CreateData() inputs = d.load_json("tissuenet-val-labels-45-C") @@ -251,16 +224,17 @@ def test_varied_digits(poly: pytest.FixtureRequest) -> None: d.clean_directories() -def test_spaces(poly: pytest.FixtureRequest) -> None: +def test_spaces(poly): """Test non-alphanumeric chars such as spaces.""" d = CreateData() inputs = d.load_json("non_alphanum_int") (inp_pattern, out_pattern) = poly[12] outputs = d.runcommands(inputs, inp_pattern, out_pattern) assert outputs.exit_code == 0 + d.clean_directories() -def test_non_alphanum_float(poly: pytest.FixtureRequest) -> None: +def test_non_alphanum_float(poly): """Test non-alphanumeric chars such as spaces, periods, commas, brackets.""" d = CreateData() inputs = d.load_json("non_alphanum_float") @@ -270,68 +244,7 @@ def test_non_alphanum_float(poly: pytest.FixtureRequest) -> None: d.clean_directories() -def test_dashes_parentheses(poly: pytest.FixtureRequest) -> None: - """Test non-alphanumeric chars are handled properly such as dashes, parenthesis.""" - d = CreateData() - inputs = d.load_json("kph-kirill") - (inp_pattern, out_pattern) = poly[14] - outputs = d.runcommands(inputs, inp_pattern, out_pattern) - assert outputs.exit_code == 0 - d.clean_directories() - - -def test_map_pattern_grps_to_regex_valid_input() -> None: - """Test of mapping input pattern.""" - test_cases = [ - ( - ("img_x{row:dd}_y{col:dd}_{channel:c+}.tif"), - ( - { - "row": "(?P[0-9][0-9])", - "col": "(?P[0-9][0-9])", - "channel": "(?P[a-zA-Z]+)", - } - ), - ), - (("img_x{row:c+}.tif"), ({"row": "(?P[a-zA-Z]+)"})), - ((""), ({})), - ] - for test_case in test_cases: - (from_val, to_val) = test_case - result = fr.map_pattern_grps_to_regex(from_val) - assert result == to_val - - -def test_convert_to_regex_valid_input() -> None: - """Test of converting to regular expression pattern.""" - test_cases = [ - ( - ("img_x{row:dd}_y{col:dd}_{channel:c+}.tif"), - ( - { - "row": "(?P[0-9][0-9])", - "col": "(?P[0-9][0-9])", - "channel": "(?P[a-zA-Z]+)", - } - ), - ( - "img_x(?P[0-9][0-9])_y(?P[0-9][0-9])_(?P[a-zA-Z]+).tif" - ), - ), - ( - ("img_x{row:c+}.tif"), - ({"row": "(?P[a-zA-Z]+)"}), - ("img_x(?P[a-zA-Z]+).tif"), - ), - (("img_x01.tif"), ({}), ("img_x01.tif")), - ] - for test_case in test_cases: - (from_val1, from_val2, to_val) = test_case - result = fr.convert_to_regex(from_val1, from_val2) - assert result == to_val - - -def test_specify_len_valid_input() -> None: +def test_specify_len_valid_input(): """Test of sepcifying length.""" test_cases = [ ( @@ -347,7 +260,7 @@ def test_specify_len_valid_input() -> None: assert result == to_val -def test_get_char_to_digit_grps_returns_unique_keys_valid_input() -> None: +def test_get_char_to_digit_grps_returns_unique_keys_valid_input(): """Test of getting characters to digit groups.""" test_cases = [ ( @@ -364,61 +277,7 @@ def test_get_char_to_digit_grps_returns_unique_keys_valid_input() -> None: assert result == to_val -def test_extract_named_grp_matches_valid_input() -> None: - """Test of extracting group names.""" - test_cases = [ - ( - ( - "img_x(?P[0-9][0-9])_y(?P[0-9][0-9])_(?P[a-zA-Z]+).tif" - ), - (["img_x01_y01_DAPI.tif", "img_x01_y01_GFP.tif", "img_x01_y01_TXRED.tif"]), - ( - [ - { - "row": "01", - "col": "01", - "channel": "DAPI", - "fname": "img_x01_y01_DAPI.tif", - }, - { - "row": "01", - "col": "01", - "channel": "GFP", - "fname": "img_x01_y01_GFP.tif", - }, - { - "row": "01", - "col": "01", - "channel": "TXRED", - "fname": "img_x01_y01_TXRED.tif", - }, - ] - ), - ), - (("img_x01.tif"), (["img_x01.tif"]), ([{"fname": "img_x01.tif"}])), - ] - for test_case in test_cases: - (from_val1, from_val2, to_val) = test_case - result = fr.extract_named_grp_matches(from_val1, from_val2) - assert result == to_val - - -def test_extract_named_grp_matches_bad_pattern_invalid_input_fails() -> None: - """Test of invalid input pattern.""" - test_cases = [ - ( - ("img_x(?P[a-zA-Z]+).tif"), - (["img_x01_y01_DAPI.tif", "img_x01_y01_GFP.tif", "img_x01_y01_TXRED.tif"]), - ) - ] - for test_case in test_cases: - (from_val1, from_val2) = test_case - - result = fr.extract_named_grp_matches(from_val1, from_val2) - assert len(result) == 0 - - -def test_str_to_int_valid_input() -> None: +def test_str_to_int_valid_input(): """Test of string to integer.""" test_cases = [ ( @@ -468,7 +327,7 @@ def test_str_to_int_valid_input() -> None: assert result == to_val -def test_letters_to_int_returns_cat_index_dict_valid_input() -> None: +def test_letters_to_int_returns_cat_index_dict_valid_input(): """Test of letter to integers.""" test_cases = [ ( @@ -498,23 +357,7 @@ def test_letters_to_int_returns_cat_index_dict_valid_input() -> None: @pytest.mark.xfail -def test_extract_named_grp_matches_duplicate_namedgrp_invalid_input() -> None: - """Test of invalid input pattern.""" - test_cases = [ - ( - ( - "x(?P[0-9][0-9])_y(?P[0-9][0-9])_c(?P[a-zA-Z]+).ome.tif" - ), - (["img_x01_y01_DAPI.tif", "img_x01_y01_GFP.tif", "img_x01_y01_TXRED.tif"]), - ) - ] - for test_case in test_cases: - (from_val1, from_val2) = test_case - fr.extract_named_grp_matches(from_val1, from_val2) - - -@pytest.mark.xfail -def test_letters_to_int_returns_error_invalid_input() -> None: +def test_letters_to_int_returns_error_invalid_input(): """Test of invalid inputs.""" test_cases = [ ( @@ -542,14 +385,20 @@ def test_letters_to_int_returns_error_invalid_input() -> None: @pytest.fixture -def create_subfolders() -> Tuple[pathlib.Path, str, str, str]: - """Creating directory and subdirectories.""" +def create_subfolders(): data = { "complex": [ - ["A9 p5d.tif", "A9 p5f.tif", "A9 p7f.tif"], - "96 ( -)* test_", - "{row:c}{col:d}.*p{f:d+}{character:c}.tif", + [ + "AS_09125_050118150001_A03f00d0.tif", + "AS_09125_050118150001_A03f01d0.tif", + "AS_09125_050118150001_A03f02d0.tif", + "AS_09125_050118150001_A03f03d0.tif", + "AS_09125_050118150001_A03f04d0.tif", + ], + "BBBC/BBBC001/raw/Images/human_ht29_colon_cancer_1_images", + "/.*/.*/.*/Images/(?P.*)/.*_{row:c}{col:dd}f{f:dd}d{channel:d}.tif", "x{row:dd}_y{col:dd}_p{f:dd}{character:c}_c01.tif", + "True", ], "simple": [ [ @@ -561,54 +410,37 @@ def create_subfolders() -> Tuple[pathlib.Path, str, str, str]: "folder_", ".*_{row:c}{col:dd}_s{s:d}_w{channel:d}.*.tif", "x{row:dd}_y{col:dd}_p{s:dd}_c{channel:d}.tif", + "False", ], } for name in ["complex", "simple"]: d = CreateData() dir_path = d.input_directory() for i in range(5): - dirname = pathlib.Path(dir_path, f"{data[name][1]}{i}") - pathlib.Path(dirname).mkdir(exist_ok=False, parents=False) + dirname = pathlib.Path(dir_path, f"{data[name][1]}_{i}") + if not pathlib.Path(dirname).exists(): + pathlib.Path(dirname).mkdir(parents=True, exist_ok=True) for fl in data[name][0]: - temp_file = pathlib.Path.open(pathlib.Path(dirname, fl), "w") + temp_file = open(pathlib.Path(dirname, fl), "w") temp_file.close() - return pathlib.Path(dir_path), data[name][1], data[name][2], data[name][3] - - -def test_recursive_searching_files() -> None: - """Test recursive searching of files nested directories.""" - - dir_path = tempfile.mkdtemp(dir=pathlib.Path.cwd()) - out_dir = tempfile.mkdtemp(dir=pathlib.Path.cwd()) - for i in range(2): - dirname1 = "image_folder_" - dirname2 = "groundtruth_folder_" - dirname1 = pathlib.Path(dir_path, f"BBBC/BBBC001/Images/{dirname1}{i}") - dirname2 = pathlib.Path(dir_path, f"BBBC/BBBC001/Groundtruth/{dirname2}{i}") - pathlib.Path(dirname1).mkdir(exist_ok=False, parents=True) - pathlib.Path(dirname2).mkdir(exist_ok=False, parents=True) - - flist = [ - "AS_09125_050118150001_A03f00d0.tif", - "AS_09125_050118150001_A03f01d0.tif", - "AS_09125_050118150001_A03f02d0.tif", - "AS_09125_050118150001_A03f03d0.tif", - "AS_09125_050118150001_A03f04d0.tif", - "AS_09125_050118150001_A03f05d0.tif", - ] + return ( + pathlib.Path(dir_path), + data[name][1], + data[name][2], + data[name][3], + data[name][4], + ) - for fl in flist: - temp_file = pathlib.Path.open(pathlib.Path(dirname1, fl), "w") - temp_file = pathlib.Path.open(pathlib.Path(dirname2, fl), "w") - temp_file.close() - file_pattern = ".*_{row:c}{col:dd}f{f:dd}d{channel:d}.tif" - out_file_pattern = "x{row:dd}_y{col:dd}_p{f:dd}_c{channel:d}.tif" - map_directory = "raw" - - runner.invoke( - app, - [ + +def test_cli(create_subfolders: pytest.FixtureRequest) -> None: + """Test Cli.""" + dir_path, _, file_pattern, out_file_pattern, map_directory = create_subfolders + + d = CreateData() + out_dir = d.output_directory() + if map_directory == "True": + params = [ "--inpDir", dir_path, "--filePattern", @@ -618,37 +450,19 @@ def test_recursive_searching_files() -> None: "--outFilePattern", out_file_pattern, "--mapDirectory", - map_directory, - ], - ) - assert list( - np.unique([p.name.split("_")[0] for p in pathlib.Path(out_dir).iterdir()]) - ) == ["groundtruth", "image"] - shutil.rmtree(dir_path) - shutil.rmtree(out_dir) - - -def test_cli(create_subfolders: pytest.FixtureRequest) -> None: - """Test Cli.""" - dir_path, _, file_pattern, out_file_pattern = create_subfolders - for i in ["raw", "map"]: - d = CreateData() - out_dir = d.output_directory() - result = runner.invoke( - app, - [ - "--inpDir", - dir_path, - "--filePattern", - file_pattern, - "--outDir", - out_dir, - "--outFilePattern", - out_file_pattern, - "--mapDirectory", - i, - ], - ) - assert result.exit_code == 0 + ] + else: + params = [ + "--inpDir", + dir_path, + "--filePattern", + file_pattern, + "--outDir", + out_dir, + "--outFilePattern", + out_file_pattern, + ] + result = runner.invoke(app, params) + assert result.exit_code == 0 d.clean_directories() From 90bbb1d4664461e00f64f0a0e8a75b82df04c697 Mon Sep 17 00:00:00 2001 From: Hamdah Shafqat Abbasi Date: Tue, 17 Dec 2024 15:36:19 -0500 Subject: [PATCH 4/8] fix bash script --- formats/file-renaming-tool/run-plugin.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/formats/file-renaming-tool/run-plugin.sh b/formats/file-renaming-tool/run-plugin.sh index 0e0a0f284..abc590780 100644 --- a/formats/file-renaming-tool/run-plugin.sh +++ b/formats/file-renaming-tool/run-plugin.sh @@ -1,4 +1,6 @@ -#!/bin/bash +!/bin/bash + + version=$( Date: Tue, 17 Dec 2024 16:23:18 -0500 Subject: [PATCH 5/8] rearrange input arg --- formats/file-renaming-tool/filerenaming.cwl | 2 +- formats/file-renaming-tool/ict.yaml | 96 +++++++++---------- formats/file-renaming-tool/plugin.json | 22 ++--- .../images/formats/file_renaming/__main__.py | 10 +- 4 files changed, 65 insertions(+), 65 deletions(-) diff --git a/formats/file-renaming-tool/filerenaming.cwl b/formats/file-renaming-tool/filerenaming.cwl index e633c76d2..89e937a6f 100644 --- a/formats/file-renaming-tool/filerenaming.cwl +++ b/formats/file-renaming-tool/filerenaming.cwl @@ -12,7 +12,7 @@ inputs: mapDirectory: inputBinding: prefix: --mapDirectory - type: string? + type: boolean? outDir: inputBinding: prefix: --outDir diff --git a/formats/file-renaming-tool/ict.yaml b/formats/file-renaming-tool/ict.yaml index bb6ebce21..1c74eb4bf 100644 --- a/formats/file-renaming-tool/ict.yaml +++ b/formats/file-renaming-tool/ict.yaml @@ -1,61 +1,61 @@ author: -- Hamdah Shafqat -- Melanie Parham + - Hamdah Shafqat + - Melanie Parham contact: hamdahshafqat.abbasi@nih.gov container: polusai/file-renaming-tool:0.2.5-dev0 description: Rename and store image collection files in a new image collection entrypoint: python3 -m polus.images.formats.file_renaming inputs: -- description: Filename pattern used to separate data - format: - - string - name: filePattern - required: true - type: string -- description: Input image collection to be processed by this plugin - format: - - collection - name: inpDir - required: true - type: path -- description: Desired filename pattern used to rename and separate data - format: - - string - name: outFilePattern - required: true - type: string -- description: Incorporate mapped directory names into renamed files - format: - - boolean - name: mapDirectory - required: false - type: boolean + - description: Input image collection to be processed by this plugin + format: + - collection + name: inpDir + required: true + type: path + - description: Filename pattern used to separate data + format: + - string + name: filePattern + required: true + type: string + - description: Desired filename pattern used to rename and separate data + format: + - string + name: outFilePattern + required: true + type: string + - description: Incorporate mapped directory names into renamed files + format: + - boolean + name: mapDirectory + required: false + type: boolean name: polusai/FileRenaming outputs: -- description: Output collection - format: - - collection - name: outDir - required: true - type: path + - description: Output collection + format: + - collection + name: outDir + required: true + type: path repository: https://github.com/PolusAI/polus-plugins specVersion: 1.0.0 title: File Renaming ui: -- description: Filename pattern used to separate data - key: inputs.filePattern - title: Filename pattern - type: text -- description: Input image collection to be processed by this plugin - key: inputs.inpDir - title: Input collection - type: path -- description: Desired filename pattern used to rename and separate data - key: inputs.outFilePattern - title: Output filename pattern - type: text -- description: Incorporate mapped directory names into renamed files - key: inputs.mapDirectory - title: mapDirectory - type: checkbox + - description: Input image collection to be processed by this plugin + key: inputs.inpDir + title: Input collection + type: path + - description: Filename pattern used to separate data + key: inputs.filePattern + title: Filename pattern + type: text + - description: Desired filename pattern used to rename and separate data + key: inputs.outFilePattern + title: Output filename pattern + type: text + - description: Incorporate mapped directory names into renamed files + key: inputs.mapDirectory + title: mapDirectory + type: checkbox version: 0.2.5-dev0 diff --git a/formats/file-renaming-tool/plugin.json b/formats/file-renaming-tool/plugin.json index acb292d52..ee034a66f 100644 --- a/formats/file-renaming-tool/plugin.json +++ b/formats/file-renaming-tool/plugin.json @@ -15,18 +15,18 @@ "polus.images.formats.file_renaming" ], "inputs": [ - { - "name": "filePattern", - "type": "string", - "description": "Filename pattern used to separate data", - "required": true - }, { "name": "inpDir", "type": "collection", "description": "Input image collection to be processed by this plugin", "required": true }, + { + "name": "filePattern", + "type": "string", + "description": "Filename pattern used to separate data", + "required": true + }, { "name": "outFilePattern", "type": "string", @@ -48,16 +48,16 @@ } ], "ui": [ - { - "key": "inputs.filePattern", - "title": "Filename pattern", - "description": "Filename pattern used to separate data" - }, { "key": "inputs.inpDir", "title": "Input collection", "description": "Input image collection to be processed by this plugin" }, + { + "key": "inputs.filePattern", + "title": "Filename pattern", + "description": "Filename pattern used to separate data" + }, { "key": "inputs.outFilePattern", "title": "Output filename pattern", diff --git a/formats/file-renaming-tool/src/polus/images/formats/file_renaming/__main__.py b/formats/file-renaming-tool/src/polus/images/formats/file_renaming/__main__.py index 825246d15..d50ed7bc0 100644 --- a/formats/file-renaming-tool/src/polus/images/formats/file_renaming/__main__.py +++ b/formats/file-renaming-tool/src/polus/images/formats/file_renaming/__main__.py @@ -32,11 +32,6 @@ def main( # noqa: PLR0913, D417 "--filePattern", help="Filename pattern used to separate data", ), - out_dir: pathlib.Path = typer.Option( - ..., - "--outDir", - help="Path to image collection storing copies of renamed files", - ), out_file_pattern: str = typer.Option( ..., "--outFilePattern", @@ -47,6 +42,11 @@ def main( # noqa: PLR0913, D417 "--mapDirectory", help="Get folder name", ), + out_dir: pathlib.Path = typer.Option( + ..., + "--outDir", + help="Path to image collection storing copies of renamed files", + ), preview: Optional[bool] = typer.Option( False, "--preview", From 3f27e948fc2dfbed698387c1726ec65e545a4708 Mon Sep 17 00:00:00 2001 From: Hamdah Shafqat Abbasi Date: Tue, 17 Dec 2024 21:53:22 -0500 Subject: [PATCH 6/8] fix base container image --- formats/file-renaming-tool/Dockerfile | 2 +- formats/file-renaming-tool/ict.yaml | 2 +- formats/file-renaming-tool/plugin.json | 2 +- .../src/polus/images/formats/file_renaming/file_renaming.py | 1 + 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/formats/file-renaming-tool/Dockerfile b/formats/file-renaming-tool/Dockerfile index 46ad0ab4d..e7abbba30 100644 --- a/formats/file-renaming-tool/Dockerfile +++ b/formats/file-renaming-tool/Dockerfile @@ -1,4 +1,4 @@ -FROM polusai/bfio:2.4.5 +FROM polusai/bfio:2.1.9 # environment variables defined in polusai/bfio ENV EXEC_DIR="/opt/executables" diff --git a/formats/file-renaming-tool/ict.yaml b/formats/file-renaming-tool/ict.yaml index 1c74eb4bf..f5b78e1e2 100644 --- a/formats/file-renaming-tool/ict.yaml +++ b/formats/file-renaming-tool/ict.yaml @@ -38,7 +38,7 @@ outputs: name: outDir required: true type: path -repository: https://github.com/PolusAI/polus-plugins +repository: https://github.com/PolusAI/image-tools specVersion: 1.0.0 title: File Renaming ui: diff --git a/formats/file-renaming-tool/plugin.json b/formats/file-renaming-tool/plugin.json index ee034a66f..5edd377e0 100644 --- a/formats/file-renaming-tool/plugin.json +++ b/formats/file-renaming-tool/plugin.json @@ -5,7 +5,7 @@ "description": "Rename and store image collection files in a new image collection", "author": "Hamdah Shafqat Abbasi (hamdahshafqat.abbasi@nih.gov), Melanie Parham (melanie.parham@axleinfo.com)", "institution": "National Center for Advancing Translational Sciences, National Institutes of Health", - "repository": "https://github.com/PolusAI/polus-plugins", + "repository": "https://github.com/PolusAI/image-tools", "website": "https://ncats.nih.gov/preclinical/core/informatics", "citation": "", "containerId": "polusai/file-renaming-tool:0.2.5-dev0", diff --git a/formats/file-renaming-tool/src/polus/images/formats/file_renaming/file_renaming.py b/formats/file-renaming-tool/src/polus/images/formats/file_renaming/file_renaming.py index f61b32b32..5ef98c008 100644 --- a/formats/file-renaming-tool/src/polus/images/formats/file_renaming/file_renaming.py +++ b/formats/file-renaming-tool/src/polus/images/formats/file_renaming/file_renaming.py @@ -187,6 +187,7 @@ def rename( # noqa: C901 numbered_categories = {} for named_grp in char_to_digit_categories: numbered_categories[named_grp] = letters_to_int(named_grp, inp_files) + # Check named groups that need c->d conversion for named_grp in char_to_digit_categories: for i in range(0, len(inp_files)): From 190a92a2cd4069800d1a3c399a940427571aa004 Mon Sep 17 00:00:00 2001 From: Hamdah Shafqat Abbasi Date: Wed, 18 Dec 2024 17:49:15 -0500 Subject: [PATCH 7/8] fix test --- formats/file-renaming-tool/Dockerfile | 2 +- .../formats/file_renaming/file_renaming.py | 16 +- formats/file-renaming-tool/tests/test_main.py | 705 +++++++++--------- 3 files changed, 356 insertions(+), 367 deletions(-) diff --git a/formats/file-renaming-tool/Dockerfile b/formats/file-renaming-tool/Dockerfile index e7abbba30..46ad0ab4d 100644 --- a/formats/file-renaming-tool/Dockerfile +++ b/formats/file-renaming-tool/Dockerfile @@ -1,4 +1,4 @@ -FROM polusai/bfio:2.1.9 +FROM polusai/bfio:2.4.5 # environment variables defined in polusai/bfio ENV EXEC_DIR="/opt/executables" diff --git a/formats/file-renaming-tool/src/polus/images/formats/file_renaming/file_renaming.py b/formats/file-renaming-tool/src/polus/images/formats/file_renaming/file_renaming.py index 5ef98c008..7a104dd9d 100644 --- a/formats/file-renaming-tool/src/polus/images/formats/file_renaming/file_renaming.py +++ b/formats/file-renaming-tool/src/polus/images/formats/file_renaming/file_renaming.py @@ -142,7 +142,7 @@ def letters_to_int(named_grp: str, all_matches: list) -> dict: return str_alphabetindex_dict -def rename( # noqa: C901 +def rename( # noqa: C901 PLR0915 inp_dir: pathlib.Path, out_dir: pathlib.Path, file_pattern: str, @@ -160,7 +160,19 @@ def rename( # noqa: C901 """ logger.info("Start renaming files") - files = fp.FilePattern(inp_dir, file_pattern, recursive=True) + # Check if the directory is empty without creating a full list + file_count = sum(1 for _ in inp_dir.iterdir()) + + if file_count == 0: + msg = f"Input directory is empty: {file_count} files found." + raise ValueError(msg) + + logger.info(f"Number of files found: {file_count}") + + if map_directory is True: + files = fp.FilePattern(inp_dir, file_pattern, recursive=True) + else: + files = fp.FilePattern(inp_dir, file_pattern) if len(files) == 0: msg = f"Please define filePattern: {file_pattern} again!!" diff --git a/formats/file-renaming-tool/tests/test_main.py b/formats/file-renaming-tool/tests/test_main.py index 8269ad640..70cf6b002 100644 --- a/formats/file-renaming-tool/tests/test_main.py +++ b/formats/file-renaming-tool/tests/test_main.py @@ -66,322 +66,322 @@ def clean_directories(self) -> None: shutil.rmtree(d) -fixture_params = [ - [ - ( - "r{row:ddd}_c{col:ddd}_{chan:ccc}.ome.tif", - "output_r{row:dddd}_c{col:dddd}_{chan:d+}.ome.tif", - ), - ( - "r{row:d+}_c{col:d+}_{chan:c+}.ome.tif", - "output_r{row:dddd}_c{col:dddd}_{chan:d+}.ome.tif", - ), - ("r.ome.tif", "output_r{row:dddd}_c{col:dddd}_{chan:d+}.ome.tif"), - ( - "%{row:ddd}_c{col:ddd}_z{z:d+}.ome.tif", - "%{row:dddd}_col{col:dddd}_z{z:d+}.ome.tif", - ), - ( - "00{one:d}0{two:dd}-{three:d}-00100100{four:d}.tif", - "output{one:dd}0{two:ddd}-{three:dd}-00100100{four:dd}.tif", - ), - ( - "S1_R{one:d}_C1-C11_A1_y0{two:dd}_x0{three:dd}_c0{four:dd}.ome.tif", - "output{one:dd}_C1-C11_A1_y0{two:ddd}_x0{three:ddd}_c0{four:ddd}.ome.tif", - ), - ( - "S1_R{one:d}_C1-C11_A1_y{two:d+}_x{three:d+}_c{four:d+}.ome.tif", - "output{one:dd}_C1-C11_A1_y{two:d+}_x{three:d+}_c{four:d+}.ome.tif", - ), - ( - "img_x{row:dd}_y{col:dd}_({chan:c+}).tif", - "output{row:dd}_{col:ddd}_{chan:dd}.tif", - ), - ( - "img_x{row:dd}_y{col:dd}_{chan:c+}_{ychan:c+}.tif", - "output{row:ddd}_{col:ddd}_{chan:dd}_{ychan:ddd}.tif", - ), - ( - "img_x{row:dd}_y{col:dd}_{chan:c+}_{ychan:c+}_{alphachan:ccc}.tif", - "output{row:ddd}_{col:ddd}_{chan:dd}_{ychan:ddd}_{alphachan:dddd}.tif", - ), - ( - "img x{row:dd} y{col:dd} {chan:ccc}.tif", - "output{row:ddd}_{col:ddd}_{chan:ccc}.tif", - ), - ( - "p{p:d}_y{y:d}_r{r:d+}_c{c:d+}.ome.tif", - "p{p:dd}_y{y:dd}_r{r:dddd}_c{c:ddd}.ome.tif", - ), - ( - "img x{row:dd} y{col:dd} {chan:c+}.tif", - "output{row:ddd}_{col:ddd}_{chan:dd}.tif", - ), - ( - "img x{row:dd}.{other:d+} y{col:dd} {chan:c+}.tif", - "output{row:ddd}_{col:ddd}_ {other:d+} {chan:dd}.tif", - ), - ( - "0({mo:dd}-{day:dd})0({mo2:dd}-{day2:dd})-({a:d}-{b:d})-{col:ddd}.ome.tif", - "0({mo:ddd}-{day:ddd})0{mo2:dd}-{day2:dd})-({a:dd}-{b:dd})-{col:ddd}.ome.tif", - ), - ] -] - - -@pytest.fixture(params=fixture_params) -def poly(request): - """To get the parameter of the fixture.""" - return request.param - - -def test_invalid_input_raises_error(poly): - """Testing of invalid input filepattern.""" - d = CreateData() - inputs = d.load_json("duplicate_channels_to_digit") - (inp_pattern, out_pattern) = poly[0] - d.runcommands(inputs, inp_pattern, out_pattern) - d.clean_directories() - - -def test_non_alphanum_inputs_percentage_sign(poly): - """Testing of filename with non alphanumeric inputs such as percentage sign.""" - d = CreateData() - inputs = d.load_json("percentage_file") - (inp_pattern, out_pattern) = poly[3] - outputs = d.runcommands(inputs, inp_pattern, out_pattern) - assert outputs.exit_code == 0 - d.clean_directories() - - -def test_numeric_fixed_width(poly): - """Testing of filename with numeric fixed length.""" - d = CreateData() - inputs = d.load_json("robot") - (inp_pattern, out_pattern) = poly[4] - outputs = d.runcommands(inputs, inp_pattern, out_pattern) - assert outputs.exit_code == 0 - d.clean_directories() - - -def test_alphanumeric_fixed_width(poly): - """Testing of filename with alphanumeric fixed length.""" - d = CreateData() - inputs = d.load_json("brain") - (inp_pattern, out_pattern) = poly[5] - outputs = d.runcommands(inputs, inp_pattern, out_pattern) - assert outputs.exit_code == 0 - d.clean_directories() - - -def test_alphanumeric_variable_width(poly): - """Testing of filename with alphanumeric variable width.""" - d = CreateData() - inputs = d.load_json("variable") - (inp_pattern, out_pattern) = poly[6] - outputs = d.runcommands(inputs, inp_pattern, out_pattern) - assert outputs.exit_code == 0 - d.clean_directories() - - -def test_two_chan_to_digit(poly): - """Testing conversion of two channels to digits.""" - d = CreateData() - inputs = d.load_json("two_chan") - (inp_pattern, out_pattern) = poly[8] - outputs = d.runcommands(inputs, inp_pattern, out_pattern) - assert outputs.exit_code == 0 - d.clean_directories() - - -def test_three_chan_to_digit(poly): - """Test conversion of three channels to digits.""" - d = CreateData() - inputs = d.load_json("three_chan") - (inp_pattern, out_pattern) = poly[9] - outputs = d.runcommands(inputs, inp_pattern, out_pattern) - assert outputs.exit_code == 0 - d.clean_directories() - - -def test_three_char_chan(poly): - """Test conversion of three character channels to digits.""" - d = CreateData() - inputs = d.load_json("three_char_chan") - (inp_pattern, out_pattern) = poly[10] - outputs = d.runcommands(inputs, inp_pattern, out_pattern) - assert outputs.exit_code == 0 - d.clean_directories() - - -def test_varied_digits(poly): - """Test varied digits.""" - d = CreateData() - inputs = d.load_json("tissuenet-val-labels-45-C") - (inp_pattern, out_pattern) = poly[11] - outputs = d.runcommands(inputs, inp_pattern, out_pattern) - assert outputs.exit_code == 0 - d.clean_directories() - - -def test_spaces(poly): - """Test non-alphanumeric chars such as spaces.""" - d = CreateData() - inputs = d.load_json("non_alphanum_int") - (inp_pattern, out_pattern) = poly[12] - outputs = d.runcommands(inputs, inp_pattern, out_pattern) - assert outputs.exit_code == 0 - d.clean_directories() - - -def test_non_alphanum_float(poly): - """Test non-alphanumeric chars such as spaces, periods, commas, brackets.""" - d = CreateData() - inputs = d.load_json("non_alphanum_float") - (inp_pattern, out_pattern) = poly[13] - outputs = d.runcommands(inputs, inp_pattern, out_pattern) - assert outputs.exit_code == 0 - d.clean_directories() - - -def test_specify_len_valid_input(): - """Test of sepcifying length.""" - test_cases = [ - ( - ("newdata_x{row:ddd}_y{col:ddd}_c{channel:ddd}.tif"), - ("newdata_x{row:03d}_y{col:03d}_c{channel:03d}.tif"), - ), - (("newdata_x{row:c+}.tif"), ("newdata_x{row:s}.tif")), - (("newdata_x01.tif"), ("newdata_x01.tif")), - ] - for test_case in test_cases: - (from_val, to_val) = test_case - result = fr.specify_len(from_val) - assert result == to_val - - -def test_get_char_to_digit_grps_returns_unique_keys_valid_input(): - """Test of getting characters to digit groups.""" - test_cases = [ - ( - ("img_x{row:dd}_y{col:dd}_{channel:c+}.tif"), - ("newdata_x{row:ddd}_y{col:ddd}_c{channel:ddd}.tif"), - (["channel"]), - ), - (("img_x{row:c+}.tif"), ("newdata_x{row:c+}.tif"), ([])), - (("img_x01.tif"), ("newdata_x01.tif"), ([])), - ] - for test_case in test_cases: - (from_val1, from_val2, to_val) = test_case - result = fr.get_char_to_digit_grps(from_val1, from_val2) - assert result == to_val - - -def test_str_to_int_valid_input(): - """Test of string to integer.""" - test_cases = [ - ( - ( - { - "row": "01", - "col": "01", - "channel": "DAPI", - "fname": "img_x01_y01_DAPI.tif", - } - ), - ({"row": 1, "col": 1, "channel": "DAPI", "fname": "img_x01_y01_DAPI.tif"}), - ), - ( - ( - { - "row": "2", - "col": "01", - "channel": "TXRED", - "fname": "img_x01_y01_TXRED.tif", - } - ), - ( - { - "row": 2, - "col": 1, - "channel": "TXRED", - "fname": "img_x01_y01_TXRED.tif", - } - ), - ), - ( - ( - { - "row": "0001", - "col": "0001", - "channel": "GFP", - "fname": "img_x01_y01_GFP.tif", - } - ), - ({"row": 1, "col": 1, "channel": "GFP", "fname": "img_x01_y01_GFP.tif"}), - ), - ] - for test_case in test_cases: - (from_val, to_val) = test_case - result = fr.str_to_int(from_val) - assert result == to_val - - -def test_letters_to_int_returns_cat_index_dict_valid_input(): - """Test of letter to integers.""" - test_cases = [ - ( - ("channel"), - [ - { - "row": 1, - "col": 1, - "channel": "DAPI", - "fname": "img_x01_y01_DAPI.tif", - }, - {"row": 1, "col": 1, "channel": "GFP", "fname": "img_x01_y01_GFP.tif"}, - { - "row": 1, - "col": 1, - "channel": "TXRED", - "fname": "img_x01_y01_TXRED.tif", - }, - ], - ({"DAPI": 0, "GFP": 1, "TXRED": 2}), - ) - ] - for test_case in test_cases: - (from_val1, from_val2, to_val) = test_case - result = fr.letters_to_int(from_val1, from_val2) - assert result == to_val - - -@pytest.mark.xfail -def test_letters_to_int_returns_error_invalid_input(): - """Test of invalid inputs.""" - test_cases = [ - ( - (2), - [ - { - "row": 1, - "col": 1, - "channel": "DAPI", - "fname": "img_x01_y01_DAPI.tif", - }, - {"row": 1, "col": 1, "channel": "GFP", "fname": "img_x01_y01_GFP.tif"}, - { - "row": 1, - "col": 1, - "channel": "TXRED", - "fname": "img_x01_y01_TXRED.tif", - }, - ], - ), - ] - for test_case in test_cases: - (from_val1, from_val2) = test_case - fr.letters_to_int(from_val1, from_val2) +# fixture_params = [ +# [ +# ( +# "r{row:ddd}_c{col:ddd}_{chan:ccc}.ome.tif", +# "output_r{row:dddd}_c{col:dddd}_{chan:d+}.ome.tif", +# ), +# ( +# "r{row:d+}_c{col:d+}_{chan:c+}.ome.tif", +# "output_r{row:dddd}_c{col:dddd}_{chan:d+}.ome.tif", +# ), +# ("r.ome.tif", "output_r{row:dddd}_c{col:dddd}_{chan:d+}.ome.tif"), +# ( +# "%{row:ddd}_c{col:ddd}_z{z:d+}.ome.tif", +# "%{row:dddd}_col{col:dddd}_z{z:d+}.ome.tif", +# ), +# ( +# "00{one:d}0{two:dd}-{three:d}-00100100{four:d}.tif", +# "output{one:dd}0{two:ddd}-{three:dd}-00100100{four:dd}.tif", +# ), +# ( +# "S1_R{one:d}_C1-C11_A1_y0{two:dd}_x0{three:dd}_c0{four:dd}.ome.tif", +# "output{one:dd}_C1-C11_A1_y0{two:ddd}_x0{three:ddd}_c0{four:ddd}.ome.tif", +# ), +# ( +# "S1_R{one:d}_C1-C11_A1_y{two:d+}_x{three:d+}_c{four:d+}.ome.tif", +# "output{one:dd}_C1-C11_A1_y{two:d+}_x{three:d+}_c{four:d+}.ome.tif", +# ), +# ( +# "img_x{row:dd}_y{col:dd}_({chan:c+}).tif", +# "output{row:dd}_{col:ddd}_{chan:dd}.tif", +# ), +# ( +# "img_x{row:dd}_y{col:dd}_{chan:c+}_{ychan:c+}.tif", +# "output{row:ddd}_{col:ddd}_{chan:dd}_{ychan:ddd}.tif", +# ), +# ( +# "img_x{row:dd}_y{col:dd}_{chan:c+}_{ychan:c+}_{alphachan:ccc}.tif", +# "output{row:ddd}_{col:ddd}_{chan:dd}_{ychan:ddd}_{alphachan:dddd}.tif", +# ), +# ( +# "img x{row:dd} y{col:dd} {chan:ccc}.tif", +# "output{row:ddd}_{col:ddd}_{chan:ccc}.tif", +# ), +# ( +# "p{p:d}_y{y:d}_r{r:d+}_c{c:d+}.ome.tif", +# "p{p:dd}_y{y:dd}_r{r:dddd}_c{c:ddd}.ome.tif", +# ), +# ( +# "img x{row:dd} y{col:dd} {chan:c+}.tif", +# "output{row:ddd}_{col:ddd}_{chan:dd}.tif", +# ), +# ( +# "img x{row:dd}.{other:d+} y{col:dd} {chan:c+}.tif", +# "output{row:ddd}_{col:ddd}_ {other:d+} {chan:dd}.tif", +# ), +# ( +# "0({mo:dd}-{day:dd})0({mo2:dd}-{day2:dd})-({a:d}-{b:d})-{col:ddd}.ome.tif", +# "0({mo:ddd}-{day:ddd})0{mo2:dd}-{day2:dd})-({a:dd}-{b:dd})-{col:ddd}.ome.tif", +# ), +# ] +# ] + + +# @pytest.fixture(params=fixture_params) +# def poly(request): +# """To get the parameter of the fixture.""" +# return request.param + + +# def test_invalid_input_raises_error(poly): +# """Testing of invalid input filepattern.""" +# d = CreateData() +# inputs = d.load_json("duplicate_channels_to_digit") +# (inp_pattern, out_pattern) = poly[0] +# d.runcommands(inputs, inp_pattern, out_pattern) +# d.clean_directories() + + +# def test_non_alphanum_inputs_percentage_sign(poly): +# """Testing of filename with non alphanumeric inputs such as percentage sign.""" +# d = CreateData() +# inputs = d.load_json("percentage_file") +# (inp_pattern, out_pattern) = poly[3] +# outputs = d.runcommands(inputs, inp_pattern, out_pattern) +# assert outputs.exit_code == 0 +# d.clean_directories() + + +# def test_numeric_fixed_width(poly): +# """Testing of filename with numeric fixed length.""" +# d = CreateData() +# inputs = d.load_json("robot") +# (inp_pattern, out_pattern) = poly[4] +# outputs = d.runcommands(inputs, inp_pattern, out_pattern) +# assert outputs.exit_code == 0 +# d.clean_directories() + + +# def test_alphanumeric_fixed_width(poly): +# """Testing of filename with alphanumeric fixed length.""" +# d = CreateData() +# inputs = d.load_json("brain") +# (inp_pattern, out_pattern) = poly[5] +# outputs = d.runcommands(inputs, inp_pattern, out_pattern) +# assert outputs.exit_code == 0 +# d.clean_directories() + + +# def test_alphanumeric_variable_width(poly): +# """Testing of filename with alphanumeric variable width.""" +# d = CreateData() +# inputs = d.load_json("variable") +# (inp_pattern, out_pattern) = poly[6] +# outputs = d.runcommands(inputs, inp_pattern, out_pattern) +# assert outputs.exit_code == 0 +# d.clean_directories() + + +# def test_two_chan_to_digit(poly): +# """Testing conversion of two channels to digits.""" +# d = CreateData() +# inputs = d.load_json("two_chan") +# (inp_pattern, out_pattern) = poly[8] +# outputs = d.runcommands(inputs, inp_pattern, out_pattern) +# assert outputs.exit_code == 0 +# d.clean_directories() + + +# def test_three_chan_to_digit(poly): +# """Test conversion of three channels to digits.""" +# d = CreateData() +# inputs = d.load_json("three_chan") +# (inp_pattern, out_pattern) = poly[9] +# outputs = d.runcommands(inputs, inp_pattern, out_pattern) +# assert outputs.exit_code == 0 +# d.clean_directories() + + +# def test_three_char_chan(poly): +# """Test conversion of three character channels to digits.""" +# d = CreateData() +# inputs = d.load_json("three_char_chan") +# (inp_pattern, out_pattern) = poly[10] +# outputs = d.runcommands(inputs, inp_pattern, out_pattern) +# assert outputs.exit_code == 0 +# d.clean_directories() + + +# def test_varied_digits(poly): +# """Test varied digits.""" +# d = CreateData() +# inputs = d.load_json("tissuenet-val-labels-45-C") +# (inp_pattern, out_pattern) = poly[11] +# outputs = d.runcommands(inputs, inp_pattern, out_pattern) +# assert outputs.exit_code == 0 +# d.clean_directories() + + +# def test_spaces(poly): +# """Test non-alphanumeric chars such as spaces.""" +# d = CreateData() +# inputs = d.load_json("non_alphanum_int") +# (inp_pattern, out_pattern) = poly[12] +# outputs = d.runcommands(inputs, inp_pattern, out_pattern) +# assert outputs.exit_code == 0 +# d.clean_directories() + + +# def test_non_alphanum_float(poly): +# """Test non-alphanumeric chars such as spaces, periods, commas, brackets.""" +# d = CreateData() +# inputs = d.load_json("non_alphanum_float") +# (inp_pattern, out_pattern) = poly[13] +# outputs = d.runcommands(inputs, inp_pattern, out_pattern) +# assert outputs.exit_code == 0 +# d.clean_directories() + + +# def test_specify_len_valid_input(): +# """Test of sepcifying length.""" +# test_cases = [ +# ( +# ("newdata_x{row:ddd}_y{col:ddd}_c{channel:ddd}.tif"), +# ("newdata_x{row:03d}_y{col:03d}_c{channel:03d}.tif"), +# ), +# (("newdata_x{row:c+}.tif"), ("newdata_x{row:s}.tif")), +# (("newdata_x01.tif"), ("newdata_x01.tif")), +# ] +# for test_case in test_cases: +# (from_val, to_val) = test_case +# result = fr.specify_len(from_val) +# assert result == to_val + + +# def test_get_char_to_digit_grps_returns_unique_keys_valid_input(): +# """Test of getting characters to digit groups.""" +# test_cases = [ +# ( +# ("img_x{row:dd}_y{col:dd}_{channel:c+}.tif"), +# ("newdata_x{row:ddd}_y{col:ddd}_c{channel:ddd}.tif"), +# (["channel"]), +# ), +# (("img_x{row:c+}.tif"), ("newdata_x{row:c+}.tif"), ([])), +# (("img_x01.tif"), ("newdata_x01.tif"), ([])), +# ] +# for test_case in test_cases: +# (from_val1, from_val2, to_val) = test_case +# result = fr.get_char_to_digit_grps(from_val1, from_val2) +# assert result == to_val + + +# def test_str_to_int_valid_input(): +# """Test of string to integer.""" +# test_cases = [ +# ( +# ( +# { +# "row": "01", +# "col": "01", +# "channel": "DAPI", +# "fname": "img_x01_y01_DAPI.tif", +# } +# ), +# ({"row": 1, "col": 1, "channel": "DAPI", "fname": "img_x01_y01_DAPI.tif"}), +# ), +# ( +# ( +# { +# "row": "2", +# "col": "01", +# "channel": "TXRED", +# "fname": "img_x01_y01_TXRED.tif", +# } +# ), +# ( +# { +# "row": 2, +# "col": 1, +# "channel": "TXRED", +# "fname": "img_x01_y01_TXRED.tif", +# } +# ), +# ), +# ( +# ( +# { +# "row": "0001", +# "col": "0001", +# "channel": "GFP", +# "fname": "img_x01_y01_GFP.tif", +# } +# ), +# ({"row": 1, "col": 1, "channel": "GFP", "fname": "img_x01_y01_GFP.tif"}), +# ), +# ] +# for test_case in test_cases: +# (from_val, to_val) = test_case +# result = fr.str_to_int(from_val) +# assert result == to_val + + +# def test_letters_to_int_returns_cat_index_dict_valid_input(): +# """Test of letter to integers.""" +# test_cases = [ +# ( +# ("channel"), +# [ +# { +# "row": 1, +# "col": 1, +# "channel": "DAPI", +# "fname": "img_x01_y01_DAPI.tif", +# }, +# {"row": 1, "col": 1, "channel": "GFP", "fname": "img_x01_y01_GFP.tif"}, +# { +# "row": 1, +# "col": 1, +# "channel": "TXRED", +# "fname": "img_x01_y01_TXRED.tif", +# }, +# ], +# ({"DAPI": 0, "GFP": 1, "TXRED": 2}), +# ) +# ] +# for test_case in test_cases: +# (from_val1, from_val2, to_val) = test_case +# result = fr.letters_to_int(from_val1, from_val2) +# assert result == to_val + + +# @pytest.mark.xfail +# def test_letters_to_int_returns_error_invalid_input(): +# """Test of invalid inputs.""" +# test_cases = [ +# ( +# (2), +# [ +# { +# "row": 1, +# "col": 1, +# "channel": "DAPI", +# "fname": "img_x01_y01_DAPI.tif", +# }, +# {"row": 1, "col": 1, "channel": "GFP", "fname": "img_x01_y01_GFP.tif"}, +# { +# "row": 1, +# "col": 1, +# "channel": "TXRED", +# "fname": "img_x01_y01_TXRED.tif", +# }, +# ], +# ), +# ] +# for test_case in test_cases: +# (from_val1, from_val2) = test_case +# fr.letters_to_int(from_val1, from_val2) @pytest.fixture @@ -396,33 +396,21 @@ def create_subfolders(): "AS_09125_050118150001_A03f04d0.tif", ], "BBBC/BBBC001/raw/Images/human_ht29_colon_cancer_1_images", - "/.*/.*/.*/Images/(?P.*)/.*_{row:c}{col:dd}f{f:dd}d{channel:d}.tif", - "x{row:dd}_y{col:dd}_p{f:dd}{character:c}_c01.tif", + "(?P.*)/AS_09125_050118150001_{row:c}{col:dd}f{f:dd}d{channel:d}.tif", + "x{row:dd}_y{col:dd}_p{f:dd}{channel:d}_c01.tif", "True", - ], - "simple": [ - [ - "taoe005-u2os-72h-cp-a-au00044859_a01_s3_w23db644df-02ee-429d-9559-09cf4625c62b.tif", - "taoe005-u2os-72h-cp-a-au00044859_b01_s3_w3add254c8-0c7b-4cf0-a5dc-bf0cf8de8cec.tif", - "taoe005-u2os-72h-cp-a-au00044859_b07_s5_w2da098211-f7c1-453d-954f-b7d4751f6daa.tif", - "taoe005-u2os-72h-cp-a-au00044859_c15_s2_w3aea523fa-3b89-46a7-95e3-604017151895.tif", - ], - "folder_", - ".*_{row:c}{col:dd}_s{s:d}_w{channel:d}.*.tif", - "x{row:dd}_y{col:dd}_p{s:dd}_c{channel:d}.tif", - "False", - ], + ] } - for name in ["complex", "simple"]: - d = CreateData() - dir_path = d.input_directory() - for i in range(5): - dirname = pathlib.Path(dir_path, f"{data[name][1]}_{i}") - if not pathlib.Path(dirname).exists(): - pathlib.Path(dirname).mkdir(parents=True, exist_ok=True) - for fl in data[name][0]: - temp_file = open(pathlib.Path(dirname, fl), "w") - temp_file.close() + name = "complex" + d = CreateData() + dir_path = d.input_directory() + for i in range(1): + dirname = pathlib.Path(dir_path, f"{data[name][1]}_{i}") + if not pathlib.Path(dirname).exists(): + pathlib.Path(dirname).mkdir(parents=True, exist_ok=True) + for fl in data[name][0]: + temp_file = open(pathlib.Path(dirname, fl), "w") + temp_file.close() return ( pathlib.Path(dir_path), @@ -439,30 +427,19 @@ def test_cli(create_subfolders: pytest.FixtureRequest) -> None: d = CreateData() out_dir = d.output_directory() - if map_directory == "True": - params = [ - "--inpDir", - dir_path, - "--filePattern", - file_pattern, - "--outDir", - out_dir, - "--outFilePattern", - out_file_pattern, - "--mapDirectory", - ] - else: - params = [ - "--inpDir", - dir_path, - "--filePattern", - file_pattern, - "--outDir", - out_dir, - "--outFilePattern", - out_file_pattern, - ] + params = [ + "--inpDir", + dir_path, + "--filePattern", + file_pattern, + "--outDir", + out_dir, + "--outFilePattern", + out_file_pattern, + "--mapDirectory", + ] result = runner.invoke(app, params) + assert result.exit_code == 0 d.clean_directories() From e85e9e76ac105da8266e7d405e8e07d5a072ef32 Mon Sep 17 00:00:00 2001 From: Hamdah Shafqat Abbasi Date: Wed, 18 Dec 2024 17:50:14 -0500 Subject: [PATCH 8/8] uncommented tests --- formats/file-renaming-tool/tests/test_main.py | 632 +++++++++--------- 1 file changed, 316 insertions(+), 316 deletions(-) diff --git a/formats/file-renaming-tool/tests/test_main.py b/formats/file-renaming-tool/tests/test_main.py index 70cf6b002..b657c2eae 100644 --- a/formats/file-renaming-tool/tests/test_main.py +++ b/formats/file-renaming-tool/tests/test_main.py @@ -66,322 +66,322 @@ def clean_directories(self) -> None: shutil.rmtree(d) -# fixture_params = [ -# [ -# ( -# "r{row:ddd}_c{col:ddd}_{chan:ccc}.ome.tif", -# "output_r{row:dddd}_c{col:dddd}_{chan:d+}.ome.tif", -# ), -# ( -# "r{row:d+}_c{col:d+}_{chan:c+}.ome.tif", -# "output_r{row:dddd}_c{col:dddd}_{chan:d+}.ome.tif", -# ), -# ("r.ome.tif", "output_r{row:dddd}_c{col:dddd}_{chan:d+}.ome.tif"), -# ( -# "%{row:ddd}_c{col:ddd}_z{z:d+}.ome.tif", -# "%{row:dddd}_col{col:dddd}_z{z:d+}.ome.tif", -# ), -# ( -# "00{one:d}0{two:dd}-{three:d}-00100100{four:d}.tif", -# "output{one:dd}0{two:ddd}-{three:dd}-00100100{four:dd}.tif", -# ), -# ( -# "S1_R{one:d}_C1-C11_A1_y0{two:dd}_x0{three:dd}_c0{four:dd}.ome.tif", -# "output{one:dd}_C1-C11_A1_y0{two:ddd}_x0{three:ddd}_c0{four:ddd}.ome.tif", -# ), -# ( -# "S1_R{one:d}_C1-C11_A1_y{two:d+}_x{three:d+}_c{four:d+}.ome.tif", -# "output{one:dd}_C1-C11_A1_y{two:d+}_x{three:d+}_c{four:d+}.ome.tif", -# ), -# ( -# "img_x{row:dd}_y{col:dd}_({chan:c+}).tif", -# "output{row:dd}_{col:ddd}_{chan:dd}.tif", -# ), -# ( -# "img_x{row:dd}_y{col:dd}_{chan:c+}_{ychan:c+}.tif", -# "output{row:ddd}_{col:ddd}_{chan:dd}_{ychan:ddd}.tif", -# ), -# ( -# "img_x{row:dd}_y{col:dd}_{chan:c+}_{ychan:c+}_{alphachan:ccc}.tif", -# "output{row:ddd}_{col:ddd}_{chan:dd}_{ychan:ddd}_{alphachan:dddd}.tif", -# ), -# ( -# "img x{row:dd} y{col:dd} {chan:ccc}.tif", -# "output{row:ddd}_{col:ddd}_{chan:ccc}.tif", -# ), -# ( -# "p{p:d}_y{y:d}_r{r:d+}_c{c:d+}.ome.tif", -# "p{p:dd}_y{y:dd}_r{r:dddd}_c{c:ddd}.ome.tif", -# ), -# ( -# "img x{row:dd} y{col:dd} {chan:c+}.tif", -# "output{row:ddd}_{col:ddd}_{chan:dd}.tif", -# ), -# ( -# "img x{row:dd}.{other:d+} y{col:dd} {chan:c+}.tif", -# "output{row:ddd}_{col:ddd}_ {other:d+} {chan:dd}.tif", -# ), -# ( -# "0({mo:dd}-{day:dd})0({mo2:dd}-{day2:dd})-({a:d}-{b:d})-{col:ddd}.ome.tif", -# "0({mo:ddd}-{day:ddd})0{mo2:dd}-{day2:dd})-({a:dd}-{b:dd})-{col:ddd}.ome.tif", -# ), -# ] -# ] - - -# @pytest.fixture(params=fixture_params) -# def poly(request): -# """To get the parameter of the fixture.""" -# return request.param - - -# def test_invalid_input_raises_error(poly): -# """Testing of invalid input filepattern.""" -# d = CreateData() -# inputs = d.load_json("duplicate_channels_to_digit") -# (inp_pattern, out_pattern) = poly[0] -# d.runcommands(inputs, inp_pattern, out_pattern) -# d.clean_directories() - - -# def test_non_alphanum_inputs_percentage_sign(poly): -# """Testing of filename with non alphanumeric inputs such as percentage sign.""" -# d = CreateData() -# inputs = d.load_json("percentage_file") -# (inp_pattern, out_pattern) = poly[3] -# outputs = d.runcommands(inputs, inp_pattern, out_pattern) -# assert outputs.exit_code == 0 -# d.clean_directories() - - -# def test_numeric_fixed_width(poly): -# """Testing of filename with numeric fixed length.""" -# d = CreateData() -# inputs = d.load_json("robot") -# (inp_pattern, out_pattern) = poly[4] -# outputs = d.runcommands(inputs, inp_pattern, out_pattern) -# assert outputs.exit_code == 0 -# d.clean_directories() - - -# def test_alphanumeric_fixed_width(poly): -# """Testing of filename with alphanumeric fixed length.""" -# d = CreateData() -# inputs = d.load_json("brain") -# (inp_pattern, out_pattern) = poly[5] -# outputs = d.runcommands(inputs, inp_pattern, out_pattern) -# assert outputs.exit_code == 0 -# d.clean_directories() - - -# def test_alphanumeric_variable_width(poly): -# """Testing of filename with alphanumeric variable width.""" -# d = CreateData() -# inputs = d.load_json("variable") -# (inp_pattern, out_pattern) = poly[6] -# outputs = d.runcommands(inputs, inp_pattern, out_pattern) -# assert outputs.exit_code == 0 -# d.clean_directories() - - -# def test_two_chan_to_digit(poly): -# """Testing conversion of two channels to digits.""" -# d = CreateData() -# inputs = d.load_json("two_chan") -# (inp_pattern, out_pattern) = poly[8] -# outputs = d.runcommands(inputs, inp_pattern, out_pattern) -# assert outputs.exit_code == 0 -# d.clean_directories() - - -# def test_three_chan_to_digit(poly): -# """Test conversion of three channels to digits.""" -# d = CreateData() -# inputs = d.load_json("three_chan") -# (inp_pattern, out_pattern) = poly[9] -# outputs = d.runcommands(inputs, inp_pattern, out_pattern) -# assert outputs.exit_code == 0 -# d.clean_directories() - - -# def test_three_char_chan(poly): -# """Test conversion of three character channels to digits.""" -# d = CreateData() -# inputs = d.load_json("three_char_chan") -# (inp_pattern, out_pattern) = poly[10] -# outputs = d.runcommands(inputs, inp_pattern, out_pattern) -# assert outputs.exit_code == 0 -# d.clean_directories() - - -# def test_varied_digits(poly): -# """Test varied digits.""" -# d = CreateData() -# inputs = d.load_json("tissuenet-val-labels-45-C") -# (inp_pattern, out_pattern) = poly[11] -# outputs = d.runcommands(inputs, inp_pattern, out_pattern) -# assert outputs.exit_code == 0 -# d.clean_directories() - - -# def test_spaces(poly): -# """Test non-alphanumeric chars such as spaces.""" -# d = CreateData() -# inputs = d.load_json("non_alphanum_int") -# (inp_pattern, out_pattern) = poly[12] -# outputs = d.runcommands(inputs, inp_pattern, out_pattern) -# assert outputs.exit_code == 0 -# d.clean_directories() - - -# def test_non_alphanum_float(poly): -# """Test non-alphanumeric chars such as spaces, periods, commas, brackets.""" -# d = CreateData() -# inputs = d.load_json("non_alphanum_float") -# (inp_pattern, out_pattern) = poly[13] -# outputs = d.runcommands(inputs, inp_pattern, out_pattern) -# assert outputs.exit_code == 0 -# d.clean_directories() - - -# def test_specify_len_valid_input(): -# """Test of sepcifying length.""" -# test_cases = [ -# ( -# ("newdata_x{row:ddd}_y{col:ddd}_c{channel:ddd}.tif"), -# ("newdata_x{row:03d}_y{col:03d}_c{channel:03d}.tif"), -# ), -# (("newdata_x{row:c+}.tif"), ("newdata_x{row:s}.tif")), -# (("newdata_x01.tif"), ("newdata_x01.tif")), -# ] -# for test_case in test_cases: -# (from_val, to_val) = test_case -# result = fr.specify_len(from_val) -# assert result == to_val - - -# def test_get_char_to_digit_grps_returns_unique_keys_valid_input(): -# """Test of getting characters to digit groups.""" -# test_cases = [ -# ( -# ("img_x{row:dd}_y{col:dd}_{channel:c+}.tif"), -# ("newdata_x{row:ddd}_y{col:ddd}_c{channel:ddd}.tif"), -# (["channel"]), -# ), -# (("img_x{row:c+}.tif"), ("newdata_x{row:c+}.tif"), ([])), -# (("img_x01.tif"), ("newdata_x01.tif"), ([])), -# ] -# for test_case in test_cases: -# (from_val1, from_val2, to_val) = test_case -# result = fr.get_char_to_digit_grps(from_val1, from_val2) -# assert result == to_val - - -# def test_str_to_int_valid_input(): -# """Test of string to integer.""" -# test_cases = [ -# ( -# ( -# { -# "row": "01", -# "col": "01", -# "channel": "DAPI", -# "fname": "img_x01_y01_DAPI.tif", -# } -# ), -# ({"row": 1, "col": 1, "channel": "DAPI", "fname": "img_x01_y01_DAPI.tif"}), -# ), -# ( -# ( -# { -# "row": "2", -# "col": "01", -# "channel": "TXRED", -# "fname": "img_x01_y01_TXRED.tif", -# } -# ), -# ( -# { -# "row": 2, -# "col": 1, -# "channel": "TXRED", -# "fname": "img_x01_y01_TXRED.tif", -# } -# ), -# ), -# ( -# ( -# { -# "row": "0001", -# "col": "0001", -# "channel": "GFP", -# "fname": "img_x01_y01_GFP.tif", -# } -# ), -# ({"row": 1, "col": 1, "channel": "GFP", "fname": "img_x01_y01_GFP.tif"}), -# ), -# ] -# for test_case in test_cases: -# (from_val, to_val) = test_case -# result = fr.str_to_int(from_val) -# assert result == to_val - - -# def test_letters_to_int_returns_cat_index_dict_valid_input(): -# """Test of letter to integers.""" -# test_cases = [ -# ( -# ("channel"), -# [ -# { -# "row": 1, -# "col": 1, -# "channel": "DAPI", -# "fname": "img_x01_y01_DAPI.tif", -# }, -# {"row": 1, "col": 1, "channel": "GFP", "fname": "img_x01_y01_GFP.tif"}, -# { -# "row": 1, -# "col": 1, -# "channel": "TXRED", -# "fname": "img_x01_y01_TXRED.tif", -# }, -# ], -# ({"DAPI": 0, "GFP": 1, "TXRED": 2}), -# ) -# ] -# for test_case in test_cases: -# (from_val1, from_val2, to_val) = test_case -# result = fr.letters_to_int(from_val1, from_val2) -# assert result == to_val - - -# @pytest.mark.xfail -# def test_letters_to_int_returns_error_invalid_input(): -# """Test of invalid inputs.""" -# test_cases = [ -# ( -# (2), -# [ -# { -# "row": 1, -# "col": 1, -# "channel": "DAPI", -# "fname": "img_x01_y01_DAPI.tif", -# }, -# {"row": 1, "col": 1, "channel": "GFP", "fname": "img_x01_y01_GFP.tif"}, -# { -# "row": 1, -# "col": 1, -# "channel": "TXRED", -# "fname": "img_x01_y01_TXRED.tif", -# }, -# ], -# ), -# ] -# for test_case in test_cases: -# (from_val1, from_val2) = test_case -# fr.letters_to_int(from_val1, from_val2) +fixture_params = [ + [ + ( + "r{row:ddd}_c{col:ddd}_{chan:ccc}.ome.tif", + "output_r{row:dddd}_c{col:dddd}_{chan:d+}.ome.tif", + ), + ( + "r{row:d+}_c{col:d+}_{chan:c+}.ome.tif", + "output_r{row:dddd}_c{col:dddd}_{chan:d+}.ome.tif", + ), + ("r.ome.tif", "output_r{row:dddd}_c{col:dddd}_{chan:d+}.ome.tif"), + ( + "%{row:ddd}_c{col:ddd}_z{z:d+}.ome.tif", + "%{row:dddd}_col{col:dddd}_z{z:d+}.ome.tif", + ), + ( + "00{one:d}0{two:dd}-{three:d}-00100100{four:d}.tif", + "output{one:dd}0{two:ddd}-{three:dd}-00100100{four:dd}.tif", + ), + ( + "S1_R{one:d}_C1-C11_A1_y0{two:dd}_x0{three:dd}_c0{four:dd}.ome.tif", + "output{one:dd}_C1-C11_A1_y0{two:ddd}_x0{three:ddd}_c0{four:ddd}.ome.tif", + ), + ( + "S1_R{one:d}_C1-C11_A1_y{two:d+}_x{three:d+}_c{four:d+}.ome.tif", + "output{one:dd}_C1-C11_A1_y{two:d+}_x{three:d+}_c{four:d+}.ome.tif", + ), + ( + "img_x{row:dd}_y{col:dd}_({chan:c+}).tif", + "output{row:dd}_{col:ddd}_{chan:dd}.tif", + ), + ( + "img_x{row:dd}_y{col:dd}_{chan:c+}_{ychan:c+}.tif", + "output{row:ddd}_{col:ddd}_{chan:dd}_{ychan:ddd}.tif", + ), + ( + "img_x{row:dd}_y{col:dd}_{chan:c+}_{ychan:c+}_{alphachan:ccc}.tif", + "output{row:ddd}_{col:ddd}_{chan:dd}_{ychan:ddd}_{alphachan:dddd}.tif", + ), + ( + "img x{row:dd} y{col:dd} {chan:ccc}.tif", + "output{row:ddd}_{col:ddd}_{chan:ccc}.tif", + ), + ( + "p{p:d}_y{y:d}_r{r:d+}_c{c:d+}.ome.tif", + "p{p:dd}_y{y:dd}_r{r:dddd}_c{c:ddd}.ome.tif", + ), + ( + "img x{row:dd} y{col:dd} {chan:c+}.tif", + "output{row:ddd}_{col:ddd}_{chan:dd}.tif", + ), + ( + "img x{row:dd}.{other:d+} y{col:dd} {chan:c+}.tif", + "output{row:ddd}_{col:ddd}_ {other:d+} {chan:dd}.tif", + ), + ( + "0({mo:dd}-{day:dd})0({mo2:dd}-{day2:dd})-({a:d}-{b:d})-{col:ddd}.ome.tif", + "0({mo:ddd}-{day:ddd})0{mo2:dd}-{day2:dd})-({a:dd}-{b:dd})-{col:ddd}.ome.tif", + ), + ] +] + + +@pytest.fixture(params=fixture_params) +def poly(request): + """To get the parameter of the fixture.""" + return request.param + + +def test_invalid_input_raises_error(poly): + """Testing of invalid input filepattern.""" + d = CreateData() + inputs = d.load_json("duplicate_channels_to_digit") + (inp_pattern, out_pattern) = poly[0] + d.runcommands(inputs, inp_pattern, out_pattern) + d.clean_directories() + + +def test_non_alphanum_inputs_percentage_sign(poly): + """Testing of filename with non alphanumeric inputs such as percentage sign.""" + d = CreateData() + inputs = d.load_json("percentage_file") + (inp_pattern, out_pattern) = poly[3] + outputs = d.runcommands(inputs, inp_pattern, out_pattern) + assert outputs.exit_code == 0 + d.clean_directories() + + +def test_numeric_fixed_width(poly): + """Testing of filename with numeric fixed length.""" + d = CreateData() + inputs = d.load_json("robot") + (inp_pattern, out_pattern) = poly[4] + outputs = d.runcommands(inputs, inp_pattern, out_pattern) + assert outputs.exit_code == 0 + d.clean_directories() + + +def test_alphanumeric_fixed_width(poly): + """Testing of filename with alphanumeric fixed length.""" + d = CreateData() + inputs = d.load_json("brain") + (inp_pattern, out_pattern) = poly[5] + outputs = d.runcommands(inputs, inp_pattern, out_pattern) + assert outputs.exit_code == 0 + d.clean_directories() + + +def test_alphanumeric_variable_width(poly): + """Testing of filename with alphanumeric variable width.""" + d = CreateData() + inputs = d.load_json("variable") + (inp_pattern, out_pattern) = poly[6] + outputs = d.runcommands(inputs, inp_pattern, out_pattern) + assert outputs.exit_code == 0 + d.clean_directories() + + +def test_two_chan_to_digit(poly): + """Testing conversion of two channels to digits.""" + d = CreateData() + inputs = d.load_json("two_chan") + (inp_pattern, out_pattern) = poly[8] + outputs = d.runcommands(inputs, inp_pattern, out_pattern) + assert outputs.exit_code == 0 + d.clean_directories() + + +def test_three_chan_to_digit(poly): + """Test conversion of three channels to digits.""" + d = CreateData() + inputs = d.load_json("three_chan") + (inp_pattern, out_pattern) = poly[9] + outputs = d.runcommands(inputs, inp_pattern, out_pattern) + assert outputs.exit_code == 0 + d.clean_directories() + + +def test_three_char_chan(poly): + """Test conversion of three character channels to digits.""" + d = CreateData() + inputs = d.load_json("three_char_chan") + (inp_pattern, out_pattern) = poly[10] + outputs = d.runcommands(inputs, inp_pattern, out_pattern) + assert outputs.exit_code == 0 + d.clean_directories() + + +def test_varied_digits(poly): + """Test varied digits.""" + d = CreateData() + inputs = d.load_json("tissuenet-val-labels-45-C") + (inp_pattern, out_pattern) = poly[11] + outputs = d.runcommands(inputs, inp_pattern, out_pattern) + assert outputs.exit_code == 0 + d.clean_directories() + + +def test_spaces(poly): + """Test non-alphanumeric chars such as spaces.""" + d = CreateData() + inputs = d.load_json("non_alphanum_int") + (inp_pattern, out_pattern) = poly[12] + outputs = d.runcommands(inputs, inp_pattern, out_pattern) + assert outputs.exit_code == 0 + d.clean_directories() + + +def test_non_alphanum_float(poly): + """Test non-alphanumeric chars such as spaces, periods, commas, brackets.""" + d = CreateData() + inputs = d.load_json("non_alphanum_float") + (inp_pattern, out_pattern) = poly[13] + outputs = d.runcommands(inputs, inp_pattern, out_pattern) + assert outputs.exit_code == 0 + d.clean_directories() + + +def test_specify_len_valid_input(): + """Test of sepcifying length.""" + test_cases = [ + ( + ("newdata_x{row:ddd}_y{col:ddd}_c{channel:ddd}.tif"), + ("newdata_x{row:03d}_y{col:03d}_c{channel:03d}.tif"), + ), + (("newdata_x{row:c+}.tif"), ("newdata_x{row:s}.tif")), + (("newdata_x01.tif"), ("newdata_x01.tif")), + ] + for test_case in test_cases: + (from_val, to_val) = test_case + result = fr.specify_len(from_val) + assert result == to_val + + +def test_get_char_to_digit_grps_returns_unique_keys_valid_input(): + """Test of getting characters to digit groups.""" + test_cases = [ + ( + ("img_x{row:dd}_y{col:dd}_{channel:c+}.tif"), + ("newdata_x{row:ddd}_y{col:ddd}_c{channel:ddd}.tif"), + (["channel"]), + ), + (("img_x{row:c+}.tif"), ("newdata_x{row:c+}.tif"), ([])), + (("img_x01.tif"), ("newdata_x01.tif"), ([])), + ] + for test_case in test_cases: + (from_val1, from_val2, to_val) = test_case + result = fr.get_char_to_digit_grps(from_val1, from_val2) + assert result == to_val + + +def test_str_to_int_valid_input(): + """Test of string to integer.""" + test_cases = [ + ( + ( + { + "row": "01", + "col": "01", + "channel": "DAPI", + "fname": "img_x01_y01_DAPI.tif", + } + ), + ({"row": 1, "col": 1, "channel": "DAPI", "fname": "img_x01_y01_DAPI.tif"}), + ), + ( + ( + { + "row": "2", + "col": "01", + "channel": "TXRED", + "fname": "img_x01_y01_TXRED.tif", + } + ), + ( + { + "row": 2, + "col": 1, + "channel": "TXRED", + "fname": "img_x01_y01_TXRED.tif", + } + ), + ), + ( + ( + { + "row": "0001", + "col": "0001", + "channel": "GFP", + "fname": "img_x01_y01_GFP.tif", + } + ), + ({"row": 1, "col": 1, "channel": "GFP", "fname": "img_x01_y01_GFP.tif"}), + ), + ] + for test_case in test_cases: + (from_val, to_val) = test_case + result = fr.str_to_int(from_val) + assert result == to_val + + +def test_letters_to_int_returns_cat_index_dict_valid_input(): + """Test of letter to integers.""" + test_cases = [ + ( + ("channel"), + [ + { + "row": 1, + "col": 1, + "channel": "DAPI", + "fname": "img_x01_y01_DAPI.tif", + }, + {"row": 1, "col": 1, "channel": "GFP", "fname": "img_x01_y01_GFP.tif"}, + { + "row": 1, + "col": 1, + "channel": "TXRED", + "fname": "img_x01_y01_TXRED.tif", + }, + ], + ({"DAPI": 0, "GFP": 1, "TXRED": 2}), + ) + ] + for test_case in test_cases: + (from_val1, from_val2, to_val) = test_case + result = fr.letters_to_int(from_val1, from_val2) + assert result == to_val + + +@pytest.mark.xfail +def test_letters_to_int_returns_error_invalid_input(): + """Test of invalid inputs.""" + test_cases = [ + ( + (2), + [ + { + "row": 1, + "col": 1, + "channel": "DAPI", + "fname": "img_x01_y01_DAPI.tif", + }, + {"row": 1, "col": 1, "channel": "GFP", "fname": "img_x01_y01_GFP.tif"}, + { + "row": 1, + "col": 1, + "channel": "TXRED", + "fname": "img_x01_y01_TXRED.tif", + }, + ], + ), + ] + for test_case in test_cases: + (from_val1, from_val2) = test_case + fr.letters_to_int(from_val1, from_val2) @pytest.fixture