From bc91664817c26ba23e66029ca136dd8b710b65d2 Mon Sep 17 00:00:00 2001 From: Zhiyuan Liu Date: Thu, 19 Dec 2024 17:22:05 -0500 Subject: [PATCH 01/10] chore: create folder for autosegmentation plugin --- .../.bumpversion.cfg | 27 + .../zarr-autosegmentation-tool/CHANGELOG.md | 4 + .../zarr-autosegmentation-tool/Dockerfile | 25 + .../zarr-autosegmentation-tool/README.md | 71 ++ .../zarr-autosegmentation-tool/VERSION | 1 + .../build-docker.sh | 22 + .../filerenaming.cwl | 36 + .../zarr-autosegmentation-tool/ict.yaml | 65 ++ .../zarr-autosegmentation-tool/plugin.json | 81 +++ .../zarr-autosegmentation-tool/pyproject.toml | 28 + .../zarr-autosegmentation-tool/run-plugin.sh | 23 + .../zarr_autosegmentation/__init__.py | 4 + .../zarr_autosegmentation/__main__.py | 184 +++++ .../zarr_autosegmentation.py | 406 +++++++++++ .../tests/__init__.py | 1 + .../tests/file_rename_test.json | 103 +++ .../tests/test_main.py | 654 ++++++++++++++++++ 17 files changed, 1735 insertions(+) create mode 100644 segmentation/zarr-autosegmentation-tool/.bumpversion.cfg create mode 100644 segmentation/zarr-autosegmentation-tool/CHANGELOG.md create mode 100644 segmentation/zarr-autosegmentation-tool/Dockerfile create mode 100644 segmentation/zarr-autosegmentation-tool/README.md create mode 100644 segmentation/zarr-autosegmentation-tool/VERSION create mode 100644 segmentation/zarr-autosegmentation-tool/build-docker.sh create mode 100644 segmentation/zarr-autosegmentation-tool/filerenaming.cwl create mode 100644 segmentation/zarr-autosegmentation-tool/ict.yaml create mode 100644 segmentation/zarr-autosegmentation-tool/plugin.json create mode 100644 segmentation/zarr-autosegmentation-tool/pyproject.toml create mode 100644 segmentation/zarr-autosegmentation-tool/run-plugin.sh create mode 100644 segmentation/zarr-autosegmentation-tool/src/polus/images/segmentation/zarr_autosegmentation/__init__.py create mode 100644 segmentation/zarr-autosegmentation-tool/src/polus/images/segmentation/zarr_autosegmentation/__main__.py create mode 100644 segmentation/zarr-autosegmentation-tool/src/polus/images/segmentation/zarr_autosegmentation/zarr_autosegmentation.py create mode 100644 segmentation/zarr-autosegmentation-tool/tests/__init__.py create mode 100644 segmentation/zarr-autosegmentation-tool/tests/file_rename_test.json create mode 100644 segmentation/zarr-autosegmentation-tool/tests/test_main.py diff --git a/segmentation/zarr-autosegmentation-tool/.bumpversion.cfg b/segmentation/zarr-autosegmentation-tool/.bumpversion.cfg new file mode 100644 index 000000000..9f1772079 --- /dev/null +++ b/segmentation/zarr-autosegmentation-tool/.bumpversion.cfg @@ -0,0 +1,27 @@ +[bumpversion] +current_version = 0.2.4 +commit = True +tag = False +parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-(?P[a-z]+)(?P\d+))? +serialize = + {major}.{minor}.{patch}-{release}{dev} + {major}.{minor}.{patch} + +[bumpversion:part:release] +optional_value = _ +first_value = dev +values = + dev + _ + +[bumpversion:part:dev] + +[bumpversion:file:pyproject.toml] +search = version = "{current_version}" +replace = version = "{new_version}" + +[bumpversion:file:plugin.json] + +[bumpversion:file:VERSION] + +[bumpversion:file:src/polus/images/formats/file_renaming/__init__.py] diff --git a/segmentation/zarr-autosegmentation-tool/CHANGELOG.md b/segmentation/zarr-autosegmentation-tool/CHANGELOG.md new file mode 100644 index 000000000..02a40369f --- /dev/null +++ b/segmentation/zarr-autosegmentation-tool/CHANGELOG.md @@ -0,0 +1,4 @@ +## [0.2.4-dev0] - 2024-01-17 +### Added +- Pytests to test this plugin +- Added a support for recursively searching for files within a directory and its subdirectories of specified pattern by passing value either raw or map for `mapDirectory` input argument. diff --git a/segmentation/zarr-autosegmentation-tool/Dockerfile b/segmentation/zarr-autosegmentation-tool/Dockerfile new file mode 100644 index 000000000..2ff053e1e --- /dev/null +++ b/segmentation/zarr-autosegmentation-tool/Dockerfile @@ -0,0 +1,25 @@ +FROM python:3.13-slim AS builder + +# environment variables defined in polusai/bfio +ENV EXEC_DIR="/opt/executables" +ENV POLUS_IMG_EXT=".ome.tif" +ENV POLUS_TAB_EXT=".csv" +ENV POLUS_LOG="INFO" + +# Work directory defined in the base container +WORKDIR ${EXEC_DIR} + +# TODO: Change the tool_dir to the tool directory +ENV TOOL_DIR="formats/zarr-flatten-tool" + +# Copy the repository into the container +RUN mkdir image-tools +COPY . ${EXEC_DIR}/image-tools + +# Install the tool +RUN pip3 install "${EXEC_DIR}/image-tools/${TOOL_DIR}" --no-cache-dir + +# Set the entrypoint +# TODO: Change the entrypoint to the tool entrypoint +ENTRYPOINT ["python3", "-m", "polus.images.formats.file_renaming"] +CMD ["--help"] diff --git a/segmentation/zarr-autosegmentation-tool/README.md b/segmentation/zarr-autosegmentation-tool/README.md new file mode 100644 index 000000000..37e01d17d --- /dev/null +++ b/segmentation/zarr-autosegmentation-tool/README.md @@ -0,0 +1,71 @@ +# File Renaming(0.2.4-dev0) +This WIPP plugin uses supplied file naming patterns to dynamically +rename and save files in an image collection to a new image collection. + +## Example Usage +* The user can upload an image collection where all files contain similar +naming conventions. + + * **Input collection:** +`img_x01_y01_DAPI.tif` +`img_x01_y01_GFP.tif` +`img_x01_y01_TXRED.tif` + + * **Output collection:** +`newdata_x001_y001_c001.tif` +`newdata_x001_y001_c002.tif` +`newdata_x001_y001_c003.tif` + + * **User input pattern:** +`img_x{row:dd}_y{col:dd}_{channel:c+}.ome.tif` + + * **User output pattern:** +`newdata_x{row:ddd}_y{col:ddd}_c{channel:ddd}.ome.tif` + +* The user can format the output digit using the number of digits +specified in the output format. + * `d` represents *digit* + * `c` represents *character*. + +* Note that c+ only matches letters in the alphabet, not symbols and numbers + +* If the output formats have plus signs (+), then the number of output +digits/characters is not fixed. + +* Finally, the input and output pattern data types *must* agree with one +exception: + * If the input is a chracter and the output is digit, +then the script sorts the strings that match the character pattern and +assigns numbers 0+ to them. + +* New optional feature `mapDirectory` implemented to include directory name in renamed files. This plugin also handles nested directories and one level up directory name is added to renamed files if `raw` value passed, `map` for mapped subdirectories `d0, d1, d2, ... dn` and if not passed then no directory name is added in renamed files. + + +Contact [Melanie Parham](mailto:melanie.parham@axleinfo.com), [Hamdah Shafqat abbasi](mailto:hamdahshafqat.abbasi@nih.gov) for more +information. + +For more information on WIPP, visit the +[official WIPP page](https://isg.nist.gov/deepzoomweb/software/wipp). + +## Building + +To build the Docker image for the conversion plugin, run +`./build-docker.sh`. + +## Install WIPP Plugin + +If WIPP is running, navigate to the plugins page and add a new plugin. +Paste the contents of `plugin.json` into the pop-up window and submit. + +## Options + +This plugin takes three input argument and one output argument: + +| Name | Description | I/O | Type | +|--------------------|-----------------------------------|----------|------------| +| `--inpDir` | Input image collection | Input | collection | +| `--filePattern` | Input filename pattern | Input | string | +| `--outDir` | Output collection | Output | collection | +| `--outFilePattern` | Output filename pattern | Input | string | +| `--mapDirectory` | Directory name (`raw`, `map`) | Input | enum | +| `--preview` | Generate a JSON file with outputs | Output | JSON | diff --git a/segmentation/zarr-autosegmentation-tool/VERSION b/segmentation/zarr-autosegmentation-tool/VERSION new file mode 100644 index 000000000..abd410582 --- /dev/null +++ b/segmentation/zarr-autosegmentation-tool/VERSION @@ -0,0 +1 @@ +0.2.4 diff --git a/segmentation/zarr-autosegmentation-tool/build-docker.sh b/segmentation/zarr-autosegmentation-tool/build-docker.sh new file mode 100644 index 000000000..ebe00add2 --- /dev/null +++ b/segmentation/zarr-autosegmentation-tool/build-docker.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +# Change the name of the tool here +tool_dir="formats" +tool_name="file-renaming-tool" + +# The version is read from the VERSION file +version=$(", +"Hamdah Shafqat abbasi " +] +readme = "README.md" +packages = [{include = "polus", from = "src"}] + +[tool.poetry.dependencies] +python = ">=3.9,<3.12" +typer = "^0.7.0" +tqdm = "^4.64.1" +numpy = "^1.26.3" + +[tool.poetry.group.dev.dependencies] +bump2version = "^1.0.1" +pre-commit = "^3.1.0" +black = "^23.1.0" +flake8 = "^6.0.0" +mypy = "^1.0.1" +pytest = "^7.2.1" + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/segmentation/zarr-autosegmentation-tool/run-plugin.sh b/segmentation/zarr-autosegmentation-tool/run-plugin.sh new file mode 100644 index 000000000..c9b7a5ef3 --- /dev/null +++ b/segmentation/zarr-autosegmentation-tool/run-plugin.sh @@ -0,0 +1,23 @@ +#!/bin/bash +version=$( None: + """Takes a pyramidal dataset of ome zarr data and flattens it into + high resolution slices, usually along the z axis. + + See README for pattern rules. + + asdf + + Args: + inpDir: Path or url to an ome zarr database + filePattern: Input file pattern + outDir: Path to image collection storing copies of renamed files + outFilePattern: Output file pattern + mapDirectory: Include foldername to the renamed files + + Returns: + output_dict: Dictionary of in to out file names, for testing + + """ # noqa: D205 + logger.info(f"inpDir = {inp_dir}") + logger.info(f"filePattern = {file_pattern}") + logger.info(f"outDir = {out_dir}") + logger.info(f"outFilePattern = {out_file_pattern}") + logger.info(f"mapDirectory = {map_directory}") + + inp_dir = inp_dir.resolve() + out_dir = out_dir.resolve() + + assert ( + inp_dir.exists() + ), f"{inp_dir} does not exists!! Please check input path again" + assert ( + out_dir.exists() + ), f"{out_dir} does not exists!! Please check output path again" + + subdirs, subfiles = fr.get_data(inp_dir) + if subfiles: + assert len(subfiles) != 0, "Files are missing in input directory!!!" + + if not map_directory: + fr.rename( + inp_dir, + out_dir, + file_pattern, + out_file_pattern, + ) + + elif map_directory: + file_ext = re.split("\\.", file_pattern)[-1] + + subdirs = np.unique( + [ + sub + for sub in subdirs + for f in pathlib.Path(sub).rglob("*") + if f.suffix == f".{file_ext}" + ], + ) + + if len(subdirs) == 1: + logger.info( + "Renaming files in a single directory.", + ) + dir_pattern = r"^[A-Za-z0-9_]+$" + # Iterate over the directories and check if they match the pattern + matching_directory: Optional[Match[Any]] = re.match( + dir_pattern, + pathlib.Path(subdirs[0]).stem, + ) + if matching_directory is not None: + matching_directory = matching_directory.group() + if f"{map_directory}" == "raw": + outfile_pattern = f"{matching_directory}_{out_file_pattern}" + if f"{map_directory}" == "map": + outfile_pattern = f"d1_{out_file_pattern}" + + fr.rename(subdirs[0], out_dir, file_pattern, outfile_pattern) + logger.info( + "Finished renaming files.", + ) + if len(subdirs) > 1: + subnames = [pathlib.Path(sb).name for sb in subdirs] + sub_check = all(name == subnames[0] for name in subnames) + + for i, sub in enumerate(subdirs): + assert ( + len([f for f in pathlib.Path(sub).iterdir() if f.is_file()]) != 0 + ), "Files are missing in input directory!!!" + dir_pattern = r"^[A-Za-z0-9_]+$" + # Iterate over the directories and check if they match the pattern + matching_directories: Optional[Match[Any]] = re.match( + dir_pattern, + pathlib.Path(sub).stem, + ) + if matching_directories is not None: + matching_directories = matching_directories.group() + + if not sub_check and f"{map_directory}" == "raw": + outfile_pattern = f"{matching_directories}_{out_file_pattern}" + elif subnames and f"{map_directory}" == "raw": + logger.error( + "Subdirectoy names are same, should be different.", + ) + break + else: + outfile_pattern = f"d{i}_{out_file_pattern}" + fr.rename(sub, out_dir, file_pattern, outfile_pattern) + logger.info( + "Finished renaming files.", + ) + + if preview: + with pathlib.Path.open(pathlib.Path(out_dir, "preview.json"), "w") as jfile: + out_json: dict[str, Any] = { + "filepattern": out_file_pattern, + "outDir": [], + } + for file in out_dir.iterdir(): + if file.is_file() and file.suffix != ".json": + out_name = file.name + out_json["outDir"].append(out_name) + json.dump(out_json, jfile, indent=2) + + +if __name__ == "__main__": + app() diff --git a/segmentation/zarr-autosegmentation-tool/src/polus/images/segmentation/zarr_autosegmentation/zarr_autosegmentation.py b/segmentation/zarr-autosegmentation-tool/src/polus/images/segmentation/zarr_autosegmentation/zarr_autosegmentation.py new file mode 100644 index 000000000..2b570d7b8 --- /dev/null +++ b/segmentation/zarr-autosegmentation-tool/src/polus/images/segmentation/zarr_autosegmentation/zarr_autosegmentation.py @@ -0,0 +1,406 @@ +"""File Renaming.""" +import enum +import logging +import os +import pathlib +import re +import shutil +from concurrent.futures import ProcessPoolExecutor +from concurrent.futures import as_completed +from multiprocessing import cpu_count +from sys import platform +from typing import Any +from typing import Union + +from tqdm import tqdm + +EXT = (".csv", ".txt", ".cppipe", ".yml", ".yaml", ".xml", ".json") + +logger = logging.getLogger(__name__) +logger.setLevel(os.environ.get("POLUS_LOG", logging.INFO)) + +if platform == "linux" or platform == "linux2": + NUM_THREADS = len(os.sched_getaffinity(0)) # type: ignore +else: + NUM_THREADS = max(cpu_count() // 2, 2) + + +class MappingDirectory(str, enum.Enum): + """Map Directory information.""" + + RAW = "raw" + MAP = "map" + Default = "" + + +def image_directory(dirpath: pathlib.Path) -> Union[bool, None]: + """Fetching image directory only. + + Args: + dirpath: Path to directory. + + Returns: + bool. + """ + for file in dirpath.iterdir(): + return bool(file.is_file() and file.suffix not in EXT) + return None + + +def get_data(inp_dir: str) -> tuple[list[pathlib.Path], list[pathlib.Path]]: + """Get group names from pattern. Convert patterns (c+ or dd) to regex. + + Args: + inp_dir: Path to input directory. + + Returns: + A tuple of list of subdirectories and files path. + """ + filepath: list[pathlib.Path] = [] + dirpaths: list[pathlib.Path] = [] + for path in pathlib.Path(inp_dir).rglob("*"): + if path.is_dir(): + if path.parent in dirpaths: + dirpaths.remove(path.parent) + if image_directory(path): + dirpaths.append(path) + elif path.is_file() and not path.name.endswith(tuple(EXT)): + fpath = pathlib.Path(inp_dir).joinpath(path) + filepath.append(fpath) + + return dirpaths, filepath + + +def map_pattern_grps_to_regex(file_pattern: str) -> dict: + """Get group names from pattern. Convert patterns (c+ or dd) to regex. + + Args: + file_pattern: File pattern, with special characters escaped. + + Returns: + rgx_patterns: The key is a named regex group. The value is regex. + """ + logger.debug(f"pattern_to_regex() inputs: {file_pattern}") + #: Extract the group name and associated pattern (ex: {row:dd}) + group_and_pattern_tuples = re.findall(r"\{(\w+):([dc+]+)\}", file_pattern) + pattern_map = {"d": r"[0-9]", "c": r"[a-zA-Z]", "+": "+"} + rgx_patterns = {} + for group_name, groups_pattern in group_and_pattern_tuples: + rgx = "".join([pattern_map[pattern] for pattern in groups_pattern]) + #: ?P is included to specify that foo is a named group. + rgx_patterns[group_name] = rf"(?P<{group_name}>{rgx})" + logger.debug(f"pattern_to_regex() returns {rgx_patterns}") + + return rgx_patterns + + +def convert_to_regex(file_pattern: str, extracted_rgx_patterns: dict) -> str: + """Integrate regex into original file pattern. + + The extracted_rgx_patterns helps replace simple patterns (ie. dd, c+) + with regex in the correct location, based on named groups. + + Args: + file_pattern: file pattern provided by the user. + extracted_rgx_patterns: named group and regex value dictionary. + + Returns: + new_pattern: file pattern converted to regex. + """ + logger.debug(f"convert_to_regex() inputs: {file_pattern}, {extracted_rgx_patterns}") + rgx_pattern = file_pattern + for named_grp, regex_str in extracted_rgx_patterns.items(): + #: The prefix "fr" creates raw f-strings, which act like format() + rgx_pattern = re.sub(rf"\{{{named_grp}:.*?\}}", regex_str, rgx_pattern) + logger.debug(f"convert_to_regex() returns {rgx_pattern}") + return rgx_pattern + + +def specify_len(out_pattern: str) -> str: + """Update output file pattern to output correct number of digits. + + After extracting group names and associated patterns from the + outFilePattern, integrate format strings into the file pattern to + accomplish. + + Example: + "newdata_x{row:ddd}" becomes "new_data{row:03d}". + + Args: + out_pattern: output file pattern provided by the user. + + Returns: + new_out_pattern: file pattern converted to format string. + """ + logger.debug(f"specify_len() inputs: {out_pattern}") + #: Extract the group name and associated pattern (ex: {row:dd}) + group_and_pattern_tuples = re.findall(r"\{(\w+):([dc+]+)\}", out_pattern) + grp_rgx_dict = {} + #: Convert simple file patterns to format strings (ex: ddd becomes :03d). + for group_name, groups_pattern in group_and_pattern_tuples: + # Get the length of the string if not variable width + s_len = "" if "+" in groups_pattern else str(len(groups_pattern)) + # Set the formatting value + temp_pattern = "s" if groups_pattern[0] == "c" else "d" + # Prepend a 0 for padding digit format + if temp_pattern == "d": + s_len = "0" + s_len + grp_rgx_dict[group_name] = "{" + group_name + ":" + s_len + temp_pattern + "}" + new_out_pattern = out_pattern + for named_group, format_str in grp_rgx_dict.items(): + new_out_pattern = re.sub( + rf"\{{{named_group}:.*?\}}", + format_str, + new_out_pattern, + ) + logger.debug(f"specify_len() returns {new_out_pattern}") + + return new_out_pattern + + +def get_char_to_digit_grps(inp_pattern: str, out_pattern: str) -> list[str]: + """Return group names where input and output datatypes differ. + + If the input pattern is a character and the output pattern is a + digit, return the named group associated with those patterns. + + Args: + inp_pattern: Original input pattern. + out_pattern: Original output pattern. + + Returns: + special_categories: Named groups with c to d conversion or [None]. + """ + logger.debug(f"get_char_to_digit_grps() inputs: {inp_pattern}, {out_pattern}") + #: Extract the group name and associated pattern (ex: {row:dd}) + ingrp_and_pattern_tuples = re.findall(r"\{(\w+):([dc+]+)\}", inp_pattern) + outgrp_and_pattern_tuples = re.findall(r"\{(\w+):([dc+]+)\}", out_pattern) + + #: Get group names where input pattern is c and output pattern is d + special_categories = [] + for out_grp_name in dict(outgrp_and_pattern_tuples): + if dict(ingrp_and_pattern_tuples)[out_grp_name].startswith("c") and dict( + outgrp_and_pattern_tuples, + )[out_grp_name].startswith("d"): + special_categories.append(out_grp_name) + logger.debug(f"get_char_to_digit_grps() returns {special_categories}") + return special_categories + + +def extract_named_grp_matches( + rgx_pattern: str, + inp_files: list, +) -> list[dict[str, Union[str, Any]]]: + """Store matches from the substrings from each filename that vary. + + Loop through each file. Apply the regex pattern to each + filename. When a match occurs for a named group, add that match to + a dictionary, where the key is the named (regex capture) group and + the value is the corresponding match from the filename. + + Args: + rgx_pattern: input pattern in regex format. + inp_files: list of files in input directory. + + Returns: + grp_match_dict_list: list of dictionaries containing str matches. + """ + logger.debug(f"extract_named_grp_matches() inputs: {rgx_pattern}, {inp_files}") + grp_match_dict_list = [] + #: Build list of dicts, where key is capture group and value is match + for filename in inp_files: + try: + d = re.match(rgx_pattern, filename) + if d is None: + break + grp_match_dict = d.groupdict() + #: Add filename information to dictionary + grp_match_dict["fname"] = filename + grp_match_dict_list.append(grp_match_dict) + except AttributeError as e: + logger.error(e) + logger.error( + "File pattern does not match one or more files. " + "See README for pattern rules.", + ) + msg = "File pattern does not match with files." + raise AttributeError(msg) from e + except AssertionError as e: + if str(e).startswith("redefinition of group name"): + logger.error( + "Ensure that named groups in file patterns are unique. " + "({})".format(e), + ) + msg = f"Ensure that named groups in file patterns are unique. ({e})" + raise ValueError( + msg, + ) from e + + logger.debug(f"extract_named_grp_matches() returns {grp_match_dict_list}") + + return grp_match_dict_list + + +def str_to_int(dictionary: dict) -> dict: + """If a number in the dictionary is in str format, convert to int. + + Args: + dictionary: contains group, match, and filename info. + + Returns: + fixed_dictionary: input dict, with numeric str values to int. + """ + fixed_dictionary = {} + for key, value in dictionary.items(): + try: + fixed_dictionary[key] = int(value) + except Exception: # noqa: BLE001 + fixed_dictionary[key] = value + logger.debug(f"str_to_int() returns {fixed_dictionary}") + return fixed_dictionary + + +def letters_to_int(named_grp: str, all_matches: list) -> dict: + """Alphabetically number matches for the given named group for all files. + + Make a dictionary where each key is a match for each filename and + the corresponding value is a number indicating its alphabetical rank. + + Args: + named_grp: Group with c in input pattern and d in out pattern. + all_matches: list of dicts, k=grps, v=match, last item=file name. + + Returns: + cat_index_dict: dict key=category name, value=index after sorting. + """ + logger.debug(f"letters_to_int() inputs: {named_grp}, {all_matches}") + #: Generate list of strings belonging to the given category (element). + alphabetized_matches = sorted( + {namedgrp_match_dict[named_grp] for namedgrp_match_dict in all_matches}, + ) + str_alphabetindex_dict = {} + for i in range(0, len(alphabetized_matches)): + str_alphabetindex_dict[alphabetized_matches[i]] = i + logger.debug(f"letters_to_int() returns {str_alphabetindex_dict}") + return str_alphabetindex_dict + + +def rename( # noqa: C901, PLR0915, PLR0912 + inp_dir: str, + out_dir: pathlib.Path, + file_pattern: str, + out_file_pattern: str, +) -> None: + """Scalable Extraction of Nyxus Features. + + Args: + inp_dir : Path to image collection. + out_dir : Path to image collection storing copies of renamed files. + file_pattern : Input file pattern. + out_file_pattern : Output file pattern. + """ + logger.info("Start renaming files") + file_ext = re.split("\\.", file_pattern)[-1] + empty_ext = "" + ext_length = 5 + if file_ext == "*": + msg = "Please define filePattern including file extension!" + raise ValueError(msg) + if file_ext == empty_ext: + msg = "Please define filePattern including file extension!" + raise ValueError(msg) + if len(file_ext) > ext_length: + msg = "Please define filePattern including file extension!" + raise ValueError(msg) + + _, inpfiles = get_data(inp_dir) + + inp_files: list[str] = [ + f"{f.name}" for f in inpfiles if pathlib.Path(f).suffix == f".{file_ext}" + ] + + if len(inp_files) == 0: + msg = "Please check input directory again!! As it does not contain files" + raise ValueError(msg) + + chars_to_escape = ["(", ")", "[", "]", "$", "."] + for char in chars_to_escape: + file_pattern = file_pattern.replace(char, ("\\" + char)) + + if "\\.*" in file_pattern: + file_pattern = file_pattern.replace("\\.*", (".*")) + if "\\.+" in file_pattern: + file_pattern = file_pattern.replace("\\.+", (".+")) + groupname_regex_dict = map_pattern_grps_to_regex(file_pattern) + + # #: Integrate regex from dictionary into original file pattern + inp_pattern_rgx = convert_to_regex(file_pattern, groupname_regex_dict) + + # #: Integrate format strings into outFilePattern to specify digit/char len + out_pattern_fstring = specify_len(out_file_pattern) + + #: List named groups where input pattern=char & output pattern=digit + char_to_digit_categories = get_char_to_digit_grps(file_pattern, out_file_pattern) + + #: List a dictionary (k=named grp, v=match) for each filename + + all_grp_matches = extract_named_grp_matches(inp_pattern_rgx, inp_files) + + #: Convert numbers from strings to integers, if applicable + for i in range(0, len(all_grp_matches)): + tmp_match = all_grp_matches[i] + all_grp_matches[i] = str_to_int(tmp_match) + + if len(all_grp_matches) == 0: + msg = f"Please define filePattern: {file_pattern} again!!" + raise ValueError( + msg, + ) + + #: Populate dict if any matches need to be converted from char to digit + #: Key=named group, Value=Int representing matched chars + numbered_categories = {} + for named_grp in char_to_digit_categories: + numbered_categories[named_grp] = letters_to_int(named_grp, all_grp_matches) + # Check named groups that need c->d conversion + for named_grp in char_to_digit_categories: + for i in range(0, len(all_grp_matches)): + if all_grp_matches[i].get(named_grp): + #: Replace original matched letter with new digit + all_grp_matches[i][named_grp] = numbered_categories[named_grp][ + all_grp_matches[i][named_grp] + ] + + with ProcessPoolExecutor(max_workers=NUM_THREADS) as executor: + threads = [] + for match in all_grp_matches: + # : If running on WIPP + if out_dir != inp_dir: + #: Apply str formatting to change digit or char length + out_name = out_dir.resolve() / out_pattern_fstring.format( + **match, + ) + old_file_name = pathlib.Path(inp_dir, match["fname"]) + threads.append(executor.submit(shutil.copy2, old_file_name, out_name)) + else: + out_name = out_pattern_fstring.format(**match) # type: ignore + old_file_name = match["fname"] # type: ignore + logger.info(f"Old name {old_file_name} & new name {out_name}") + threads.append( + executor.submit( + os.rename, + pathlib.Path(inp_dir, old_file_name), + pathlib.Path(out_dir, out_name), + ), + ) + + for f in tqdm( + as_completed(threads), + total=len(threads), + mininterval=5, + desc="converting images", + initial=0, + unit_scale=True, + colour="cyan", + ): + f.result() diff --git a/segmentation/zarr-autosegmentation-tool/tests/__init__.py b/segmentation/zarr-autosegmentation-tool/tests/__init__.py new file mode 100644 index 000000000..abf20f391 --- /dev/null +++ b/segmentation/zarr-autosegmentation-tool/tests/__init__.py @@ -0,0 +1 @@ +"""File Renaming.""" diff --git a/segmentation/zarr-autosegmentation-tool/tests/file_rename_test.json b/segmentation/zarr-autosegmentation-tool/tests/file_rename_test.json new file mode 100644 index 000000000..5c8a5109f --- /dev/null +++ b/segmentation/zarr-autosegmentation-tool/tests/file_rename_test.json @@ -0,0 +1,103 @@ +{ + "bleed_through_estimation_fixed_chars": [ + "r001_c001_z000.ome.tif", + "r002_c001_z001.ome.tif", + "r001_c001_z000.ome.tif" + ], + "duplicate_channels_to_digit": [ + "r001_c001_GFP.ome.tif", + "r002_c001_GFP.ome.tif", + "r001_c001_YFP.ome.tif" + ], + "percentage_file": [ + "%033_c333_z5050.ome.tif", + "%444_c123_z50493.ome.tif" + ], + "robot": [ + "003011-3-001001002.tif", + "005008-4-001001001.tif", + "006003-2-001001002.tif", + "006005-2-001001001.tif", + "007005-1-001001001.tif", + "007011-3-001001002.tif", + "006008-4-001001002.tif", + "006004-1-001001001.tif", + "007010-2-001001001.tif" + ], + "brain": [ + "S1_R3_C1-C11_A1_y001_x010_c005.ome.tif", + "S1_R3_C1-C11_A1_y011_x021_c005.ome.tif", + "S1_R1_C1-C11_A1_y013_x007_c003.ome.tif", + "S1_R2_C1-C11_A1_y007_x006_c004.ome.tif", + "S1_R2_C1-C11_A1_y005_x012_c003.ome.tif", + "S1_R5_C1-C11_A1_y009_x016_c005.ome.tif", + "S1_R2_C1-C11_A1_y002_x018_c000.ome.tif" + ], + "variable": [ + "S1_R4_C1-C11_A1_y2_x011_c6.ome.tif", + "S1_R4_C1-C11_A1_y1_x4_c4.ome.tif", + "S1_R1_C1-C11_A1_y7_x0_c0.ome.tif", + "S1_R2_C1-C11_A1_y011_x7_c8.ome.tif", + "S1_R5_C1-C11_A1_y012_x3_c4.ome.tif", + "S1_R2_C1-C11_A1_y3_x0_c6.ome.tif", + "S1_R2_C1-C11_A1_y7_x5_c3.ome.tif", + "S1_R2_C1-C11_A1_y011_x4_c6.ome.tif", + "S1_R2_C1-C11_A1_y2_x018_c0.ome.tif" + ], + "parenthesis": [ + "img_x01_y01_(TXRED).tif", + "img_x01_y01_(GFP).tif", + "img_x01_y01_(DAPI).tif" + ], + "two_chan": [ + "img_x01_y01_TXRED_TXYELLOW.tif", + "img_x01_y02_GFP_YFP.tif", + "img_x03_y03_DAPI_DAPIYELLOW.tif" + ], + "three_chan": [ + "img_x01_y01_TXRED_TXYELLOW_ABC.tif", + "img_x01_y02_GFP_YFP_DEF.tif", + "img_x03_y03_DAPI_DAPIYELLOW_GHI.tif" + ], + "non_alphanum_int": [ + "img x01 y01 TXRED.tif", + "img x01 y02 GFP.tif", + "img x03 y03 DAPI.tif" + ], + "non_alphanum_float": [ + "img x01.22 y01 TXRED.tif", + "img x01.33 y02 GFP.tif", + "img x03.44 y03 DAPI.tif" + ], + "kph-kirill": [ + "0(01-16)0(01-24)-(1-4)-002.ome.tif", + "0(01-16)0(01-24)-(1-4)-001.ome.tif", + "0(01-16)0(01-24)-(1-4)-003.ome.tif" + ], + "three_char_chan": [ + "img x01 y01 GFP.tif", + "img x02 y02 YFP.tif", + "img x03 y02 ABC.tif", + "img x04 y00 DEF.tif" + ], + "tissuenet-val-labels-45-C": [ + "p0_y4_r730_c1.ome.tif", + "p2_y0_r36_c1.ome.tif", + "p4_y1_r232_c1.ome.tif", + "p0_y4_r731_c1.ome.tif", + "p2_y0_r37_c1.ome.tif", + "p4_y1_r233_c1.ome.tif", + "p0_y4_r732_c1.ome.tif", + "p2_y0_r38_c1.ome.tif", + "p4_y1_r234_c1.ome.tif", + "p1_y3_r365_c1.ome.tif", + "p3_y5_r110_c1.ome.tif", + "p5_y4_r18_c1.ome.tif", + "p1_y3_r366_c1.ome.tif", + "p3_y5_r111_c1.ome.tif", + "p5_y4_r19_c1.ome.tif", + "p1_y3_r367_c1.ome.tif", + "p3_y5_r112_c1.ome.tif", + "p5_y4_r20_c1.ome.tif" + ] +} diff --git a/segmentation/zarr-autosegmentation-tool/tests/test_main.py b/segmentation/zarr-autosegmentation-tool/tests/test_main.py new file mode 100644 index 000000000..e9d981bda --- /dev/null +++ b/segmentation/zarr-autosegmentation-tool/tests/test_main.py @@ -0,0 +1,654 @@ +"""Testing of File Renaming.""" + +import json +import pathlib +import shutil +import tempfile +from typing import Any +from typing import DefaultDict +from typing import Tuple +import click +import pytest +import numpy as np +from typer.testing import CliRunner + +from polus.images.formats.file_renaming import file_renaming as fr +from polus.images.formats.file_renaming.__main__ import app as app + +runner = CliRunner() + + +class CreateData: + """Generate tabular data with several different file format.""" + + def __init__(self): + """Define instance attributes.""" + self.dirpath = pathlib.Path(__file__).parent + self.jsonpath = self.dirpath.joinpath("file_rename_test.json") + + def input_directory(self) -> pathlib.Path: + """Create temporary input directory.""" + return tempfile.mkdtemp(dir=self.dirpath) + + def output_directory(self) -> pathlib.Path: + """Create temporary output directory.""" + return tempfile.mkdtemp(dir=self.dirpath) + + def runcommands( + self, inputs: pathlib.Path, inp_pattern: str, out_pattern: str + ) -> click.testing.Result: + """Run command line arguments.""" + inp_dir = self.input_directory() + out_dir = self.output_directory() + for inp in inputs: + pathlib.Path.open(pathlib.Path(inp_dir, inp), "w").close() + + outputs = runner.invoke( + app, + [ + "--inpDir", + str(inp_dir), + "--filePattern", + inp_pattern, + "--outDir", + str(out_dir), + "--outFilePattern", + out_pattern, + ], + ) + return outputs + + def load_json(self, x: str) -> DefaultDict[Any, Any]: + """Json file containing image filenames.""" + with pathlib.Path.open(self.jsonpath) as file: + data = json.load(file) + return data[x] + + def clean_directories(self) -> None: + """Remove files.""" + for d in self.dirpath.iterdir(): + if d.is_dir() and d.name.startswith("tmp"): + shutil.rmtree(d) + + +fixture_params = [ + [ + ( + "r{row:ddd}_c{col:ddd}_{chan:ccc}.ome.tif", + "output_r{row:dddd}_c{col:dddd}_{chan:d+}.ome.tif", + ), + ( + "r{row:d+}_c{col:d+}_{chan:c+}.ome.tif", + "output_r{row:dddd}_c{col:dddd}_{chan:d+}.ome.tif", + ), + ("r.ome.tif", "output_r{row:dddd}_c{col:dddd}_{chan:d+}.ome.tif"), + ( + "%{row:ddd}_c{col:ddd}_z{z:d+}.ome.tif", + "%{row:dddd}_col{col:dddd}_z{z:d+}.ome.tif", + ), + ( + "00{one:d}0{two:dd}-{three:d}-00100100{four:d}.tif", + "output{one:dd}0{two:ddd}-{three:dd}-00100100{four:dd}.tif", + ), + ( + "S1_R{one:d}_C1-C11_A1_y0{two:dd}_x0{three:dd}_c0{four:dd}.ome.tif", + "output{one:dd}_C1-C11_A1_y0{two:ddd}_x0{three:ddd}_c0{four:ddd}.ome.tif", + ), + ( + "S1_R{one:d}_C1-C11_A1_y{two:d+}_x{three:d+}_c{four:d+}.ome.tif", + "output{one:dd}_C1-C11_A1_y{two:d+}_x{three:d+}_c{four:d+}.ome.tif", + ), + ( + "img_x{row:dd}_y{col:dd}_({chan:c+}).tif", + "output{row:dd}_{col:ddd}_{chan:dd}.tif", + ), + ( + "img_x{row:dd}_y{col:dd}_{chan:c+}_{ychan:c+}.tif", + "output{row:ddd}_{col:ddd}_{chan:dd}_{ychan:ddd}.tif", + ), + ( + "img_x{row:dd}_y{col:dd}_{chan:c+}_{ychan:c+}_{alphachan:ccc}.tif", + "output{row:ddd}_{col:ddd}_{chan:dd}_{ychan:ddd}_{alphachan:dddd}.tif", + ), + ( + "img x{row:dd} y{col:dd} {chan:ccc}.tif", + "output{row:ddd}_{col:ddd}_{chan:ccc}.tif", + ), + ( + "p{p:d}_y{y:d}_r{r:d+}_c{c:d+}.ome.tif", + "p{p:dd}_y{y:dd}_r{r:dddd}_c{c:ddd}.ome.tif", + ), + ( + "img x{row:dd} y{col:dd} {chan:c+}.tif", + "output{row:ddd}_{col:ddd}_{chan:dd}.tif", + ), + ( + "img x{row:dd}.{other:d+} y{col:dd} {chan:c+}.tif", + "output{row:ddd}_{col:ddd}_ {other:d+} {chan:dd}.tif", + ), + ( + "0({mo:dd}-{day:dd})0({mo2:dd}-{day2:dd})-({a:d}-{b:d})-{col:ddd}.ome.tif", + "0({mo:ddd}-{day:ddd})0{mo2:dd}-{day2:dd})-({a:dd}-{b:dd})-{col:ddd}.ome.tif", + ), + ] +] + + +@pytest.fixture(params=fixture_params) +def poly(request: Tuple[str, str]) -> pytest.FixtureRequest: + """To get the parameter of the fixture.""" + return request.param + + +def test_duplicate_channels_to_digit(poly: pytest.FixtureRequest) -> None: + """Testing of duplicate channels to digits.""" + d = CreateData() + inputs = d.load_json("duplicate_channels_to_digit") + (inp_pattern, out_pattern) = poly[0] + outputs = d.runcommands(inputs, inp_pattern, out_pattern) + assert outputs.exit_code == 0 + + +def test_duplicate_channels_to_digit_non_spec_digit_len( + poly: pytest.FixtureRequest, +) -> None: + """Testing of duplicate channels to digits with non specified length of digits.""" + d = CreateData() + inputs = d.load_json("duplicate_channels_to_digit") + (inp_pattern, out_pattern) = poly[1] + outputs = d.runcommands(inputs, inp_pattern, out_pattern) + assert outputs.exit_code == 0 + + +def test_invalid_input_raises_error(poly: pytest.FixtureRequest) -> None: + """Testing of invalid input filepattern.""" + d = CreateData() + inputs = d.load_json("duplicate_channels_to_digit") + (inp_pattern, out_pattern) = poly[0] + d.runcommands(inputs, inp_pattern, out_pattern) + + +def test_non_alphanum_inputs_percentage_sign(poly: pytest.FixtureRequest) -> None: + """Testing of filename with non alphanumeric inputs such as percentage sign.""" + d = CreateData() + inputs = d.load_json("percentage_file") + (inp_pattern, out_pattern) = poly[3] + outputs = d.runcommands(inputs, inp_pattern, out_pattern) + assert outputs.exit_code == 0 + + +def test_numeric_fixed_width(poly: pytest.FixtureRequest) -> None: + """Testing of filename with numeric fixed length.""" + d = CreateData() + inputs = d.load_json("robot") + (inp_pattern, out_pattern) = poly[4] + outputs = d.runcommands(inputs, inp_pattern, out_pattern) + assert outputs.exit_code == 0 + + +def test_alphanumeric_fixed_width(poly: pytest.FixtureRequest) -> None: + """Testing of filename with alphanumeric fixed length.""" + d = CreateData() + inputs = d.load_json("brain") + (inp_pattern, out_pattern) = poly[5] + outputs = d.runcommands(inputs, inp_pattern, out_pattern) + assert outputs.exit_code == 0 + + +def test_alphanumeric_variable_width(poly: pytest.FixtureRequest) -> None: + """Testing of filename with alphanumeric variable width.""" + d = CreateData() + inputs = d.load_json("variable") + (inp_pattern, out_pattern) = poly[6] + outputs = d.runcommands(inputs, inp_pattern, out_pattern) + assert outputs.exit_code == 0 + d.clean_directories() + + +def test_parenthesis(poly: pytest.FixtureRequest) -> None: + """Testing of filename with parenthesis.""" + d = CreateData() + inputs = d.load_json("parenthesis") + (inp_pattern, out_pattern) = poly[7] + outputs = d.runcommands(inputs, inp_pattern, out_pattern) + assert outputs.exit_code == 0 + + +def test_two_chan_to_digit(poly: pytest.FixtureRequest) -> None: + """Testing conversion of two channels to digits.""" + d = CreateData() + inputs = d.load_json("two_chan") + (inp_pattern, out_pattern) = poly[8] + outputs = d.runcommands(inputs, inp_pattern, out_pattern) + assert outputs.exit_code == 0 + + +def test_three_chan_to_digit(poly: pytest.FixtureRequest) -> None: + """Test conversion of three channels to digits.""" + d = CreateData() + inputs = d.load_json("three_chan") + (inp_pattern, out_pattern) = poly[9] + outputs = d.runcommands(inputs, inp_pattern, out_pattern) + assert outputs.exit_code == 0 + + +def test_three_char_chan(poly: pytest.FixtureRequest) -> None: + """Test conversion of three character channels to digits.""" + d = CreateData() + inputs = d.load_json("three_char_chan") + (inp_pattern, out_pattern) = poly[10] + outputs = d.runcommands(inputs, inp_pattern, out_pattern) + assert outputs.exit_code == 0 + + +def test_varied_digits(poly: pytest.FixtureRequest) -> None: + """Test varied digits.""" + d = CreateData() + inputs = d.load_json("tissuenet-val-labels-45-C") + (inp_pattern, out_pattern) = poly[11] + outputs = d.runcommands(inputs, inp_pattern, out_pattern) + assert outputs.exit_code == 0 + d.clean_directories() + + +def test_spaces(poly: pytest.FixtureRequest) -> None: + """Test non-alphanumeric chars such as spaces.""" + d = CreateData() + inputs = d.load_json("non_alphanum_int") + (inp_pattern, out_pattern) = poly[12] + outputs = d.runcommands(inputs, inp_pattern, out_pattern) + assert outputs.exit_code == 0 + + +def test_non_alphanum_float(poly: pytest.FixtureRequest) -> None: + """Test non-alphanumeric chars such as spaces, periods, commas, brackets.""" + d = CreateData() + inputs = d.load_json("non_alphanum_float") + (inp_pattern, out_pattern) = poly[13] + outputs = d.runcommands(inputs, inp_pattern, out_pattern) + assert outputs.exit_code == 0 + d.clean_directories() + + +def test_dashes_parentheses(poly: pytest.FixtureRequest) -> None: + """Test non-alphanumeric chars are handled properly such as dashes, parenthesis.""" + d = CreateData() + inputs = d.load_json("kph-kirill") + (inp_pattern, out_pattern) = poly[14] + outputs = d.runcommands(inputs, inp_pattern, out_pattern) + assert outputs.exit_code == 0 + d.clean_directories() + + +def test_map_pattern_grps_to_regex_valid_input() -> None: + """Test of mapping input pattern.""" + test_cases = [ + ( + ("img_x{row:dd}_y{col:dd}_{channel:c+}.tif"), + ( + { + "row": "(?P[0-9][0-9])", + "col": "(?P[0-9][0-9])", + "channel": "(?P[a-zA-Z]+)", + } + ), + ), + (("img_x{row:c+}.tif"), ({"row": "(?P[a-zA-Z]+)"})), + ((""), ({})), + ] + for test_case in test_cases: + (from_val, to_val) = test_case + result = fr.map_pattern_grps_to_regex(from_val) + assert result == to_val + + +def test_convert_to_regex_valid_input() -> None: + """Test of converting to regular expression pattern.""" + test_cases = [ + ( + ("img_x{row:dd}_y{col:dd}_{channel:c+}.tif"), + ( + { + "row": "(?P[0-9][0-9])", + "col": "(?P[0-9][0-9])", + "channel": "(?P[a-zA-Z]+)", + } + ), + ( + "img_x(?P[0-9][0-9])_y(?P[0-9][0-9])_(?P[a-zA-Z]+).tif" + ), + ), + ( + ("img_x{row:c+}.tif"), + ({"row": "(?P[a-zA-Z]+)"}), + ("img_x(?P[a-zA-Z]+).tif"), + ), + (("img_x01.tif"), ({}), ("img_x01.tif")), + ] + for test_case in test_cases: + (from_val1, from_val2, to_val) = test_case + result = fr.convert_to_regex(from_val1, from_val2) + assert result == to_val + + +def test_specify_len_valid_input() -> None: + """Test of sepcifying length.""" + test_cases = [ + ( + ("newdata_x{row:ddd}_y{col:ddd}_c{channel:ddd}.tif"), + ("newdata_x{row:03d}_y{col:03d}_c{channel:03d}.tif"), + ), + (("newdata_x{row:c+}.tif"), ("newdata_x{row:s}.tif")), + (("newdata_x01.tif"), ("newdata_x01.tif")), + ] + for test_case in test_cases: + (from_val, to_val) = test_case + result = fr.specify_len(from_val) + assert result == to_val + + +def test_get_char_to_digit_grps_returns_unique_keys_valid_input() -> None: + """Test of getting characters to digit groups.""" + test_cases = [ + ( + ("img_x{row:dd}_y{col:dd}_{channel:c+}.tif"), + ("newdata_x{row:ddd}_y{col:ddd}_c{channel:ddd}.tif"), + (["channel"]), + ), + (("img_x{row:c+}.tif"), ("newdata_x{row:c+}.tif"), ([])), + (("img_x01.tif"), ("newdata_x01.tif"), ([])), + ] + for test_case in test_cases: + (from_val1, from_val2, to_val) = test_case + result = fr.get_char_to_digit_grps(from_val1, from_val2) + assert result == to_val + + +def test_extract_named_grp_matches_valid_input() -> None: + """Test of extracting group names.""" + test_cases = [ + ( + ( + "img_x(?P[0-9][0-9])_y(?P[0-9][0-9])_(?P[a-zA-Z]+).tif" + ), + (["img_x01_y01_DAPI.tif", "img_x01_y01_GFP.tif", "img_x01_y01_TXRED.tif"]), + ( + [ + { + "row": "01", + "col": "01", + "channel": "DAPI", + "fname": "img_x01_y01_DAPI.tif", + }, + { + "row": "01", + "col": "01", + "channel": "GFP", + "fname": "img_x01_y01_GFP.tif", + }, + { + "row": "01", + "col": "01", + "channel": "TXRED", + "fname": "img_x01_y01_TXRED.tif", + }, + ] + ), + ), + (("img_x01.tif"), (["img_x01.tif"]), ([{"fname": "img_x01.tif"}])), + ] + for test_case in test_cases: + (from_val1, from_val2, to_val) = test_case + result = fr.extract_named_grp_matches(from_val1, from_val2) + assert result == to_val + + +def test_extract_named_grp_matches_bad_pattern_invalid_input_fails() -> None: + """Test of invalid input pattern.""" + test_cases = [ + ( + ("img_x(?P[a-zA-Z]+).tif"), + (["img_x01_y01_DAPI.tif", "img_x01_y01_GFP.tif", "img_x01_y01_TXRED.tif"]), + ) + ] + for test_case in test_cases: + (from_val1, from_val2) = test_case + + result = fr.extract_named_grp_matches(from_val1, from_val2) + assert len(result) == 0 + + +def test_str_to_int_valid_input() -> None: + """Test of string to integer.""" + test_cases = [ + ( + ( + { + "row": "01", + "col": "01", + "channel": "DAPI", + "fname": "img_x01_y01_DAPI.tif", + } + ), + ({"row": 1, "col": 1, "channel": "DAPI", "fname": "img_x01_y01_DAPI.tif"}), + ), + ( + ( + { + "row": "2", + "col": "01", + "channel": "TXRED", + "fname": "img_x01_y01_TXRED.tif", + } + ), + ( + { + "row": 2, + "col": 1, + "channel": "TXRED", + "fname": "img_x01_y01_TXRED.tif", + } + ), + ), + ( + ( + { + "row": "0001", + "col": "0001", + "channel": "GFP", + "fname": "img_x01_y01_GFP.tif", + } + ), + ({"row": 1, "col": 1, "channel": "GFP", "fname": "img_x01_y01_GFP.tif"}), + ), + ] + for test_case in test_cases: + (from_val, to_val) = test_case + result = fr.str_to_int(from_val) + assert result == to_val + + +def test_letters_to_int_returns_cat_index_dict_valid_input() -> None: + """Test of letter to integers.""" + test_cases = [ + ( + ("channel"), + [ + { + "row": 1, + "col": 1, + "channel": "DAPI", + "fname": "img_x01_y01_DAPI.tif", + }, + {"row": 1, "col": 1, "channel": "GFP", "fname": "img_x01_y01_GFP.tif"}, + { + "row": 1, + "col": 1, + "channel": "TXRED", + "fname": "img_x01_y01_TXRED.tif", + }, + ], + ({"DAPI": 0, "GFP": 1, "TXRED": 2}), + ) + ] + for test_case in test_cases: + (from_val1, from_val2, to_val) = test_case + result = fr.letters_to_int(from_val1, from_val2) + assert result == to_val + + +@pytest.mark.xfail +def test_extract_named_grp_matches_duplicate_namedgrp_invalid_input() -> None: + """Test of invalid input pattern.""" + test_cases = [ + ( + ( + "x(?P[0-9][0-9])_y(?P[0-9][0-9])_c(?P[a-zA-Z]+).ome.tif" + ), + (["img_x01_y01_DAPI.tif", "img_x01_y01_GFP.tif", "img_x01_y01_TXRED.tif"]), + ) + ] + for test_case in test_cases: + (from_val1, from_val2) = test_case + fr.extract_named_grp_matches(from_val1, from_val2) + + +@pytest.mark.xfail +def test_letters_to_int_returns_error_invalid_input() -> None: + """Test of invalid inputs.""" + test_cases = [ + ( + (2), + [ + { + "row": 1, + "col": 1, + "channel": "DAPI", + "fname": "img_x01_y01_DAPI.tif", + }, + {"row": 1, "col": 1, "channel": "GFP", "fname": "img_x01_y01_GFP.tif"}, + { + "row": 1, + "col": 1, + "channel": "TXRED", + "fname": "img_x01_y01_TXRED.tif", + }, + ], + ), + ] + for test_case in test_cases: + (from_val1, from_val2) = test_case + fr.letters_to_int(from_val1, from_val2) + + +@pytest.fixture +def create_subfolders() -> Tuple[pathlib.Path, str, str, str]: + """Creating directory and subdirectories.""" + data = { + "complex": [ + ["A9 p5d.tif", "A9 p5f.tif", "A9 p7f.tif"], + "96 ( -)* test_", + "{row:c}{col:d}.*p{f:d+}{character:c}.tif", + "x{row:dd}_y{col:dd}_p{f:dd}{character:c}_c01.tif", + ], + "simple": [ + [ + "taoe005-u2os-72h-cp-a-au00044859_a01_s3_w23db644df-02ee-429d-9559-09cf4625c62b.tif", + "taoe005-u2os-72h-cp-a-au00044859_b01_s3_w3add254c8-0c7b-4cf0-a5dc-bf0cf8de8cec.tif", + "taoe005-u2os-72h-cp-a-au00044859_b07_s5_w2da098211-f7c1-453d-954f-b7d4751f6daa.tif", + "taoe005-u2os-72h-cp-a-au00044859_c15_s2_w3aea523fa-3b89-46a7-95e3-604017151895.tif", + ], + "folder_", + ".*_{row:c}{col:dd}_s{s:d}_w{channel:d}.*.tif", + "x{row:dd}_y{col:dd}_p{s:dd}_c{channel:d}.tif", + ], + } + for name in ["complex", "simple"]: + d = CreateData() + dir_path = d.input_directory() + for i in range(5): + dirname = pathlib.Path(dir_path, f"{data[name][1]}{i}") + pathlib.Path(dirname).mkdir(exist_ok=False, parents=False) + for fl in data[name][0]: + temp_file = pathlib.Path.open(pathlib.Path(dirname, fl), "w") + temp_file.close() + + return pathlib.Path(dir_path), data[name][1], data[name][2], data[name][3] + + +def test_recursive_searching_files() -> None: + """Test recursive searching of files nested directories.""" + + dir_path = tempfile.mkdtemp(dir=pathlib.Path.cwd()) + out_dir = tempfile.mkdtemp(dir=pathlib.Path.cwd()) + for i in range(2): + dirname1 = "image_folder_" + dirname2 = "groundtruth_folder_" + dirname1 = pathlib.Path(dir_path, f"BBBC/BBBC001/Images/{dirname1}{i}") + dirname2 = pathlib.Path(dir_path, f"BBBC/BBBC001/Groundtruth/{dirname2}{i}") + pathlib.Path(dirname1).mkdir(exist_ok=False, parents=True) + pathlib.Path(dirname2).mkdir(exist_ok=False, parents=True) + + flist = [ + "AS_09125_050118150001_A03f00d0.tif", + "AS_09125_050118150001_A03f01d0.tif", + "AS_09125_050118150001_A03f02d0.tif", + "AS_09125_050118150001_A03f03d0.tif", + "AS_09125_050118150001_A03f04d0.tif", + "AS_09125_050118150001_A03f05d0.tif", + ] + + for fl in flist: + temp_file = pathlib.Path.open(pathlib.Path(dirname1, fl), "w") + temp_file = pathlib.Path.open(pathlib.Path(dirname2, fl), "w") + temp_file.close() + file_pattern = ".*_{row:c}{col:dd}f{f:dd}d{channel:d}.tif" + out_file_pattern = "x{row:dd}_y{col:dd}_p{f:dd}_c{channel:d}.tif" + map_directory = "raw" + + runner.invoke( + app, + [ + "--inpDir", + dir_path, + "--filePattern", + file_pattern, + "--outDir", + out_dir, + "--outFilePattern", + out_file_pattern, + "--mapDirectory", + map_directory, + ], + ) + assert list( + np.unique([p.name.split("_")[0] for p in pathlib.Path(out_dir).iterdir()]) + ) == ["groundtruth", "image"] + shutil.rmtree(dir_path) + shutil.rmtree(out_dir) + + +def test_cli(create_subfolders: pytest.FixtureRequest) -> None: + """Test Cli.""" + dir_path, _, file_pattern, out_file_pattern = create_subfolders + for i in ["raw", "map"]: + d = CreateData() + out_dir = d.output_directory() + result = runner.invoke( + app, + [ + "--inpDir", + dir_path, + "--filePattern", + file_pattern, + "--outDir", + out_dir, + "--outFilePattern", + out_file_pattern, + "--mapDirectory", + i, + ], + ) + assert result.exit_code == 0 + + d.clean_directories() From 3b46d70bee8482adb17cb298e8e0dedbf669baa9 Mon Sep 17 00:00:00 2001 From: David Liu Date: Fri, 3 Jan 2025 13:05:24 -0500 Subject: [PATCH 02/10] feat: new plugin and docker. need sophios working --- .../.bumpversion.cfg | 29 ++ .../.dockerignore | 4 + .../.gitignore | 4 + .../.python-version | 1 + .../CHANGELOG.md | 5 + .../Dockerfile | 35 +++ .../README.md | 23 ++ .../ome-zarr-autosegmentation-plugin/VERSION | 1 + .../build-docker.sh | 4 + .../ome-zarr-autosegmentation-plugin/ict.yaml | 29 ++ .../plugin.json | 63 +++++ .../pyproject.toml | 32 +++ .../run-plugin.sh | 17 ++ .../ome_zarr_autosegmentation/__init__.py | 7 + .../ome_zarr_autosegmentation/__main__.py | 76 ++++++ .../autosegmentation.py | 254 ++++++++++++++++++ .../tests/__init__.py | 1 + .../tests/conftest.py | 147 ++++++++++ .../tests/test_cli.py | 96 +++++++ .../tests/test_ome_zarr_autosegmentation.py | 22 ++ 20 files changed, 850 insertions(+) create mode 100644 segmentation/ome-zarr-autosegmentation-plugin/.bumpversion.cfg create mode 100644 segmentation/ome-zarr-autosegmentation-plugin/.dockerignore create mode 100644 segmentation/ome-zarr-autosegmentation-plugin/.gitignore create mode 100644 segmentation/ome-zarr-autosegmentation-plugin/.python-version create mode 100644 segmentation/ome-zarr-autosegmentation-plugin/CHANGELOG.md create mode 100644 segmentation/ome-zarr-autosegmentation-plugin/Dockerfile create mode 100644 segmentation/ome-zarr-autosegmentation-plugin/README.md create mode 100644 segmentation/ome-zarr-autosegmentation-plugin/VERSION create mode 100755 segmentation/ome-zarr-autosegmentation-plugin/build-docker.sh create mode 100644 segmentation/ome-zarr-autosegmentation-plugin/ict.yaml create mode 100644 segmentation/ome-zarr-autosegmentation-plugin/plugin.json create mode 100644 segmentation/ome-zarr-autosegmentation-plugin/pyproject.toml create mode 100755 segmentation/ome-zarr-autosegmentation-plugin/run-plugin.sh create mode 100644 segmentation/ome-zarr-autosegmentation-plugin/src/polus/images/segmentation/ome_zarr_autosegmentation/__init__.py create mode 100644 segmentation/ome-zarr-autosegmentation-plugin/src/polus/images/segmentation/ome_zarr_autosegmentation/__main__.py create mode 100644 segmentation/ome-zarr-autosegmentation-plugin/src/polus/images/segmentation/ome_zarr_autosegmentation/autosegmentation.py create mode 100644 segmentation/ome-zarr-autosegmentation-plugin/tests/__init__.py create mode 100644 segmentation/ome-zarr-autosegmentation-plugin/tests/conftest.py create mode 100644 segmentation/ome-zarr-autosegmentation-plugin/tests/test_cli.py create mode 100644 segmentation/ome-zarr-autosegmentation-plugin/tests/test_ome_zarr_autosegmentation.py diff --git a/segmentation/ome-zarr-autosegmentation-plugin/.bumpversion.cfg b/segmentation/ome-zarr-autosegmentation-plugin/.bumpversion.cfg new file mode 100644 index 000000000..b3f972743 --- /dev/null +++ b/segmentation/ome-zarr-autosegmentation-plugin/.bumpversion.cfg @@ -0,0 +1,29 @@ +[bumpversion] +current_version = 0.1.0 +commit = False +tag = False +parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-(?P[a-z]+)(?P\d+))? +serialize = + {major}.{minor}.{patch}-{release}{dev} + {major}.{minor}.{patch} + +[bumpversion:part:release] +optional_value = _ +first_value = dev +values = + dev + _ + +[bumpversion:part:dev] + +[bumpversion:file:pyproject.toml] +search = version = "{current_version}" +replace = version = "{new_version}" + +[bumpversion:file:VERSION] + +[bumpversion:file:README.md] + +[bumpversion:file:plugin.json] + +[bumpversion:file:src/polus/plugins/images/segmentation/ome_zarr_autosegmentation/__init__.py] diff --git a/segmentation/ome-zarr-autosegmentation-plugin/.dockerignore b/segmentation/ome-zarr-autosegmentation-plugin/.dockerignore new file mode 100644 index 000000000..7c603f814 --- /dev/null +++ b/segmentation/ome-zarr-autosegmentation-plugin/.dockerignore @@ -0,0 +1,4 @@ +.venv +out +tests +__pycache__ diff --git a/segmentation/ome-zarr-autosegmentation-plugin/.gitignore b/segmentation/ome-zarr-autosegmentation-plugin/.gitignore new file mode 100644 index 000000000..5c0dac495 --- /dev/null +++ b/segmentation/ome-zarr-autosegmentation-plugin/.gitignore @@ -0,0 +1,4 @@ +poetry.lock +uv.lock +test_datasets/** +models/** \ No newline at end of file diff --git a/segmentation/ome-zarr-autosegmentation-plugin/.python-version b/segmentation/ome-zarr-autosegmentation-plugin/.python-version new file mode 100644 index 000000000..e4fba2183 --- /dev/null +++ b/segmentation/ome-zarr-autosegmentation-plugin/.python-version @@ -0,0 +1 @@ +3.12 diff --git a/segmentation/ome-zarr-autosegmentation-plugin/CHANGELOG.md b/segmentation/ome-zarr-autosegmentation-plugin/CHANGELOG.md new file mode 100644 index 000000000..b67793f7a --- /dev/null +++ b/segmentation/ome-zarr-autosegmentation-plugin/CHANGELOG.md @@ -0,0 +1,5 @@ +# CHANGELOG + +## 0.1.0 + +Initial release. diff --git a/segmentation/ome-zarr-autosegmentation-plugin/Dockerfile b/segmentation/ome-zarr-autosegmentation-plugin/Dockerfile new file mode 100644 index 000000000..a5ba2bd56 --- /dev/null +++ b/segmentation/ome-zarr-autosegmentation-plugin/Dockerfile @@ -0,0 +1,35 @@ +# Build stage +FROM python:3.12-slim AS builder + +RUN apt-get update && apt-get install -y \ + gcc \ + g++ \ + python3-dev \ + && apt-get autoremove -y \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +# Install uv +COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ + +WORKDIR /app + +COPY pyproject.toml uv.lock ./ + +RUN uv pip install --system -e . + +COPY . . + +# Final stage +FROM python:3.12-slim + +WORKDIR /app + +COPY --from=builder /app /app + +COPY --from=builder /usr/local/lib/python3.12/site-packages /usr/local/lib/python3.12/site-packages + +WORKDIR /app/src + +ENTRYPOINT ["python3", "-m", "polus.images.segmentation.ome_zarr_autosegmentation"] +CMD ["--help"] \ No newline at end of file diff --git a/segmentation/ome-zarr-autosegmentation-plugin/README.md b/segmentation/ome-zarr-autosegmentation-plugin/README.md new file mode 100644 index 000000000..ec8ef3fe3 --- /dev/null +++ b/segmentation/ome-zarr-autosegmentation-plugin/README.md @@ -0,0 +1,23 @@ +# ome_zarr_autosegmentation (0.1.0) + +description goes here + +## Building + +To build the Docker image for the conversion plugin, run `./build-docker.sh`. + +## Install WIPP Plugin + +If WIPP is running, navigate to the plugins page and add a new plugin. Paste the +contents of `plugin.json` into the pop-up window and submit. + +## Options + +This plugin takes 2 input arguments and 1 output argument: + +| Name | Description | I/O | Type | Default +|---------------|-------------------------|--------|--------| +| inpDir | Input image collection to be processed by this plugin | Input | collection +| filePattern | Filename pattern used to separate data | Input | string | .* +| preview | Generate an output preview | Input | boolean | False +| outDir | Output collection | Output | collection diff --git a/segmentation/ome-zarr-autosegmentation-plugin/VERSION b/segmentation/ome-zarr-autosegmentation-plugin/VERSION new file mode 100644 index 000000000..6e8bf73aa --- /dev/null +++ b/segmentation/ome-zarr-autosegmentation-plugin/VERSION @@ -0,0 +1 @@ +0.1.0 diff --git a/segmentation/ome-zarr-autosegmentation-plugin/build-docker.sh b/segmentation/ome-zarr-autosegmentation-plugin/build-docker.sh new file mode 100755 index 000000000..6b77f22d4 --- /dev/null +++ b/segmentation/ome-zarr-autosegmentation-plugin/build-docker.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +version=$(=2.1.0", + "torchvision>=0.16.0", + "greenlet>=3.1.1", + "ome-zarr>=0.9.0", + "bfio>=2.3.6", + "sam2>=1.1.0", + "typer>=0.15.1", +] + +[[tool.uv.index]] +name = "pytorch" +url = "https://download.pytorch.org/whl/cu121" +explicit = true + +[tool.uv.sources] +torch = { index = "pytorch" } +torchvision = { index = "pytorch" } + +[dependency-groups] +dev = [ + "ruff>=0.8.0", +] + +[tool.ruff.lint] +extend-select = ["I"] diff --git a/segmentation/ome-zarr-autosegmentation-plugin/run-plugin.sh b/segmentation/ome-zarr-autosegmentation-plugin/run-plugin.sh new file mode 100755 index 000000000..f09d908d3 --- /dev/null +++ b/segmentation/ome-zarr-autosegmentation-plugin/run-plugin.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +version=$( None: + """Generate preview of the plugin outputs.""" + + preview = {} + + with Path.open(out_dir / "preview.json", "w") as fw: + json.dump(preview, fw, indent=2) + +@app.command() +def main( + inp_dir: Path = typer.Option( + ..., + "--inpDir", + "-i", + help="Input directory to be processed.", + exists=True, + readable=True, + file_okay=False, + resolve_path=True, + ), + out_dir: Path = typer.Option( + ..., + "--outDir", + "-o", + help="Output directory.", + exists=False, + writable=True, + file_okay=False, + resolve_path=True, + ), + preview: bool = typer.Option( + False, + "--preview", + "-v", + help="Preview of expected outputs (dry-run)", + show_default=False, + ), +): + """ome_zarr_autosegmentation.""" + logger.info(f"inpDir: {inp_dir}") + logger.info(f"outDir: {out_dir}") + + if preview: + generate_preview(inp_dir, out_dir) + logger.info(f"generating preview data in : {out_dir}.") + return + + autosegmentation(inp_dir, out_dir) + + +if __name__ == "__main__": + app() diff --git a/segmentation/ome-zarr-autosegmentation-plugin/src/polus/images/segmentation/ome_zarr_autosegmentation/autosegmentation.py b/segmentation/ome-zarr-autosegmentation-plugin/src/polus/images/segmentation/ome_zarr_autosegmentation/autosegmentation.py new file mode 100644 index 000000000..2a0c001ed --- /dev/null +++ b/segmentation/ome-zarr-autosegmentation-plugin/src/polus/images/segmentation/ome_zarr_autosegmentation/autosegmentation.py @@ -0,0 +1,254 @@ +"""ome_zarr_autosegmentation.""" + +from pathlib import Path + +import os + +os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1" + +import pathlib +from uuid import UUID + +import numpy as np +import ome_zarr.scale +import torch +import zarr +from ome_zarr.io import parse_url +from ome_zarr.reader import Reader +from ome_zarr.writer import write_multiscale +from PIL import Image +from sam2.automatic_mask_generator import SAM2AutomaticMaskGenerator +from sam2.build_sam import build_sam2 + +def get_device(): + """Get the appropriate device for the current system.""" + if torch.backends.mps.is_available(): + return torch.device("mps") + elif torch.cuda.is_available(): + return torch.device("cuda") + return torch.device("cpu") + + +def init_sam2_predictor(checkpoint_path): + """Initialize SAM2 predictor with given checkpoint""" + device = get_device() + model = build_sam2("configs/sam2.1/sam2.1_hiera_s.yaml", checkpoint_path, device=str(device)) + return SAM2AutomaticMaskGenerator(model) + + +def generate_segmentation_mask(predictor, image): + """Generate segmentation mask for given PIL image.""" + # Convert PIL image to numpy array + img_array = np.array(image) + + # Convert grayscale to RGB if necessary + if len(img_array.shape) == 2 or ( + len(img_array.shape) == 3 and img_array.shape[2] == 1 + ): + # Stack the single channel three times to create RGB + img_array = np.stack([img_array] * 3, axis=-1) + + # Ensure array is in correct format (H, W, C) + if len(img_array.shape) != 3 or img_array.shape[2] != 3: + raise ValueError(f"Unexpected image shape: {img_array.shape}") + + # Generate masks + with torch.inference_mode(): + masks = predictor.generate(img_array) + + # Convert list of mask dictionaries to numpy array + mask_array = np.stack([mask["segmentation"] for mask in masks], axis=0) + return mask_array + + +def create_segmentation_overlay(image, masks, colors=None): + """Create a new image showing segmentation masks with different colors.""" + if len(masks) == 0: + return Image.new("RGB", image.size, (0, 0, 0)) + + # Generate random colors if none provided + if colors is None: + colors = [] + for i in range(len(masks)): + # Distribute hues evenly around color wheel + hue = i / len(masks) + h = hue * 6 + c = int(255) + x = int(255 * (1 - abs(h % 2 - 1))) + + if h < 1: + rgb = (c, x, 0) + elif h < 2: + rgb = (x, c, 0) + elif h < 3: + rgb = (0, c, x) + elif h < 4: + rgb = (0, x, c) + elif h < 5: + rgb = (x, 0, c) + else: + rgb = (c, 0, x) + + colors.append(rgb) + + # Create a black background image + result = Image.new("RGB", image.size, (0, 0, 0)) + + # Add each mask with its color + for i, mask in enumerate(masks): + mask_img = Image.fromarray((mask * 255).astype(np.uint8)).convert("L") + overlay = Image.new("RGB", image.size, colors[i]) + result = Image.composite(overlay, result, mask_img) + + return result + + +def segment_image(predictor, image): + # Generate masks + masks = generate_segmentation_mask(predictor, image) + + # Create visualization + return create_segmentation_overlay(image, masks) + + +def create_zarr_from_segmentations(segmentations, original_dataset_path, output_dataset_path): + """Create an OME-ZARR dataset from segmentation PNGs matching original structure.""" + # Get original structure + location = parse_url(original_dataset_path) + reader = Reader(location) + nodes = list(reader()) + image_node = nodes[0] + image_data = image_node.data[0] + ndim = len(image_data.shape) + + # Get original metadata + axes = image_node.metadata["axes"] + original_chunks = image_data.chunks[0] # First resolution level + + # Create output directory + output_path = pathlib.Path(output_dataset_path) + if output_path.exists(): + import shutil + + shutil.rmtree(output_path) + output_path.mkdir(parents=True) + + # Create store with nested directory settings + store = zarr.DirectoryStore( + str(output_path), dimension_separator="/" + ) # Use '/' for nested directories + root = zarr.group(store) + + # Get dimensions from first mask + first_mask = np.array(segmentations[0]) + if len(first_mask.shape) == 3: + first_mask = first_mask[..., 0] + + # Create array matching original dimensions + if ndim == 5: # (T, C, Z, Y, X) + masks = np.zeros( + (1, 1, len(segmentations), first_mask.shape[0], first_mask.shape[1]), + dtype=np.uint8, + ) + else: # (C, Z, Y, X) + masks = np.zeros( + (1, len(segmentations), first_mask.shape[0], first_mask.shape[1]), + dtype=np.uint8, + ) + + # Load all masks + print(f"Loading {len(segmentations)} segmentation masks...") + for i, segmentation in enumerate(segmentations): + mask = np.array(segmentation) + if len(mask.shape) == 3: + mask = mask[..., 0] + if ndim == 5: + masks[0, 0, i] = mask + else: + masks[0, i] = mask + + # Create pyramid using nearest neighbor for labels + scaler = ome_zarr.scale.Scaler() + pyramid = scaler.nearest(masks) + + # Write with nested directory structure + write_multiscale( + pyramid=pyramid, + group=root, + axes=axes, + storage_options={ + "chunks": original_chunks, + "dimension_separator": "/", # Ensure nested directory structure + }, + ) + + return output_path + + +def autosegment_dataset(input_dataset_path: Path | str, output_dataset_path: Path | str): + location = parse_url(input_dataset_path) + reader = Reader(location) + nodes = list(reader()) + + # First node has highest resolution + image_node = nodes[0] + image_data = image_node.data[0] + + print(f"Dataset shape: {image_data.shape}") + print(f"Data chunks: {image_data.chunks}") + + ndim = len(image_data.shape) + + if ndim == 5: # Typically (T, C, Z, Y, X) + print("5D dataset detected (T, C, Z, Y, X)") + volume = image_data[0, 0] + elif ndim == 4: # Typically (C, Z, Y, X) + print("4D dataset detected (C, Z, Y, X)") + volume = image_data[0] + else: + raise ValueError(f"Unexpected number of dimensions: {ndim}") + + num_slices = volume.shape[0] + print(f"Processing {num_slices} Z-slices from channel") + + segmentations = [] + sam2_predictor = init_sam2_predictor( + "../models/sam2.1_hiera_small.pt", + ) + for z in range(num_slices): + slice_data = volume[z].compute() + + # Normalize to 0-255 range + if slice_data.dtype != np.uint8: + slice_min = slice_data.min() + slice_max = slice_data.max() + if slice_max > slice_min: + slice_data = ( + (slice_data - slice_min) * 255 / (slice_max - slice_min) + ).astype(np.uint8) + else: + slice_data = np.zeros_like(slice_data, dtype=np.uint8) + + img = Image.fromarray(slice_data) + segmentations.append(segment_image(sam2_predictor, img)) + + if z % 10 == 0: + print(f"Processed slice {z}/{num_slices}") + + + create_zarr_from_segmentations(segmentations, + input_dataset_path, output_dataset_path + ) + + +def autosegmentation(inp_dir: Path, out_dir: Path): + """ome_zarr_autosegmentation. + + Args: + inp_dir: input directory to process + filepattern: filepattern to filter inputs + out_dir: output directory + Returns: + None + """ + autosegment_dataset(inp_dir, out_dir) \ No newline at end of file diff --git a/segmentation/ome-zarr-autosegmentation-plugin/tests/__init__.py b/segmentation/ome-zarr-autosegmentation-plugin/tests/__init__.py new file mode 100644 index 000000000..e665adfe7 --- /dev/null +++ b/segmentation/ome-zarr-autosegmentation-plugin/tests/__init__.py @@ -0,0 +1 @@ +"""Tests for ome_zarr_autosegmentation.""" diff --git a/segmentation/ome-zarr-autosegmentation-plugin/tests/conftest.py b/segmentation/ome-zarr-autosegmentation-plugin/tests/conftest.py new file mode 100644 index 000000000..fd0c32168 --- /dev/null +++ b/segmentation/ome-zarr-autosegmentation-plugin/tests/conftest.py @@ -0,0 +1,147 @@ +"""Test fixtures. + +Set up all data used in tests. +""" +import tempfile +import shutil +from pathlib import Path +import numpy as np +import pytest +import itertools + +from bfio import BioWriter, BioReader + +def pytest_addoption(parser: pytest.Parser) -> None: + """Add options to pytest.""" + parser.addoption( + "--downloads", + action="store_true", + dest="downloads", + default=False, + help="run tests that download large data files", + ) + parser.addoption( + "--slow", + action="store_true", + dest="slow", + default=False, + help="run slow tests", + ) + + + + +IMAGE_SIZES = [(1024 * (2**i) ,1024 * (2**i)) for i in range(1, 2)] +LARGE_IMAGE_SIZES = [(1024 * (2**i) ,1024 * (2**i)) for i in range(4, 5)] +PIXEL_TYPES = [np.uint8, float] +PARAMS = [ + (image_size, pixel_type) + for image_size, pixel_type in itertools.product( + IMAGE_SIZES, PIXEL_TYPES + ) +] +LARGE_DATASET_PARAMS = [ + (image_size, pixel_type) + for image_size, pixel_type in itertools.product( + LARGE_IMAGE_SIZES, PIXEL_TYPES + ) +] + + +FixtureReturnType = tuple[ + Path, # input dir + Path, # output dir + Path, # ground truth path + Path, # input image path + Path, # ground truth path +] + + +@pytest.fixture(params=PARAMS) +def generate_test_data(request: pytest.FixtureRequest) -> FixtureReturnType: + """Generate staging temporary directories with test data and ground truth.""" + + # collect test params + image_size, pixel_type = request.param + test_data = _generate_test_data(image_size, pixel_type) + print(test_data) + yield from test_data + + +@pytest.fixture(params=LARGE_DATASET_PARAMS) +def generate_large_test_data(request: pytest.FixtureRequest) -> FixtureReturnType: + """Generate staging temporary directories with test data and ground truth.""" + + # collect test params + image_size, pixel_type = request.param + test_data =_generate_test_data(image_size, pixel_type) + + print(test_data) + + yield from test_data + + +def _generate_test_data(image_size : tuple[int,int], pixel_type: int) -> FixtureReturnType: + """Generate staging temporary directories with test data and ground truth.""" + + image_x, image_y = image_size + + # staging area + data_dir = Path(tempfile.mkdtemp(suffix="_data_dir")) + inp_dir = data_dir.joinpath("inp_dir") + inp_dir.mkdir(exist_ok=True) + out_dir = data_dir.joinpath("out_dir") + out_dir.mkdir(exist_ok=True) + ground_truth_dir = data_dir.joinpath("ground_truth_dir") + ground_truth_dir.mkdir(exist_ok=True) + + # generate image and ground_truth + img_path = inp_dir.joinpath("img.ome.tif") + image = gen_2D_image(img_path, image_x, image_y, pixel_type) + ground_truth_path = ground_truth_dir.joinpath("ground_truth.ome.tif") + gen_ground_truth(img_path, ground_truth_path) + + yield inp_dir, out_dir, ground_truth_dir, img_path, ground_truth_path + + shutil.rmtree(data_dir) + +def gen_2D_image( + img_path, + image_x, + image_y, + pixel_type +) : + """Generate a random 2D square image.""" + + if np.issubdtype(pixel_type, np.floating) : + rng = np.random.default_rng() + image = rng.uniform(0.0, 1.0, + size=(image_y, image_x) + ).astype(pixel_type) + else: + image = np.random.randint(0, 255, size=(image_y, image_x)) + + with BioWriter(img_path) as writer: + (y, x) = image.shape + writer.Y = y + writer.X = x + writer.Z = 1 + writer.C = 1 + writer.T = 1 + writer.dtype = image.dtype + writer[:] = image[:] + + return image + + +def gen_ground_truth(img_path : Path, ground_truth_path : Path): + """generate some ground truth from the image data. + Here we generate a simple binary mask. + """ + + with BioReader(img_path) as reader: + with BioWriter(ground_truth_path, metadata=reader.metadata) as writer: + ground_truth = np.asarray(reader[:] != 0) + writer[:] = ground_truth + + return ground_truth \ No newline at end of file diff --git a/segmentation/ome-zarr-autosegmentation-plugin/tests/test_cli.py b/segmentation/ome-zarr-autosegmentation-plugin/tests/test_cli.py new file mode 100644 index 000000000..6a580a43a --- /dev/null +++ b/segmentation/ome-zarr-autosegmentation-plugin/tests/test_cli.py @@ -0,0 +1,96 @@ +"""Testing the Command Line Tool.""" + +import faulthandler +import json +from pathlib import Path +from typer.testing import CliRunner + +from .conftest import FixtureReturnType + +from polus.plugins.images.segmentation.ome_zarr_autosegmentation.__main__ import app + +faulthandler.enable() + + +def test_cli(generate_test_data : FixtureReturnType) -> None: # noqa + """Test the command line.""" + inp_dir, out_dir, ground_truth_dir, img_path, ground_truth_path = generate_test_data #noqa + + runner = CliRunner() + + result = runner.invoke( + app, + [ + "--inpDir", + inp_dir, + "--outDir", + out_dir, + ], + ) + + assert result.exit_code == 0 + +def test_cli_short(generate_test_data : FixtureReturnType): # noqa + """Test the command line.""" + runner = CliRunner() + + inp_dir, out_dir, _, _, _ = generate_test_data #noqa + + result = runner.invoke( + app, + [ + "-i", + inp_dir, + "-o", + out_dir, + ], + ) + + assert result.exit_code == 0 + +def test_cli_preview(generate_test_data : FixtureReturnType): # noqa + """Test the preview option.""" + runner = CliRunner() + + inp_dir, out_dir, _, _, _ = generate_test_data #noqa + + + result = runner.invoke( + app, + [ + "--inpDir", + inp_dir, + "--outDir", + out_dir, + "--preview", + ], + ) + + assert result.exit_code == 0 + + with Path.open(out_dir / "preview.json") as file: + plugin_json = json.load(file) + + # verify we generate the preview file + assert plugin_json == {} + + +def test_cli_bad_input(generate_test_data : FixtureReturnType): # noqa + """Test bad inputs.""" + runner = CliRunner() + + inp_dir, out_dir, _, _, _ = generate_test_data #noqa + # replace with a bad path + inp_dir = "/does_not_exists" + + result = runner.invoke( + app, + [ + "--inpDir", + inp_dir, + "--outDir", + out_dir, + ], + ) + + assert result.exc_info[0] is SystemExit diff --git a/segmentation/ome-zarr-autosegmentation-plugin/tests/test_ome_zarr_autosegmentation.py b/segmentation/ome-zarr-autosegmentation-plugin/tests/test_ome_zarr_autosegmentation.py new file mode 100644 index 000000000..29e017936 --- /dev/null +++ b/segmentation/ome-zarr-autosegmentation-plugin/tests/test_ome_zarr_autosegmentation.py @@ -0,0 +1,22 @@ +"""Tests for ome_zarr_autosegmentation.""" + +import pytest +from polus.plugins.images.segmentation.ome_zarr_autosegmentation.ome_zarr_autosegmentation import ( + ome_zarr_autosegmentation, +) +from .conftest import FixtureReturnType + + +def test_ome_zarr_autosegmentation(generate_test_data : FixtureReturnType): + """Test ome_zarr_autosegmentation.""" + inp_dir, out_dir, ground_truth_dir, img_path, ground_truth_path = generate_test_data + filepattern = ".*" + assert ome_zarr_autosegmentation(inp_dir, filepattern, out_dir) == None + + +@pytest.mark.skipif("not config.getoption('slow')") +def test_ome_zarr_autosegmentation(generate_large_test_data : FixtureReturnType): + """Test ome_zarr_autosegmentation.""" + inp_dir, out_dir, ground_truth_dir, img_path, ground_truth_path = generate_large_test_data + filepattern = ".*" + assert ome_zarr_autosegmentation(inp_dir, filepattern, out_dir) == None \ No newline at end of file From 37804baa159fd383a3696db77a67831f0c84603d Mon Sep 17 00:00:00 2001 From: David Liu Date: Fri, 3 Jan 2025 13:07:07 -0500 Subject: [PATCH 03/10] chore: update readme --- .../README.md | 6 +- .../.bumpversion.cfg | 27 - .../zarr-autosegmentation-tool/CHANGELOG.md | 4 - .../zarr-autosegmentation-tool/Dockerfile | 25 - .../zarr-autosegmentation-tool/README.md | 71 -- .../zarr-autosegmentation-tool/VERSION | 1 - .../build-docker.sh | 22 - .../filerenaming.cwl | 36 - .../zarr-autosegmentation-tool/ict.yaml | 65 -- .../zarr-autosegmentation-tool/plugin.json | 81 --- .../zarr-autosegmentation-tool/pyproject.toml | 28 - .../zarr-autosegmentation-tool/run-plugin.sh | 23 - .../zarr_autosegmentation/__init__.py | 4 - .../zarr_autosegmentation/__main__.py | 184 ----- .../zarr_autosegmentation.py | 406 ----------- .../tests/__init__.py | 1 - .../tests/file_rename_test.json | 103 --- .../tests/test_main.py | 654 ------------------ 18 files changed, 3 insertions(+), 1738 deletions(-) delete mode 100644 segmentation/zarr-autosegmentation-tool/.bumpversion.cfg delete mode 100644 segmentation/zarr-autosegmentation-tool/CHANGELOG.md delete mode 100644 segmentation/zarr-autosegmentation-tool/Dockerfile delete mode 100644 segmentation/zarr-autosegmentation-tool/README.md delete mode 100644 segmentation/zarr-autosegmentation-tool/VERSION delete mode 100644 segmentation/zarr-autosegmentation-tool/build-docker.sh delete mode 100644 segmentation/zarr-autosegmentation-tool/filerenaming.cwl delete mode 100644 segmentation/zarr-autosegmentation-tool/ict.yaml delete mode 100644 segmentation/zarr-autosegmentation-tool/plugin.json delete mode 100644 segmentation/zarr-autosegmentation-tool/pyproject.toml delete mode 100644 segmentation/zarr-autosegmentation-tool/run-plugin.sh delete mode 100644 segmentation/zarr-autosegmentation-tool/src/polus/images/segmentation/zarr_autosegmentation/__init__.py delete mode 100644 segmentation/zarr-autosegmentation-tool/src/polus/images/segmentation/zarr_autosegmentation/__main__.py delete mode 100644 segmentation/zarr-autosegmentation-tool/src/polus/images/segmentation/zarr_autosegmentation/zarr_autosegmentation.py delete mode 100644 segmentation/zarr-autosegmentation-tool/tests/__init__.py delete mode 100644 segmentation/zarr-autosegmentation-tool/tests/file_rename_test.json delete mode 100644 segmentation/zarr-autosegmentation-tool/tests/test_main.py diff --git a/segmentation/ome-zarr-autosegmentation-plugin/README.md b/segmentation/ome-zarr-autosegmentation-plugin/README.md index ec8ef3fe3..d9704f836 100644 --- a/segmentation/ome-zarr-autosegmentation-plugin/README.md +++ b/segmentation/ome-zarr-autosegmentation-plugin/README.md @@ -5,6 +5,7 @@ description goes here ## Building To build the Docker image for the conversion plugin, run `./build-docker.sh`. +Download the model you want to use for SAM from `https://github.com/facebookresearch/sam2?tab=readme-ov-file#download-checkpoints` ## Install WIPP Plugin @@ -13,11 +14,10 @@ contents of `plugin.json` into the pop-up window and submit. ## Options -This plugin takes 2 input arguments and 1 output argument: +This plugin takes 1 input arguments and 1 output argument: | Name | Description | I/O | Type | Default |---------------|-------------------------|--------|--------| -| inpDir | Input image collection to be processed by this plugin | Input | collection -| filePattern | Filename pattern used to separate data | Input | string | .* +| inpDir | Input dataset to be processed by this plugin | Input | collection | preview | Generate an output preview | Input | boolean | False | outDir | Output collection | Output | collection diff --git a/segmentation/zarr-autosegmentation-tool/.bumpversion.cfg b/segmentation/zarr-autosegmentation-tool/.bumpversion.cfg deleted file mode 100644 index 9f1772079..000000000 --- a/segmentation/zarr-autosegmentation-tool/.bumpversion.cfg +++ /dev/null @@ -1,27 +0,0 @@ -[bumpversion] -current_version = 0.2.4 -commit = True -tag = False -parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-(?P[a-z]+)(?P\d+))? -serialize = - {major}.{minor}.{patch}-{release}{dev} - {major}.{minor}.{patch} - -[bumpversion:part:release] -optional_value = _ -first_value = dev -values = - dev - _ - -[bumpversion:part:dev] - -[bumpversion:file:pyproject.toml] -search = version = "{current_version}" -replace = version = "{new_version}" - -[bumpversion:file:plugin.json] - -[bumpversion:file:VERSION] - -[bumpversion:file:src/polus/images/formats/file_renaming/__init__.py] diff --git a/segmentation/zarr-autosegmentation-tool/CHANGELOG.md b/segmentation/zarr-autosegmentation-tool/CHANGELOG.md deleted file mode 100644 index 02a40369f..000000000 --- a/segmentation/zarr-autosegmentation-tool/CHANGELOG.md +++ /dev/null @@ -1,4 +0,0 @@ -## [0.2.4-dev0] - 2024-01-17 -### Added -- Pytests to test this plugin -- Added a support for recursively searching for files within a directory and its subdirectories of specified pattern by passing value either raw or map for `mapDirectory` input argument. diff --git a/segmentation/zarr-autosegmentation-tool/Dockerfile b/segmentation/zarr-autosegmentation-tool/Dockerfile deleted file mode 100644 index 2ff053e1e..000000000 --- a/segmentation/zarr-autosegmentation-tool/Dockerfile +++ /dev/null @@ -1,25 +0,0 @@ -FROM python:3.13-slim AS builder - -# environment variables defined in polusai/bfio -ENV EXEC_DIR="/opt/executables" -ENV POLUS_IMG_EXT=".ome.tif" -ENV POLUS_TAB_EXT=".csv" -ENV POLUS_LOG="INFO" - -# Work directory defined in the base container -WORKDIR ${EXEC_DIR} - -# TODO: Change the tool_dir to the tool directory -ENV TOOL_DIR="formats/zarr-flatten-tool" - -# Copy the repository into the container -RUN mkdir image-tools -COPY . ${EXEC_DIR}/image-tools - -# Install the tool -RUN pip3 install "${EXEC_DIR}/image-tools/${TOOL_DIR}" --no-cache-dir - -# Set the entrypoint -# TODO: Change the entrypoint to the tool entrypoint -ENTRYPOINT ["python3", "-m", "polus.images.formats.file_renaming"] -CMD ["--help"] diff --git a/segmentation/zarr-autosegmentation-tool/README.md b/segmentation/zarr-autosegmentation-tool/README.md deleted file mode 100644 index 37e01d17d..000000000 --- a/segmentation/zarr-autosegmentation-tool/README.md +++ /dev/null @@ -1,71 +0,0 @@ -# File Renaming(0.2.4-dev0) -This WIPP plugin uses supplied file naming patterns to dynamically -rename and save files in an image collection to a new image collection. - -## Example Usage -* The user can upload an image collection where all files contain similar -naming conventions. - - * **Input collection:** -`img_x01_y01_DAPI.tif` -`img_x01_y01_GFP.tif` -`img_x01_y01_TXRED.tif` - - * **Output collection:** -`newdata_x001_y001_c001.tif` -`newdata_x001_y001_c002.tif` -`newdata_x001_y001_c003.tif` - - * **User input pattern:** -`img_x{row:dd}_y{col:dd}_{channel:c+}.ome.tif` - - * **User output pattern:** -`newdata_x{row:ddd}_y{col:ddd}_c{channel:ddd}.ome.tif` - -* The user can format the output digit using the number of digits -specified in the output format. - * `d` represents *digit* - * `c` represents *character*. - -* Note that c+ only matches letters in the alphabet, not symbols and numbers - -* If the output formats have plus signs (+), then the number of output -digits/characters is not fixed. - -* Finally, the input and output pattern data types *must* agree with one -exception: - * If the input is a chracter and the output is digit, -then the script sorts the strings that match the character pattern and -assigns numbers 0+ to them. - -* New optional feature `mapDirectory` implemented to include directory name in renamed files. This plugin also handles nested directories and one level up directory name is added to renamed files if `raw` value passed, `map` for mapped subdirectories `d0, d1, d2, ... dn` and if not passed then no directory name is added in renamed files. - - -Contact [Melanie Parham](mailto:melanie.parham@axleinfo.com), [Hamdah Shafqat abbasi](mailto:hamdahshafqat.abbasi@nih.gov) for more -information. - -For more information on WIPP, visit the -[official WIPP page](https://isg.nist.gov/deepzoomweb/software/wipp). - -## Building - -To build the Docker image for the conversion plugin, run -`./build-docker.sh`. - -## Install WIPP Plugin - -If WIPP is running, navigate to the plugins page and add a new plugin. -Paste the contents of `plugin.json` into the pop-up window and submit. - -## Options - -This plugin takes three input argument and one output argument: - -| Name | Description | I/O | Type | -|--------------------|-----------------------------------|----------|------------| -| `--inpDir` | Input image collection | Input | collection | -| `--filePattern` | Input filename pattern | Input | string | -| `--outDir` | Output collection | Output | collection | -| `--outFilePattern` | Output filename pattern | Input | string | -| `--mapDirectory` | Directory name (`raw`, `map`) | Input | enum | -| `--preview` | Generate a JSON file with outputs | Output | JSON | diff --git a/segmentation/zarr-autosegmentation-tool/VERSION b/segmentation/zarr-autosegmentation-tool/VERSION deleted file mode 100644 index abd410582..000000000 --- a/segmentation/zarr-autosegmentation-tool/VERSION +++ /dev/null @@ -1 +0,0 @@ -0.2.4 diff --git a/segmentation/zarr-autosegmentation-tool/build-docker.sh b/segmentation/zarr-autosegmentation-tool/build-docker.sh deleted file mode 100644 index ebe00add2..000000000 --- a/segmentation/zarr-autosegmentation-tool/build-docker.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash - -# Change the name of the tool here -tool_dir="formats" -tool_name="file-renaming-tool" - -# The version is read from the VERSION file -version=$(", -"Hamdah Shafqat abbasi " -] -readme = "README.md" -packages = [{include = "polus", from = "src"}] - -[tool.poetry.dependencies] -python = ">=3.9,<3.12" -typer = "^0.7.0" -tqdm = "^4.64.1" -numpy = "^1.26.3" - -[tool.poetry.group.dev.dependencies] -bump2version = "^1.0.1" -pre-commit = "^3.1.0" -black = "^23.1.0" -flake8 = "^6.0.0" -mypy = "^1.0.1" -pytest = "^7.2.1" - -[build-system] -requires = ["poetry-core"] -build-backend = "poetry.core.masonry.api" diff --git a/segmentation/zarr-autosegmentation-tool/run-plugin.sh b/segmentation/zarr-autosegmentation-tool/run-plugin.sh deleted file mode 100644 index c9b7a5ef3..000000000 --- a/segmentation/zarr-autosegmentation-tool/run-plugin.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/bash -version=$( None: - """Takes a pyramidal dataset of ome zarr data and flattens it into - high resolution slices, usually along the z axis. - - See README for pattern rules. - - asdf - - Args: - inpDir: Path or url to an ome zarr database - filePattern: Input file pattern - outDir: Path to image collection storing copies of renamed files - outFilePattern: Output file pattern - mapDirectory: Include foldername to the renamed files - - Returns: - output_dict: Dictionary of in to out file names, for testing - - """ # noqa: D205 - logger.info(f"inpDir = {inp_dir}") - logger.info(f"filePattern = {file_pattern}") - logger.info(f"outDir = {out_dir}") - logger.info(f"outFilePattern = {out_file_pattern}") - logger.info(f"mapDirectory = {map_directory}") - - inp_dir = inp_dir.resolve() - out_dir = out_dir.resolve() - - assert ( - inp_dir.exists() - ), f"{inp_dir} does not exists!! Please check input path again" - assert ( - out_dir.exists() - ), f"{out_dir} does not exists!! Please check output path again" - - subdirs, subfiles = fr.get_data(inp_dir) - if subfiles: - assert len(subfiles) != 0, "Files are missing in input directory!!!" - - if not map_directory: - fr.rename( - inp_dir, - out_dir, - file_pattern, - out_file_pattern, - ) - - elif map_directory: - file_ext = re.split("\\.", file_pattern)[-1] - - subdirs = np.unique( - [ - sub - for sub in subdirs - for f in pathlib.Path(sub).rglob("*") - if f.suffix == f".{file_ext}" - ], - ) - - if len(subdirs) == 1: - logger.info( - "Renaming files in a single directory.", - ) - dir_pattern = r"^[A-Za-z0-9_]+$" - # Iterate over the directories and check if they match the pattern - matching_directory: Optional[Match[Any]] = re.match( - dir_pattern, - pathlib.Path(subdirs[0]).stem, - ) - if matching_directory is not None: - matching_directory = matching_directory.group() - if f"{map_directory}" == "raw": - outfile_pattern = f"{matching_directory}_{out_file_pattern}" - if f"{map_directory}" == "map": - outfile_pattern = f"d1_{out_file_pattern}" - - fr.rename(subdirs[0], out_dir, file_pattern, outfile_pattern) - logger.info( - "Finished renaming files.", - ) - if len(subdirs) > 1: - subnames = [pathlib.Path(sb).name for sb in subdirs] - sub_check = all(name == subnames[0] for name in subnames) - - for i, sub in enumerate(subdirs): - assert ( - len([f for f in pathlib.Path(sub).iterdir() if f.is_file()]) != 0 - ), "Files are missing in input directory!!!" - dir_pattern = r"^[A-Za-z0-9_]+$" - # Iterate over the directories and check if they match the pattern - matching_directories: Optional[Match[Any]] = re.match( - dir_pattern, - pathlib.Path(sub).stem, - ) - if matching_directories is not None: - matching_directories = matching_directories.group() - - if not sub_check and f"{map_directory}" == "raw": - outfile_pattern = f"{matching_directories}_{out_file_pattern}" - elif subnames and f"{map_directory}" == "raw": - logger.error( - "Subdirectoy names are same, should be different.", - ) - break - else: - outfile_pattern = f"d{i}_{out_file_pattern}" - fr.rename(sub, out_dir, file_pattern, outfile_pattern) - logger.info( - "Finished renaming files.", - ) - - if preview: - with pathlib.Path.open(pathlib.Path(out_dir, "preview.json"), "w") as jfile: - out_json: dict[str, Any] = { - "filepattern": out_file_pattern, - "outDir": [], - } - for file in out_dir.iterdir(): - if file.is_file() and file.suffix != ".json": - out_name = file.name - out_json["outDir"].append(out_name) - json.dump(out_json, jfile, indent=2) - - -if __name__ == "__main__": - app() diff --git a/segmentation/zarr-autosegmentation-tool/src/polus/images/segmentation/zarr_autosegmentation/zarr_autosegmentation.py b/segmentation/zarr-autosegmentation-tool/src/polus/images/segmentation/zarr_autosegmentation/zarr_autosegmentation.py deleted file mode 100644 index 2b570d7b8..000000000 --- a/segmentation/zarr-autosegmentation-tool/src/polus/images/segmentation/zarr_autosegmentation/zarr_autosegmentation.py +++ /dev/null @@ -1,406 +0,0 @@ -"""File Renaming.""" -import enum -import logging -import os -import pathlib -import re -import shutil -from concurrent.futures import ProcessPoolExecutor -from concurrent.futures import as_completed -from multiprocessing import cpu_count -from sys import platform -from typing import Any -from typing import Union - -from tqdm import tqdm - -EXT = (".csv", ".txt", ".cppipe", ".yml", ".yaml", ".xml", ".json") - -logger = logging.getLogger(__name__) -logger.setLevel(os.environ.get("POLUS_LOG", logging.INFO)) - -if platform == "linux" or platform == "linux2": - NUM_THREADS = len(os.sched_getaffinity(0)) # type: ignore -else: - NUM_THREADS = max(cpu_count() // 2, 2) - - -class MappingDirectory(str, enum.Enum): - """Map Directory information.""" - - RAW = "raw" - MAP = "map" - Default = "" - - -def image_directory(dirpath: pathlib.Path) -> Union[bool, None]: - """Fetching image directory only. - - Args: - dirpath: Path to directory. - - Returns: - bool. - """ - for file in dirpath.iterdir(): - return bool(file.is_file() and file.suffix not in EXT) - return None - - -def get_data(inp_dir: str) -> tuple[list[pathlib.Path], list[pathlib.Path]]: - """Get group names from pattern. Convert patterns (c+ or dd) to regex. - - Args: - inp_dir: Path to input directory. - - Returns: - A tuple of list of subdirectories and files path. - """ - filepath: list[pathlib.Path] = [] - dirpaths: list[pathlib.Path] = [] - for path in pathlib.Path(inp_dir).rglob("*"): - if path.is_dir(): - if path.parent in dirpaths: - dirpaths.remove(path.parent) - if image_directory(path): - dirpaths.append(path) - elif path.is_file() and not path.name.endswith(tuple(EXT)): - fpath = pathlib.Path(inp_dir).joinpath(path) - filepath.append(fpath) - - return dirpaths, filepath - - -def map_pattern_grps_to_regex(file_pattern: str) -> dict: - """Get group names from pattern. Convert patterns (c+ or dd) to regex. - - Args: - file_pattern: File pattern, with special characters escaped. - - Returns: - rgx_patterns: The key is a named regex group. The value is regex. - """ - logger.debug(f"pattern_to_regex() inputs: {file_pattern}") - #: Extract the group name and associated pattern (ex: {row:dd}) - group_and_pattern_tuples = re.findall(r"\{(\w+):([dc+]+)\}", file_pattern) - pattern_map = {"d": r"[0-9]", "c": r"[a-zA-Z]", "+": "+"} - rgx_patterns = {} - for group_name, groups_pattern in group_and_pattern_tuples: - rgx = "".join([pattern_map[pattern] for pattern in groups_pattern]) - #: ?P is included to specify that foo is a named group. - rgx_patterns[group_name] = rf"(?P<{group_name}>{rgx})" - logger.debug(f"pattern_to_regex() returns {rgx_patterns}") - - return rgx_patterns - - -def convert_to_regex(file_pattern: str, extracted_rgx_patterns: dict) -> str: - """Integrate regex into original file pattern. - - The extracted_rgx_patterns helps replace simple patterns (ie. dd, c+) - with regex in the correct location, based on named groups. - - Args: - file_pattern: file pattern provided by the user. - extracted_rgx_patterns: named group and regex value dictionary. - - Returns: - new_pattern: file pattern converted to regex. - """ - logger.debug(f"convert_to_regex() inputs: {file_pattern}, {extracted_rgx_patterns}") - rgx_pattern = file_pattern - for named_grp, regex_str in extracted_rgx_patterns.items(): - #: The prefix "fr" creates raw f-strings, which act like format() - rgx_pattern = re.sub(rf"\{{{named_grp}:.*?\}}", regex_str, rgx_pattern) - logger.debug(f"convert_to_regex() returns {rgx_pattern}") - return rgx_pattern - - -def specify_len(out_pattern: str) -> str: - """Update output file pattern to output correct number of digits. - - After extracting group names and associated patterns from the - outFilePattern, integrate format strings into the file pattern to - accomplish. - - Example: - "newdata_x{row:ddd}" becomes "new_data{row:03d}". - - Args: - out_pattern: output file pattern provided by the user. - - Returns: - new_out_pattern: file pattern converted to format string. - """ - logger.debug(f"specify_len() inputs: {out_pattern}") - #: Extract the group name and associated pattern (ex: {row:dd}) - group_and_pattern_tuples = re.findall(r"\{(\w+):([dc+]+)\}", out_pattern) - grp_rgx_dict = {} - #: Convert simple file patterns to format strings (ex: ddd becomes :03d). - for group_name, groups_pattern in group_and_pattern_tuples: - # Get the length of the string if not variable width - s_len = "" if "+" in groups_pattern else str(len(groups_pattern)) - # Set the formatting value - temp_pattern = "s" if groups_pattern[0] == "c" else "d" - # Prepend a 0 for padding digit format - if temp_pattern == "d": - s_len = "0" + s_len - grp_rgx_dict[group_name] = "{" + group_name + ":" + s_len + temp_pattern + "}" - new_out_pattern = out_pattern - for named_group, format_str in grp_rgx_dict.items(): - new_out_pattern = re.sub( - rf"\{{{named_group}:.*?\}}", - format_str, - new_out_pattern, - ) - logger.debug(f"specify_len() returns {new_out_pattern}") - - return new_out_pattern - - -def get_char_to_digit_grps(inp_pattern: str, out_pattern: str) -> list[str]: - """Return group names where input and output datatypes differ. - - If the input pattern is a character and the output pattern is a - digit, return the named group associated with those patterns. - - Args: - inp_pattern: Original input pattern. - out_pattern: Original output pattern. - - Returns: - special_categories: Named groups with c to d conversion or [None]. - """ - logger.debug(f"get_char_to_digit_grps() inputs: {inp_pattern}, {out_pattern}") - #: Extract the group name and associated pattern (ex: {row:dd}) - ingrp_and_pattern_tuples = re.findall(r"\{(\w+):([dc+]+)\}", inp_pattern) - outgrp_and_pattern_tuples = re.findall(r"\{(\w+):([dc+]+)\}", out_pattern) - - #: Get group names where input pattern is c and output pattern is d - special_categories = [] - for out_grp_name in dict(outgrp_and_pattern_tuples): - if dict(ingrp_and_pattern_tuples)[out_grp_name].startswith("c") and dict( - outgrp_and_pattern_tuples, - )[out_grp_name].startswith("d"): - special_categories.append(out_grp_name) - logger.debug(f"get_char_to_digit_grps() returns {special_categories}") - return special_categories - - -def extract_named_grp_matches( - rgx_pattern: str, - inp_files: list, -) -> list[dict[str, Union[str, Any]]]: - """Store matches from the substrings from each filename that vary. - - Loop through each file. Apply the regex pattern to each - filename. When a match occurs for a named group, add that match to - a dictionary, where the key is the named (regex capture) group and - the value is the corresponding match from the filename. - - Args: - rgx_pattern: input pattern in regex format. - inp_files: list of files in input directory. - - Returns: - grp_match_dict_list: list of dictionaries containing str matches. - """ - logger.debug(f"extract_named_grp_matches() inputs: {rgx_pattern}, {inp_files}") - grp_match_dict_list = [] - #: Build list of dicts, where key is capture group and value is match - for filename in inp_files: - try: - d = re.match(rgx_pattern, filename) - if d is None: - break - grp_match_dict = d.groupdict() - #: Add filename information to dictionary - grp_match_dict["fname"] = filename - grp_match_dict_list.append(grp_match_dict) - except AttributeError as e: - logger.error(e) - logger.error( - "File pattern does not match one or more files. " - "See README for pattern rules.", - ) - msg = "File pattern does not match with files." - raise AttributeError(msg) from e - except AssertionError as e: - if str(e).startswith("redefinition of group name"): - logger.error( - "Ensure that named groups in file patterns are unique. " - "({})".format(e), - ) - msg = f"Ensure that named groups in file patterns are unique. ({e})" - raise ValueError( - msg, - ) from e - - logger.debug(f"extract_named_grp_matches() returns {grp_match_dict_list}") - - return grp_match_dict_list - - -def str_to_int(dictionary: dict) -> dict: - """If a number in the dictionary is in str format, convert to int. - - Args: - dictionary: contains group, match, and filename info. - - Returns: - fixed_dictionary: input dict, with numeric str values to int. - """ - fixed_dictionary = {} - for key, value in dictionary.items(): - try: - fixed_dictionary[key] = int(value) - except Exception: # noqa: BLE001 - fixed_dictionary[key] = value - logger.debug(f"str_to_int() returns {fixed_dictionary}") - return fixed_dictionary - - -def letters_to_int(named_grp: str, all_matches: list) -> dict: - """Alphabetically number matches for the given named group for all files. - - Make a dictionary where each key is a match for each filename and - the corresponding value is a number indicating its alphabetical rank. - - Args: - named_grp: Group with c in input pattern and d in out pattern. - all_matches: list of dicts, k=grps, v=match, last item=file name. - - Returns: - cat_index_dict: dict key=category name, value=index after sorting. - """ - logger.debug(f"letters_to_int() inputs: {named_grp}, {all_matches}") - #: Generate list of strings belonging to the given category (element). - alphabetized_matches = sorted( - {namedgrp_match_dict[named_grp] for namedgrp_match_dict in all_matches}, - ) - str_alphabetindex_dict = {} - for i in range(0, len(alphabetized_matches)): - str_alphabetindex_dict[alphabetized_matches[i]] = i - logger.debug(f"letters_to_int() returns {str_alphabetindex_dict}") - return str_alphabetindex_dict - - -def rename( # noqa: C901, PLR0915, PLR0912 - inp_dir: str, - out_dir: pathlib.Path, - file_pattern: str, - out_file_pattern: str, -) -> None: - """Scalable Extraction of Nyxus Features. - - Args: - inp_dir : Path to image collection. - out_dir : Path to image collection storing copies of renamed files. - file_pattern : Input file pattern. - out_file_pattern : Output file pattern. - """ - logger.info("Start renaming files") - file_ext = re.split("\\.", file_pattern)[-1] - empty_ext = "" - ext_length = 5 - if file_ext == "*": - msg = "Please define filePattern including file extension!" - raise ValueError(msg) - if file_ext == empty_ext: - msg = "Please define filePattern including file extension!" - raise ValueError(msg) - if len(file_ext) > ext_length: - msg = "Please define filePattern including file extension!" - raise ValueError(msg) - - _, inpfiles = get_data(inp_dir) - - inp_files: list[str] = [ - f"{f.name}" for f in inpfiles if pathlib.Path(f).suffix == f".{file_ext}" - ] - - if len(inp_files) == 0: - msg = "Please check input directory again!! As it does not contain files" - raise ValueError(msg) - - chars_to_escape = ["(", ")", "[", "]", "$", "."] - for char in chars_to_escape: - file_pattern = file_pattern.replace(char, ("\\" + char)) - - if "\\.*" in file_pattern: - file_pattern = file_pattern.replace("\\.*", (".*")) - if "\\.+" in file_pattern: - file_pattern = file_pattern.replace("\\.+", (".+")) - groupname_regex_dict = map_pattern_grps_to_regex(file_pattern) - - # #: Integrate regex from dictionary into original file pattern - inp_pattern_rgx = convert_to_regex(file_pattern, groupname_regex_dict) - - # #: Integrate format strings into outFilePattern to specify digit/char len - out_pattern_fstring = specify_len(out_file_pattern) - - #: List named groups where input pattern=char & output pattern=digit - char_to_digit_categories = get_char_to_digit_grps(file_pattern, out_file_pattern) - - #: List a dictionary (k=named grp, v=match) for each filename - - all_grp_matches = extract_named_grp_matches(inp_pattern_rgx, inp_files) - - #: Convert numbers from strings to integers, if applicable - for i in range(0, len(all_grp_matches)): - tmp_match = all_grp_matches[i] - all_grp_matches[i] = str_to_int(tmp_match) - - if len(all_grp_matches) == 0: - msg = f"Please define filePattern: {file_pattern} again!!" - raise ValueError( - msg, - ) - - #: Populate dict if any matches need to be converted from char to digit - #: Key=named group, Value=Int representing matched chars - numbered_categories = {} - for named_grp in char_to_digit_categories: - numbered_categories[named_grp] = letters_to_int(named_grp, all_grp_matches) - # Check named groups that need c->d conversion - for named_grp in char_to_digit_categories: - for i in range(0, len(all_grp_matches)): - if all_grp_matches[i].get(named_grp): - #: Replace original matched letter with new digit - all_grp_matches[i][named_grp] = numbered_categories[named_grp][ - all_grp_matches[i][named_grp] - ] - - with ProcessPoolExecutor(max_workers=NUM_THREADS) as executor: - threads = [] - for match in all_grp_matches: - # : If running on WIPP - if out_dir != inp_dir: - #: Apply str formatting to change digit or char length - out_name = out_dir.resolve() / out_pattern_fstring.format( - **match, - ) - old_file_name = pathlib.Path(inp_dir, match["fname"]) - threads.append(executor.submit(shutil.copy2, old_file_name, out_name)) - else: - out_name = out_pattern_fstring.format(**match) # type: ignore - old_file_name = match["fname"] # type: ignore - logger.info(f"Old name {old_file_name} & new name {out_name}") - threads.append( - executor.submit( - os.rename, - pathlib.Path(inp_dir, old_file_name), - pathlib.Path(out_dir, out_name), - ), - ) - - for f in tqdm( - as_completed(threads), - total=len(threads), - mininterval=5, - desc="converting images", - initial=0, - unit_scale=True, - colour="cyan", - ): - f.result() diff --git a/segmentation/zarr-autosegmentation-tool/tests/__init__.py b/segmentation/zarr-autosegmentation-tool/tests/__init__.py deleted file mode 100644 index abf20f391..000000000 --- a/segmentation/zarr-autosegmentation-tool/tests/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""File Renaming.""" diff --git a/segmentation/zarr-autosegmentation-tool/tests/file_rename_test.json b/segmentation/zarr-autosegmentation-tool/tests/file_rename_test.json deleted file mode 100644 index 5c8a5109f..000000000 --- a/segmentation/zarr-autosegmentation-tool/tests/file_rename_test.json +++ /dev/null @@ -1,103 +0,0 @@ -{ - "bleed_through_estimation_fixed_chars": [ - "r001_c001_z000.ome.tif", - "r002_c001_z001.ome.tif", - "r001_c001_z000.ome.tif" - ], - "duplicate_channels_to_digit": [ - "r001_c001_GFP.ome.tif", - "r002_c001_GFP.ome.tif", - "r001_c001_YFP.ome.tif" - ], - "percentage_file": [ - "%033_c333_z5050.ome.tif", - "%444_c123_z50493.ome.tif" - ], - "robot": [ - "003011-3-001001002.tif", - "005008-4-001001001.tif", - "006003-2-001001002.tif", - "006005-2-001001001.tif", - "007005-1-001001001.tif", - "007011-3-001001002.tif", - "006008-4-001001002.tif", - "006004-1-001001001.tif", - "007010-2-001001001.tif" - ], - "brain": [ - "S1_R3_C1-C11_A1_y001_x010_c005.ome.tif", - "S1_R3_C1-C11_A1_y011_x021_c005.ome.tif", - "S1_R1_C1-C11_A1_y013_x007_c003.ome.tif", - "S1_R2_C1-C11_A1_y007_x006_c004.ome.tif", - "S1_R2_C1-C11_A1_y005_x012_c003.ome.tif", - "S1_R5_C1-C11_A1_y009_x016_c005.ome.tif", - "S1_R2_C1-C11_A1_y002_x018_c000.ome.tif" - ], - "variable": [ - "S1_R4_C1-C11_A1_y2_x011_c6.ome.tif", - "S1_R4_C1-C11_A1_y1_x4_c4.ome.tif", - "S1_R1_C1-C11_A1_y7_x0_c0.ome.tif", - "S1_R2_C1-C11_A1_y011_x7_c8.ome.tif", - "S1_R5_C1-C11_A1_y012_x3_c4.ome.tif", - "S1_R2_C1-C11_A1_y3_x0_c6.ome.tif", - "S1_R2_C1-C11_A1_y7_x5_c3.ome.tif", - "S1_R2_C1-C11_A1_y011_x4_c6.ome.tif", - "S1_R2_C1-C11_A1_y2_x018_c0.ome.tif" - ], - "parenthesis": [ - "img_x01_y01_(TXRED).tif", - "img_x01_y01_(GFP).tif", - "img_x01_y01_(DAPI).tif" - ], - "two_chan": [ - "img_x01_y01_TXRED_TXYELLOW.tif", - "img_x01_y02_GFP_YFP.tif", - "img_x03_y03_DAPI_DAPIYELLOW.tif" - ], - "three_chan": [ - "img_x01_y01_TXRED_TXYELLOW_ABC.tif", - "img_x01_y02_GFP_YFP_DEF.tif", - "img_x03_y03_DAPI_DAPIYELLOW_GHI.tif" - ], - "non_alphanum_int": [ - "img x01 y01 TXRED.tif", - "img x01 y02 GFP.tif", - "img x03 y03 DAPI.tif" - ], - "non_alphanum_float": [ - "img x01.22 y01 TXRED.tif", - "img x01.33 y02 GFP.tif", - "img x03.44 y03 DAPI.tif" - ], - "kph-kirill": [ - "0(01-16)0(01-24)-(1-4)-002.ome.tif", - "0(01-16)0(01-24)-(1-4)-001.ome.tif", - "0(01-16)0(01-24)-(1-4)-003.ome.tif" - ], - "three_char_chan": [ - "img x01 y01 GFP.tif", - "img x02 y02 YFP.tif", - "img x03 y02 ABC.tif", - "img x04 y00 DEF.tif" - ], - "tissuenet-val-labels-45-C": [ - "p0_y4_r730_c1.ome.tif", - "p2_y0_r36_c1.ome.tif", - "p4_y1_r232_c1.ome.tif", - "p0_y4_r731_c1.ome.tif", - "p2_y0_r37_c1.ome.tif", - "p4_y1_r233_c1.ome.tif", - "p0_y4_r732_c1.ome.tif", - "p2_y0_r38_c1.ome.tif", - "p4_y1_r234_c1.ome.tif", - "p1_y3_r365_c1.ome.tif", - "p3_y5_r110_c1.ome.tif", - "p5_y4_r18_c1.ome.tif", - "p1_y3_r366_c1.ome.tif", - "p3_y5_r111_c1.ome.tif", - "p5_y4_r19_c1.ome.tif", - "p1_y3_r367_c1.ome.tif", - "p3_y5_r112_c1.ome.tif", - "p5_y4_r20_c1.ome.tif" - ] -} diff --git a/segmentation/zarr-autosegmentation-tool/tests/test_main.py b/segmentation/zarr-autosegmentation-tool/tests/test_main.py deleted file mode 100644 index e9d981bda..000000000 --- a/segmentation/zarr-autosegmentation-tool/tests/test_main.py +++ /dev/null @@ -1,654 +0,0 @@ -"""Testing of File Renaming.""" - -import json -import pathlib -import shutil -import tempfile -from typing import Any -from typing import DefaultDict -from typing import Tuple -import click -import pytest -import numpy as np -from typer.testing import CliRunner - -from polus.images.formats.file_renaming import file_renaming as fr -from polus.images.formats.file_renaming.__main__ import app as app - -runner = CliRunner() - - -class CreateData: - """Generate tabular data with several different file format.""" - - def __init__(self): - """Define instance attributes.""" - self.dirpath = pathlib.Path(__file__).parent - self.jsonpath = self.dirpath.joinpath("file_rename_test.json") - - def input_directory(self) -> pathlib.Path: - """Create temporary input directory.""" - return tempfile.mkdtemp(dir=self.dirpath) - - def output_directory(self) -> pathlib.Path: - """Create temporary output directory.""" - return tempfile.mkdtemp(dir=self.dirpath) - - def runcommands( - self, inputs: pathlib.Path, inp_pattern: str, out_pattern: str - ) -> click.testing.Result: - """Run command line arguments.""" - inp_dir = self.input_directory() - out_dir = self.output_directory() - for inp in inputs: - pathlib.Path.open(pathlib.Path(inp_dir, inp), "w").close() - - outputs = runner.invoke( - app, - [ - "--inpDir", - str(inp_dir), - "--filePattern", - inp_pattern, - "--outDir", - str(out_dir), - "--outFilePattern", - out_pattern, - ], - ) - return outputs - - def load_json(self, x: str) -> DefaultDict[Any, Any]: - """Json file containing image filenames.""" - with pathlib.Path.open(self.jsonpath) as file: - data = json.load(file) - return data[x] - - def clean_directories(self) -> None: - """Remove files.""" - for d in self.dirpath.iterdir(): - if d.is_dir() and d.name.startswith("tmp"): - shutil.rmtree(d) - - -fixture_params = [ - [ - ( - "r{row:ddd}_c{col:ddd}_{chan:ccc}.ome.tif", - "output_r{row:dddd}_c{col:dddd}_{chan:d+}.ome.tif", - ), - ( - "r{row:d+}_c{col:d+}_{chan:c+}.ome.tif", - "output_r{row:dddd}_c{col:dddd}_{chan:d+}.ome.tif", - ), - ("r.ome.tif", "output_r{row:dddd}_c{col:dddd}_{chan:d+}.ome.tif"), - ( - "%{row:ddd}_c{col:ddd}_z{z:d+}.ome.tif", - "%{row:dddd}_col{col:dddd}_z{z:d+}.ome.tif", - ), - ( - "00{one:d}0{two:dd}-{three:d}-00100100{four:d}.tif", - "output{one:dd}0{two:ddd}-{three:dd}-00100100{four:dd}.tif", - ), - ( - "S1_R{one:d}_C1-C11_A1_y0{two:dd}_x0{three:dd}_c0{four:dd}.ome.tif", - "output{one:dd}_C1-C11_A1_y0{two:ddd}_x0{three:ddd}_c0{four:ddd}.ome.tif", - ), - ( - "S1_R{one:d}_C1-C11_A1_y{two:d+}_x{three:d+}_c{four:d+}.ome.tif", - "output{one:dd}_C1-C11_A1_y{two:d+}_x{three:d+}_c{four:d+}.ome.tif", - ), - ( - "img_x{row:dd}_y{col:dd}_({chan:c+}).tif", - "output{row:dd}_{col:ddd}_{chan:dd}.tif", - ), - ( - "img_x{row:dd}_y{col:dd}_{chan:c+}_{ychan:c+}.tif", - "output{row:ddd}_{col:ddd}_{chan:dd}_{ychan:ddd}.tif", - ), - ( - "img_x{row:dd}_y{col:dd}_{chan:c+}_{ychan:c+}_{alphachan:ccc}.tif", - "output{row:ddd}_{col:ddd}_{chan:dd}_{ychan:ddd}_{alphachan:dddd}.tif", - ), - ( - "img x{row:dd} y{col:dd} {chan:ccc}.tif", - "output{row:ddd}_{col:ddd}_{chan:ccc}.tif", - ), - ( - "p{p:d}_y{y:d}_r{r:d+}_c{c:d+}.ome.tif", - "p{p:dd}_y{y:dd}_r{r:dddd}_c{c:ddd}.ome.tif", - ), - ( - "img x{row:dd} y{col:dd} {chan:c+}.tif", - "output{row:ddd}_{col:ddd}_{chan:dd}.tif", - ), - ( - "img x{row:dd}.{other:d+} y{col:dd} {chan:c+}.tif", - "output{row:ddd}_{col:ddd}_ {other:d+} {chan:dd}.tif", - ), - ( - "0({mo:dd}-{day:dd})0({mo2:dd}-{day2:dd})-({a:d}-{b:d})-{col:ddd}.ome.tif", - "0({mo:ddd}-{day:ddd})0{mo2:dd}-{day2:dd})-({a:dd}-{b:dd})-{col:ddd}.ome.tif", - ), - ] -] - - -@pytest.fixture(params=fixture_params) -def poly(request: Tuple[str, str]) -> pytest.FixtureRequest: - """To get the parameter of the fixture.""" - return request.param - - -def test_duplicate_channels_to_digit(poly: pytest.FixtureRequest) -> None: - """Testing of duplicate channels to digits.""" - d = CreateData() - inputs = d.load_json("duplicate_channels_to_digit") - (inp_pattern, out_pattern) = poly[0] - outputs = d.runcommands(inputs, inp_pattern, out_pattern) - assert outputs.exit_code == 0 - - -def test_duplicate_channels_to_digit_non_spec_digit_len( - poly: pytest.FixtureRequest, -) -> None: - """Testing of duplicate channels to digits with non specified length of digits.""" - d = CreateData() - inputs = d.load_json("duplicate_channels_to_digit") - (inp_pattern, out_pattern) = poly[1] - outputs = d.runcommands(inputs, inp_pattern, out_pattern) - assert outputs.exit_code == 0 - - -def test_invalid_input_raises_error(poly: pytest.FixtureRequest) -> None: - """Testing of invalid input filepattern.""" - d = CreateData() - inputs = d.load_json("duplicate_channels_to_digit") - (inp_pattern, out_pattern) = poly[0] - d.runcommands(inputs, inp_pattern, out_pattern) - - -def test_non_alphanum_inputs_percentage_sign(poly: pytest.FixtureRequest) -> None: - """Testing of filename with non alphanumeric inputs such as percentage sign.""" - d = CreateData() - inputs = d.load_json("percentage_file") - (inp_pattern, out_pattern) = poly[3] - outputs = d.runcommands(inputs, inp_pattern, out_pattern) - assert outputs.exit_code == 0 - - -def test_numeric_fixed_width(poly: pytest.FixtureRequest) -> None: - """Testing of filename with numeric fixed length.""" - d = CreateData() - inputs = d.load_json("robot") - (inp_pattern, out_pattern) = poly[4] - outputs = d.runcommands(inputs, inp_pattern, out_pattern) - assert outputs.exit_code == 0 - - -def test_alphanumeric_fixed_width(poly: pytest.FixtureRequest) -> None: - """Testing of filename with alphanumeric fixed length.""" - d = CreateData() - inputs = d.load_json("brain") - (inp_pattern, out_pattern) = poly[5] - outputs = d.runcommands(inputs, inp_pattern, out_pattern) - assert outputs.exit_code == 0 - - -def test_alphanumeric_variable_width(poly: pytest.FixtureRequest) -> None: - """Testing of filename with alphanumeric variable width.""" - d = CreateData() - inputs = d.load_json("variable") - (inp_pattern, out_pattern) = poly[6] - outputs = d.runcommands(inputs, inp_pattern, out_pattern) - assert outputs.exit_code == 0 - d.clean_directories() - - -def test_parenthesis(poly: pytest.FixtureRequest) -> None: - """Testing of filename with parenthesis.""" - d = CreateData() - inputs = d.load_json("parenthesis") - (inp_pattern, out_pattern) = poly[7] - outputs = d.runcommands(inputs, inp_pattern, out_pattern) - assert outputs.exit_code == 0 - - -def test_two_chan_to_digit(poly: pytest.FixtureRequest) -> None: - """Testing conversion of two channels to digits.""" - d = CreateData() - inputs = d.load_json("two_chan") - (inp_pattern, out_pattern) = poly[8] - outputs = d.runcommands(inputs, inp_pattern, out_pattern) - assert outputs.exit_code == 0 - - -def test_three_chan_to_digit(poly: pytest.FixtureRequest) -> None: - """Test conversion of three channels to digits.""" - d = CreateData() - inputs = d.load_json("three_chan") - (inp_pattern, out_pattern) = poly[9] - outputs = d.runcommands(inputs, inp_pattern, out_pattern) - assert outputs.exit_code == 0 - - -def test_three_char_chan(poly: pytest.FixtureRequest) -> None: - """Test conversion of three character channels to digits.""" - d = CreateData() - inputs = d.load_json("three_char_chan") - (inp_pattern, out_pattern) = poly[10] - outputs = d.runcommands(inputs, inp_pattern, out_pattern) - assert outputs.exit_code == 0 - - -def test_varied_digits(poly: pytest.FixtureRequest) -> None: - """Test varied digits.""" - d = CreateData() - inputs = d.load_json("tissuenet-val-labels-45-C") - (inp_pattern, out_pattern) = poly[11] - outputs = d.runcommands(inputs, inp_pattern, out_pattern) - assert outputs.exit_code == 0 - d.clean_directories() - - -def test_spaces(poly: pytest.FixtureRequest) -> None: - """Test non-alphanumeric chars such as spaces.""" - d = CreateData() - inputs = d.load_json("non_alphanum_int") - (inp_pattern, out_pattern) = poly[12] - outputs = d.runcommands(inputs, inp_pattern, out_pattern) - assert outputs.exit_code == 0 - - -def test_non_alphanum_float(poly: pytest.FixtureRequest) -> None: - """Test non-alphanumeric chars such as spaces, periods, commas, brackets.""" - d = CreateData() - inputs = d.load_json("non_alphanum_float") - (inp_pattern, out_pattern) = poly[13] - outputs = d.runcommands(inputs, inp_pattern, out_pattern) - assert outputs.exit_code == 0 - d.clean_directories() - - -def test_dashes_parentheses(poly: pytest.FixtureRequest) -> None: - """Test non-alphanumeric chars are handled properly such as dashes, parenthesis.""" - d = CreateData() - inputs = d.load_json("kph-kirill") - (inp_pattern, out_pattern) = poly[14] - outputs = d.runcommands(inputs, inp_pattern, out_pattern) - assert outputs.exit_code == 0 - d.clean_directories() - - -def test_map_pattern_grps_to_regex_valid_input() -> None: - """Test of mapping input pattern.""" - test_cases = [ - ( - ("img_x{row:dd}_y{col:dd}_{channel:c+}.tif"), - ( - { - "row": "(?P[0-9][0-9])", - "col": "(?P[0-9][0-9])", - "channel": "(?P[a-zA-Z]+)", - } - ), - ), - (("img_x{row:c+}.tif"), ({"row": "(?P[a-zA-Z]+)"})), - ((""), ({})), - ] - for test_case in test_cases: - (from_val, to_val) = test_case - result = fr.map_pattern_grps_to_regex(from_val) - assert result == to_val - - -def test_convert_to_regex_valid_input() -> None: - """Test of converting to regular expression pattern.""" - test_cases = [ - ( - ("img_x{row:dd}_y{col:dd}_{channel:c+}.tif"), - ( - { - "row": "(?P[0-9][0-9])", - "col": "(?P[0-9][0-9])", - "channel": "(?P[a-zA-Z]+)", - } - ), - ( - "img_x(?P[0-9][0-9])_y(?P[0-9][0-9])_(?P[a-zA-Z]+).tif" - ), - ), - ( - ("img_x{row:c+}.tif"), - ({"row": "(?P[a-zA-Z]+)"}), - ("img_x(?P[a-zA-Z]+).tif"), - ), - (("img_x01.tif"), ({}), ("img_x01.tif")), - ] - for test_case in test_cases: - (from_val1, from_val2, to_val) = test_case - result = fr.convert_to_regex(from_val1, from_val2) - assert result == to_val - - -def test_specify_len_valid_input() -> None: - """Test of sepcifying length.""" - test_cases = [ - ( - ("newdata_x{row:ddd}_y{col:ddd}_c{channel:ddd}.tif"), - ("newdata_x{row:03d}_y{col:03d}_c{channel:03d}.tif"), - ), - (("newdata_x{row:c+}.tif"), ("newdata_x{row:s}.tif")), - (("newdata_x01.tif"), ("newdata_x01.tif")), - ] - for test_case in test_cases: - (from_val, to_val) = test_case - result = fr.specify_len(from_val) - assert result == to_val - - -def test_get_char_to_digit_grps_returns_unique_keys_valid_input() -> None: - """Test of getting characters to digit groups.""" - test_cases = [ - ( - ("img_x{row:dd}_y{col:dd}_{channel:c+}.tif"), - ("newdata_x{row:ddd}_y{col:ddd}_c{channel:ddd}.tif"), - (["channel"]), - ), - (("img_x{row:c+}.tif"), ("newdata_x{row:c+}.tif"), ([])), - (("img_x01.tif"), ("newdata_x01.tif"), ([])), - ] - for test_case in test_cases: - (from_val1, from_val2, to_val) = test_case - result = fr.get_char_to_digit_grps(from_val1, from_val2) - assert result == to_val - - -def test_extract_named_grp_matches_valid_input() -> None: - """Test of extracting group names.""" - test_cases = [ - ( - ( - "img_x(?P[0-9][0-9])_y(?P[0-9][0-9])_(?P[a-zA-Z]+).tif" - ), - (["img_x01_y01_DAPI.tif", "img_x01_y01_GFP.tif", "img_x01_y01_TXRED.tif"]), - ( - [ - { - "row": "01", - "col": "01", - "channel": "DAPI", - "fname": "img_x01_y01_DAPI.tif", - }, - { - "row": "01", - "col": "01", - "channel": "GFP", - "fname": "img_x01_y01_GFP.tif", - }, - { - "row": "01", - "col": "01", - "channel": "TXRED", - "fname": "img_x01_y01_TXRED.tif", - }, - ] - ), - ), - (("img_x01.tif"), (["img_x01.tif"]), ([{"fname": "img_x01.tif"}])), - ] - for test_case in test_cases: - (from_val1, from_val2, to_val) = test_case - result = fr.extract_named_grp_matches(from_val1, from_val2) - assert result == to_val - - -def test_extract_named_grp_matches_bad_pattern_invalid_input_fails() -> None: - """Test of invalid input pattern.""" - test_cases = [ - ( - ("img_x(?P[a-zA-Z]+).tif"), - (["img_x01_y01_DAPI.tif", "img_x01_y01_GFP.tif", "img_x01_y01_TXRED.tif"]), - ) - ] - for test_case in test_cases: - (from_val1, from_val2) = test_case - - result = fr.extract_named_grp_matches(from_val1, from_val2) - assert len(result) == 0 - - -def test_str_to_int_valid_input() -> None: - """Test of string to integer.""" - test_cases = [ - ( - ( - { - "row": "01", - "col": "01", - "channel": "DAPI", - "fname": "img_x01_y01_DAPI.tif", - } - ), - ({"row": 1, "col": 1, "channel": "DAPI", "fname": "img_x01_y01_DAPI.tif"}), - ), - ( - ( - { - "row": "2", - "col": "01", - "channel": "TXRED", - "fname": "img_x01_y01_TXRED.tif", - } - ), - ( - { - "row": 2, - "col": 1, - "channel": "TXRED", - "fname": "img_x01_y01_TXRED.tif", - } - ), - ), - ( - ( - { - "row": "0001", - "col": "0001", - "channel": "GFP", - "fname": "img_x01_y01_GFP.tif", - } - ), - ({"row": 1, "col": 1, "channel": "GFP", "fname": "img_x01_y01_GFP.tif"}), - ), - ] - for test_case in test_cases: - (from_val, to_val) = test_case - result = fr.str_to_int(from_val) - assert result == to_val - - -def test_letters_to_int_returns_cat_index_dict_valid_input() -> None: - """Test of letter to integers.""" - test_cases = [ - ( - ("channel"), - [ - { - "row": 1, - "col": 1, - "channel": "DAPI", - "fname": "img_x01_y01_DAPI.tif", - }, - {"row": 1, "col": 1, "channel": "GFP", "fname": "img_x01_y01_GFP.tif"}, - { - "row": 1, - "col": 1, - "channel": "TXRED", - "fname": "img_x01_y01_TXRED.tif", - }, - ], - ({"DAPI": 0, "GFP": 1, "TXRED": 2}), - ) - ] - for test_case in test_cases: - (from_val1, from_val2, to_val) = test_case - result = fr.letters_to_int(from_val1, from_val2) - assert result == to_val - - -@pytest.mark.xfail -def test_extract_named_grp_matches_duplicate_namedgrp_invalid_input() -> None: - """Test of invalid input pattern.""" - test_cases = [ - ( - ( - "x(?P[0-9][0-9])_y(?P[0-9][0-9])_c(?P[a-zA-Z]+).ome.tif" - ), - (["img_x01_y01_DAPI.tif", "img_x01_y01_GFP.tif", "img_x01_y01_TXRED.tif"]), - ) - ] - for test_case in test_cases: - (from_val1, from_val2) = test_case - fr.extract_named_grp_matches(from_val1, from_val2) - - -@pytest.mark.xfail -def test_letters_to_int_returns_error_invalid_input() -> None: - """Test of invalid inputs.""" - test_cases = [ - ( - (2), - [ - { - "row": 1, - "col": 1, - "channel": "DAPI", - "fname": "img_x01_y01_DAPI.tif", - }, - {"row": 1, "col": 1, "channel": "GFP", "fname": "img_x01_y01_GFP.tif"}, - { - "row": 1, - "col": 1, - "channel": "TXRED", - "fname": "img_x01_y01_TXRED.tif", - }, - ], - ), - ] - for test_case in test_cases: - (from_val1, from_val2) = test_case - fr.letters_to_int(from_val1, from_val2) - - -@pytest.fixture -def create_subfolders() -> Tuple[pathlib.Path, str, str, str]: - """Creating directory and subdirectories.""" - data = { - "complex": [ - ["A9 p5d.tif", "A9 p5f.tif", "A9 p7f.tif"], - "96 ( -)* test_", - "{row:c}{col:d}.*p{f:d+}{character:c}.tif", - "x{row:dd}_y{col:dd}_p{f:dd}{character:c}_c01.tif", - ], - "simple": [ - [ - "taoe005-u2os-72h-cp-a-au00044859_a01_s3_w23db644df-02ee-429d-9559-09cf4625c62b.tif", - "taoe005-u2os-72h-cp-a-au00044859_b01_s3_w3add254c8-0c7b-4cf0-a5dc-bf0cf8de8cec.tif", - "taoe005-u2os-72h-cp-a-au00044859_b07_s5_w2da098211-f7c1-453d-954f-b7d4751f6daa.tif", - "taoe005-u2os-72h-cp-a-au00044859_c15_s2_w3aea523fa-3b89-46a7-95e3-604017151895.tif", - ], - "folder_", - ".*_{row:c}{col:dd}_s{s:d}_w{channel:d}.*.tif", - "x{row:dd}_y{col:dd}_p{s:dd}_c{channel:d}.tif", - ], - } - for name in ["complex", "simple"]: - d = CreateData() - dir_path = d.input_directory() - for i in range(5): - dirname = pathlib.Path(dir_path, f"{data[name][1]}{i}") - pathlib.Path(dirname).mkdir(exist_ok=False, parents=False) - for fl in data[name][0]: - temp_file = pathlib.Path.open(pathlib.Path(dirname, fl), "w") - temp_file.close() - - return pathlib.Path(dir_path), data[name][1], data[name][2], data[name][3] - - -def test_recursive_searching_files() -> None: - """Test recursive searching of files nested directories.""" - - dir_path = tempfile.mkdtemp(dir=pathlib.Path.cwd()) - out_dir = tempfile.mkdtemp(dir=pathlib.Path.cwd()) - for i in range(2): - dirname1 = "image_folder_" - dirname2 = "groundtruth_folder_" - dirname1 = pathlib.Path(dir_path, f"BBBC/BBBC001/Images/{dirname1}{i}") - dirname2 = pathlib.Path(dir_path, f"BBBC/BBBC001/Groundtruth/{dirname2}{i}") - pathlib.Path(dirname1).mkdir(exist_ok=False, parents=True) - pathlib.Path(dirname2).mkdir(exist_ok=False, parents=True) - - flist = [ - "AS_09125_050118150001_A03f00d0.tif", - "AS_09125_050118150001_A03f01d0.tif", - "AS_09125_050118150001_A03f02d0.tif", - "AS_09125_050118150001_A03f03d0.tif", - "AS_09125_050118150001_A03f04d0.tif", - "AS_09125_050118150001_A03f05d0.tif", - ] - - for fl in flist: - temp_file = pathlib.Path.open(pathlib.Path(dirname1, fl), "w") - temp_file = pathlib.Path.open(pathlib.Path(dirname2, fl), "w") - temp_file.close() - file_pattern = ".*_{row:c}{col:dd}f{f:dd}d{channel:d}.tif" - out_file_pattern = "x{row:dd}_y{col:dd}_p{f:dd}_c{channel:d}.tif" - map_directory = "raw" - - runner.invoke( - app, - [ - "--inpDir", - dir_path, - "--filePattern", - file_pattern, - "--outDir", - out_dir, - "--outFilePattern", - out_file_pattern, - "--mapDirectory", - map_directory, - ], - ) - assert list( - np.unique([p.name.split("_")[0] for p in pathlib.Path(out_dir).iterdir()]) - ) == ["groundtruth", "image"] - shutil.rmtree(dir_path) - shutil.rmtree(out_dir) - - -def test_cli(create_subfolders: pytest.FixtureRequest) -> None: - """Test Cli.""" - dir_path, _, file_pattern, out_file_pattern = create_subfolders - for i in ["raw", "map"]: - d = CreateData() - out_dir = d.output_directory() - result = runner.invoke( - app, - [ - "--inpDir", - dir_path, - "--filePattern", - file_pattern, - "--outDir", - out_dir, - "--outFilePattern", - out_file_pattern, - "--mapDirectory", - i, - ], - ) - assert result.exit_code == 0 - - d.clean_directories() From 299106e638529de5af084726db23e5050cc879f3 Mon Sep 17 00:00:00 2001 From: David Liu Date: Wed, 15 Jan 2025 17:29:40 -0500 Subject: [PATCH 04/10] committing working code with rgb bug --- .../.gitignore | 6 +++- .../ome-zarr-autosegmentation-plugin/ict.yaml | 29 ------------------- .../ome-zarr-autosegmentation-plugin/job.wic | 7 +++++ .../autosegmentation.py | 4 +-- 4 files changed, 14 insertions(+), 32 deletions(-) delete mode 100644 segmentation/ome-zarr-autosegmentation-plugin/ict.yaml create mode 100644 segmentation/ome-zarr-autosegmentation-plugin/job.wic diff --git a/segmentation/ome-zarr-autosegmentation-plugin/.gitignore b/segmentation/ome-zarr-autosegmentation-plugin/.gitignore index 5c0dac495..ef3fb9aeb 100644 --- a/segmentation/ome-zarr-autosegmentation-plugin/.gitignore +++ b/segmentation/ome-zarr-autosegmentation-plugin/.gitignore @@ -1,4 +1,8 @@ poetry.lock uv.lock test_datasets/** -models/** \ No newline at end of file +models/** +autogenerated/** +cachedir/** +provenance/** +output_job.json \ No newline at end of file diff --git a/segmentation/ome-zarr-autosegmentation-plugin/ict.yaml b/segmentation/ome-zarr-autosegmentation-plugin/ict.yaml deleted file mode 100644 index 67b64ba86..000000000 --- a/segmentation/ome-zarr-autosegmentation-plugin/ict.yaml +++ /dev/null @@ -1,29 +0,0 @@ -author: -- David Liu -contact: zhiyuan.liu@axleinfo.com -container: polusai/ome-zarr-autosegmentation-plugin:0.1.0 -description: Takes in an OME-Zarr dataset and produces a second one with the same structure but of segmentations -inputs: -- description: Dataset input for the plugin. - format: - - genericData - name: input - required: true - type: path -name: polusai/OMEZarrAutosegmentation -outputs: -- description: Output data for the plugin - format: - - collection - name: output - required: true - type: path -repository: https://github.com/PolusAI/image-tools -specVersion: 1.0.0 -title: OME-Zarr Autosegmentation -ui: -- description: Pick a collection... - key: inputs.input - title: 'OME-Zarr input dataset: ' - type: path -version: 0.1.0 diff --git a/segmentation/ome-zarr-autosegmentation-plugin/job.wic b/segmentation/ome-zarr-autosegmentation-plugin/job.wic new file mode 100644 index 000000000..056976235 --- /dev/null +++ b/segmentation/ome-zarr-autosegmentation-plugin/job.wic @@ -0,0 +1,7 @@ +steps: + ome-zarr-autosegmentation: + in: + inpDir: !ii input_directory + outDir: !ii output_directory + out: + - outDir: !& output_directory \ No newline at end of file diff --git a/segmentation/ome-zarr-autosegmentation-plugin/src/polus/images/segmentation/ome_zarr_autosegmentation/autosegmentation.py b/segmentation/ome-zarr-autosegmentation-plugin/src/polus/images/segmentation/ome_zarr_autosegmentation/autosegmentation.py index 2a0c001ed..8d6a193e0 100644 --- a/segmentation/ome-zarr-autosegmentation-plugin/src/polus/images/segmentation/ome_zarr_autosegmentation/autosegmentation.py +++ b/segmentation/ome-zarr-autosegmentation-plugin/src/polus/images/segmentation/ome_zarr_autosegmentation/autosegmentation.py @@ -32,7 +32,7 @@ def get_device(): def init_sam2_predictor(checkpoint_path): """Initialize SAM2 predictor with given checkpoint""" device = get_device() - model = build_sam2("configs/sam2.1/sam2.1_hiera_s.yaml", checkpoint_path, device=str(device)) + model = build_sam2("configs/sam2.1/sam2.1_hiera_l.yaml", checkpoint_path, device=str(device)) return SAM2AutomaticMaskGenerator(model) @@ -213,7 +213,7 @@ def autosegment_dataset(input_dataset_path: Path | str, output_dataset_path: Pat segmentations = [] sam2_predictor = init_sam2_predictor( - "../models/sam2.1_hiera_small.pt", + "../models/sam2.1_hiera_large.pt", ) for z in range(num_slices): slice_data = volume[z].compute() From a7a23bfa994cf91265f04c207f3dacaa878ae698 Mon Sep 17 00:00:00 2001 From: David Liu Date: Tue, 4 Mar 2025 11:11:06 -0500 Subject: [PATCH 05/10] old commit --- .../autosegmentation.py | 89 ++++--------------- 1 file changed, 16 insertions(+), 73 deletions(-) diff --git a/segmentation/ome-zarr-autosegmentation-plugin/src/polus/images/segmentation/ome_zarr_autosegmentation/autosegmentation.py b/segmentation/ome-zarr-autosegmentation-plugin/src/polus/images/segmentation/ome_zarr_autosegmentation/autosegmentation.py index 8d6a193e0..0bc0d6403 100644 --- a/segmentation/ome-zarr-autosegmentation-plugin/src/polus/images/segmentation/ome_zarr_autosegmentation/autosegmentation.py +++ b/segmentation/ome-zarr-autosegmentation-plugin/src/polus/images/segmentation/ome_zarr_autosegmentation/autosegmentation.py @@ -38,81 +38,33 @@ def init_sam2_predictor(checkpoint_path): def generate_segmentation_mask(predictor, image): """Generate segmentation mask for given PIL image.""" - # Convert PIL image to numpy array img_array = np.array(image) - # Convert grayscale to RGB if necessary if len(img_array.shape) == 2 or ( len(img_array.shape) == 3 and img_array.shape[2] == 1 ): # Stack the single channel three times to create RGB img_array = np.stack([img_array] * 3, axis=-1) - # Ensure array is in correct format (H, W, C) if len(img_array.shape) != 3 or img_array.shape[2] != 3: raise ValueError(f"Unexpected image shape: {img_array.shape}") - # Generate masks with torch.inference_mode(): masks = predictor.generate(img_array) - # Convert list of mask dictionaries to numpy array - mask_array = np.stack([mask["segmentation"] for mask in masks], axis=0) - return mask_array - - -def create_segmentation_overlay(image, masks, colors=None): - """Create a new image showing segmentation masks with different colors.""" - if len(masks) == 0: - return Image.new("RGB", image.size, (0, 0, 0)) - - # Generate random colors if none provided - if colors is None: - colors = [] - for i in range(len(masks)): - # Distribute hues evenly around color wheel - hue = i / len(masks) - h = hue * 6 - c = int(255) - x = int(255 * (1 - abs(h % 2 - 1))) - - if h < 1: - rgb = (c, x, 0) - elif h < 2: - rgb = (x, c, 0) - elif h < 3: - rgb = (0, c, x) - elif h < 4: - rgb = (0, x, c) - elif h < 5: - rgb = (x, 0, c) - else: - rgb = (c, 0, x) - - colors.append(rgb) - - # Create a black background image - result = Image.new("RGB", image.size, (0, 0, 0)) - - # Add each mask with its color - for i, mask in enumerate(masks): - mask_img = Image.fromarray((mask * 255).astype(np.uint8)).convert("L") - overlay = Image.new("RGB", image.size, colors[i]) - result = Image.composite(overlay, result, mask_img) - - return result - + height, width = img_array.shape[:2] + label_image = np.zeros((height, width), dtype=np.uint16) + + for idx, mask in enumerate(masks, start=1): # Start from 1, leaving 0 as background + label_image[mask["segmentation"]] = idx + + return label_image def segment_image(predictor, image): - # Generate masks - masks = generate_segmentation_mask(predictor, image) - - # Create visualization - return create_segmentation_overlay(image, masks) - + return generate_segmentation_mask(predictor, image) def create_zarr_from_segmentations(segmentations, original_dataset_path, output_dataset_path): - """Create an OME-ZARR dataset from segmentation PNGs matching original structure.""" + """Create an OME-ZARR dataset from segmentation masks with unique instance labels.""" # Get original structure location = parse_url(original_dataset_path) reader = Reader(location) @@ -129,56 +81,45 @@ def create_zarr_from_segmentations(segmentations, original_dataset_path, output_ output_path = pathlib.Path(output_dataset_path) if output_path.exists(): import shutil - shutil.rmtree(output_path) output_path.mkdir(parents=True) - # Create store with nested directory settings - store = zarr.DirectoryStore( - str(output_path), dimension_separator="/" - ) # Use '/' for nested directories + store = zarr.DirectoryStore(str(output_path), dimension_separator="/") root = zarr.group(store) # Get dimensions from first mask first_mask = np.array(segmentations[0]) - if len(first_mask.shape) == 3: - first_mask = first_mask[..., 0] # Create array matching original dimensions if ndim == 5: # (T, C, Z, Y, X) masks = np.zeros( (1, 1, len(segmentations), first_mask.shape[0], first_mask.shape[1]), - dtype=np.uint8, + dtype=np.uint16, ) else: # (C, Z, Y, X) masks = np.zeros( (1, len(segmentations), first_mask.shape[0], first_mask.shape[1]), - dtype=np.uint8, + dtype=np.uint16, ) - # Load all masks print(f"Loading {len(segmentations)} segmentation masks...") for i, segmentation in enumerate(segmentations): mask = np.array(segmentation) - if len(mask.shape) == 3: - mask = mask[..., 0] if ndim == 5: masks[0, 0, i] = mask else: masks[0, i] = mask - # Create pyramid using nearest neighbor for labels scaler = ome_zarr.scale.Scaler() pyramid = scaler.nearest(masks) - # Write with nested directory structure write_multiscale( pyramid=pyramid, group=root, axes=axes, storage_options={ "chunks": original_chunks, - "dimension_separator": "/", # Ensure nested directory structure + "dimension_separator": "/", }, ) @@ -251,4 +192,6 @@ def autosegmentation(inp_dir: Path, out_dir: Path): Returns: None """ - autosegment_dataset(inp_dir, out_dir) \ No newline at end of file + autosegment_dataset(inp_dir, out_dir) + + \ No newline at end of file From f0e01ad84ee085b31849cd5f32871530f81e7d93 Mon Sep 17 00:00:00 2001 From: David Liu Date: Wed, 26 Mar 2025 13:06:24 -0400 Subject: [PATCH 06/10] updated segmentation --- .../.gitignore | 2 +- .../.python-version | 2 +- .../Dockerfile | 4 +- .../pyproject.toml | 13 +- .../autosegmentation.py | 460 +++++++++++++----- 5 files changed, 355 insertions(+), 126 deletions(-) diff --git a/segmentation/ome-zarr-autosegmentation-plugin/.gitignore b/segmentation/ome-zarr-autosegmentation-plugin/.gitignore index ef3fb9aeb..d3b40938c 100644 --- a/segmentation/ome-zarr-autosegmentation-plugin/.gitignore +++ b/segmentation/ome-zarr-autosegmentation-plugin/.gitignore @@ -1,7 +1,7 @@ poetry.lock uv.lock test_datasets/** -models/** +**/models/** autogenerated/** cachedir/** provenance/** diff --git a/segmentation/ome-zarr-autosegmentation-plugin/.python-version b/segmentation/ome-zarr-autosegmentation-plugin/.python-version index e4fba2183..2c0733315 100644 --- a/segmentation/ome-zarr-autosegmentation-plugin/.python-version +++ b/segmentation/ome-zarr-autosegmentation-plugin/.python-version @@ -1 +1 @@ -3.12 +3.11 diff --git a/segmentation/ome-zarr-autosegmentation-plugin/Dockerfile b/segmentation/ome-zarr-autosegmentation-plugin/Dockerfile index a5ba2bd56..f380b9bed 100644 --- a/segmentation/ome-zarr-autosegmentation-plugin/Dockerfile +++ b/segmentation/ome-zarr-autosegmentation-plugin/Dockerfile @@ -1,5 +1,5 @@ # Build stage -FROM python:3.12-slim AS builder +FROM python:3.11-slim AS builder RUN apt-get update && apt-get install -y \ gcc \ @@ -21,7 +21,7 @@ RUN uv pip install --system -e . COPY . . # Final stage -FROM python:3.12-slim +FROM python:3.11-slim WORKDIR /app diff --git a/segmentation/ome-zarr-autosegmentation-plugin/pyproject.toml b/segmentation/ome-zarr-autosegmentation-plugin/pyproject.toml index dbef7fa50..b3d32587d 100644 --- a/segmentation/ome-zarr-autosegmentation-plugin/pyproject.toml +++ b/segmentation/ome-zarr-autosegmentation-plugin/pyproject.toml @@ -3,7 +3,7 @@ name = "ome_zarr_autosegmentation_plugin" version = "0.1.0" description = "Add your description here" readme = "README.md" -requires-python = ">=3.12" +requires-python = ">=3.11" dependencies = [ "torch>=2.1.0", "torchvision>=0.16.0", @@ -11,18 +11,11 @@ dependencies = [ "ome-zarr>=0.9.0", "bfio>=2.3.6", "sam2>=1.1.0", + "ngff-zarr[dask-image,tensorstore]>=0.12.2", + "argolid>=0.0.6", "typer>=0.15.1", ] -[[tool.uv.index]] -name = "pytorch" -url = "https://download.pytorch.org/whl/cu121" -explicit = true - -[tool.uv.sources] -torch = { index = "pytorch" } -torchvision = { index = "pytorch" } - [dependency-groups] dev = [ "ruff>=0.8.0", diff --git a/segmentation/ome-zarr-autosegmentation-plugin/src/polus/images/segmentation/ome_zarr_autosegmentation/autosegmentation.py b/segmentation/ome-zarr-autosegmentation-plugin/src/polus/images/segmentation/ome_zarr_autosegmentation/autosegmentation.py index 0bc0d6403..c040f1869 100644 --- a/segmentation/ome-zarr-autosegmentation-plugin/src/polus/images/segmentation/ome_zarr_autosegmentation/autosegmentation.py +++ b/segmentation/ome-zarr-autosegmentation-plugin/src/polus/images/segmentation/ome_zarr_autosegmentation/autosegmentation.py @@ -1,25 +1,73 @@ -"""ome_zarr_autosegmentation.""" - -from pathlib import Path - +import json import os +from typing import Dict, List, Tuple + +import dask.array as da os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1" +os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True" import pathlib -from uuid import UUID - +import shutil import numpy as np -import ome_zarr.scale import torch import zarr -from ome_zarr.io import parse_url -from ome_zarr.reader import Reader -from ome_zarr.writer import write_multiscale +from argolid import PyramidGenerator3D +from ngff_zarr import ( + from_ngff_zarr, +) from PIL import Image from sam2.automatic_mask_generator import SAM2AutomaticMaskGenerator from sam2.build_sam import build_sam2 +CHUNK_SIZE = 1024 # Size of each chunk (1024×1024 pixels) +CHUNK_OVERLAP = 100 # Overlap between chunks (100 pixels on each side) +NUM_WORKERS = 1 # Sam2 Automatic mask segmentation cannot do batch + + +def create_segment_properties_info(): + """ + Create an empty segment properties info file compatible with neuroglancer format. + """ + segment_properties = { + "@type": "neuroglancer_segment_properties", + "inline": { + "ids": [], + "properties": [ + {"id": "class", "type": "label", "values": []}, + {"id": "description", "type": "description", "values": []}, + { + "id": "tags", + "type": "tags", + "tags": [ + "verified", + "reviewed", + "uncertain", + "automated", + "manual", + "incomplete", + "merged", + "split", + ], + "tag_descriptions": [ + "Verified by expert annotation", + "Reviewed but may need additional verification", + "Confidence in classification is low", + "Generated by automated pipeline", + "Manually annotated", + "Segmentation may be incomplete", + "Result of merge operation", + "Result of split operation", + ], + "values": [], + }, + ], + }, + } + + return segment_properties + + def get_device(): """Get the appropriate device for the current system.""" if torch.backends.mps.is_available(): @@ -29,15 +77,45 @@ def get_device(): return torch.device("cpu") -def init_sam2_predictor(checkpoint_path): - """Initialize SAM2 predictor with given checkpoint""" +def init_sam2_predictor(config_path, checkpoint_path, image_shape=None): device = get_device() - model = build_sam2("configs/sam2.1/sam2.1_hiera_l.yaml", checkpoint_path, device=str(device)) + model = build_sam2(config_path, checkpoint_path, device=str(device)) + return SAM2AutomaticMaskGenerator(model) +def normalize_to_uint8(array): + if array.dtype != np.uint8: + array_min = array.min() + array_max = array.max() + if array_max > array_min: + array = ((array - array_min) * 255 / (array_max - array_min)).astype( + np.uint8 + ) + else: + array = np.zeros_like(array, dtype=np.uint8) + return array + + +def process_chunk(chunk, predictor): + """ + Process a single chunk of an image and generate segmentation mask. + + Args: + chunk: Chunk of image data + predictor: SAM2 predictor + + Returns: + Segmentation mask for the chunk + """ + chunk = normalize_to_uint8(chunk) + + img = Image.fromarray(chunk) + + return generate_segmentation_mask(predictor, img) + + def generate_segmentation_mask(predictor, image): - """Generate segmentation mask for given PIL image.""" img_array = np.array(image) if len(img_array.shape) == 2 or ( @@ -54,144 +132,302 @@ def generate_segmentation_mask(predictor, image): height, width = img_array.shape[:2] label_image = np.zeros((height, width), dtype=np.uint16) - - for idx, mask in enumerate(masks, start=1): # Start from 1, leaving 0 as background + + for idx, mask in enumerate(masks, start=1): label_image[mask["segmentation"]] = idx - + return label_image -def segment_image(predictor, image): - return generate_segmentation_mask(predictor, image) - -def create_zarr_from_segmentations(segmentations, original_dataset_path, output_dataset_path): - """Create an OME-ZARR dataset from segmentation masks with unique instance labels.""" - # Get original structure - location = parse_url(original_dataset_path) - reader = Reader(location) - nodes = list(reader()) - image_node = nodes[0] - image_data = image_node.data[0] - ndim = len(image_data.shape) - - # Get original metadata - axes = image_node.metadata["axes"] - original_chunks = image_data.chunks[0] # First resolution level - - # Create output directory - output_path = pathlib.Path(output_dataset_path) - if output_path.exists(): - import shutil - shutil.rmtree(output_path) - output_path.mkdir(parents=True) +def initialize_job( + output_path: pathlib.Path, +) -> Tuple[Dict, bool, List[int]]: + """ + Creates directories and sets up progress file + """ + resuming = output_path.exists() + progress_file = output_path / "progress.json" + + if resuming: + print("Found existing output directory. Attempting to resume segmentation.") + with open(progress_file, "r") as f: + progress = json.load(f) + processed_slices = progress.get("processed_slices", []) + else: + print("Starting new segmentation job") + output_path.mkdir(parents=True) + processed_slices = [] + progress = {"processed_slices": processed_slices, "total_slices": 0} + + return progress, resuming, processed_slices + + +def initialize_zarr_store( + output_path: pathlib.Path, original_path: pathlib.Path, dtype=np.uint16 +) -> zarr.Group: + """ + Copy the original dataset to the output location, which we will modify later + """ + print(f"Copying original dataset from {original_path} to {output_path}") + + # Make sure output directory exists + os.makedirs(output_path, exist_ok=True) + + # Copy the original dataset to the output location + if os.path.exists(original_path) and original_path != output_path: + # Copy zarr directory contents + for item in os.listdir(original_path): + s = os.path.join(original_path, item) + d = os.path.join(output_path, item) + if os.path.isdir(s): + shutil.copytree(s, d, dirs_exist_ok=True) + else: + shutil.copy2(s, d) + + # Initialize segment properties + segment_properties = create_segment_properties_info() + info_path = output_path / "info" + with open(info_path, "w") as f: + json.dump(segment_properties, f, indent=2) + + # Open the zarr store at the output location store = zarr.DirectoryStore(str(output_path), dimension_separator="/") root = zarr.group(store) - # Get dimensions from first mask - first_mask = np.array(segmentations[0]) + return root - # Create array matching original dimensions - if ndim == 5: # (T, C, Z, Y, X) - masks = np.zeros( - (1, 1, len(segmentations), first_mask.shape[0], first_mask.shape[1]), - dtype=np.uint16, + +def update_segment_properties( + output_path: pathlib.Path, segmentation: np.ndarray +) -> None: + """ + Update segment properties file with new segments from a segmentation mask. + + Args: + output_path: Path to the output directory + segmentation: Segmentation mask with new segments + """ + info_path = output_path / "info" + with open(info_path, "r") as f: + segment_properties = json.load(f) + + # Add any new segments from this slice + unique_ids = np.unique(segmentation) + unique_ids = unique_ids[unique_ids > 0] + + str_ids = [str(int(id)) for id in unique_ids] + current_str_ids = segment_properties["inline"]["ids"] + + # Add any new IDs not already in the properties + new_ids = [id for id in str_ids if id not in current_str_ids] + if new_ids: + for id in new_ids: + segment_properties["inline"]["ids"].append(id) + segment_properties["inline"]["properties"][0]["values"].append( + f"segment {id}" + ) + segment_properties["inline"]["properties"][1]["values"].append( + f"Automatically generated segment {id}" + ) + segment_properties["inline"]["properties"][2]["values"].append( + [3] + ) # Automated tag + + # Write updated properties + with open(info_path, "w") as f: + json.dump(segment_properties, f, indent=2) + + +def update_progress( + output_path: pathlib.Path, + progress: Dict, + processed_slices: List[int], + slice_index: int, +) -> None: + """ + Update the progress file with newly processed slice. + + Args: + output_path: Path to the output directory + progress: Progress dictionary + processed_slices: List of processed slice indices + slice_index: Index of the newly processed slice + """ + if slice_index not in processed_slices: + processed_slices.append(slice_index) + progress["processed_slices"] = processed_slices + progress_file = output_path / "progress.json" + with open(progress_file, "w") as f: + json.dump(progress, f) + + +def process_slice( + volume: np.ndarray, + slice_index: int, + predictor: SAM2AutomaticMaskGenerator, + zarr_root: zarr.Group, + ndim: int, + output_path: pathlib.Path, + progress: Dict, + processed_slices: List[int], + num_slices: int, +) -> None: + """ + Process a single slice and update zarr store. + + Args: + volume: Volume data + slice_index: Index of the slice to process + predictor: SAM2 predictor + zarr_root: Zarr root group + ndim: Number of dimensions + output_path: Path to the output directory + progress: Progress dictionary + processed_slices: List of processed slice indices + num_slices: Total number of slices + """ + if slice_index in processed_slices: + print(f"Skipping already processed slice {slice_index}/{num_slices}") + return + + # Get the slice and compute + slice_data = volume[slice_index] + # Check if the slice is small enough to process directly + if slice_data.shape[0] <= CHUNK_SIZE and slice_data.shape[1] <= CHUNK_SIZE: + slice_data = normalize_to_uint8(slice_data) + img = Image.fromarray(slice_data) + segmentation = generate_segmentation_mask(predictor, img) + else: + # For large images, use dask map_overlap to process in chunks + print( + f"Processing slice {slice_index} in {CHUNK_SIZE}×{CHUNK_SIZE} chunks with {CHUNK_OVERLAP} pixel overlap" ) - else: # (C, Z, Y, X) - masks = np.zeros( - (1, len(segmentations), first_mask.shape[0], first_mask.shape[1]), + + # Use map_overlap to process the image in chunks with overlap + chunked_result = da.map_overlap( + process_chunk, + slice_data, + depth={0: CHUNK_OVERLAP, 1: CHUNK_OVERLAP}, + boundary="reflect", dtype=np.uint16, + chunks=(CHUNK_SIZE, CHUNK_SIZE), + predictor=predictor, ) - print(f"Loading {len(segmentations)} segmentation masks...") - for i, segmentation in enumerate(segmentations): - mask = np.array(segmentation) - if ndim == 5: - masks[0, 0, i] = mask - else: - masks[0, i] = mask - - scaler = ome_zarr.scale.Scaler() - pyramid = scaler.nearest(masks) - - write_multiscale( - pyramid=pyramid, - group=root, - axes=axes, - storage_options={ - "chunks": original_chunks, - "dimension_separator": "/", - }, - ) + # Compute the final result + segmentation = chunked_result.compute(scheduler="synchronous") - return output_path + zarr_array = zarr_root["0"] + z = zarr.open(zarr_array.store, path=zarr_array.path, mode="r+") + # Update the specific slice in the zarr array to save progress + if ndim == 5: # (T, C, Z, Y, X) + z[0, 0, slice_index] = segmentation + else: # (C, Z, Y, X) + z[0, slice_index] = segmentation + + # Update segment properties + update_segment_properties(output_path, segmentation) + + # Update progress + update_progress(output_path, progress, processed_slices, slice_index) + + # Print progress + if slice_index % 10 == 0 or slice_index == num_slices - 1: + print( + f"Processed slice {slice_index}/{num_slices} ({len(processed_slices)}/{num_slices} complete)" + ) -def autosegment_dataset(input_dataset_path: Path | str, output_dataset_path: Path | str): - location = parse_url(input_dataset_path) - reader = Reader(location) - nodes = list(reader()) - # First node has highest resolution - image_node = nodes[0] - image_data = image_node.data[0] +def autosegment_dataset(input_dir: pathlib.Path, output_path: pathlib.Path): + progress, resuming, processed_slices = ( + initialize_job(output_path) + ) - print(f"Dataset shape: {image_data.shape}") - print(f"Data chunks: {image_data.chunks}") + multiscales = from_ngff_zarr(input_dir) + image_data = multiscales.images[0] # Get the highest resolution image - ndim = len(image_data.shape) + ndim = len(image_data.data.shape) + print(f"Dataset shape: {image_data.data.shape}") + print(f"Data chunks: {image_data.data.chunks}") + # Extract the correct volume based on dimensionality if ndim == 5: # Typically (T, C, Z, Y, X) print("5D dataset detected (T, C, Z, Y, X)") - volume = image_data[0, 0] + volume = image_data.data[0, 0] + slice_shape = (volume[0].shape[0], volume[0].shape[1]) elif ndim == 4: # Typically (C, Z, Y, X) print("4D dataset detected (C, Z, Y, X)") - volume = image_data[0] + volume = image_data.data[0] + slice_shape = (volume[0].shape[0], volume[0].shape[1]) else: raise ValueError(f"Unexpected number of dimensions: {ndim}") num_slices = volume.shape[0] print(f"Processing {num_slices} Z-slices from channel") - segmentations = [] + if not resuming or progress.get("total_slices", 0) != num_slices: + progress["total_slices"] = num_slices + with open(output_path / "progress.json", "w") as f: + json.dump(progress, f) + sam2_predictor = init_sam2_predictor( - "../models/sam2.1_hiera_large.pt", + "configs/sam2.1/sam2.1_hiera_s.yaml", + "models/sam2.1_hiera_small.pt", + image_shape=slice_shape, ) + + if not resuming: + root = initialize_zarr_store( + output_path, + input_dir, + ) + else: + store = zarr.DirectoryStore(str(output_path), dimension_separator="/") + root = zarr.group(store) + for z in range(num_slices): - slice_data = volume[z].compute() - - # Normalize to 0-255 range - if slice_data.dtype != np.uint8: - slice_min = slice_data.min() - slice_max = slice_data.max() - if slice_max > slice_min: - slice_data = ( - (slice_data - slice_min) * 255 / (slice_max - slice_min) - ).astype(np.uint8) - else: - slice_data = np.zeros_like(slice_data, dtype=np.uint8) + process_slice( + volume, + z, + sam2_predictor, + root, + ndim, + output_path, + progress, + processed_slices, + num_slices, + ) - img = Image.fromarray(slice_data) - segmentations.append(segment_image(sam2_predictor, img)) - - if z % 10 == 0: - print(f"Processed slice {z}/{num_slices}") - - - create_zarr_from_segmentations(segmentations, - input_dataset_path, output_dataset_path - ) + zarr_loc_dir = str(output_path) + base_scale_key = 0 + num_levels = len(multiscales.images) + + pyramid_gen = PyramidGenerator3D(zarr_loc_dir, base_scale_key) + pyramid_gen.generate_pyramid(num_levels) + + # Set permissions recursively on the output directory + print(f"Setting permissions on {output_path}") + for root_dir, dirs, files in os.walk(str(output_path)): + for dir_name in dirs: + dir_path = os.path.join(root_dir, dir_name) + os.chmod(dir_path, 0o755) + # Set 0755 (drwxr-xr-x) permissions on files + for file_name in files: + file_path = os.path.join(root_dir, file_name) + os.chmod(file_path, 0o755) -def autosegmentation(inp_dir: Path, out_dir: Path): + +def autosegmentation(input_dir: pathlib.Path, output_dir: pathlib.Path): """ome_zarr_autosegmentation. Args: - inp_dir: input directory to process + input_dir: input directory to process filepattern: filepattern to filter inputs - out_dir: output directory + output_dir: output ome-zarr directory Returns: None """ - autosegment_dataset(inp_dir, out_dir) - + autosegment_dataset(input_dir, output_dir) \ No newline at end of file From 90b5120669ae7c6493d52d9b82ce8b4558d47398 Mon Sep 17 00:00:00 2001 From: David Liu Date: Thu, 3 Apr 2025 15:29:39 -0400 Subject: [PATCH 07/10] fix python version --- segmentation/ome-zarr-autosegmentation-plugin/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/segmentation/ome-zarr-autosegmentation-plugin/Dockerfile b/segmentation/ome-zarr-autosegmentation-plugin/Dockerfile index f380b9bed..8547e591a 100644 --- a/segmentation/ome-zarr-autosegmentation-plugin/Dockerfile +++ b/segmentation/ome-zarr-autosegmentation-plugin/Dockerfile @@ -27,9 +27,9 @@ WORKDIR /app COPY --from=builder /app /app -COPY --from=builder /usr/local/lib/python3.12/site-packages /usr/local/lib/python3.12/site-packages +COPY --from=builder /usr/local/lib/python3.11/site-packages /usr/local/lib/python3.11/site-packages WORKDIR /app/src ENTRYPOINT ["python3", "-m", "polus.images.segmentation.ome_zarr_autosegmentation"] -CMD ["--help"] \ No newline at end of file +CMD ["--help"] From 331ce21249a41908ad66700218f5a85172bbeb8c Mon Sep 17 00:00:00 2001 From: David Liu Date: Thu, 17 Apr 2025 12:31:03 -0400 Subject: [PATCH 08/10] remove unnecessary file --- segmentation/ome-zarr-autosegmentation-plugin/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/segmentation/ome-zarr-autosegmentation-plugin/Dockerfile b/segmentation/ome-zarr-autosegmentation-plugin/Dockerfile index 8547e591a..2bb047c5e 100644 --- a/segmentation/ome-zarr-autosegmentation-plugin/Dockerfile +++ b/segmentation/ome-zarr-autosegmentation-plugin/Dockerfile @@ -14,7 +14,7 @@ COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ WORKDIR /app -COPY pyproject.toml uv.lock ./ +COPY pyproject.toml ./ RUN uv pip install --system -e . From ca476b22485e68778479bbf83a9a32d10e4a7983 Mon Sep 17 00:00:00 2001 From: David Liu Date: Mon, 8 Dec 2025 15:09:15 -0500 Subject: [PATCH 09/10] updates --- .../autosegmentation.py | 38 ++++++++----------- 1 file changed, 16 insertions(+), 22 deletions(-) diff --git a/segmentation/ome-zarr-autosegmentation-plugin/src/polus/images/segmentation/ome_zarr_autosegmentation/autosegmentation.py b/segmentation/ome-zarr-autosegmentation-plugin/src/polus/images/segmentation/ome_zarr_autosegmentation/autosegmentation.py index c040f1869..cc0443dc3 100644 --- a/segmentation/ome-zarr-autosegmentation-plugin/src/polus/images/segmentation/ome_zarr_autosegmentation/autosegmentation.py +++ b/segmentation/ome-zarr-autosegmentation-plugin/src/polus/images/segmentation/ome_zarr_autosegmentation/autosegmentation.py @@ -1,14 +1,10 @@ import json import os +import pathlib +import shutil from typing import Dict, List, Tuple import dask.array as da - -os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1" -os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True" - -import pathlib -import shutil import numpy as np import torch import zarr @@ -20,6 +16,9 @@ from sam2.automatic_mask_generator import SAM2AutomaticMaskGenerator from sam2.build_sam import build_sam2 +os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1" +os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True" + CHUNK_SIZE = 1024 # Size of each chunk (1024×1024 pixels) CHUNK_OVERLAP = 100 # Overlap between chunks (100 pixels on each side) NUM_WORKERS = 1 # Sam2 Automatic mask segmentation cannot do batch @@ -158,27 +157,25 @@ def initialize_job( output_path.mkdir(parents=True) processed_slices = [] progress = {"processed_slices": processed_slices, "total_slices": 0} + with open(progress_file, "w") as f: + json.dump(progress, f) return progress, resuming, processed_slices def initialize_zarr_store( - output_path: pathlib.Path, original_path: pathlib.Path, dtype=np.uint16 + output_path: pathlib.Path, original_dataset_path: pathlib.Path, dtype=np.uint16 ) -> zarr.Group: """ Copy the original dataset to the output location, which we will modify later """ - print(f"Copying original dataset from {original_path} to {output_path}") - - # Make sure output directory exists - os.makedirs(output_path, exist_ok=True) + print(f"Copying original dataset from {original_dataset_path} to {output_path}") # Copy the original dataset to the output location - if os.path.exists(original_path) and original_path != output_path: - # Copy zarr directory contents - for item in os.listdir(original_path): - s = os.path.join(original_path, item) - d = os.path.join(output_path, item) + if original_dataset_path != output_path: + for item in original_dataset_path.iterdir(): + s = original_dataset_path / item.name + d = output_path / item.name if os.path.isdir(s): shutil.copytree(s, d, dirs_exist_ok=True) else: @@ -294,6 +291,7 @@ def process_slice( slice_data = volume[slice_index] # Check if the slice is small enough to process directly if slice_data.shape[0] <= CHUNK_SIZE and slice_data.shape[1] <= CHUNK_SIZE: + slice_data = np.array(slice_data) slice_data = normalize_to_uint8(slice_data) img = Image.fromarray(slice_data) segmentation = generate_segmentation_mask(predictor, img) @@ -340,9 +338,7 @@ def process_slice( def autosegment_dataset(input_dir: pathlib.Path, output_path: pathlib.Path): - progress, resuming, processed_slices = ( - initialize_job(output_path) - ) + progress, resuming, processed_slices = initialize_job(output_path) multiscales = from_ngff_zarr(input_dir) image_data = multiscales.images[0] # Get the highest resolution image @@ -424,10 +420,8 @@ def autosegmentation(input_dir: pathlib.Path, output_dir: pathlib.Path): Args: input_dir: input directory to process - filepattern: filepattern to filter inputs output_dir: output ome-zarr directory Returns: None """ - autosegment_dataset(input_dir, output_dir) - \ No newline at end of file + autosegment_dataset(input_dir, output_dir) \ No newline at end of file From 3e676728e3d3068fc2ad2ca275c9b6acf044776d Mon Sep 17 00:00:00 2001 From: David Liu Date: Mon, 8 Dec 2025 15:10:12 -0500 Subject: [PATCH 10/10] appease linter --- .../segmentation/ome_zarr_autosegmentation/autosegmentation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/segmentation/ome-zarr-autosegmentation-plugin/src/polus/images/segmentation/ome_zarr_autosegmentation/autosegmentation.py b/segmentation/ome-zarr-autosegmentation-plugin/src/polus/images/segmentation/ome_zarr_autosegmentation/autosegmentation.py index cc0443dc3..60c6dfbe9 100644 --- a/segmentation/ome-zarr-autosegmentation-plugin/src/polus/images/segmentation/ome_zarr_autosegmentation/autosegmentation.py +++ b/segmentation/ome-zarr-autosegmentation-plugin/src/polus/images/segmentation/ome_zarr_autosegmentation/autosegmentation.py @@ -424,4 +424,4 @@ def autosegmentation(input_dir: pathlib.Path, output_dir: pathlib.Path): Returns: None """ - autosegment_dataset(input_dir, output_dir) \ No newline at end of file + autosegment_dataset(input_dir, output_dir)