From 9b67a6cbbc953d0767a49532da53787ad182eaa3 Mon Sep 17 00:00:00 2001 From: hamshkhawar Date: Wed, 28 Feb 2024 14:59:15 -0600 Subject: [PATCH 01/16] cwlfeature_extraction --- README.md | 84 ++++++ bbbc_json/bbbc_config.json | 17 ++ cwl_adapters/basic-flatfield-estimation.cwl | 115 -------- cwl_adapters/bbbcdownload.cwl | 61 ---- cwl_adapters/file-renaming.cwl | 85 ------ cwl_adapters/image_assembler.cwl | 101 ------- cwl_adapters/montage.cwl | 123 -------- cwl_adapters/ome-converter.cwl | 85 ------ cwl_adapters/precompute_slide.cwl | 81 ------ cwl_workflows/__init__.py | 2 + cwl_workflows/__main__.py | 54 ++++ .../cwl_features_extraction.cpython-310.pyc | Bin 0 -> 8762 bytes .../__pycache__/utils.cpython-310.pyc | Bin 0 -> 1923 bytes {workflows => cwl_workflows}/bbbc.py | 0 {workflows => cwl_workflows}/bbbc.yml | 0 cwl_workflows/cwl_features_extraction.py | 265 ++++++++++++++++++ cwl_workflows/utils.py | 45 +++ 17 files changed, 467 insertions(+), 651 deletions(-) create mode 100644 README.md create mode 100644 bbbc_json/bbbc_config.json delete mode 100644 cwl_adapters/basic-flatfield-estimation.cwl delete mode 100644 cwl_adapters/bbbcdownload.cwl delete mode 100644 cwl_adapters/file-renaming.cwl delete mode 100644 cwl_adapters/image_assembler.cwl delete mode 100644 cwl_adapters/montage.cwl delete mode 100644 cwl_adapters/ome-converter.cwl delete mode 100644 cwl_adapters/precompute_slide.cwl create mode 100644 cwl_workflows/__init__.py create mode 100644 cwl_workflows/__main__.py create mode 100644 cwl_workflows/__pycache__/cwl_features_extraction.cpython-310.pyc create mode 100644 cwl_workflows/__pycache__/utils.cpython-310.pyc rename {workflows => cwl_workflows}/bbbc.py (100%) rename {workflows => cwl_workflows}/bbbc.yml (100%) create mode 100644 cwl_workflows/cwl_features_extraction.py create mode 100644 cwl_workflows/utils.py diff --git a/README.md b/README.md new file mode 100644 index 0000000..c735f02 --- /dev/null +++ b/README.md @@ -0,0 +1,84 @@ +# Common Workflow Language (CWL) Feature Extraction worflow + +CWL feature extraction workflow for imaging dataset + +## Workflow Steps: + +create a [Conda](https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html#activating-an-environment) environment using python = ">=3.9,<3.12" + +#### 1. Install polus-plugins. + +- clone a image-tools reporsitory +`git clone https://github.com/camilovelezr/image-tools.git` +- cd `image-tools` +- `pip install .` + +#### 2. Install workflow-inference-compiler. +- clone a workflow-inference-compiler reporsitory +`git clone https://github.com/camilovelezr/workflow-inference-compiler.git` +- cd `workflow-inference-compiler` +- `pip install -e ".[all]"` + +## Details +This workflow integrates eight distinct plugins, starting from data retrieval from [Broad Bioimage Benchmark Collection](https://bbbc.broadinstitute.org/), renaming files, correcting uneven illumination, segmenting nuclear objects, and culminating in the extraction of features from identified objects + +Below are the specifics of the plugins employed in the workflow +1. [bbbc-download-plugin](https://github.com/saketprem/polus-plugins/tree/bbbc_download/utils/bbbc-download-plugin) +2. [file-renaming-tool](https://github.com/PolusAI/image-tools/tree/master/formats/file-renaming-tool) +3. [ome-converter-tool](https://github.com/PolusAI/image-tools/tree/master/formats/ome-converter-tool) +4. [basic-flatfield-estimation-tool](https://github.com/PolusAI/image-tools/tree/master/regression/basic-flatfield-estimation-tool) +5. [apply-flatfield-tool](https://github.com/PolusAI/image-tools/tree/master/transforms/images/apply-flatfield-tool) +6. [kaggle-nuclei-segmentation](https://github.com/hamshkhawar/image-tools/tree/kaggle-nuclei_seg/segmentation/kaggle-nuclei-segmentation) +7. [polus-ftl-label-plugin](https://github.com/hamshkhawar/image-tools/tree/kaggle-nuclei_seg/transforms/images/polus-ftl-label-plugin) +8. [nyxus-plugin](https://github.com/PolusAI/image-tools/tree/kaggle-nuclei_seg/features/nyxus-plugin) + +## Execute CWL feature extraction workflow + +The parameters for each imaging dataset are pre-defined and stored in JSON format. A Pydantic model in a utils Python file can be utilized to store parameters for any new dataset + +`python cwl_workflows/__main__.py --name="BBBC039" --workflow=CWLFeatureWorkflow` + +A directory named `workflow` is generated, encompassing CLTs for each plugin, YAML files, and all outputs are stored within the `outdir` directory. +``` +workflows +├── experiment +│ └── cwl_adapters +| experiment.cwl +| experiment.yml +| +└── outdir + └── experiment + ├── step 1 BbbcDownload + │ └── outDir + │ └── bbbc.outDir + │ └── BBBC + │ └── BBBC039 + │ └── raw + │ ├── Ground_Truth + │ │ ├── masks + │ │ └── metadata + │ └── Images + │ └── images + ├── step 2 FileRenaming + │ └── outDir + │ └── rename.outDir + ├── step 3 OmeConverter + │ └── outDir + │ └── ome_converter.outDir + ├── step 4 BasicFlatfieldEstimation + │ └── outDir + │ └── estimate_flatfield.outDir + ├── step 5 ApplyFlatfield + │ └── outDir + │ └── apply_flatfield.outDir + ├── step 6 KaggleNucleiSegmentation + │ └── outDir + │ └── kaggle_nuclei_segmentation.outDir + ├── step 7 FtlLabel + │ └── outDir + │ └── ftl_plugin.outDir + └── step 8 NyxusPlugin + └── outDir + └── nyxus_plugin.outDir + +``` diff --git a/bbbc_json/bbbc_config.json b/bbbc_json/bbbc_config.json new file mode 100644 index 0000000..c5d55c6 --- /dev/null +++ b/bbbc_json/bbbc_config.json @@ -0,0 +1,17 @@ +{ + "data": { + "BBBC039": { + "name": "BBBC039", + "file_pattern": ".*_{row:c}{col:dd}_s{s:d}_w{channel:d}.*.tif", + "out_file_pattern": "x{row:dd}_y{col:dd}_p{s:dd}_c{channel:d}.tif", + "image_pattern": "images_x{x:dd}_y{y:dd}_p{p:dd}_c{c:d}.ome.tif", + "seg_pattern": "images_x{x:dd}_y{y:dd}_p{p:dd}_c1.ome.tif", + "map_directory": "raw", + "ff_pattern": "images_x\\(00-15\\)_y\\(01-24\\)_p0\\(1-9\\)_c{c:d}_flatfield.ome.tif", + "df_pattern": "images_x\\(00-15\\)_y\\(01-24\\)_p0\\(1-9\\)_c{c:d}_darkfield.ome.tif", + "group_by": "c", + "features": "ALL_INTENSITY,ALL_MORPHOLOGY", + "file_extension": "pandas" + } + } +} \ No newline at end of file diff --git a/cwl_adapters/basic-flatfield-estimation.cwl b/cwl_adapters/basic-flatfield-estimation.cwl deleted file mode 100644 index 3893ae8..0000000 --- a/cwl_adapters/basic-flatfield-estimation.cwl +++ /dev/null @@ -1,115 +0,0 @@ -#!/usr/bin/env cwl-runner -class: CommandLineTool -cwlVersion: v1.0 - -label: BaSiC Flatfield Estimation - -doc: |- - This WIPP plugin will take a collection of images and use the BaSiC flatfield correction algorithm to generate a flatfield image, a darkfield image, and a photobleach offset. - https://github.com/PolusAI/polus-plugins/tree/master/regression/basic-flatfield-estimation-plugin - -requirements: - DockerRequirement: - dockerPull: polusai/basic-flatfield-estimation-plugin:2.1.1 - # See https://www.commonwl.org/v1.0/CommandLineTool.html#InitialWorkDirRequirement - InitialWorkDirRequirement: - listing: - - entry: $(inputs.outDir) - writable: true # Output directories must be writable - InlineJavascriptRequirement: {} - -# See https://github.com/PolusAI/workflow-inference-compiler/blob/master/docker_remove_entrypoints.py -baseCommand: python3 -arguments: ["-m", "polus.plugins.regression.basic_flatfield_estimation"] - -# "jax._src.xla_bridge - WARNING - An NVIDIA GPU may be present on this machine, but a CUDA-enabled jaxlib is not installed. Falling back to cpu." -hints: - cwltool:CUDARequirement: - cudaVersionMin: "11.4" - cudaComputeCapabilityMin: "3.0" - cudaDeviceCountMin: 1 - cudaDeviceCountMax: 1 - -inputs: - inpDir: - label: Path to input images - doc: |- - Path to input images - type: Directory - inputBinding: - prefix: --inpDir - - getDarkfield: - label: If 'true', will calculate darkfield image - doc: |- - If 'true', will calculate darkfield image - type: boolean? - inputBinding: - prefix: --getDarkfield - - # photobleach: - # label: If 'true', will calculate photobleach scalar - # doc: |- - # If 'true', will calculate photobleach scalar - # type: boolean? - # inputBinding: - # prefix: --photobleach - - filePattern: - label: File pattern to subset data - doc: |- - File pattern to subset data - type: string? - inputBinding: - prefix: --filePattern - - groupBy: - label: Variables to group together - doc: |- - Variables to group together - type: string? - inputBinding: - prefix: --groupBy - - preview: - label: Generate a JSON file describing what the outputs should be - doc: |- - Generate a JSON file describing what the outputs should be - type: boolean? - inputBinding: - prefix: --preview - - outDir: - label: Output image collection - doc: |- - Output image collection - type: Directory - inputBinding: - prefix: --outDir - -outputs: - outDir: - label: Output image collection - doc: |- - Output image collection - type: Directory - outputBinding: - glob: $(inputs.outDir.basename) - - preview_json: - label: JSON file describing what the outputs should be - doc: |- - JSON file describing what the outputs should be - type: File? # if --preview - format: edam:format_3464 - outputBinding: - glob: preview.json - -$namespaces: - edam: https://edamontology.org/ - cwltool: http://commonwl.org/cwltool# - -$schemas: -- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl - -# manifest: https://raw.githubusercontent.com/PolusAI/polus-plugins/master/regression/basic-flatfield-estimation-plugin/plugin.json \ No newline at end of file diff --git a/cwl_adapters/bbbcdownload.cwl b/cwl_adapters/bbbcdownload.cwl deleted file mode 100644 index 252514a..0000000 --- a/cwl_adapters/bbbcdownload.cwl +++ /dev/null @@ -1,61 +0,0 @@ -class: CommandLineTool -cwlVersion: v1.1 - -label: BBBC Download - -doc: |- - Downloads the datasets on the Broad Bioimage Benchmark Collection website - https://github.com/saketprem/polus-plugins/tree/bbbc_download/utils/bbbc-download-plugin - -# See https://github.com/PolusAI/workflow-inference-compiler/blob/master/docker_remove_entrypoints.py -baseCommand: python3 -arguments: ["-m", "polus.plugins.utils.bbbc_download"] - -requirements: - DockerRequirement: - dockerPull: polusai/bbbc-download-plugin:0.1.0-dev1 - # See https://www.commonwl.org/v1.0/CommandLineTool.html#InitialWorkDirRequirement - InitialWorkDirRequirement: - listing: - - entry: $(inputs.outDir) - writable: true # Output directories must be writable - InlineJavascriptRequirement: {} - # NOTE: By default, "tools must not assume network access, except for localhost" - # See https://www.commonwl.org/v1.1/CommandLineTool.html#NetworkAccess - NetworkAccess: - networkAccess: true - -inputs: - name: - label: The name of the dataset(s) to be downloaded (separate the datasets with a comma. eg BBBC001,BBBC002,BBBC003) - doc: |- - The name of the dataset(s) to be downloaded (separate the datasets with a comma. eg BBBC001,BBBC002,BBBC003) - inputBinding: - prefix: --name - type: string - # default: BBBC001 - - outDir: - label: Output collection - doc: |- - Output collection - inputBinding: - prefix: --outDir - type: Directory - -outputs: - outDir: - label: Output collection - doc: |- - Output collection - type: Directory - outputBinding: - glob: $(inputs.outDir.basename) - -$namespaces: - edam: https://edamontology.org/ - -$schemas: -- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl - -# manifest: "https://raw.githubusercontent.com/saketprem/polus-plugins/bbbc_download/utils/bbbc-download-plugin/plugin.json" \ No newline at end of file diff --git a/cwl_adapters/file-renaming.cwl b/cwl_adapters/file-renaming.cwl deleted file mode 100644 index a2df113..0000000 --- a/cwl_adapters/file-renaming.cwl +++ /dev/null @@ -1,85 +0,0 @@ -class: CommandLineTool -cwlVersion: v1.0 - -label: File Renaming - -doc: |- - Rename and store image collection files in a new image collection - https://github.com/PolusAI/polus-plugins/tree/master/formats/file-renaming-plugin - -# See https://github.com/PolusAI/workflow-inference-compiler/blob/master/docker_remove_entrypoints.py -baseCommand: python3 -arguments: ["-m", "polus.plugins.formats.file_renaming"] - -requirements: - DockerRequirement: - dockerPull: polusai/file-renaming-plugin:0.2.1-dev0 # NOTE: 0.2.3 not pushed yet - # See https://www.commonwl.org/v1.0/CommandLineTool.html#InitialWorkDirRequirement - InitialWorkDirRequirement: - listing: - - entry: $(inputs.outDir) - writable: true # Output directories must be writable - InlineJavascriptRequirement: {} - -inputs: - inpDir: - inputBinding: - prefix: --inpDir - type: Directory - - filePattern: - inputBinding: - prefix: --filePattern - type: string - - mapDirectory: - inputBinding: - prefix: --mapDirectory - type: string? # enum: raw, map, default - - preview: - label: Generate a JSON file describing what the outputs should be - doc: |- - Generate a JSON file describing what the outputs should be - inputBinding: - prefix: --preview - type: boolean? - - outFilePattern: - inputBinding: - prefix: --outFilePattern - type: string - - outDir: - label: Output collection - doc: |- - Output collection - inputBinding: - prefix: --outDir - type: Directory - -outputs: - outDir: - label: Output collection - doc: |- - Output collection - type: Directory - outputBinding: - glob: $(inputs.outDir.basename) - - preview_json: - label: JSON file describing what the outputs should be - doc: |- - JSON file describing what the outputs should be - type: File? # if --preview - format: edam:format_3464 - outputBinding: - glob: preview.json - -$namespaces: - edam: https://edamontology.org/ - -$schemas: -- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl - -# manifest: https://raw.githubusercontent.com/PolusAI/polus-plugins/master/formats/file-renaming-plugin/plugin.json \ No newline at end of file diff --git a/cwl_adapters/image_assembler.cwl b/cwl_adapters/image_assembler.cwl deleted file mode 100644 index 5b9eca3..0000000 --- a/cwl_adapters/image_assembler.cwl +++ /dev/null @@ -1,101 +0,0 @@ -#!/usr/bin/env cwl-runner -class: CommandLineTool -cwlVersion: v1.0 - -label: Image Assembler - -doc: |- - This plugin assembles images into a stitched image using an image stitching vector. - https://github.com/PolusAI/polus-plugins/tree/master/transforms/images/image-assembler-plugin - -# See https://github.com/PolusAI/workflow-inference-compiler/blob/master/docker_remove_entrypoints.py -baseCommand: python3 -arguments: ["-m", "polus.plugins.transforms.images.image_assembler"] - -requirements: - DockerRequirement: - dockerPull: polusai/image-assembler-plugin:1.4.0-dev0 - # See https://www.commonwl.org/v1.0/CommandLineTool.html#InitialWorkDirRequirement - InitialWorkDirRequirement: - listing: - - $(inputs.stitchPath) # Must stage inputs for tools which do not accept full paths. - - entry: $(inputs.outDir) - writable: true # Output directories must be writable - InlineJavascriptRequirement: {} - -inputs: - stitchPath: - label: Path to directory containing "stitching vector" file img-global-positions-0.txt - doc: |- - Path to directory containing "stitching vector" file img-global-positions-0.txt - type: Directory - inputBinding: - prefix: --stitchPath - - imgPath: - label: Path to input image collection - doc: |- - Path to input image collection - type: Directory - inputBinding: - prefix: --imgPath - - timesliceNaming: - label: Label images by timeslice rather than analyzing input image names - doc: |- - Label images by timeslice rather than analyzing input image names - inputBinding: - prefix: --timesliceNaming - type: boolean? - - preview: - label: Generate a JSON file describing what the outputs should be - doc: |- - Generate a JSON file describing what the outputs should be - type: boolean? - inputBinding: - prefix: --preview - - outDir: - label: Output collection - doc: |- - Output collection - type: Directory - inputBinding: - prefix: --outDir - -outputs: - outDir: - label: Output collection - doc: |- - Output collection - type: Directory - outputBinding: - glob: $(inputs.outDir.basename) - - assembled_image: - label: The assembled montage image - doc: |- - JSON file with outputs - type: File? # if not --preview - # See https://bioportal.bioontology.org/ontologies/EDAM?p=classes&conceptid=format_3727 - format: edam:format_3727 - outputBinding: - glob: "*.ome.tif" - - preview_json: - label: JSON file with outputs - doc: |- - JSON file with outputs - type: File? # if --preview - format: edam:format_3464 - outputBinding: - glob: preview.json - -$namespaces: - edam: https://edamontology.org/ - -$schemas: -- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl - -# manifest: https://raw.githubusercontent.com/PolusAI/polus-plugins/master/transforms/images/image-assembler-plugin/plugin.json \ No newline at end of file diff --git a/cwl_adapters/montage.cwl b/cwl_adapters/montage.cwl deleted file mode 100644 index ac4007f..0000000 --- a/cwl_adapters/montage.cwl +++ /dev/null @@ -1,123 +0,0 @@ -#!/usr/bin/env cwl-runner -class: CommandLineTool -cwlVersion: v1.0 - -label: Montage - -doc: |- - This plugin generates a stitching vector that will montage images together. - https://github.com/PolusAI/polus-plugins/tree/master/transforms/images/montage-plugin - -# See https://github.com/PolusAI/workflow-inference-compiler/blob/master/docker_remove_entrypoints.py -baseCommand: python3 -arguments: ["-m", "polus.plugins.transforms.images.montage"] - -requirements: - DockerRequirement: - dockerPull: polusai/montage-plugin:0.5.0 - # See https://www.commonwl.org/v1.0/CommandLineTool.html#InitialWorkDirRequirement - InitialWorkDirRequirement: - listing: - - entry: $(inputs.outDir) - writable: true # Output directories must be writable - InlineJavascriptRequirement: {} - -inputs: - inpDir: - label: Input image collection to be processed by this plugin - doc: |- - Input image collection to be processed by this plugin - type: Directory - inputBinding: - prefix: --inpDir - - filePattern: - label: Filename pattern used to parse data - doc: |- - Filename pattern used to parse data - type: string - inputBinding: - prefix: --filePattern - - layout: - label: Specify montage organization - doc: |- - Specify montage organization - type: string? - # optional array of strings? - inputBinding: - prefix: --layout - - gridSpacing: - label: Specify spacing between images in the lowest grid - doc: |- - Specify spacing between images in the lowest grid - inputBinding: - prefix: --gridSpacing - type: int? - - imageSpacing: - label: Specify spacing multiplier between grids - doc: |- - Specify spacing multiplier between grids - inputBinding: - prefix: --imageSpacing - type: int? - - flipAxis: - label: Axes to flip when laying out images - doc: |- - Axes to flip when laying out images - inputBinding: - prefix: --flipAxis - type: string? - - preview: - label: Generate a JSON file describing what the outputs should be - doc: |- - Generate a JSON file describing what the outputs should be - type: boolean? - inputBinding: - prefix: --preview - - outDir: - label: Output collection - doc: |- - Output collection - type: Directory - inputBinding: - prefix: --outDir - -outputs: - outDir: - label: Output collection - doc: |- - Output collection - type: Directory - outputBinding: - glob: $(inputs.outDir.basename) - - global_positions: - label: The "stitching vector", i.e. the positions of the individual images in the montage - doc: |- - The "stitching vector", i.e. the positions of the individual images in the montage - type: File? # if not --preview - outputBinding: - glob: $(inputs.outDir.basename)/img-global-positions-0.txt - - preview_json: - label: JSON file describing what the outputs should be - doc: |- - JSON file describing what the outputs should be - type: File? # if --preview - format: edam:format_3464 - outputBinding: - glob: preview.json - -$namespaces: - edam: https://edamontology.org/ - -$schemas: -- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl - -# manifest: https://raw.githubusercontent.com/PolusAI/polus-plugins/master/transforms/images/montage-plugin/plugin.json \ No newline at end of file diff --git a/cwl_adapters/ome-converter.cwl b/cwl_adapters/ome-converter.cwl deleted file mode 100644 index af846a5..0000000 --- a/cwl_adapters/ome-converter.cwl +++ /dev/null @@ -1,85 +0,0 @@ -#!/usr/bin/env cwl-runner -class: CommandLineTool -cwlVersion: v1.0 - -label: OME Zarr Converter - -doc: |- - This WIPP plugin converts BioFormats supported data types to the OME Zarr file format. - https://github.com/PolusAI/polus-plugins/tree/master/formats/ome-converter-plugin - -# See https://github.com/PolusAI/workflow-inference-compiler/blob/master/docker_remove_entrypoints.py -baseCommand: python3 -arguments: ["-m", "polus.plugins.formats.ome_converter"] - -requirements: - DockerRequirement: - dockerPull: jakefennick/ome-converter-plugin:0.3.2 - # See https://www.commonwl.org/v1.0/CommandLineTool.html#InitialWorkDirRequirement - InitialWorkDirRequirement: - listing: - - entry: $(inputs.outDir) - writable: true # Output directories must be writable - InlineJavascriptRequirement: {} -# NOTE: polusai/ome-converter-plugin:0.3.1 uses the base image -# polusai/bfio:2.3.2 which now un-bundles the java maven package -# ome:formats-gpl:7.1.0 due to licensing reasons. -# To avoid requiring network access at runtime, in the bfio Dockerfile -# it is pre-installed and saved in ~/.m2/ However, by default -# CWL hides all environment variables (including HOME), so we need to -# set HOME here so that at runtime we get a cache hit on the maven install. - EnvVarRequirement: -# See https://www.commonwl.org/user_guide/topics/environment-variables.html - envDef: - HOME: /home/polusai - -inputs: - inpDir: - label: Input generic data collection to be processed by this plugin - doc: |- - Input generic data collection to be processed by this plugin - type: Directory - inputBinding: - prefix: --inpDir - - filePattern: - label: A filepattern, used to select data for conversion - doc: |- - A filepattern, used to select data for conversion - type: string - inputBinding: - prefix: --filePattern - - fileExtension: - label: The file extension - doc: |- - The file extension - type: string - inputBinding: - prefix: --fileExtension - default: "default" # enum: .ome.tiff, .ome.zarr, default - - outDir: - label: Output collection - doc: |- - Output collection - type: Directory - inputBinding: - prefix: --outDir - -outputs: - outDir: - label: Output collection - doc: |- - Output collection - type: Directory - outputBinding: - glob: $(inputs.outDir.basename) - -$namespaces: - edam: https://edamontology.org/ - -$schemas: -- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl - -# manifest: https://raw.githubusercontent.com/PolusAI/polus-plugins/master/formats/ome-converter-plugin/plugin.json \ No newline at end of file diff --git a/cwl_adapters/precompute_slide.cwl b/cwl_adapters/precompute_slide.cwl deleted file mode 100644 index 44753d2..0000000 --- a/cwl_adapters/precompute_slide.cwl +++ /dev/null @@ -1,81 +0,0 @@ -#!/usr/bin/env cwl-runner -class: CommandLineTool -cwlVersion: v1.0 - -label: Precompute Slide - -doc: |- - This plugin generates image pyramids in multiple viewing formats. - https://github.com/PolusAI/polus-plugins/tree/master/visualization/polus-precompute-slide-plugin - -# See https://github.com/PolusAI/workflow-inference-compiler/blob/master/docker_remove_entrypoints.py -baseCommand: python3 -arguments: ["-m", "polus.plugins.visualization.precompute_slide"] - -requirements: - DockerRequirement: - dockerPull: polusai/precompute-slide-plugin:1.7.0-dev0 - # See https://www.commonwl.org/v1.0/CommandLineTool.html#InitialWorkDirRequirement - InitialWorkDirRequirement: - listing: - - entry: $(inputs.outDir) - writable: true # Output directories must be writable - InlineJavascriptRequirement: {} - -inputs: - inpDir: - label: Input generic data collection to be processed by this plugin - doc: |- - Input generic data collection to be processed by this plugin - type: Directory - inputBinding: - prefix: --inpDir - - pyramidType: - label: Build a DeepZoom, Neuroglancer, Zarr pyramid - doc: |- - Build a DeepZoom, Neuroglancer, Zarr pyramid - type: string # enum: DeepZoom, Neuroglancer, Zarr - inputBinding: - prefix: --pyramidType - - imageType: - label: Image is either Segmentation or Image - doc: |- - Image is either Segmentation or Image - inputBinding: - prefix: --imageType - type: string - - filePattern: - label: Filename pattern used to parse data - doc: |- - Filename pattern used to parse data - type: string? - inputBinding: - prefix: --filePattern - - outDir: - label: Output collection - doc: |- - Output collection - type: Directory - inputBinding: - prefix: --outDir - -outputs: - outDir: - label: Output collection - doc: |- - Output collection - type: Directory - outputBinding: - glob: $(inputs.outDir.basename) - -$namespaces: - edam: https://edamontology.org/ - -$schemas: -- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl - -# \ No newline at end of file diff --git a/cwl_workflows/__init__.py b/cwl_workflows/__init__.py new file mode 100644 index 0000000..b2e9ca5 --- /dev/null +++ b/cwl_workflows/__init__.py @@ -0,0 +1,2 @@ +import cwl_features_extraction as cwl_features_extraction +import utils as utils \ No newline at end of file diff --git a/cwl_workflows/__main__.py b/cwl_workflows/__main__.py new file mode 100644 index 0000000..971173e --- /dev/null +++ b/cwl_workflows/__main__.py @@ -0,0 +1,54 @@ +"""Ome Converter.""" +import logging +from typing import Any +from typing import Optional +import typer +from utils import JSON_FILENAME +from utils import get_params +from cwl_features_extraction import CWLFeatureWorkflow + + +app = typer.Typer() + +# Initialize the logger +logging.basicConfig( + format="%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s", + datefmt="%d-%b-%y %H:%M:%S", +) +logger = logging.getLogger("WIC Python API") +logger.setLevel(logging.INFO) + + +@app.command() +def main( + name: str = typer.Option( + ..., + "--name", + "-n", + help="Name of imaging dataset of Broad Bioimage Benchmark Collection (https://bbbc.broadinstitute.org/image_sets)" + ), + workflow: str = typer.Option( + ..., + "--workflow", + "-w", + help="Name of cwl workflow" + ) +) -> None: + + """Execute CWL Workflow.""" + + logger.info(f"name = {name}") + logger.info(f"workflow = {workflow}") + + params = get_params(JSON_FILENAME, name) + + if workflow == "CWLFeatureWorkflow": + logger.info(f"Executing {workflow}!!!") + model = CWLFeatureWorkflow(**params) + model.workflow() + + logger.info("Completed CWL workflow!!!") + + +if __name__ == "__main__": + app() \ No newline at end of file diff --git a/cwl_workflows/__pycache__/cwl_features_extraction.cpython-310.pyc b/cwl_workflows/__pycache__/cwl_features_extraction.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a5f83f499631cc14de5d0c8d4ff9374f7cac7cc4 GIT binary patch literal 8762 zcma)B%ahwidIvy|1TiG%q0#$c$WKwbL)xM(eu^vlTQp2zs;~4^O;MGeW~*vClI!VqUfyed zp;xqvlX1f~Bt*$B$=dh9-&Hql|;itHw ze?r3*TV;x^G1bnMv{H`Mn9hM8R{>WMmw~H<%fz*WtBk9{^IModsjzBx*FMIU*~%Ar zyT~rGBW(Q(9dCP)9c9OGSHj(Kb^>>%#J|e*w$4tnQy5udr`ee=6ukRcb`E`oJ%=4u zd39?UYbY7(`k!8jfLBay)dx)UZ4A3!0TqZE{=uQ9K<}jBA*F1aC_Xk zVr^0vwk@yccD$ftF*kN29@EhKB6OMcz8BIQx8CPLtJ`zMBkNk|`#jy~LN|{4(UnG{ z*=)AzO@i=(DE4A(Ru4s|Az>Y0jB54i-fhq4PT!4VE&`kZjFxxpP*{CeM05_}ZdH}!&;2ay-=NOJHQ7RznpgqEP=(CN0j zJg^4xpgp(mFi(IUq1c)37V*v@FF1&}6^HU2xrHS?9>lI>SW4pQ3s3PZU!yOO`XNWkl&}#c`-1fN7tXA0ThXD>0S#1&a zte7yguX|!fd^X$Q|BIhNUVZ@o^!yzW4*E`W=ZbZc?ugD$T;aJ*pW{purcPvK3N#V^ z=*mP%Ca2%rc{+%!sdRR{xI1Xp@s5pqp+AVO-f9H2qEW+pLSzCCs?79fhMfcz<0KVf zaK*Yii2DOj4Z{;@lS`A_)!Vld!xbXj_WG?vC%z=*c|lE94vKD4SrEx&X-;O6$^zpP zbH>}moUtJ>GMXhuMu?;;x1AokRfcKIesr(nx`rznp&6+o#Zg8YT6L5|tBne1bE6_! zePp1`k4k6@adC^DO0hXown|$|&(v`_Hsi`Ob*swMlgb`pS{^Cml|AJl;akPEHqy}^ z8RgNgj{xZ$9W9|f#uQjLZS(6t?C4fCdO9i1vH!(PHHKfB#*f`fZ&*{>K~fYv3jHU% zmJ{b_jwmF~QbP<%8;kBvqoLP}NI1({6;vy$hB|!tFp7Owx)x}b2MW5R2L2;%;t9nh zjkIx&X+%Ai+tp&|&Q^Y0*dyu{seh(jSAKr#p|+db)knE4V_X_xwRj~q$4ev77C7|j z=%v}{;}av5-~n9$q+G+WURnVIQjS^mY2Nxvui$g1G3VH z72tjc0ds?Rfpr}oZ~+HLYXVCQ0=XPi8y=qm&$0Ua`SXd%pZ2-%ND*qfU2z9-IKjf1 zZM580mjlAqKmK&{6Wi>I@Ckf!(Bk3+&~md@NeX`0>2Q(gUI3pV*(sTx8+v_l6pNCg z2gw1FR3gaqKym^WZNo_m4Qvv-eEO9a}jEiLSw+zWlljXonefaKu;RX@ns7%dfK>#f7l&L!u zI#6Vg=@-Qo-Jx{{M4Cck$-*?)Y8RlrAk}8ABx*RRxJV6gLm)9xQd7D?cXeuBrG{8n z(^A#EL<5UjnN%ENjWaO|HdaJrs7>f+S*;Q!)gi^llfdA?#IDyDz*__(nTv9h@ z1Z5f|KR4F)6w*$X+tWxt$Iu6ctYMsoeyS|b3cu7&Djg6R1TDgng~1apVyo{D5Ll<- zHYn3VWZcjel`o-EXSHD;nZvm%x6Adgckr_(IUD4_$ zc^S@3>~W$#g{5}mR`($1*PV0^NABUIdvt&(B2tt!U0u_BpM;doQ@lah;ql3-Q0*2tTcT6Dq~jD zZ9R&7H|mlaWo8XgHrW!H0l5K^#OdR(bm8HJ+UUZ=S`7qz`0%xdmxkIyW@`vxNK%GH zr}fvZD{NO1Q<8N;QSlC@lYG?oy?A20#8nL0rO8gtYvj_SfgjI?E*+%vdAM2TOb_ie zIV>tAZWL*CdBmeWa7OEB^jsBWFKe2r?H|*$KWh7Wet7vX2K*4pBsUO?@<=LK#3fns zF>WL~D9F%a3M6vR)ZHA$;4a5`JQom(LP=rgVCCQ29DGY12D?hqEvfk~lS&>u1$ z6v#_vZQ>?YQnJfl`wF^*=iH>%Cw+d>7bbmi(i>lCtn?2CnVk`G3^2+ZzVPYCwEUzU%=h=o)Uhq)eyV`yFike4s1*-1RE6l;3Y7%K_^-u(1Y(i zHe6lo$V@kCAUi>j0a8Ti?#O=neSc?ekMAJ-Ek~i=5F;O!kxNoePn$jo((f!` zk{(5exsDE+7L+eiJLu5t-vLH?2}3uKH_iTqZw>R%7;OQ3A?PLlOlg?3FaM02n6e`k z-WX>6nTp)y7l_VaEw*w`6ww`i6eILSdk@`N%sY+nHrbO4bk$chqGhE-9{bX_W zv7sw*X{VDykRxJM%H2Lw9vj=ZSOs=&d3f_q$h`KBEb$^-@Pbw-`XSOOZt>uWC&EDT z%bzQfQTTvpsOL!qw%k?=syxhJBwu9b&?Ks`D*=xwKEEnw@7ra;9}iH#l(h%DvN=C% z7qHcJ-cIxn@80=fSZQ=2XsJPUJ$v+ZG0Qh5fl#92b!elc0F7yToy0&6A!|hk#n|jg z7LXtQdu$zDK(h`9sUg2GRb4CZ8+v)aoU5v5_O;K?e5-$6Hj#PkYv1VA;n_o6=M3M1 zP*dq>8g#yiw?xcBmn@}FIRGLKk#@8g3CJ^)R#2MDRv-$eNC+zs`#31xZ57d@RJVoV zfMY@tbo5Jr)B&l@LFOeSihfA$0Yy=hQ(y(O6+}@8aV1m{P`NoMBaNR>en26?K{0Qu zik|jeM!&*JXjieD2{?TYZb`!BC0rhbL4wPpG^hhl9>qar>nQqT=#Qg60Sske$UFF8 zgZAX~eJc`AVU!14VX}V#<;fhYVtrw5$7Q*)I4WRe(OF_EXiu?Kw5PFp4XcZDtMNe! zbTlMh19*#oGMqE=oQ882{W&~8hu%VOp+Aql!q(9~&yJ#f0a%X#tKsPJi{qEraaoW7 z&B<(_1X@5*)#w9mYC-jJipxK6#?=M3$`&cbM%8(yB4uCM;^w;^lt6BMTnm*giLsKRxWM()x`Mfw+f z$4STT(U?((1*pefd$_*fbFxK$fkpR+C%@}}vIP{t504#iM%m;iGda+jt3Jid*W@rJtN#7H;gNY~Hd!~^xP>yQm)5DEwAQ3zkbP`Hz++|A z1Jp7ESpj^XETQ@!wgrkwH)@()LUjvijrrllYxthxbFz0ofU>?>Ylu655kI4b z?0_t3%bE%Cg2HKgi7eK=G)DMT`fKoVNAZKaIkl};u zTZWM3YO1W;MVWQKzhjq?id@e=*Z{cK0kCbNpX_1h|yp9=h8BJ{|t^Mhw9+WgwwNH>P2$FR{@+?Shg%nDXIV9dlsZft5H%nh^5BL?a zg+;%^B_pGiya8vjlFPy(y04-vKR;SnSV1!dRR@H=Z=g+~sjxqfhw!CiriLG!*6go; zyJD6#1OKX)VnD8U*av{@4Nz;MG+a;~j{E1)ZPwQ569bXMaT3!(U5yR!(ZWf}j`Min z`jeTWgRr&bIN}z7l0v*gU-lB?Zl5vf_a5Mw|IlADmmYe`y}jhn^vzlZA1YOy(agMg^X7fO7Zr;Zf^pXQo&A`pIk1`32ZxAOoE${qZxjf;jJBjMRJyOW`W>@4#|YuV86Bb;N->H+@(1 zK0Od3=Bu?@;&iG_UmUa>ZO#%m3Iz*A)s0%N`Zzwo!9!<$4gf>SfwuM>fk{TX3{RUy zYOxH(Yv5cEuglz~Bb+A@Wq~v(fb~jNnuLT-ixDC%LRt}R2MR6_@~Z6w!}NEKe$ie*kgmK5;nJ$h5+6zY<6E4R;M4H20FboS0-p=WjQBxW@l<}8LA^Vm z2uYRT)wLjUoq#_8U__cIohpVjqL_u!P&P;-0E=YC7p%oAnM74-Dop0oGf^fc%Cj;% zJIvD6s%G@ zAiU34;pp_;14u0f$U$|XDtxcEc%0HIqV6SmtjGc5j?*jZr8KKJIqn~)1l@i&J>z~n z`k+`xEtWE`{1^Gkh*auOm%Xn?^!s<0mzP$)+OLoHY5r-JRX7mnuRbb$BH| z3}8XU4zfW&K8A8CYKt_QmzZK|(d9MIS@ZHzY&kKZe!^T4C0%KG-sO`;uVAy8MD3U~ zy3+KRBiae$vXqKt$I!btG>HmM)90N^Jv-NEG+d$*Yp0VDx4rYQzW(H4GOYxtYKLyyZpBx%Jyyu(+OX$7Q2ybou$joj i1(=3b*2{)wY53o!@t5(znl$l@R>Br8X=VJ`hkpRqt`#@{ literal 0 HcmV?d00001 diff --git a/workflows/bbbc.py b/cwl_workflows/bbbc.py similarity index 100% rename from workflows/bbbc.py rename to cwl_workflows/bbbc.py diff --git a/workflows/bbbc.yml b/cwl_workflows/bbbc.yml similarity index 100% rename from workflows/bbbc.yml rename to cwl_workflows/bbbc.yml diff --git a/cwl_workflows/cwl_features_extraction.py b/cwl_workflows/cwl_features_extraction.py new file mode 100644 index 0000000..6232604 --- /dev/null +++ b/cwl_workflows/cwl_features_extraction.py @@ -0,0 +1,265 @@ +import wic.api.pythonapi as api +import polus.plugins as pp +from pathlib import Path +import yaml +import logging +import re +import shutil +import typing +from utils import GITHUB_TAG + +# Initialize the logger +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +class CWLFeatureWorkflow: + """ + A CWL feature extraction pipeline. + + Attributes: + name : Name of imaging dataset of Broad Bioimage Benchmark Collection (https://bbbc.broadinstitute.org/image_sets). + file_pattern : Pattern for parsing raw filenames. + out_file_pattern : Preferred format for filenames + image_pattern : Pattern for parsing intensity image filenames after renaming when using map_directory + seg_pattern : Pattern use to parse segmentation image filenames + map_directory : Extract folder name + ff_pattern: The filename pattern employed to select flatfield components from the ffDir. + df_pattern:The filename pattern employed to select darkfield components from the ffDir + group_by: Grouping variables for filePattern + features:Features from Nyxus (https://github.com/PolusAI/nyxus/) that need extraction + file_extension: Output file format + """ + def __init__( + self, + name: str, + file_pattern: str, + out_file_pattern: str, + image_pattern: str, + seg_pattern: str, + map_directory: str, + ff_pattern: str, + df_pattern: str, + group_by: str, + features: typing.Optional[str]="ALL", + file_extension: typing.Optional[str]="arrowipc" + ): + self.name = name + self.file_pattern = file_pattern + self.out_file_pattern = out_file_pattern + self.map_directory = map_directory + self.ff_pattern = ff_pattern + self.df_pattern = df_pattern + self.group_by = group_by + self.wic_path = api._WIC_PATH + self.PATH = Path(self.wic_path.parent).joinpath(Path(__file__).parts[-3]) + self.cwl_path, self.workflow_path = self._create_directories() + self.image_pattern = image_pattern + self.seg_pattern = seg_pattern + self.features = features + self.file_extension = file_extension + + def _create_directories(self) -> None: + """Create directories for CWL outputs""" + cwl_path = self.PATH.joinpath("cwl_adapters") + cwl_path.mkdir(parents=True, exist_ok=True) + workflow_path = self.PATH.joinpath("workflows").resolve() + workflow_path.mkdir(exist_ok=True) + return cwl_path, workflow_path + + def _clean(self) -> None: + """Cleaning of redundant directories generating on running CWL""" + logger.info("Cleaning directories!!!") + destination_path = self.workflow_path.joinpath("experiment") + dir_names = ("autogenerated", "cachedir", "RUNS", "provenance") + for i, d in zip(self.wic_path.iterdir(), self.PATH.iterdir()): + if i.name.endswith(dir_names): + shutil.rmtree(d) + if d.name.endswith(dir_names): + shutil.rmtree(d) + + for d in destination_path.iterdir(): + if d.name.endswith("cwl_adapters"): + shutil.rmtree(d) + for d in self.PATH.iterdir(): + if d.name.endswith("cwl_adapters"): + shutil.move(d, destination_path) + + return + + def _move_outputs(self) -> None: + """Transfer outputs from the WIC directory to the workflow path""" + logger.info("Move outputs to workflow path!!!") + for d in self.wic_path.iterdir(): + if d.name.endswith("outdir"): + shutil.move(d, self.workflow_path) + return + + def _camel(self, name: str) -> str: + """Convert plugin name to camel case.""" + name = re.sub(r"(_|-)+", " ", name).title().replace(" ", "") + return "".join([name[0].upper(), name[1:]]) + + def _string_after_period(self, x): + """Get a string after period.""" + match = re.search(r"\.(.*)", x) + if match: + # Get the part after the period + return f".*.{match.group(1)}" + else: + return "" + + def _add_backslash_before_parentheses(self, x): + """Add backslash to generate ff_pattern and df_pattern""" + # Define the regular expression pattern to match parenthesis + pattern_1 = r"(\()|(\))" + # Use re.sub() to add a backslash before starting and finishing parenthesis + result = re.sub(pattern_1, r"\\\1\2", x) + pattern_2 = r"\d" + result = ( + result.split("_c")[0] + + "_c{c:d}" + + re.sub(pattern_2, "", result.split("_c")[1]) + ) + return result + + def create_step(self, url: str) -> api.Step: + """Generate the plugin class name from the plugin name specified in the manifest""" + manifest = pp.submit_plugin(url) + plugin_version = str(manifest.version) + cwl_tool = pp.get_plugin(self._camel(manifest.name), plugin_version).save_cwl( + self.cwl_path.joinpath(f"{self._camel(manifest.name)}.cwl") + ) + step = api.Step(cwl_tool) + return step + + def manifest_urls(self, x: str) -> str: + """URLs on GitHub for plugin manifests""" + + urls = { + "bbbc_download": f"{GITHUB_TAG}/saketprem/polus-plugins/bbbc_download/utils/bbbc-download-plugin/plugin.json", + "file_renaming": f"{GITHUB_TAG}/PolusAI/polus-plugins/f20a2f75264d59af78cfb40b4c3cec118309f7ec/formats/file-renaming-plugin/plugin.json", + "ome_converter": f"{GITHUB_TAG}/hamshkhawar/image-tools/basecontainer_omecontainer/formats/ome-converter-plugin/plugin.json", + "estimate_flatfield": f"{GITHUB_TAG}/nishaq503/image-tools/fix/basic/regression/basic-flatfield-estimation-tool/plugin.json", + "apply_flatfield": f"{GITHUB_TAG}/hamshkhawar/image-tools/cast_images/transforms/images/apply-flatfield-tool/plugin.json", + "kaggle_nuclei_segmentation": f"{GITHUB_TAG}/hamshkhawar/image-tools/kaggle-nuclei_seg/segmentation/kaggle-nuclei-segmentation/plugin.json", + "ftl_plugin": f"{GITHUB_TAG}/nishaq503/image-tools/fix/ftl-label/transforms/images/polus-ftl-label-plugin/plugin.json", + "nyxus_plugin": f"{GITHUB_TAG}/hamshkhawar/image-tools/nyxus_manifest/features/nyxus-plugin/plugin.json", + } + return urls[x] + + def modify_cwl(self) -> None: + """Modify CWL to incorporate environmental variables and permission access""" + for f in list(self.cwl_path.rglob("*.cwl")): + if "cwl" in f.name: + try: + with Path.open(f, "r") as file: + config = yaml.safe_load(file) + config["requirements"]["NetworkAccess"] = { + "networkAccess": True + } + config["requirements"]["EnvVarRequirement"] = { + "envDef": {"HOME": "/home/polusai"} + } + with open(f, "w") as out_file: + yaml.dump(config, out_file) + except FileNotFoundError: + logger.info("Error: There was an unexpected error while processing the file.") + return + + def workflow(self) -> None: + """ + A CWL feature extraction pipeline. + """ + # BBBCDownload + bbbc = self.create_step(self.manifest_urls("bbbc_download")) + bbbc.name = self.name + bbbc.outDir = Path("bbbc.outDir") + + # Renaming plugin + rename = self.create_step(self.manifest_urls("file_renaming")) + rename.filePattern = self.file_pattern + rename.outFilePattern = self.out_file_pattern + rename.mapDirectory = self.map_directory + rename.inpDir = bbbc.outDir + rename.outDir = Path("rename.outDir") + + # OMEConverter + ome_converter = self.create_step(self.manifest_urls("ome_converter")) + ome_converter.filePattern = self._string_after_period(self.out_file_pattern) + ome_converter.fileExtension = ".ome.tif" + ome_converter.inpDir = rename.outDir + ome_converter.outDir = Path("ome_converter.outDir") + + # Estimate Flatfield + estimate_flatfield = self.create_step(self.manifest_urls("estimate_flatfield")) + estimate_flatfield.inpDir = ome_converter.outDir + estimate_flatfield.filePattern = self.image_pattern + estimate_flatfield.groupBy = self.group_by + estimate_flatfield.getDarkfield = True + estimate_flatfield.outDir = Path("estimate_flatfield.outDir") + + # # Apply Flatfield + apply_flatfield = self.create_step(self.manifest_urls("apply_flatfield")) + apply_flatfield.imgDir = ome_converter.outDir + apply_flatfield.imgPattern = self.image_pattern + apply_flatfield.ffDir = estimate_flatfield.outDir + apply_flatfield.ffPattern = self.ff_pattern + apply_flatfield.dfPattern = self.df_pattern + apply_flatfield.outDir = Path("apply_flatfield.outDir") + apply_flatfield.dataType = True + + ## Kaggle Nuclei Segmentation + kaggle_nuclei_segmentation = self.create_step( + self.manifest_urls("kaggle_nuclei_segmentation") + ) + kaggle_nuclei_segmentation.inpDir = apply_flatfield.outDir + kaggle_nuclei_segmentation.filePattern = self.image_pattern + kaggle_nuclei_segmentation.outDir = Path("kaggle_nuclei_segmentation.outDir") + + ## FTL Label Plugin + ftl_plugin = self.create_step(self.manifest_urls("ftl_plugin")) + ftl_plugin.inpDir = kaggle_nuclei_segmentation.outDir + ftl_plugin.connectivity = 1 + ftl_plugin.binarizationThreshold = 0.5 + ftl_plugin.outDir = Path("ftl_plugin.outDir") + + # ## Nyxus Plugin + nyxus_plugin = self.create_step(self.manifest_urls("nyxus_plugin")) + nyxus_plugin.inpDir = apply_flatfield.outDir + nyxus_plugin.segDir = ftl_plugin.outDir + nyxus_plugin.intPattern = self.image_pattern + nyxus_plugin.segPattern = self.seg_pattern + nyxus_plugin.features = self.features + nyxus_plugin.fileExtension = self.file_extension + nyxus_plugin.neighborDist = 5 + nyxus_plugin.pixelPerMicron = 1.0 + nyxus_plugin.outDir = Path("nyxus_plugin.outDir") + + logger.info("Initiating CWL Feature Extraction Workflow!!!") + steps = [ + bbbc, + rename, + ome_converter, + estimate_flatfield, + apply_flatfield, + kaggle_nuclei_segmentation, + ftl_plugin, + nyxus_plugin + ] + workflow = api.Workflow(steps, "experiment", self.workflow_path) + # # Saving CLT for plugins + workflow._save_all_cwl(overwrite=True) + # # Adding environmental variables for bbbc_download and ome_converter plugin + self.modify_cwl() + # # # Save yaml to run CWL tool + workflow._save_yaml() + # Compile and run using WIC python API + workflow.compile(run_local=True, overwrite=False) + # # print(workflow.yml_path) + # # clean autognerated directories + self._clean() + self._move_outputs() + logger.info("Completed CWL Feature Extraction Workflow.") + return + \ No newline at end of file diff --git a/cwl_workflows/utils.py b/cwl_workflows/utils.py new file mode 100644 index 0000000..2da1a30 --- /dev/null +++ b/cwl_workflows/utils.py @@ -0,0 +1,45 @@ +import json +import pydantic +from pathlib import Path +from typing import Dict + +GITHUB_TAG = "https://raw.githubusercontent.com" + + +class DataModel(pydantic.BaseModel): + data: Dict[str, Dict[str, str]] + + +def get_params(path: Path, name: str): + """Loading json file for getting parameters""" + with open(path) as json_file: + # Read the JSON data + data = json.load(json_file) + params = [v[name] for k, v in data.items()][0] + return params + + +params = { + "BBBC039": { + "name": "BBBC039", + "file_pattern": ".*_{row:c}{col:dd}_s{s:d}_w{channel:d}.*.tif", + "out_file_pattern": "x{row:dd}_y{col:dd}_p{s:dd}_c{channel:d}.tif", + "image_pattern": "images_x{x:dd}_y{y:dd}_p{p:dd}_c{c:d}.ome.tif", + "seg_pattern":"images_x{x:dd}_y{y:dd}_p{p:dd}_c1.ome.tif", + "map_directory": "raw", + "ff_pattern": "images_x\\(00-15\\)_y\\(01-24\\)_p0\\(1-9\\)_c{c:d}_flatfield.ome.tif", + "df_pattern": "images_x\\(00-15\\)_y\\(01-24\\)_p0\\(1-9\\)_c{c:d}_darkfield.ome.tif", + "group_by": "c", + "features": "ALL_INTENSITY,ALL_MORPHOLOGY", + "file_extension": "pandas" + } +} +model = DataModel(data=params) +model_dict = model.dict() + +json_dir = Path(Path(__file__).parents[1]).joinpath("bbbc_json") +json_dir.mkdir(parents=True, exist_ok=True) +JSON_FILENAME = json_dir.joinpath("bbbc_config.json") + +with Path.open(JSON_FILENAME, "w") as json_file: + json.dump(model_dict, json_file, indent=2) From 1d589e6b2e117eb6888e1a3e617be312adbe37b1 Mon Sep 17 00:00:00 2001 From: hamshkhawar Date: Wed, 28 Feb 2024 15:02:40 -0600 Subject: [PATCH 02/16] remove hidden files --- .../cwl_features_extraction.cpython-310.pyc | Bin 8762 -> 0 bytes cwl_workflows/__pycache__/utils.cpython-310.pyc | Bin 1923 -> 0 bytes 2 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 cwl_workflows/__pycache__/cwl_features_extraction.cpython-310.pyc delete mode 100644 cwl_workflows/__pycache__/utils.cpython-310.pyc diff --git a/cwl_workflows/__pycache__/cwl_features_extraction.cpython-310.pyc b/cwl_workflows/__pycache__/cwl_features_extraction.cpython-310.pyc deleted file mode 100644 index a5f83f499631cc14de5d0c8d4ff9374f7cac7cc4..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 8762 zcma)B%ahwidIvy|1TiG%q0#$c$WKwbL)xM(eu^vlTQp2zs;~4^O;MGeW~*vClI!VqUfyed zp;xqvlX1f~Bt*$B$=dh9-&Hql|;itHw ze?r3*TV;x^G1bnMv{H`Mn9hM8R{>WMmw~H<%fz*WtBk9{^IModsjzBx*FMIU*~%Ar zyT~rGBW(Q(9dCP)9c9OGSHj(Kb^>>%#J|e*w$4tnQy5udr`ee=6ukRcb`E`oJ%=4u zd39?UYbY7(`k!8jfLBay)dx)UZ4A3!0TqZE{=uQ9K<}jBA*F1aC_Xk zVr^0vwk@yccD$ftF*kN29@EhKB6OMcz8BIQx8CPLtJ`zMBkNk|`#jy~LN|{4(UnG{ z*=)AzO@i=(DE4A(Ru4s|Az>Y0jB54i-fhq4PT!4VE&`kZjFxxpP*{CeM05_}ZdH}!&;2ay-=NOJHQ7RznpgqEP=(CN0j zJg^4xpgp(mFi(IUq1c)37V*v@FF1&}6^HU2xrHS?9>lI>SW4pQ3s3PZU!yOO`XNWkl&}#c`-1fN7tXA0ThXD>0S#1&a zte7yguX|!fd^X$Q|BIhNUVZ@o^!yzW4*E`W=ZbZc?ugD$T;aJ*pW{purcPvK3N#V^ z=*mP%Ca2%rc{+%!sdRR{xI1Xp@s5pqp+AVO-f9H2qEW+pLSzCCs?79fhMfcz<0KVf zaK*Yii2DOj4Z{;@lS`A_)!Vld!xbXj_WG?vC%z=*c|lE94vKD4SrEx&X-;O6$^zpP zbH>}moUtJ>GMXhuMu?;;x1AokRfcKIesr(nx`rznp&6+o#Zg8YT6L5|tBne1bE6_! zePp1`k4k6@adC^DO0hXown|$|&(v`_Hsi`Ob*swMlgb`pS{^Cml|AJl;akPEHqy}^ z8RgNgj{xZ$9W9|f#uQjLZS(6t?C4fCdO9i1vH!(PHHKfB#*f`fZ&*{>K~fYv3jHU% zmJ{b_jwmF~QbP<%8;kBvqoLP}NI1({6;vy$hB|!tFp7Owx)x}b2MW5R2L2;%;t9nh zjkIx&X+%Ai+tp&|&Q^Y0*dyu{seh(jSAKr#p|+db)knE4V_X_xwRj~q$4ev77C7|j z=%v}{;}av5-~n9$q+G+WURnVIQjS^mY2Nxvui$g1G3VH z72tjc0ds?Rfpr}oZ~+HLYXVCQ0=XPi8y=qm&$0Ua`SXd%pZ2-%ND*qfU2z9-IKjf1 zZM580mjlAqKmK&{6Wi>I@Ckf!(Bk3+&~md@NeX`0>2Q(gUI3pV*(sTx8+v_l6pNCg z2gw1FR3gaqKym^WZNo_m4Qvv-eEO9a}jEiLSw+zWlljXonefaKu;RX@ns7%dfK>#f7l&L!u zI#6Vg=@-Qo-Jx{{M4Cck$-*?)Y8RlrAk}8ABx*RRxJV6gLm)9xQd7D?cXeuBrG{8n z(^A#EL<5UjnN%ENjWaO|HdaJrs7>f+S*;Q!)gi^llfdA?#IDyDz*__(nTv9h@ z1Z5f|KR4F)6w*$X+tWxt$Iu6ctYMsoeyS|b3cu7&Djg6R1TDgng~1apVyo{D5Ll<- zHYn3VWZcjel`o-EXSHD;nZvm%x6Adgckr_(IUD4_$ zc^S@3>~W$#g{5}mR`($1*PV0^NABUIdvt&(B2tt!U0u_BpM;doQ@lah;ql3-Q0*2tTcT6Dq~jD zZ9R&7H|mlaWo8XgHrW!H0l5K^#OdR(bm8HJ+UUZ=S`7qz`0%xdmxkIyW@`vxNK%GH zr}fvZD{NO1Q<8N;QSlC@lYG?oy?A20#8nL0rO8gtYvj_SfgjI?E*+%vdAM2TOb_ie zIV>tAZWL*CdBmeWa7OEB^jsBWFKe2r?H|*$KWh7Wet7vX2K*4pBsUO?@<=LK#3fns zF>WL~D9F%a3M6vR)ZHA$;4a5`JQom(LP=rgVCCQ29DGY12D?hqEvfk~lS&>u1$ z6v#_vZQ>?YQnJfl`wF^*=iH>%Cw+d>7bbmi(i>lCtn?2CnVk`G3^2+ZzVPYCwEUzU%=h=o)Uhq)eyV`yFike4s1*-1RE6l;3Y7%K_^-u(1Y(i zHe6lo$V@kCAUi>j0a8Ti?#O=neSc?ekMAJ-Ek~i=5F;O!kxNoePn$jo((f!` zk{(5exsDE+7L+eiJLu5t-vLH?2}3uKH_iTqZw>R%7;OQ3A?PLlOlg?3FaM02n6e`k z-WX>6nTp)y7l_VaEw*w`6ww`i6eILSdk@`N%sY+nHrbO4bk$chqGhE-9{bX_W zv7sw*X{VDykRxJM%H2Lw9vj=ZSOs=&d3f_q$h`KBEb$^-@Pbw-`XSOOZt>uWC&EDT z%bzQfQTTvpsOL!qw%k?=syxhJBwu9b&?Ks`D*=xwKEEnw@7ra;9}iH#l(h%DvN=C% z7qHcJ-cIxn@80=fSZQ=2XsJPUJ$v+ZG0Qh5fl#92b!elc0F7yToy0&6A!|hk#n|jg z7LXtQdu$zDK(h`9sUg2GRb4CZ8+v)aoU5v5_O;K?e5-$6Hj#PkYv1VA;n_o6=M3M1 zP*dq>8g#yiw?xcBmn@}FIRGLKk#@8g3CJ^)R#2MDRv-$eNC+zs`#31xZ57d@RJVoV zfMY@tbo5Jr)B&l@LFOeSihfA$0Yy=hQ(y(O6+}@8aV1m{P`NoMBaNR>en26?K{0Qu zik|jeM!&*JXjieD2{?TYZb`!BC0rhbL4wPpG^hhl9>qar>nQqT=#Qg60Sske$UFF8 zgZAX~eJc`AVU!14VX}V#<;fhYVtrw5$7Q*)I4WRe(OF_EXiu?Kw5PFp4XcZDtMNe! zbTlMh19*#oGMqE=oQ882{W&~8hu%VOp+Aql!q(9~&yJ#f0a%X#tKsPJi{qEraaoW7 z&B<(_1X@5*)#w9mYC-jJipxK6#?=M3$`&cbM%8(yB4uCM;^w;^lt6BMTnm*giLsKRxWM()x`Mfw+f z$4STT(U?((1*pefd$_*fbFxK$fkpR+C%@}}vIP{t504#iM%m;iGda+jt3Jid*W@rJtN#7H;gNY~Hd!~^xP>yQm)5DEwAQ3zkbP`Hz++|A z1Jp7ESpj^XETQ@!wgrkwH)@()LUjvijrrllYxthxbFz0ofU>?>Ylu655kI4b z?0_t3%bE%Cg2HKgi7eK=G)DMT`fKoVNAZKaIkl};u zTZWM3YO1W;MVWQKzhjq?id@e=*Z{cK0kCbNpX_1h|yp9=h8BJ{|t^Mhw9+WgwwNH>P2$FR{@+?Shg%nDXIV9dlsZft5H%nh^5BL?a zg+;%^B_pGiya8vjlFPy(y04-vKR;SnSV1!dRR@H=Z=g+~sjxqfhw!CiriLG!*6go; zyJD6#1OKX)VnD8U*av{@4Nz;MG+a;~j{E1)ZPwQ569bXMaT3!(U5yR!(ZWf}j`Min z`jeTWgRr&bIN}z7l0v*gU-lB?Zl5vf_a5Mw|IlADmmYe`y}jhn^vzlZA1YOy(agMg^X7fO7Zr;Zf^pXQo&A`pIk1`32ZxAOoE${qZxjf;jJBjMRJyOW`W>@4#|YuV86Bb;N->H+@(1 zK0Od3=Bu?@;&iG_UmUa>ZO#%m3Iz*A)s0%N`Zzwo!9!<$4gf>SfwuM>fk{TX3{RUy zYOxH(Yv5cEuglz~Bb+A@Wq~v(fb~jNnuLT-ixDC%LRt}R2MR6_@~Z6w!}NEKe$ie*kgmK5;nJ$h5+6zY<6E4R;M4H20FboS0-p=WjQBxW@l<}8LA^Vm z2uYRT)wLjUoq#_8U__cIohpVjqL_u!P&P;-0E=YC7p%oAnM74-Dop0oGf^fc%Cj;% zJIvD6s%G@ zAiU34;pp_;14u0f$U$|XDtxcEc%0HIqV6SmtjGc5j?*jZr8KKJIqn~)1l@i&J>z~n z`k+`xEtWE`{1^Gkh*auOm%Xn?^!s<0mzP$)+OLoHY5r-JRX7mnuRbb$BH| z3}8XU4zfW&K8A8CYKt_QmzZK|(d9MIS@ZHzY&kKZe!^T4C0%KG-sO`;uVAy8MD3U~ zy3+KRBiae$vXqKt$I!btG>HmM)90N^Jv-NEG+d$*Yp0VDx4rYQzW(H4GOYxtYKLyyZpBx%Jyyu(+OX$7Q2ybou$joj i1(=3b*2{)wY53o!@t5(znl$l@R>Br8X=VJ`hkpRqt`#@{ From 81af0325eedf8a834c259f665ad63f0d541ecd5d Mon Sep 17 00:00:00 2001 From: hamshkhawar Date: Wed, 28 Feb 2024 15:03:50 -0600 Subject: [PATCH 03/16] fix typo error --- cwl_workflows/__main__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cwl_workflows/__main__.py b/cwl_workflows/__main__.py index 971173e..115895f 100644 --- a/cwl_workflows/__main__.py +++ b/cwl_workflows/__main__.py @@ -1,4 +1,4 @@ -"""Ome Converter.""" +"""CWL Workflow.""" import logging from typing import Any from typing import Optional From 2dcc5e67d085e5a9c60ea50b236006276e27d506 Mon Sep 17 00:00:00 2001 From: hamshkhawar Date: Wed, 28 Feb 2024 15:59:09 -0600 Subject: [PATCH 04/16] fix config file name workflow name --- README.md | 2 +- bbbc_json/{bbbc_config.json => bbbc_feature.json} | 0 cwl_workflows/__main__.py | 13 +++++++------ cwl_workflows/utils.py | 8 ++++---- 4 files changed, 12 insertions(+), 11 deletions(-) rename bbbc_json/{bbbc_config.json => bbbc_feature.json} (100%) diff --git a/README.md b/README.md index c735f02..c2ff8f8 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,7 @@ Below are the specifics of the plugins employed in the workflow The parameters for each imaging dataset are pre-defined and stored in JSON format. A Pydantic model in a utils Python file can be utilized to store parameters for any new dataset -`python cwl_workflows/__main__.py --name="BBBC039" --workflow=CWLFeatureWorkflow` +`python cwl_workflows/__main__.py --name="BBBC039" --workflow=feature` A directory named `workflow` is generated, encompassing CLTs for each plugin, YAML files, and all outputs are stored within the `outdir` directory. ``` diff --git a/bbbc_json/bbbc_config.json b/bbbc_json/bbbc_feature.json similarity index 100% rename from bbbc_json/bbbc_config.json rename to bbbc_json/bbbc_feature.json diff --git a/cwl_workflows/__main__.py b/cwl_workflows/__main__.py index 115895f..0e3c645 100644 --- a/cwl_workflows/__main__.py +++ b/cwl_workflows/__main__.py @@ -3,7 +3,7 @@ from typing import Any from typing import Optional import typer -from utils import JSON_FILENAME +from utils import FEAT_JSON_FILENAME from utils import get_params from cwl_features_extraction import CWLFeatureWorkflow @@ -40,12 +40,13 @@ def main( logger.info(f"name = {name}") logger.info(f"workflow = {workflow}") - params = get_params(JSON_FILENAME, name) + - if workflow == "CWLFeatureWorkflow": - logger.info(f"Executing {workflow}!!!") - model = CWLFeatureWorkflow(**params) - model.workflow() + if workflow == "feature": + params = get_params(FEAT_JSON_FILENAME, name) + logger.info(f"Executing {workflow}!!!") + model = CWLFeatureWorkflow(**params) + model.workflow() logger.info("Completed CWL workflow!!!") diff --git a/cwl_workflows/utils.py b/cwl_workflows/utils.py index 2da1a30..397057f 100644 --- a/cwl_workflows/utils.py +++ b/cwl_workflows/utils.py @@ -19,7 +19,7 @@ def get_params(path: Path, name: str): return params -params = { +feat_params = { "BBBC039": { "name": "BBBC039", "file_pattern": ".*_{row:c}{col:dd}_s{s:d}_w{channel:d}.*.tif", @@ -34,12 +34,12 @@ def get_params(path: Path, name: str): "file_extension": "pandas" } } -model = DataModel(data=params) +model = DataModel(data=feat_params) model_dict = model.dict() json_dir = Path(Path(__file__).parents[1]).joinpath("bbbc_json") json_dir.mkdir(parents=True, exist_ok=True) -JSON_FILENAME = json_dir.joinpath("bbbc_config.json") +FEAT_JSON_FILENAME = json_dir.joinpath("bbbc_config.json") -with Path.open(JSON_FILENAME, "w") as json_file: +with Path.open(FEAT_JSON_FILENAME, "w") as json_file: json.dump(model_dict, json_file, indent=2) From 03c6d18e79700a8b3a65c15218b5a03f46e1fa38 Mon Sep 17 00:00:00 2001 From: hamshkhawar Date: Mon, 4 Mar 2024 12:20:57 -0600 Subject: [PATCH 05/16] implemented pydatic model for loading json data and updated plugin_url --- bbbc_json/bbbc_feature.json | 4 +-- cwl_workflows/__main__.py | 8 ++--- cwl_workflows/cwl_features_extraction.py | 8 ++--- cwl_workflows/utils.py | 44 +++++++++++++++++------- 4 files changed, 40 insertions(+), 24 deletions(-) diff --git a/bbbc_json/bbbc_feature.json b/bbbc_json/bbbc_feature.json index c5d55c6..27aa56b 100644 --- a/bbbc_json/bbbc_feature.json +++ b/bbbc_json/bbbc_feature.json @@ -2,14 +2,14 @@ "data": { "BBBC039": { "name": "BBBC039", - "file_pattern": ".*_{row:c}{col:dd}_s{s:d}_w{channel:d}.*.tif", + "file_pattern": "/.*/.*/.*/Images/(?P.*)/.*_{row:c}{col:dd}_s{s:d}_w{channel:d}.*.tif", "out_file_pattern": "x{row:dd}_y{col:dd}_p{s:dd}_c{channel:d}.tif", "image_pattern": "images_x{x:dd}_y{y:dd}_p{p:dd}_c{c:d}.ome.tif", "seg_pattern": "images_x{x:dd}_y{y:dd}_p{p:dd}_c1.ome.tif", - "map_directory": "raw", "ff_pattern": "images_x\\(00-15\\)_y\\(01-24\\)_p0\\(1-9\\)_c{c:d}_flatfield.ome.tif", "df_pattern": "images_x\\(00-15\\)_y\\(01-24\\)_p0\\(1-9\\)_c{c:d}_darkfield.ome.tif", "group_by": "c", + "map_directory": false, "features": "ALL_INTENSITY,ALL_MORPHOLOGY", "file_extension": "pandas" } diff --git a/cwl_workflows/__main__.py b/cwl_workflows/__main__.py index 0e3c645..0adfbb3 100644 --- a/cwl_workflows/__main__.py +++ b/cwl_workflows/__main__.py @@ -1,10 +1,8 @@ """CWL Workflow.""" import logging -from typing import Any -from typing import Optional import typer from utils import FEAT_JSON_FILENAME -from utils import get_params +from utils import LoadData from cwl_features_extraction import CWLFeatureWorkflow @@ -41,9 +39,9 @@ def main( logger.info(f"workflow = {workflow}") - if workflow == "feature": - params = get_params(FEAT_JSON_FILENAME, name) + model = LoadData(path=FEAT_JSON_FILENAME, name= name) + params = model.parse_json() logger.info(f"Executing {workflow}!!!") model = CWLFeatureWorkflow(**params) model.workflow() diff --git a/cwl_workflows/cwl_features_extraction.py b/cwl_workflows/cwl_features_extraction.py index 6232604..03addef 100644 --- a/cwl_workflows/cwl_features_extraction.py +++ b/cwl_workflows/cwl_features_extraction.py @@ -37,17 +37,16 @@ def __init__( out_file_pattern: str, image_pattern: str, seg_pattern: str, - map_directory: str, ff_pattern: str, df_pattern: str, group_by: str, + map_directory: typing.Optional[bool] = False, features: typing.Optional[str]="ALL", file_extension: typing.Optional[str]="arrowipc" ): self.name = name self.file_pattern = file_pattern self.out_file_pattern = out_file_pattern - self.map_directory = map_directory self.ff_pattern = ff_pattern self.df_pattern = df_pattern self.group_by = group_by @@ -58,6 +57,7 @@ def __init__( self.seg_pattern = seg_pattern self.features = features self.file_extension = file_extension + self.map_directory = map_directory def _create_directories(self) -> None: """Create directories for CWL outputs""" @@ -138,11 +138,11 @@ def manifest_urls(self, x: str) -> str: urls = { "bbbc_download": f"{GITHUB_TAG}/saketprem/polus-plugins/bbbc_download/utils/bbbc-download-plugin/plugin.json", - "file_renaming": f"{GITHUB_TAG}/PolusAI/polus-plugins/f20a2f75264d59af78cfb40b4c3cec118309f7ec/formats/file-renaming-plugin/plugin.json", + "file_renaming": f"{GITHUB_TAG}/hamshkhawar/image-tools/filepattern_filerenaming/formats/file-renaming-tool/plugin.json", "ome_converter": f"{GITHUB_TAG}/hamshkhawar/image-tools/basecontainer_omecontainer/formats/ome-converter-plugin/plugin.json", "estimate_flatfield": f"{GITHUB_TAG}/nishaq503/image-tools/fix/basic/regression/basic-flatfield-estimation-tool/plugin.json", "apply_flatfield": f"{GITHUB_TAG}/hamshkhawar/image-tools/cast_images/transforms/images/apply-flatfield-tool/plugin.json", - "kaggle_nuclei_segmentation": f"{GITHUB_TAG}/hamshkhawar/image-tools/kaggle-nuclei_seg/segmentation/kaggle-nuclei-segmentation/plugin.json", + "kaggle_nuclei_segmentation": f"{GITHUB_TAG}/hamshkhawar/image-tools/kaggle-nucleiseg/segmentation/kaggle-nuclei-segmentation-tool/plugin.json", "ftl_plugin": f"{GITHUB_TAG}/nishaq503/image-tools/fix/ftl-label/transforms/images/polus-ftl-label-plugin/plugin.json", "nyxus_plugin": f"{GITHUB_TAG}/hamshkhawar/image-tools/nyxus_manifest/features/nyxus-plugin/plugin.json", } diff --git a/cwl_workflows/utils.py b/cwl_workflows/utils.py index 397057f..e18c73d 100644 --- a/cwl_workflows/utils.py +++ b/cwl_workflows/utils.py @@ -2,34 +2,52 @@ import pydantic from pathlib import Path from typing import Dict +from typing import Union GITHUB_TAG = "https://raw.githubusercontent.com" class DataModel(pydantic.BaseModel): - data: Dict[str, Dict[str, str]] - - -def get_params(path: Path, name: str): - """Loading json file for getting parameters""" - with open(path) as json_file: - # Read the JSON data - data = json.load(json_file) - params = [v[name] for k, v in data.items()][0] - return params + data: Dict[str, Dict[str, Union[str, bool]]] + + +class LoadData(pydantic.BaseModel): + path: Union[str, Path] + name:str + + @pydantic.validator("path", pre=True) + @classmethod + def validate_path(cls, value: Union[str, Path]) -> Union[str, Path]: + """Validation of Paths.""" + if not Path(value).exists(): + msg = f"{value} do not exist! Please do check it again" + raise ValueError(msg) + if isinstance(value, str): + return Path(value) + return value + + def parse_json(self) -> Dict[str, Union[str, bool]]: + with open(self.path) as json_file: + # Read the JSON data + data = json.load(json_file) + params = [v[self.name] for k, v in data.items()][0] + if len(params) == 0: + msg = f"{self.name} dataset donot exist! Please do check it again" + raise ValueError(msg) + return params feat_params = { "BBBC039": { "name": "BBBC039", - "file_pattern": ".*_{row:c}{col:dd}_s{s:d}_w{channel:d}.*.tif", + "file_pattern": "/.*/.*/.*/Images/(?P.*)/.*_{row:c}{col:dd}_s{s:d}_w{channel:d}.*.tif", "out_file_pattern": "x{row:dd}_y{col:dd}_p{s:dd}_c{channel:d}.tif", "image_pattern": "images_x{x:dd}_y{y:dd}_p{p:dd}_c{c:d}.ome.tif", "seg_pattern":"images_x{x:dd}_y{y:dd}_p{p:dd}_c1.ome.tif", - "map_directory": "raw", "ff_pattern": "images_x\\(00-15\\)_y\\(01-24\\)_p0\\(1-9\\)_c{c:d}_flatfield.ome.tif", "df_pattern": "images_x\\(00-15\\)_y\\(01-24\\)_p0\\(1-9\\)_c{c:d}_darkfield.ome.tif", "group_by": "c", + "map_directory": False, "features": "ALL_INTENSITY,ALL_MORPHOLOGY", "file_extension": "pandas" } @@ -39,7 +57,7 @@ def get_params(path: Path, name: str): json_dir = Path(Path(__file__).parents[1]).joinpath("bbbc_json") json_dir.mkdir(parents=True, exist_ok=True) -FEAT_JSON_FILENAME = json_dir.joinpath("bbbc_config.json") +FEAT_JSON_FILENAME = json_dir.joinpath("bbbc_feature.json") with Path.open(FEAT_JSON_FILENAME, "w") as json_file: json.dump(model_dict, json_file, indent=2) From 88b6c821bde498e43799a2e236d0d1b94bad6ca5 Mon Sep 17 00:00:00 2001 From: hamshkhawar Date: Mon, 4 Mar 2024 12:22:14 -0600 Subject: [PATCH 06/16] fix class attributes --- cwl_workflows/cwl_features_extraction.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cwl_workflows/cwl_features_extraction.py b/cwl_workflows/cwl_features_extraction.py index 03addef..63e3c0f 100644 --- a/cwl_workflows/cwl_features_extraction.py +++ b/cwl_workflows/cwl_features_extraction.py @@ -23,7 +23,7 @@ class CWLFeatureWorkflow: out_file_pattern : Preferred format for filenames image_pattern : Pattern for parsing intensity image filenames after renaming when using map_directory seg_pattern : Pattern use to parse segmentation image filenames - map_directory : Extract folder name + map_directory : Mapping of folder name ff_pattern: The filename pattern employed to select flatfield components from the ffDir. df_pattern:The filename pattern employed to select darkfield components from the ffDir group_by: Grouping variables for filePattern From a9ed5f66fd1a8e3b5ed2954b85954970f3769cf0 Mon Sep 17 00:00:00 2001 From: hamshkhawar Date: Mon, 4 Mar 2024 12:39:08 -0600 Subject: [PATCH 07/16] fix typo error --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index c2ff8f8..d74d022 100644 --- a/README.md +++ b/README.md @@ -8,13 +8,13 @@ create a [Conda](https://conda.io/projects/conda/en/latest/user-guide/tasks/mana #### 1. Install polus-plugins. -- clone a image-tools reporsitory +- clone a image-tools repository `git clone https://github.com/camilovelezr/image-tools.git` - cd `image-tools` - `pip install .` #### 2. Install workflow-inference-compiler. -- clone a workflow-inference-compiler reporsitory +- clone a workflow-inference-compiler repository `git clone https://github.com/camilovelezr/workflow-inference-compiler.git` - cd `workflow-inference-compiler` - `pip install -e ".[all]"` From 8d93e8a746a518c55cd3c72444535f9155eaea3b Mon Sep 17 00:00:00 2001 From: hamshkhawar Date: Mon, 11 Mar 2024 08:50:39 -0500 Subject: [PATCH 08/16] added more documentation for docker --- README.md | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index d74d022..3ad7af1 100644 --- a/README.md +++ b/README.md @@ -9,16 +9,21 @@ create a [Conda](https://conda.io/projects/conda/en/latest/user-guide/tasks/mana #### 1. Install polus-plugins. - clone a image-tools repository -`git clone https://github.com/camilovelezr/image-tools.git` +`git clone https://github.com/camilovelezr/image-tools.git ../` - cd `image-tools` - `pip install .` #### 2. Install workflow-inference-compiler. - clone a workflow-inference-compiler repository -`git clone https://github.com/camilovelezr/workflow-inference-compiler.git` +`git clone https://github.com/camilovelezr/workflow-inference-compiler.git ../` - cd `workflow-inference-compiler` - `pip install -e ".[all]"` +#### Note: +Ensure that the [docker-desktop](https://www.docker.com/products/docker-desktop/) is running in the background. To verify that it's operational, you can use the following command: +`docker run -d -p 80:80 docker/getting-started` +This command will launch the `docker/getting-started container` in detached mode (-d flag), exposing port 80 on your local machine (-p 80:80). It's a simple way to test if Docker Desktop is functioning correctly. + ## Details This workflow integrates eight distinct plugins, starting from data retrieval from [Broad Bioimage Benchmark Collection](https://bbbc.broadinstitute.org/), renaming files, correcting uneven illumination, segmenting nuclear objects, and culminating in the extraction of features from identified objects From b7689d0b5d003a352ff0f575906127d118437d8e Mon Sep 17 00:00:00 2001 From: hamshkhawar Date: Fri, 15 Mar 2024 17:16:15 -0500 Subject: [PATCH 09/16] fix documentation --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 3ad7af1..9a86d7c 100644 --- a/README.md +++ b/README.md @@ -11,12 +11,16 @@ create a [Conda](https://conda.io/projects/conda/en/latest/user-guide/tasks/mana - clone a image-tools repository `git clone https://github.com/camilovelezr/image-tools.git ../` - cd `image-tools` +- create a new branch +`git checkout -b hd2 remotes/origin/hd2` - `pip install .` #### 2. Install workflow-inference-compiler. - clone a workflow-inference-compiler repository `git clone https://github.com/camilovelezr/workflow-inference-compiler.git ../` - cd `workflow-inference-compiler` +- create a new branch +`git checkout -b hd2 remotes/origin/hd2` - `pip install -e ".[all]"` #### Note: From 0d0302cc59f5df34c513254abae33423fef90121 Mon Sep 17 00:00:00 2001 From: hamshkhawar Date: Fri, 22 Mar 2024 12:42:37 -0500 Subject: [PATCH 10/16] configured cwl workflows --- README.md | 15 +- bbbc_json/bbbc_feature.json | 17 -- configuration/analysis/BBBC001.yml | 13 + configuration/analysis/BBBC039.yml | 12 + configuration/analysis/sample.yml | 12 + configuration/segmentation/BBBC001.yml | 10 + configuration/segmentation/BBBC039.yml | 10 + configuration/segmentation/sample.yml | 12 + cwl_workflows/__init__.py | 3 +- cwl_workflows/__main__.py | 27 +- ...features_extraction.py => cwl_analysis.py} | 8 +- cwl_workflows/cwl_nuclear_segmentation.py | 243 ++++++++++++++++++ cwl_workflows/utils.py | 87 ++++--- 13 files changed, 395 insertions(+), 74 deletions(-) delete mode 100644 bbbc_json/bbbc_feature.json create mode 100644 configuration/analysis/BBBC001.yml create mode 100644 configuration/analysis/BBBC039.yml create mode 100644 configuration/analysis/sample.yml create mode 100644 configuration/segmentation/BBBC001.yml create mode 100644 configuration/segmentation/BBBC039.yml create mode 100644 configuration/segmentation/sample.yml rename cwl_workflows/{cwl_features_extraction.py => cwl_analysis.py} (98%) create mode 100644 cwl_workflows/cwl_nuclear_segmentation.py diff --git a/README.md b/README.md index 9a86d7c..de5dc41 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Common Workflow Language (CWL) Feature Extraction worflow +# Common Workflow Language (CWL) Workflows CWL feature extraction workflow for imaging dataset @@ -41,11 +41,16 @@ Below are the specifics of the plugins employed in the workflow 7. [polus-ftl-label-plugin](https://github.com/hamshkhawar/image-tools/tree/kaggle-nuclei_seg/transforms/images/polus-ftl-label-plugin) 8. [nyxus-plugin](https://github.com/PolusAI/image-tools/tree/kaggle-nuclei_seg/features/nyxus-plugin) -## Execute CWL feature extraction workflow +## Execute CWL workflows +Three different CWL workflows can be executed for specific datasets +1. segmentation +2. analysis -The parameters for each imaging dataset are pre-defined and stored in JSON format. A Pydantic model in a utils Python file can be utilized to store parameters for any new dataset +During the execution of the segmentation workflow, `1 to 7` plugins will be utilized. However, for executing the analysis workflow, `1 to 8` plugins will be employed. +If a user wishes to execute a workflow for a new dataset, they can utilize a sample YAML file to input parameter values. This YAML file can be saved in the desired subdirectory of the `configuration` folder with the name `dataset.yml` -`python cwl_workflows/__main__.py --name="BBBC039" --workflow=feature` + +`python cwl_workflows/__main__.py --name="BBBC039" --workflow=analysis` A directory named `workflow` is generated, encompassing CLTs for each plugin, YAML files, and all outputs are stored within the `outdir` directory. ``` @@ -91,3 +96,5 @@ workflows └── nyxus_plugin.outDir ``` +#### Note: +Step 7 and step 8 are executed only in the case of the `analysis` workflow. \ No newline at end of file diff --git a/bbbc_json/bbbc_feature.json b/bbbc_json/bbbc_feature.json deleted file mode 100644 index 27aa56b..0000000 --- a/bbbc_json/bbbc_feature.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "data": { - "BBBC039": { - "name": "BBBC039", - "file_pattern": "/.*/.*/.*/Images/(?P.*)/.*_{row:c}{col:dd}_s{s:d}_w{channel:d}.*.tif", - "out_file_pattern": "x{row:dd}_y{col:dd}_p{s:dd}_c{channel:d}.tif", - "image_pattern": "images_x{x:dd}_y{y:dd}_p{p:dd}_c{c:d}.ome.tif", - "seg_pattern": "images_x{x:dd}_y{y:dd}_p{p:dd}_c1.ome.tif", - "ff_pattern": "images_x\\(00-15\\)_y\\(01-24\\)_p0\\(1-9\\)_c{c:d}_flatfield.ome.tif", - "df_pattern": "images_x\\(00-15\\)_y\\(01-24\\)_p0\\(1-9\\)_c{c:d}_darkfield.ome.tif", - "group_by": "c", - "map_directory": false, - "features": "ALL_INTENSITY,ALL_MORPHOLOGY", - "file_extension": "pandas" - } - } -} \ No newline at end of file diff --git a/configuration/analysis/BBBC001.yml b/configuration/analysis/BBBC001.yml new file mode 100644 index 0000000..c72c9c8 --- /dev/null +++ b/configuration/analysis/BBBC001.yml @@ -0,0 +1,13 @@ +--- +name : BBBC001 +file_pattern : /.*/.*/.*/Images/.*/.*_{row:c}{col:dd}f{f:dd}d{channel:d}.tif +out_file_pattern : x{row:dd}_y{col:dd}_p{f:dd}_c{channel:d}.tif +image_pattern: x{x:dd}_y{y:dd}_p{p:dd}_c{c:d}.ome.tif +seg_pattern: x{x:dd}_y{y:dd}_p{p:dd}_c0.ome.tif +ff_pattern: "x00_y03_p0\\(0-5\\)_c{c:d}_flatfield.ome.tif" +df_pattern: "x00_y03_p0\\(0-5\\)_c{c:d}_darkfield.ome.tif" +group_by: c +map_directory: false +features: ALL +file_extension: pandas + diff --git a/configuration/analysis/BBBC039.yml b/configuration/analysis/BBBC039.yml new file mode 100644 index 0000000..fc5c3a0 --- /dev/null +++ b/configuration/analysis/BBBC039.yml @@ -0,0 +1,12 @@ +--- +name : BBBC039 +file_pattern : /.*/.*/.*/Images/.*/.*_{row:c}{col:dd}_s{s:d}_w{channel:d}.*.tif +out_file_pattern : x{row:dd}_y{col:dd}_p{s:dd}_c{channel:d}.tif +image_pattern: x{x:dd}_y{y:dd}_p{p:dd}_c{c:d}.ome.tif +seg_pattern: x{x:dd}_y{y:dd}_p{p:dd}_c1.ome.tif +ff_pattern: "x\\(00-15\\)_y\\(01-24\\)_p0\\(1-9\\)_c{c:d}_flatfield.ome.tif" +df_pattern: "x\\(00-15\\)_y\\(01-24\\)_p0\\(1-9\\)_c{c:d}_darkfield.ome.tif" +group_by: c +map_directory: false +features: "ALL_INTENSITY,ALL_MORPHOLOGY" +file_extension: pandas \ No newline at end of file diff --git a/configuration/analysis/sample.yml b/configuration/analysis/sample.yml new file mode 100644 index 0000000..ecc82e1 --- /dev/null +++ b/configuration/analysis/sample.yml @@ -0,0 +1,12 @@ +--- +name : +file_pattern : +out_file_pattern : +image_pattern: +seg_pattern: +ff_pattern: +df_pattern: +group_by: +map_directory: +features: +file_extension: \ No newline at end of file diff --git a/configuration/segmentation/BBBC001.yml b/configuration/segmentation/BBBC001.yml new file mode 100644 index 0000000..28e1655 --- /dev/null +++ b/configuration/segmentation/BBBC001.yml @@ -0,0 +1,10 @@ +--- +name : BBBC001 +file_pattern : /.*/.*/.*/Images/.*/.*_{row:c}{col:dd}f{f:dd}d{channel:d}.tif +out_file_pattern : x{row:dd}_y{col:dd}_p{f:dd}_c{channel:d}.tif +image_pattern: x{x:dd}_y{y:dd}_p{p:dd}_c{c:d}.ome.tif +seg_pattern: x{x:dd}_y{y:dd}_p{p:dd}_c0.ome.tif +ff_pattern: "x00_y03_p0\\(0-5\\)_c{c:d}_flatfield.ome.tif" +df_pattern: "x00_y03_p0\\(0-5\\)_c{c:d}_darkfield.ome.tif" +group_by: c +map_directory: false \ No newline at end of file diff --git a/configuration/segmentation/BBBC039.yml b/configuration/segmentation/BBBC039.yml new file mode 100644 index 0000000..d474e73 --- /dev/null +++ b/configuration/segmentation/BBBC039.yml @@ -0,0 +1,10 @@ +--- +name : BBBC039 +file_pattern : /.*/.*/.*/Images/.*/.*_{row:c}{col:dd}_s{s:d}_w{channel:d}.*.tif +out_file_pattern : x{row:dd}_y{col:dd}_p{s:dd}_c{channel:d}.tif +image_pattern: x{x:dd}_y{y:dd}_p{p:dd}_c{c:d}.ome.tif +seg_pattern: x{x:dd}_y{y:dd}_p{p:dd}_c1.ome.tif +ff_pattern: "x\\(00-15\\)_y\\(01-24\\)_p0\\(1-9\\)_c{c:d}_flatfield.ome.tif" +df_pattern: "x\\(00-15\\)_y\\(01-24\\)_p0\\(1-9\\)_c{c:d}_darkfield.ome.tif" +group_by: c +map_directory: false \ No newline at end of file diff --git a/configuration/segmentation/sample.yml b/configuration/segmentation/sample.yml new file mode 100644 index 0000000..ecc82e1 --- /dev/null +++ b/configuration/segmentation/sample.yml @@ -0,0 +1,12 @@ +--- +name : +file_pattern : +out_file_pattern : +image_pattern: +seg_pattern: +ff_pattern: +df_pattern: +group_by: +map_directory: +features: +file_extension: \ No newline at end of file diff --git a/cwl_workflows/__init__.py b/cwl_workflows/__init__.py index b2e9ca5..bfa5117 100644 --- a/cwl_workflows/__init__.py +++ b/cwl_workflows/__init__.py @@ -1,2 +1,3 @@ -import cwl_features_extraction as cwl_features_extraction +import cwl_analysis as cwl_analysis +import cwl_nuclear_segmentation as cwl_nuclear_segmentation import utils as utils \ No newline at end of file diff --git a/cwl_workflows/__main__.py b/cwl_workflows/__main__.py index 0adfbb3..acf736d 100644 --- a/cwl_workflows/__main__.py +++ b/cwl_workflows/__main__.py @@ -1,9 +1,11 @@ """CWL Workflow.""" import logging import typer -from utils import FEAT_JSON_FILENAME -from utils import LoadData -from cwl_features_extraction import CWLFeatureWorkflow +from utils import LoadYaml +from cwl_analysis import CWLAnalysisWorkflow +from cwl_nuclear_segmentation import CWLSegmentationWorkflow +from pathlib import Path + app = typer.Typer() @@ -38,14 +40,23 @@ def main( logger.info(f"name = {name}") logger.info(f"workflow = {workflow}") - - if workflow == "feature": - model = LoadData(path=FEAT_JSON_FILENAME, name= name) - params = model.parse_json() + config_path = Path.cwd().joinpath(f"configuration/{workflow}/{name}.yml") + + + model = LoadYaml(workflow=workflow, config_path=config_path) + params = model.parse_yaml() + + if workflow == "analysis": logger.info(f"Executing {workflow}!!!") - model = CWLFeatureWorkflow(**params) + model = CWLAnalysisWorkflow(**params) model.workflow() + if workflow == "segmentation": + logger.info(f"Executing {workflow}!!!") + model = CWLSegmentationWorkflow(**params) + model.workflow() + + logger.info("Completed CWL workflow!!!") diff --git a/cwl_workflows/cwl_features_extraction.py b/cwl_workflows/cwl_analysis.py similarity index 98% rename from cwl_workflows/cwl_features_extraction.py rename to cwl_workflows/cwl_analysis.py index 63e3c0f..60702bc 100644 --- a/cwl_workflows/cwl_features_extraction.py +++ b/cwl_workflows/cwl_analysis.py @@ -13,9 +13,9 @@ logger.setLevel(logging.INFO) -class CWLFeatureWorkflow: +class CWLAnalysisWorkflow: """ - A CWL feature extraction pipeline. + A CWL feature extraction or Analysis pipeline. Attributes: name : Name of imaging dataset of Broad Bioimage Benchmark Collection (https://bbbc.broadinstitute.org/image_sets). @@ -224,7 +224,7 @@ def workflow(self) -> None: ftl_plugin.binarizationThreshold = 0.5 ftl_plugin.outDir = Path("ftl_plugin.outDir") - # ## Nyxus Plugin + # # ## Nyxus Plugin nyxus_plugin = self.create_step(self.manifest_urls("nyxus_plugin")) nyxus_plugin.inpDir = apply_flatfield.outDir nyxus_plugin.segDir = ftl_plugin.outDir @@ -260,6 +260,6 @@ def workflow(self) -> None: # # clean autognerated directories self._clean() self._move_outputs() - logger.info("Completed CWL Feature Extraction Workflow.") + logger.info("Completed CWL Feature Extraction /Analysis Workflow.") return \ No newline at end of file diff --git a/cwl_workflows/cwl_nuclear_segmentation.py b/cwl_workflows/cwl_nuclear_segmentation.py new file mode 100644 index 0000000..c376159 --- /dev/null +++ b/cwl_workflows/cwl_nuclear_segmentation.py @@ -0,0 +1,243 @@ +import wic.api.pythonapi as api +import polus.plugins as pp +from pathlib import Path +import yaml +import logging +import typing +import re +import shutil +from utils import GITHUB_TAG + +# Initialize the logger +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +class CWLSegmentationWorkflow: + """ + A CWL Nuclear Segmentation pipeline. + + Attributes: + name : Name of imaging dataset of Broad Bioimage Benchmark Collection (https://bbbc.broadinstitute.org/image_sets). + file_pattern : Pattern for parsing raw filenames. + out_file_pattern : Preferred format for filenames + image_pattern : Pattern for parsing intensity image filenames after renaming when using map_directory + seg_pattern : Pattern use to parse segmentation image filenames + map_directory : Mapping of folder name + ff_pattern: The filename pattern employed to select flatfield components from the ffDir. + df_pattern:The filename pattern employed to select darkfield components from the ffDir + group_by: Grouping variables for filePattern + """ + def __init__( + self, + name: str, + file_pattern: str, + out_file_pattern: str, + image_pattern: str, + seg_pattern: str, + ff_pattern: str, + df_pattern: str, + group_by: str, + map_directory: typing.Optional[bool] = False, + ): + self.name = name + self.file_pattern = file_pattern + self.out_file_pattern = out_file_pattern + self.map_directory = map_directory + self.ff_pattern = ff_pattern + self.df_pattern = df_pattern + self.group_by = group_by + self.wic_path = api._WIC_PATH + self.PATH = Path(self.wic_path.parent).joinpath(Path(__file__).parts[-3]) + self.cwl_path, self.workflow_path = self._create_directories() + self.image_pattern = image_pattern + self.seg_pattern = seg_pattern + + def _create_directories(self) -> None: + """Create directories for CWL outputs""" + cwl_path = self.PATH.joinpath("cwl_adapters") + cwl_path.mkdir(parents=True, exist_ok=True) + workflow_path = self.PATH.joinpath("workflows").resolve() + workflow_path.mkdir(exist_ok=True) + return cwl_path, workflow_path + + def _clean(self) -> None: + """Cleaning of redundant directories generating on running CWL""" + logger.info("Cleaning directories!!!") + destination_path = self.workflow_path.joinpath("experiment") + dir_names = ("autogenerated", "cachedir", "RUNS", "provenance") + for i, d in zip(self.wic_path.iterdir(), self.PATH.iterdir()): + if i.name.endswith(dir_names): + shutil.rmtree(d) + if d.name.endswith(dir_names): + shutil.rmtree(d) + + for d in destination_path.iterdir(): + if d.name.endswith("cwl_adapters"): + shutil.rmtree(d) + for d in self.PATH.iterdir(): + if d.name.endswith("cwl_adapters"): + shutil.move(d, destination_path) + + return + + def _move_outputs(self) -> None: + """Transfer outputs from the WIC directory to the workflow path""" + logger.info("Move outputs to workflow path!!!") + for d in self.wic_path.iterdir(): + if d.name.endswith("outdir"): + shutil.move(d, self.workflow_path) + return + + def _camel(self, name: str) -> str: + """Convert plugin name to camel case.""" + name = re.sub(r"(_|-)+", " ", name).title().replace(" ", "") + return "".join([name[0].upper(), name[1:]]) + + def _string_after_period(self, x): + """Get a string after period.""" + match = re.search(r"\.(.*)", x) + if match: + # Get the part after the period + return f".*.{match.group(1)}" + else: + return "" + + def _add_backslash_before_parentheses(self, x): + """Add backslash to generate ff_pattern and df_pattern""" + # Define the regular expression pattern to match parenthesis + pattern_1 = r"(\()|(\))" + # Use re.sub() to add a backslash before starting and finishing parenthesis + result = re.sub(pattern_1, r"\\\1\2", x) + pattern_2 = r"\d" + result = ( + result.split("_c")[0] + + "_c{c:d}" + + re.sub(pattern_2, "", result.split("_c")[1]) + ) + return result + + def create_step(self, url: str) -> api.Step: + """Generate the plugin class name from the plugin name specified in the manifest""" + manifest = pp.submit_plugin(url) + plugin_version = str(manifest.version) + cwl_tool = pp.get_plugin(self._camel(manifest.name), plugin_version).save_cwl( + self.cwl_path.joinpath(f"{self._camel(manifest.name)}.cwl") + ) + step = api.Step(cwl_tool) + return step + + def manifest_urls(self, x: str) -> str: + """URLs on GitHub for plugin manifests""" + urls = { + "bbbc_download": f"{GITHUB_TAG}/saketprem/polus-plugins/bbbc_download/utils/bbbc-download-plugin/plugin.json", + "file_renaming": f"{GITHUB_TAG}/hamshkhawar/image-tools/filepattern_filerenaming/formats/file-renaming-tool/plugin.json", + "ome_converter": f"{GITHUB_TAG}/hamshkhawar/image-tools/basecontainer_omecontainer/formats/ome-converter-plugin/plugin.json", + "estimate_flatfield": f"{GITHUB_TAG}/nishaq503/image-tools/fix/basic/regression/basic-flatfield-estimation-tool/plugin.json", + "apply_flatfield": f"{GITHUB_TAG}/hamshkhawar/image-tools/cast_images/transforms/images/apply-flatfield-tool/plugin.json", + "kaggle_nuclei_segmentation": f"{GITHUB_TAG}/hamshkhawar/image-tools/kaggle-nucleiseg/segmentation/kaggle-nuclei-segmentation-tool/plugin.json", + "ftl_plugin": f"{GITHUB_TAG}/nishaq503/image-tools/fix/ftl-label/transforms/images/polus-ftl-label-plugin/plugin.json" + } + return urls[x] + + def modify_cwl(self) -> None: + """Modify CWL to incorporate environmental variables and permission access""" + for f in list(self.cwl_path.rglob("*.cwl")): + if "cwl" in f.name: + try: + with Path.open(f, "r") as file: + config = yaml.safe_load(file) + config["requirements"]["NetworkAccess"] = { + "networkAccess": True + } + config["requirements"]["EnvVarRequirement"] = { + "envDef": {"HOME": "/home/polusai"} + } + with open(f, "w") as out_file: + yaml.dump(config, out_file) + except FileNotFoundError: + logger.info("Error: There was an unexpected error while processing the file.") + return + + def workflow(self) -> None: + """ + A CWL nuclear segmentation pipeline. + """ + # BBBCDownload + bbbc = self.create_step(self.manifest_urls("bbbc_download")) + bbbc.name = self.name + bbbc.outDir = Path("bbbc.outDir") + + # Renaming plugin + rename = self.create_step(self.manifest_urls("file_renaming")) + rename.filePattern = self.file_pattern + rename.outFilePattern = self.out_file_pattern + rename.mapDirectory = self.map_directory + rename.inpDir = bbbc.outDir + rename.outDir = Path("rename.outDir") + + # OMEConverter + ome_converter = self.create_step(self.manifest_urls("ome_converter")) + ome_converter.filePattern = self._string_after_period(self.out_file_pattern) + ome_converter.fileExtension = ".ome.tif" + ome_converter.inpDir = rename.outDir + ome_converter.outDir = Path("ome_converter.outDir") + + # Estimate Flatfield + estimate_flatfield = self.create_step(self.manifest_urls("estimate_flatfield")) + estimate_flatfield.inpDir = ome_converter.outDir + estimate_flatfield.filePattern = self.image_pattern + estimate_flatfield.groupBy = self.group_by + estimate_flatfield.getDarkfield = True + estimate_flatfield.outDir = Path("estimate_flatfield.outDir") + + # # Apply Flatfield + apply_flatfield = self.create_step(self.manifest_urls("apply_flatfield")) + apply_flatfield.imgDir = ome_converter.outDir + apply_flatfield.imgPattern = self.image_pattern + apply_flatfield.ffDir = estimate_flatfield.outDir + apply_flatfield.ffPattern = self.ff_pattern + apply_flatfield.dfPattern = self.df_pattern + apply_flatfield.outDir = Path("apply_flatfield.outDir") + apply_flatfield.dataType = True + + ## Kaggle Nuclei Segmentation + kaggle_nuclei_segmentation = self.create_step( + self.manifest_urls("kaggle_nuclei_segmentation") + ) + kaggle_nuclei_segmentation.inpDir = apply_flatfield.outDir + kaggle_nuclei_segmentation.filePattern = self.image_pattern + kaggle_nuclei_segmentation.outDir = Path("kaggle_nuclei_segmentation.outDir") + + ## FTL Label Plugin + ftl_plugin = self.create_step(self.manifest_urls("ftl_plugin")) + ftl_plugin.inpDir = kaggle_nuclei_segmentation.outDir + ftl_plugin.connectivity = 1 + ftl_plugin.binarizationThreshold = 0.5 + ftl_plugin.outDir = Path("ftl_plugin.outDir") + + logger.info("Initiating CWL Nuclear Segmentation Workflow!!!") + steps = [ + bbbc, + rename, + ome_converter, + estimate_flatfield, + apply_flatfield, + kaggle_nuclei_segmentation, + ftl_plugin + ] + workflow = api.Workflow(steps, "experiment", self.workflow_path) + # # Saving CLT for plugins + workflow._save_all_cwl(overwrite=True) + # # Adding environmental variables for bbbc_download and ome_converter plugin + self.modify_cwl() + # # # Save yaml to run CWL tool + workflow._save_yaml() + # Compile and run using WIC python API + workflow.compile(run_local=True, overwrite=False) + # # print(workflow.yml_path) + # # clean autognerated directories + self._clean() + self._move_outputs() + logger.info("Completed CWL nuclear segmentation workflow.") + return \ No newline at end of file diff --git a/cwl_workflows/utils.py b/cwl_workflows/utils.py index e18c73d..e2fc09e 100644 --- a/cwl_workflows/utils.py +++ b/cwl_workflows/utils.py @@ -1,63 +1,70 @@ -import json import pydantic from pathlib import Path from typing import Dict from typing import Union +import yaml + GITHUB_TAG = "https://raw.githubusercontent.com" +CONFIGURATION_FILENAME =Path.cwd().joinpath("bbbc_json/bbbc_cwl_configuration.json") + +ANALYSIS_KEYS = ["name", "file_pattern", "out_file_pattern", "image_pattern", "seg_pattern", "ff_pattern", "df_pattern", "group_by", "map_directory", "features", "file_extension"] +SEG_KEYS = ["name", "file_pattern", "out_file_pattern", "image_pattern", "seg_pattern", "ff_pattern", "df_pattern", "group_by", "map_directory"] + + class DataModel(pydantic.BaseModel): data: Dict[str, Dict[str, Union[str, bool]]] -class LoadData(pydantic.BaseModel): - path: Union[str, Path] - name:str - @pydantic.validator("path", pre=True) +class LoadYaml(pydantic.BaseModel): + """Validation of Dataset yaml.""" + workflow:str + config_path: Union[str, Path] + + @pydantic.validator("config_path", pre=True) @classmethod def validate_path(cls, value: Union[str, Path]) -> Union[str, Path]: """Validation of Paths.""" if not Path(value).exists(): - msg = f"{value} do not exist! Please do check it again" + msg = f"{value} does not exist! Please do check it again" raise ValueError(msg) if isinstance(value, str): return Path(value) return value + + @pydantic.validator("workflow", pre=True) + @classmethod + def validate_workflow_name(cls, value: str) -> str: + """Validation of workflow name.""" + if not value in ["analysis", "segmentation", "visualization"]: + msg = f"Please choose a valid workflow name i-e analysis segmentation visualization" + raise ValueError(msg) + return value + + def parse_yaml(self) -> Dict[str, Union[str, bool]]: + """Parsing yaml configuration file for each dataset.""" - def parse_json(self) -> Dict[str, Union[str, bool]]: - with open(self.path) as json_file: - # Read the JSON data - data = json.load(json_file) - params = [v[self.name] for k, v in data.items()][0] - if len(params) == 0: - msg = f"{self.name} dataset donot exist! Please do check it again" + with open(f'{self.config_path}','r') as f: + data = yaml.safe_load(f) + + check_values = any([v for _, v in data.items() if f is None]) + + if check_values is True: + msg = f"All the parameters are not defined! Please do check it again" raise ValueError(msg) - return params - - -feat_params = { - "BBBC039": { - "name": "BBBC039", - "file_pattern": "/.*/.*/.*/Images/(?P.*)/.*_{row:c}{col:dd}_s{s:d}_w{channel:d}.*.tif", - "out_file_pattern": "x{row:dd}_y{col:dd}_p{s:dd}_c{channel:d}.tif", - "image_pattern": "images_x{x:dd}_y{y:dd}_p{p:dd}_c{c:d}.ome.tif", - "seg_pattern":"images_x{x:dd}_y{y:dd}_p{p:dd}_c1.ome.tif", - "ff_pattern": "images_x\\(00-15\\)_y\\(01-24\\)_p0\\(1-9\\)_c{c:d}_flatfield.ome.tif", - "df_pattern": "images_x\\(00-15\\)_y\\(01-24\\)_p0\\(1-9\\)_c{c:d}_darkfield.ome.tif", - "group_by": "c", - "map_directory": False, - "features": "ALL_INTENSITY,ALL_MORPHOLOGY", - "file_extension": "pandas" - } -} -model = DataModel(data=feat_params) -model_dict = model.dict() - -json_dir = Path(Path(__file__).parents[1]).joinpath("bbbc_json") -json_dir.mkdir(parents=True, exist_ok=True) -FEAT_JSON_FILENAME = json_dir.joinpath("bbbc_feature.json") - -with Path.open(FEAT_JSON_FILENAME, "w") as json_file: - json.dump(model_dict, json_file, indent=2) + + + if self.workflow == "analysis": + if list(data.keys()) != ANALYSIS_KEYS: + msg = f"Please do check parameters again for analysis workflow!!" + raise ValueError(msg) + + if self.workflow == "segmentation": + if list(data.keys()) != SEG_KEYS: + msg = f"Please do check parameters again for segmentation workflow!!" + raise ValueError(msg) + + return data From 7ddca5c1059fb698e078e1763c814a67c911ee6c Mon Sep 17 00:00:00 2001 From: hamshkhawar Date: Fri, 22 Mar 2024 13:16:42 -0500 Subject: [PATCH 11/16] fixing directory structure --- README.md | 2 +- cwl_workflows/__init__.py | 3 --- cwl_workflows/cwl_analysis.py | 6 ++++-- cwl_workflows/cwl_nuclear_segmentation.py | 7 +++++-- {cwl_workflows => src}/__main__.py | 10 ++++++---- 5 files changed, 16 insertions(+), 12 deletions(-) delete mode 100644 cwl_workflows/__init__.py rename {cwl_workflows => src}/__main__.py (81%) diff --git a/README.md b/README.md index de5dc41..4071420 100644 --- a/README.md +++ b/README.md @@ -50,7 +50,7 @@ During the execution of the segmentation workflow, `1 to 7` plugins will be util If a user wishes to execute a workflow for a new dataset, they can utilize a sample YAML file to input parameter values. This YAML file can be saved in the desired subdirectory of the `configuration` folder with the name `dataset.yml` -`python cwl_workflows/__main__.py --name="BBBC039" --workflow=analysis` +`python src/__main__.py --name="BBBC039" --workflow=analysis` A directory named `workflow` is generated, encompassing CLTs for each plugin, YAML files, and all outputs are stored within the `outdir` directory. ``` diff --git a/cwl_workflows/__init__.py b/cwl_workflows/__init__.py deleted file mode 100644 index bfa5117..0000000 --- a/cwl_workflows/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -import cwl_analysis as cwl_analysis -import cwl_nuclear_segmentation as cwl_nuclear_segmentation -import utils as utils \ No newline at end of file diff --git a/cwl_workflows/cwl_analysis.py b/cwl_workflows/cwl_analysis.py index 60702bc..1fbbe66 100644 --- a/cwl_workflows/cwl_analysis.py +++ b/cwl_workflows/cwl_analysis.py @@ -6,7 +6,9 @@ import re import shutil import typing -from utils import GITHUB_TAG +import sys +sys.path.append('../') +from cwl_workflows.utils import GITHUB_TAG # Initialize the logger logger = logging.getLogger(__name__) @@ -51,7 +53,7 @@ def __init__( self.df_pattern = df_pattern self.group_by = group_by self.wic_path = api._WIC_PATH - self.PATH = Path(self.wic_path.parent).joinpath(Path(__file__).parts[-3]) + self.PATH = Path(self.wic_path.parent).joinpath("image-workflows") self.cwl_path, self.workflow_path = self._create_directories() self.image_pattern = image_pattern self.seg_pattern = seg_pattern diff --git a/cwl_workflows/cwl_nuclear_segmentation.py b/cwl_workflows/cwl_nuclear_segmentation.py index c376159..7fa3a69 100644 --- a/cwl_workflows/cwl_nuclear_segmentation.py +++ b/cwl_workflows/cwl_nuclear_segmentation.py @@ -6,7 +6,9 @@ import typing import re import shutil -from utils import GITHUB_TAG +import sys +sys.path.append('../') +from cwl_workflows.utils import GITHUB_TAG # Initialize the logger logger = logging.getLogger(__name__) @@ -48,7 +50,7 @@ def __init__( self.df_pattern = df_pattern self.group_by = group_by self.wic_path = api._WIC_PATH - self.PATH = Path(self.wic_path.parent).joinpath(Path(__file__).parts[-3]) + self.PATH = Path(self.wic_path.parent).joinpath("image-workflows") self.cwl_path, self.workflow_path = self._create_directories() self.image_pattern = image_pattern self.seg_pattern = seg_pattern @@ -226,6 +228,7 @@ def workflow(self) -> None: kaggle_nuclei_segmentation, ftl_plugin ] + workflow = api.Workflow(steps, "experiment", self.workflow_path) # # Saving CLT for plugins workflow._save_all_cwl(overwrite=True) diff --git a/cwl_workflows/__main__.py b/src/__main__.py similarity index 81% rename from cwl_workflows/__main__.py rename to src/__main__.py index acf736d..70aab11 100644 --- a/cwl_workflows/__main__.py +++ b/src/__main__.py @@ -1,9 +1,11 @@ """CWL Workflow.""" import logging import typer -from utils import LoadYaml -from cwl_analysis import CWLAnalysisWorkflow -from cwl_nuclear_segmentation import CWLSegmentationWorkflow +import sys +sys.path.append('../') +from cwl_workflows.utils import LoadYaml +from cwl_workflows.cwl_analysis import CWLAnalysisWorkflow +from cwl_workflows.cwl_nuclear_segmentation import CWLSegmentationWorkflow from pathlib import Path @@ -40,7 +42,7 @@ def main( logger.info(f"name = {name}") logger.info(f"workflow = {workflow}") - config_path = Path.cwd().joinpath(f"configuration/{workflow}/{name}.yml") + config_path = Path.cwd().parent.joinpath(f"configuration/{workflow}/{name}.yml") model = LoadYaml(workflow=workflow, config_path=config_path) From 8c152f33bb9fb47775c15c2ffeadbafde09f932f Mon Sep 17 00:00:00 2001 From: hamshkhawar Date: Mon, 1 Apr 2024 17:42:25 -0500 Subject: [PATCH 12/16] fix module not found error --- README.md | 11 +- configuration/__init__.py | 0 configuration/analysis/BBBC039.yml | 2 +- configuration/analysis/__init__.py | 0 configuration/analysis/sample.yml | 2 +- configuration/segmentation/__init__.py | 0 cwl-adapters/basic-flatfield-estimation.cwl | 111 ++++++++++++++++ cwl-adapters/bbbcdownload.cwl | 57 +++++++++ cwl-adapters/file-renaming.cwl | 81 ++++++++++++ cwl-adapters/image_assembler.cwl | 97 ++++++++++++++ cwl-adapters/montage.cwl | 119 ++++++++++++++++++ cwl-adapters/ome-converter.cwl | 81 ++++++++++++ cwl-adapters/precompute_slide.cwl | 77 ++++++++++++ pyproject.toml | 38 ++++++ src/polus/image/workflows/__init__.py | 0 src/{ => polus/image/workflows}/__main__.py | 13 +- .../__pycache__/__init__.cpython-310.pyc | Bin 0 -> 186 bytes .../__pycache__/__main__.cpython-310.pyc | Bin 0 -> 1693 bytes .../__pycache__/utils.cpython-310.pyc | Bin 0 -> 2724 bytes .../polus/image/workflows}/utils.py | 16 +-- workflows/__init__.py | 0 {cwl_workflows => workflows}/bbbc.py | 12 +- {cwl_workflows => workflows}/bbbc.yml | 0 workflows/bbbc_sub.py | 78 ++++++++++++ {cwl_workflows => workflows}/cwl_analysis.py | 8 +- .../cwl_nuclear_segmentation.py | 19 ++- 26 files changed, 780 insertions(+), 42 deletions(-) create mode 100644 configuration/__init__.py create mode 100644 configuration/analysis/__init__.py create mode 100644 configuration/segmentation/__init__.py create mode 100644 cwl-adapters/basic-flatfield-estimation.cwl create mode 100644 cwl-adapters/bbbcdownload.cwl create mode 100644 cwl-adapters/file-renaming.cwl create mode 100644 cwl-adapters/image_assembler.cwl create mode 100644 cwl-adapters/montage.cwl create mode 100644 cwl-adapters/ome-converter.cwl create mode 100644 cwl-adapters/precompute_slide.cwl create mode 100644 pyproject.toml create mode 100644 src/polus/image/workflows/__init__.py rename src/{ => polus/image/workflows}/__main__.py (79%) create mode 100644 src/polus/image/workflows/__pycache__/__init__.cpython-310.pyc create mode 100644 src/polus/image/workflows/__pycache__/__main__.cpython-310.pyc create mode 100644 src/polus/image/workflows/__pycache__/utils.cpython-310.pyc rename {cwl_workflows => src/polus/image/workflows}/utils.py (82%) create mode 100644 workflows/__init__.py rename {cwl_workflows => workflows}/bbbc.py (79%) rename {cwl_workflows => workflows}/bbbc.yml (100%) create mode 100644 workflows/bbbc_sub.py rename {cwl_workflows => workflows}/cwl_analysis.py (98%) rename {cwl_workflows => workflows}/cwl_nuclear_segmentation.py (96%) diff --git a/README.md b/README.md index 4071420..ca2838b 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,10 @@ create a [Conda](https://conda.io/projects/conda/en/latest/user-guide/tasks/mana `git checkout -b hd2 remotes/origin/hd2` - `pip install -e ".[all]"` +#### 3. Install image-workflow. +- cd `image-workflows` +- poetry install + #### Note: Ensure that the [docker-desktop](https://www.docker.com/products/docker-desktop/) is running in the background. To verify that it's operational, you can use the following command: `docker run -d -p 80:80 docker/getting-started` @@ -49,12 +53,11 @@ Three different CWL workflows can be executed for specific datasets During the execution of the segmentation workflow, `1 to 7` plugins will be utilized. However, for executing the analysis workflow, `1 to 8` plugins will be employed. If a user wishes to execute a workflow for a new dataset, they can utilize a sample YAML file to input parameter values. This YAML file can be saved in the desired subdirectory of the `configuration` folder with the name `dataset.yml` +`python -m polus.image.workflows --name="BBBC001" --workflow=analysis` -`python src/__main__.py --name="BBBC039" --workflow=analysis` - -A directory named `workflow` is generated, encompassing CLTs for each plugin, YAML files, and all outputs are stored within the `outdir` directory. +A directory named `outputs` is generated, encompassing CLTs for each plugin, YAML files, and all outputs are stored within the `outdir` directory. ``` -workflows +outputs ├── experiment │ └── cwl_adapters | experiment.cwl diff --git a/configuration/__init__.py b/configuration/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/configuration/analysis/BBBC039.yml b/configuration/analysis/BBBC039.yml index fc5c3a0..1894023 100644 --- a/configuration/analysis/BBBC039.yml +++ b/configuration/analysis/BBBC039.yml @@ -8,5 +8,5 @@ ff_pattern: "x\\(00-15\\)_y\\(01-24\\)_p0\\(1-9\\)_c{c:d}_flatfield.ome.tif" df_pattern: "x\\(00-15\\)_y\\(01-24\\)_p0\\(1-9\\)_c{c:d}_darkfield.ome.tif" group_by: c map_directory: false -features: "ALL_INTENSITY,ALL_MORPHOLOGY" +features: "ALL_INTENSITY" file_extension: pandas \ No newline at end of file diff --git a/configuration/analysis/__init__.py b/configuration/analysis/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/configuration/analysis/sample.yml b/configuration/analysis/sample.yml index ecc82e1..8fada1a 100644 --- a/configuration/analysis/sample.yml +++ b/configuration/analysis/sample.yml @@ -1,6 +1,6 @@ --- name : -file_pattern : +file_pattern : out_file_pattern : image_pattern: seg_pattern: diff --git a/configuration/segmentation/__init__.py b/configuration/segmentation/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/cwl-adapters/basic-flatfield-estimation.cwl b/cwl-adapters/basic-flatfield-estimation.cwl new file mode 100644 index 0000000..85c18ea --- /dev/null +++ b/cwl-adapters/basic-flatfield-estimation.cwl @@ -0,0 +1,111 @@ +#!/usr/bin/env cwl-runner +class: CommandLineTool +cwlVersion: v1.0 + +label: BaSiC Flatfield Estimation + +doc: |- + This WIPP plugin will take a collection of images and use the BaSiC flatfield correction algorithm to generate a flatfield image, a darkfield image, and a photobleach offset. + https://github.com/PolusAI/polus-plugins/tree/master/regression/basic-flatfield-estimation-plugin + +requirements: + DockerRequirement: + dockerPull: polusai/basic-flatfield-estimation-plugin:2.1.1 + # See https://www.commonwl.org/v1.0/CommandLineTool.html#InitialWorkDirRequirement + InitialWorkDirRequirement: + listing: + - entry: $(inputs.outDir) + writable: true # Output directories must be writable + InlineJavascriptRequirement: {} + +# "jax._src.xla_bridge - WARNING - An NVIDIA GPU may be present on this machine, but a CUDA-enabled jaxlib is not installed. Falling back to cpu." +hints: + cwltool:CUDARequirement: + cudaVersionMin: "11.4" + cudaComputeCapabilityMin: "3.0" + cudaDeviceCountMin: 1 + cudaDeviceCountMax: 1 + +inputs: + inpDir: + label: Path to input images + doc: |- + Path to input images + type: Directory + inputBinding: + prefix: --inpDir + + getDarkfield: + label: If 'true', will calculate darkfield image + doc: |- + If 'true', will calculate darkfield image + type: boolean? + inputBinding: + prefix: --getDarkfield + + # photobleach: + # label: If 'true', will calculate photobleach scalar + # doc: |- + # If 'true', will calculate photobleach scalar + # type: boolean? + # inputBinding: + # prefix: --photobleach + + filePattern: + label: File pattern to subset data + doc: |- + File pattern to subset data + type: string? + inputBinding: + prefix: --filePattern + + groupBy: + label: Variables to group together + doc: |- + Variables to group together + type: string? + inputBinding: + prefix: --groupBy + + preview: + label: Generate a JSON file describing what the outputs should be + doc: |- + Generate a JSON file describing what the outputs should be + type: boolean? + inputBinding: + prefix: --preview + + outDir: + label: Output image collection + doc: |- + Output image collection + type: Directory + inputBinding: + prefix: --outDir + +outputs: + outDir: + label: Output image collection + doc: |- + Output image collection + type: Directory + outputBinding: + glob: $(inputs.outDir.basename) + + preview_json: + label: JSON file describing what the outputs should be + doc: |- + JSON file describing what the outputs should be + type: File? # if --preview + format: edam:format_3464 + outputBinding: + glob: preview.json + +$namespaces: + edam: https://edamontology.org/ + cwltool: http://commonwl.org/cwltool# + +$schemas: +- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl + +# manifest: https://raw.githubusercontent.com/PolusAI/polus-plugins/master/regression/basic-flatfield-estimation-plugin/plugin.json \ No newline at end of file diff --git a/cwl-adapters/bbbcdownload.cwl b/cwl-adapters/bbbcdownload.cwl new file mode 100644 index 0000000..9fba292 --- /dev/null +++ b/cwl-adapters/bbbcdownload.cwl @@ -0,0 +1,57 @@ +class: CommandLineTool +cwlVersion: v1.1 + +label: BBBC Download + +doc: |- + Downloads the datasets on the Broad Bioimage Benchmark Collection website + https://github.com/saketprem/polus-plugins/tree/bbbc_download/utils/bbbc-download-plugin + +requirements: + DockerRequirement: + dockerPull: polusai/bbbc-download-plugin:0.1.0-dev1 + # See https://www.commonwl.org/v1.0/CommandLineTool.html#InitialWorkDirRequirement + InitialWorkDirRequirement: + listing: + - entry: $(inputs.outDir) + writable: true # Output directories must be writable + InlineJavascriptRequirement: {} + # NOTE: By default, "tools must not assume network access, except for localhost" + # See https://www.commonwl.org/v1.1/CommandLineTool.html#NetworkAccess + NetworkAccess: + networkAccess: true + +inputs: + name: + label: The name of the dataset(s) to be downloaded (separate the datasets with a comma. eg BBBC001,BBBC002,BBBC003) + doc: |- + The name of the dataset(s) to be downloaded (separate the datasets with a comma. eg BBBC001,BBBC002,BBBC003) + inputBinding: + prefix: --name + type: string + # default: BBBC001 + + outDir: + label: Output collection + doc: |- + Output collection + inputBinding: + prefix: --outDir + type: Directory + +outputs: + outDir: + label: Output collection + doc: |- + Output collection + type: Directory + outputBinding: + glob: $(inputs.outDir.basename) + +$namespaces: + edam: https://edamontology.org/ + +$schemas: +- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl + +# manifest: "https://raw.githubusercontent.com/saketprem/polus-plugins/bbbc_download/utils/bbbc-download-plugin/plugin.json" \ No newline at end of file diff --git a/cwl-adapters/file-renaming.cwl b/cwl-adapters/file-renaming.cwl new file mode 100644 index 0000000..29628cd --- /dev/null +++ b/cwl-adapters/file-renaming.cwl @@ -0,0 +1,81 @@ +class: CommandLineTool +cwlVersion: v1.0 + +label: File Renaming + +doc: |- + Rename and store image collection files in a new image collection + https://github.com/PolusAI/polus-plugins/tree/master/formats/file-renaming-plugin + +requirements: + DockerRequirement: + dockerPull: polusai/file-renaming-plugin:0.2.1-dev0 # NOTE: 0.2.3 not pushed yet + # See https://www.commonwl.org/v1.0/CommandLineTool.html#InitialWorkDirRequirement + InitialWorkDirRequirement: + listing: + - entry: $(inputs.outDir) + writable: true # Output directories must be writable + InlineJavascriptRequirement: {} + +inputs: + inpDir: + inputBinding: + prefix: --inpDir + type: Directory + + filePattern: + inputBinding: + prefix: --filePattern + type: string + + mapDirectory: + inputBinding: + prefix: --mapDirectory + type: string? # enum: raw, map, default + + preview: + label: Generate a JSON file describing what the outputs should be + doc: |- + Generate a JSON file describing what the outputs should be + inputBinding: + prefix: --preview + type: boolean? + + outFilePattern: + inputBinding: + prefix: --outFilePattern + type: string + + outDir: + label: Output collection + doc: |- + Output collection + inputBinding: + prefix: --outDir + type: Directory + +outputs: + outDir: + label: Output collection + doc: |- + Output collection + type: Directory + outputBinding: + glob: $(inputs.outDir.basename) + + preview_json: + label: JSON file describing what the outputs should be + doc: |- + JSON file describing what the outputs should be + type: File? # if --preview + format: edam:format_3464 + outputBinding: + glob: preview.json + +$namespaces: + edam: https://edamontology.org/ + +$schemas: +- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl + +# manifest: https://raw.githubusercontent.com/PolusAI/polus-plugins/master/formats/file-renaming-plugin/plugin.json \ No newline at end of file diff --git a/cwl-adapters/image_assembler.cwl b/cwl-adapters/image_assembler.cwl new file mode 100644 index 0000000..f6d179f --- /dev/null +++ b/cwl-adapters/image_assembler.cwl @@ -0,0 +1,97 @@ +#!/usr/bin/env cwl-runner +class: CommandLineTool +cwlVersion: v1.0 + +label: Image Assembler + +doc: |- + This plugin assembles images into a stitched image using an image stitching vector. + https://github.com/PolusAI/polus-plugins/tree/master/transforms/images/image-assembler-plugin + +requirements: + DockerRequirement: + dockerPull: polusai/image-assembler-plugin:1.4.0-dev0 + # See https://www.commonwl.org/v1.0/CommandLineTool.html#InitialWorkDirRequirement + InitialWorkDirRequirement: + listing: + - $(inputs.stitchPath) # Must stage inputs for tools which do not accept full paths. + - entry: $(inputs.outDir) + writable: true # Output directories must be writable + InlineJavascriptRequirement: {} + +inputs: + stitchPath: + label: Path to directory containing "stitching vector" file img-global-positions-0.txt + doc: |- + Path to directory containing "stitching vector" file img-global-positions-0.txt + type: Directory + inputBinding: + prefix: --stitchPath + + imgPath: + label: Path to input image collection + doc: |- + Path to input image collection + type: Directory + inputBinding: + prefix: --imgPath + + timesliceNaming: + label: Label images by timeslice rather than analyzing input image names + doc: |- + Label images by timeslice rather than analyzing input image names + inputBinding: + prefix: --timesliceNaming + type: boolean? + + preview: + label: Generate a JSON file describing what the outputs should be + doc: |- + Generate a JSON file describing what the outputs should be + type: boolean? + inputBinding: + prefix: --preview + + outDir: + label: Output collection + doc: |- + Output collection + type: Directory + inputBinding: + prefix: --outDir + +outputs: + outDir: + label: Output collection + doc: |- + Output collection + type: Directory + outputBinding: + glob: $(inputs.outDir.basename) + + assembled_image: + label: The assembled montage image + doc: |- + JSON file with outputs + type: File? # if not --preview + # See https://bioportal.bioontology.org/ontologies/EDAM?p=classes&conceptid=format_3727 + format: edam:format_3727 + outputBinding: + glob: "*.ome.tif" + + preview_json: + label: JSON file with outputs + doc: |- + JSON file with outputs + type: File? # if --preview + format: edam:format_3464 + outputBinding: + glob: preview.json + +$namespaces: + edam: https://edamontology.org/ + +$schemas: +- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl + +# manifest: https://raw.githubusercontent.com/PolusAI/polus-plugins/master/transforms/images/image-assembler-plugin/plugin.json \ No newline at end of file diff --git a/cwl-adapters/montage.cwl b/cwl-adapters/montage.cwl new file mode 100644 index 0000000..bc90bb0 --- /dev/null +++ b/cwl-adapters/montage.cwl @@ -0,0 +1,119 @@ +#!/usr/bin/env cwl-runner +class: CommandLineTool +cwlVersion: v1.0 + +label: Montage + +doc: |- + This plugin generates a stitching vector that will montage images together. + https://github.com/PolusAI/polus-plugins/tree/master/transforms/images/montage-plugin + +requirements: + DockerRequirement: + dockerPull: polusai/montage-plugin:0.5.0 + # See https://www.commonwl.org/v1.0/CommandLineTool.html#InitialWorkDirRequirement + InitialWorkDirRequirement: + listing: + - entry: $(inputs.outDir) + writable: true # Output directories must be writable + InlineJavascriptRequirement: {} + +inputs: + inpDir: + label: Input image collection to be processed by this plugin + doc: |- + Input image collection to be processed by this plugin + type: Directory + inputBinding: + prefix: --inpDir + + filePattern: + label: Filename pattern used to parse data + doc: |- + Filename pattern used to parse data + type: string + inputBinding: + prefix: --filePattern + + layout: + label: Specify montage organization + doc: |- + Specify montage organization + type: string? + # optional array of strings? + inputBinding: + prefix: --layout + + gridSpacing: + label: Specify spacing between images in the lowest grid + doc: |- + Specify spacing between images in the lowest grid + inputBinding: + prefix: --gridSpacing + type: int? + + imageSpacing: + label: Specify spacing multiplier between grids + doc: |- + Specify spacing multiplier between grids + inputBinding: + prefix: --imageSpacing + type: int? + + flipAxis: + label: Axes to flip when laying out images + doc: |- + Axes to flip when laying out images + inputBinding: + prefix: --flipAxis + type: string? + + preview: + label: Generate a JSON file describing what the outputs should be + doc: |- + Generate a JSON file describing what the outputs should be + type: boolean? + inputBinding: + prefix: --preview + + outDir: + label: Output collection + doc: |- + Output collection + type: Directory + inputBinding: + prefix: --outDir + +outputs: + outDir: + label: Output collection + doc: |- + Output collection + type: Directory + outputBinding: + glob: $(inputs.outDir.basename) + + global_positions: + label: The "stitching vector", i.e. the positions of the individual images in the montage + doc: |- + The "stitching vector", i.e. the positions of the individual images in the montage + type: File? # if not --preview + outputBinding: + glob: $(inputs.outDir.basename)/img-global-positions-0.txt + + preview_json: + label: JSON file describing what the outputs should be + doc: |- + JSON file describing what the outputs should be + type: File? # if --preview + format: edam:format_3464 + outputBinding: + glob: preview.json + +$namespaces: + edam: https://edamontology.org/ + +$schemas: +- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl + +# manifest: https://raw.githubusercontent.com/PolusAI/polus-plugins/master/transforms/images/montage-plugin/plugin.json \ No newline at end of file diff --git a/cwl-adapters/ome-converter.cwl b/cwl-adapters/ome-converter.cwl new file mode 100644 index 0000000..7dd5607 --- /dev/null +++ b/cwl-adapters/ome-converter.cwl @@ -0,0 +1,81 @@ +#!/usr/bin/env cwl-runner +class: CommandLineTool +cwlVersion: v1.0 + +label: OME Zarr Converter + +doc: |- + This WIPP plugin converts BioFormats supported data types to the OME Zarr file format. + https://github.com/PolusAI/polus-plugins/tree/master/formats/ome-converter-plugin + +requirements: + DockerRequirement: + dockerPull: polusai/ome-converter-plugin:0.3.2-dev2 + # See https://www.commonwl.org/v1.0/CommandLineTool.html#InitialWorkDirRequirement + InitialWorkDirRequirement: + listing: + - entry: $(inputs.outDir) + writable: true # Output directories must be writable + InlineJavascriptRequirement: {} +# NOTE: polusai/ome-converter-plugin:0.3.1 uses the base image +# polusai/bfio:2.3.2 which now un-bundles the java maven package +# ome:formats-gpl:7.1.0 due to licensing reasons. +# To avoid requiring network access at runtime, in the bfio Dockerfile +# it is pre-installed and saved in ~/.m2/ However, by default +# CWL hides all environment variables (including HOME), so we need to +# set HOME here so that at runtime we get a cache hit on the maven install. + EnvVarRequirement: +# See https://www.commonwl.org/user_guide/topics/environment-variables.html + envDef: + HOME: /home/polusai + +inputs: + inpDir: + label: Input generic data collection to be processed by this plugin + doc: |- + Input generic data collection to be processed by this plugin + type: Directory + inputBinding: + prefix: --inpDir + + filePattern: + label: A filepattern, used to select data for conversion + doc: |- + A filepattern, used to select data for conversion + type: string + inputBinding: + prefix: --filePattern + + fileExtension: + label: The file extension + doc: |- + The file extension + type: string + inputBinding: + prefix: --fileExtension + default: "default" # enum: .ome.tiff, .ome.zarr, default + + outDir: + label: Output collection + doc: |- + Output collection + type: Directory + inputBinding: + prefix: --outDir + +outputs: + outDir: + label: Output collection + doc: |- + Output collection + type: Directory + outputBinding: + glob: $(inputs.outDir.basename) + +$namespaces: + edam: https://edamontology.org/ + +$schemas: +- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl + +# manifest: https://raw.githubusercontent.com/PolusAI/polus-plugins/master/formats/ome-converter-plugin/plugin.json \ No newline at end of file diff --git a/cwl-adapters/precompute_slide.cwl b/cwl-adapters/precompute_slide.cwl new file mode 100644 index 0000000..2419170 --- /dev/null +++ b/cwl-adapters/precompute_slide.cwl @@ -0,0 +1,77 @@ +#!/usr/bin/env cwl-runner +class: CommandLineTool +cwlVersion: v1.0 + +label: Precompute Slide + +doc: |- + This plugin generates image pyramids in multiple viewing formats. + https://github.com/PolusAI/polus-plugins/tree/master/visualization/polus-precompute-slide-plugin + +requirements: + DockerRequirement: + dockerPull: polusai/precompute-slide-plugin:1.7.0-dev0 + # See https://www.commonwl.org/v1.0/CommandLineTool.html#InitialWorkDirRequirement + InitialWorkDirRequirement: + listing: + - entry: $(inputs.outDir) + writable: true # Output directories must be writable + InlineJavascriptRequirement: {} + +inputs: + inpDir: + label: Input generic data collection to be processed by this plugin + doc: |- + Input generic data collection to be processed by this plugin + type: Directory + inputBinding: + prefix: --inpDir + + pyramidType: + label: Build a DeepZoom, Neuroglancer, Zarr pyramid + doc: |- + Build a DeepZoom, Neuroglancer, Zarr pyramid + type: string # enum: DeepZoom, Neuroglancer, Zarr + inputBinding: + prefix: --pyramidType + + imageType: + label: Image is either Segmentation or Image + doc: |- + Image is either Segmentation or Image + inputBinding: + prefix: --imageType + type: string + + filePattern: + label: Filename pattern used to parse data + doc: |- + Filename pattern used to parse data + type: string? + inputBinding: + prefix: --filePattern + + outDir: + label: Output collection + doc: |- + Output collection + type: Directory + inputBinding: + prefix: --outDir + +outputs: + outDir: + label: Output collection + doc: |- + Output collection + type: Directory + outputBinding: + glob: $(inputs.outDir.basename) + +$namespaces: + edam: https://edamontology.org/ + +$schemas: +- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl + +# \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..85287fa --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,38 @@ +[tool.poetry] +name = "polus-image-workflows" +version = "0.1.1-dev1" +description = "Build and execute pipelines of polus plugins on Compute." +authors = ["Hamdah Shafqat Abbasi "] +readme = "README.md" +packages = [{include = "polus", from = "src"}] + +[tool.poetry.dependencies] +python = ">=3.9,<3.12" +typer = "^0.9.0" +pyyaml = "^6.0.1" +pydantic = "^2.6.1" +cwl-utils="0.31" +toil="^5.12" +polus-plugins = {path = "../image-tools", develop = true} +workflow-inference-compiler = {path = "../workflow-inference-compiler", develop = true} + +[tool.poetry.group.dev.dependencies] +jupyter = "^1.0.0" +nbconvert = "^7.11.0" +pytest = "^7.4.4" +bump2version = "^1.0.1" +pre-commit = "^3.3.3" +black = "^23.3.0" +ruff = "^0.0.274" +mypy = "^1.4.0" +pytest-xdist = "^3.3.1" +pytest-sugar = "^0.9.7" + +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api" + +[tool.pytest.ini_options] +addopts = [ + "--import-mode=importlib", +] \ No newline at end of file diff --git a/src/polus/image/workflows/__init__.py b/src/polus/image/workflows/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/__main__.py b/src/polus/image/workflows/__main__.py similarity index 79% rename from src/__main__.py rename to src/polus/image/workflows/__main__.py index 70aab11..863f1ef 100644 --- a/src/__main__.py +++ b/src/polus/image/workflows/__main__.py @@ -1,13 +1,11 @@ """CWL Workflow.""" import logging import typer -import sys -sys.path.append('../') -from cwl_workflows.utils import LoadYaml -from cwl_workflows.cwl_analysis import CWLAnalysisWorkflow -from cwl_workflows.cwl_nuclear_segmentation import CWLSegmentationWorkflow from pathlib import Path - +from polus.image.workflows.utils import LoadYaml +from workflows.cwl_analysis import CWLAnalysisWorkflow +from workflows.cwl_nuclear_segmentation import CWLSegmentationWorkflow +from pathlib import Path app = typer.Typer() @@ -42,7 +40,8 @@ def main( logger.info(f"name = {name}") logger.info(f"workflow = {workflow}") - config_path = Path.cwd().parent.joinpath(f"configuration/{workflow}/{name}.yml") + config_path = Path(__file__).parent.parent.parent.parent.parent.joinpath(f"configuration/{workflow}/{name}.yml") + print(config_path) model = LoadYaml(workflow=workflow, config_path=config_path) diff --git a/src/polus/image/workflows/__pycache__/__init__.cpython-310.pyc b/src/polus/image/workflows/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1a9d53681adefe19e5647737bdc0f2bdfa0cd30b GIT binary patch literal 186 zcmd1j<>g`kf(jAtG!Xq5L?8o3AjbiSi&=m~3PUi1CZpdXD p=ad#hWb_d-@$s2?nI-Y@dIgoYIBatBQ%ZAE?LgKRGXV(}1_0d3FHHad literal 0 HcmV?d00001 diff --git a/src/polus/image/workflows/__pycache__/__main__.cpython-310.pyc b/src/polus/image/workflows/__pycache__/__main__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cb595cd260f47e14d2642d5dc4890297fee6431c GIT binary patch literal 1693 zcmZuxOKa;!6xNKMwq@s?^in8F!6jbUg03#1Elu2(rlh%nw1f^6A#29A61~vKO)Q*M zx=sH;3E0q0|4r#q+pfCq>Rw83&&aWpz?GPp^PD;J%{LR*>s1N9>&;r<(9WbAv=+E#NfazSjroGStQi3QIrA>-T~Kybd+r0r&}-5guZulJkZ?l;d`xX}7>8oC+L zshhEWnB|T0qpo#2&W3Sh9h@F@a;@DK6Xv)b<(DTQS#jU;LwDdu0}HZT&N5*-Okq>j zp&tt$vkqD04MR7*w7PK+FsK+N+#6K~f+m=1clbu0$Wvn` z-6#`8b)d*mIkQpbR_^6;tqQIC(nR?U{esH)>I_H(@l}X--w|JXS9}>)6Z28?Olmdq ztuL>c2XI=?D46R49_v#puPqP~O2dnzzCRejdT>1U8)WB7XAHc=X$9aZYIDDn0B?!m zKF~C;&esqG$mQMLU1IVlRLx&><1h(W#%Ni@VnGNz`CV$6g%-qv0ZR+TkAQvRfC|HL z`hLJ1r_d5N1=17aBK9M(ZlNZrA7!Zwaij`f&YUsaom2rsS!OEEwWX%wU6~t95vztV zWkETi8}hU+yrO&aqy0T+DYxBT&*lE`WBZHP8;P67?Qi0qeQ*thoCD+LY+RC$+dTE` zBo0P=?y(m;#OJ^Y#>uMkb&1x13iunIo*N~vHN*? zYp2j1hsK`c5M7`I5EC^UCoCnp03+~wC1B+JvW`x9UpmV=I-|@F_#CS3M=M+uXK{EX zY~;h|&S>NX%uV6qJ-#?4szm*+TwHdOgwzCFUbmzSsSH?lT%KdvfISccF72^+eTZ^& z^3^M%dvO@L5hdEIM3AeHd6q7>E7<5bk7qKMHcK!*fpZ%L>@x%!KZZ_`05W7Qe?wMe mj4;yvt12eaWCaUN{K~(I_C$US@>{Iq*SK!}W3;MBdHX+m|JJAg literal 0 HcmV?d00001 diff --git a/src/polus/image/workflows/__pycache__/utils.cpython-310.pyc b/src/polus/image/workflows/__pycache__/utils.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..950ea6f5439347ea5879ba7cd66a16bb21b5dd08 GIT binary patch literal 2724 zcma)8Uys{F5Z|?(#Bq||(UvPM6}Y7oMbLy65Px*4deHVLEvHJ?Qk?~|eBO29o@1xG zo3@GWUbsrY1429_q?WgS0lo|S$`fCK7noUJE|<1QINI6u?CkFBZ{{~n(Q0`FoKyZ}vG*DpsZ`m$g=(+-2-THDob7$QDlsJ$>86l?@2g}NDr_eN1x&IlV0 z)3#JXU_NMPvnVpX5Qb5n7b;S4FByhUiCBVJBmNG$w;>L)8*OezGVNdJUM}L%0AeD$ zUlg0&m4|T3okDDN4eI9}z%a>*2eKbvDmxvvJ~>^mTUW zE)5!Hjdfs+1GWy>U_zHRP0+M3FwhQ71Cv;+$y$$`V1XTi;4EnOMiH@%Xpl`_{VK{* zh!5cE7m1HSlU(^@nCobFiimDNnkT8r(*CVBTx%$}uH28Z5!Y40A^-B&g1b*2j3Tzr zpvm_^D6$P#cub!Vxv))t0e1gt?O4j%wsz^aRN329IlGp4rL9G!O$nJGJy zw$BPKeMktOKTM@M<=@OW_zh}5?(=xdPn93_qBL(;1iGcG#!hMv_8ajlLKH%KsZ4XJ zqCDo}2pB!JwH;?tyow9#f$Yt4OdLa93+{_p8Rqt10hL7dG#!t+6q`dQOH*V!bH>ks zgI*bMaGPz)=zS_YhLDgF5(9ueIXMr&G$1~*r2{l`qddySGL^asRRpu%#4>2zPbH8l zotW3j^*w;%eo+8Uk#D^7=eG0Hb1<{FoqupQe{Qoj6|aHQ!bh=B!xEyU{aa7BHjIA* zOeK=Ub(|8X*Pq8}ZbeMNdio&waJ3Ch{tf~_B6m#|?OH#zejv(vY=2Ll&>eaQ>W@Pl zvTIY4yXT2K_JC4?zYS>0`HmzMY4^z8rjIL6$Yp}m+JQV|l@Sr|vFeVks@pbZC7e-X zwJl3rRcKv*0=CNAvc*;AK4Vn}tBnIz4Y1l^-T^Dj=f>px%}7Aj^$fxMl2jv6(hsY* zpA^F9QQY^jW&#sCnu@hKFc1U;1XEU?1knR9zVW`ccHkP{!$%UX0oVE2x`&0Bu9*-) zrdXYKH4Qbr|CDN)lz}s8?JZ}(A^3g`FJ8bsF*Q$;$y@so8iLO}G$mm~%g_rVa-5)KtvvT8~>diNG&N&@`1P1cHpX6sJK|#^7xnm4TQ6{SGLl z4`R`7(v$z3uw4s@EZy8I)oMHhhLi^xAG=@_c&pcLeSZ7X@Yc$z xzz>S}0JZf`fh6W%hWRj?ZC~V*c9+Zl8yB&zNc7(&O^QD#+qP@F^sv3;{0k3_#Ebv{ literal 0 HcmV?d00001 diff --git a/cwl_workflows/utils.py b/src/polus/image/workflows/utils.py similarity index 82% rename from cwl_workflows/utils.py rename to src/polus/image/workflows/utils.py index e2fc09e..61c25bb 100644 --- a/cwl_workflows/utils.py +++ b/src/polus/image/workflows/utils.py @@ -7,18 +7,15 @@ GITHUB_TAG = "https://raw.githubusercontent.com" -CONFIGURATION_FILENAME =Path.cwd().joinpath("bbbc_json/bbbc_cwl_configuration.json") ANALYSIS_KEYS = ["name", "file_pattern", "out_file_pattern", "image_pattern", "seg_pattern", "ff_pattern", "df_pattern", "group_by", "map_directory", "features", "file_extension"] SEG_KEYS = ["name", "file_pattern", "out_file_pattern", "image_pattern", "seg_pattern", "ff_pattern", "df_pattern", "group_by", "map_directory"] - class DataModel(pydantic.BaseModel): data: Dict[str, Dict[str, Union[str, bool]]] - class LoadYaml(pydantic.BaseModel): """Validation of Dataset yaml.""" workflow:str @@ -58,13 +55,12 @@ def parse_yaml(self) -> Dict[str, Union[str, bool]]: if self.workflow == "analysis": - if list(data.keys()) != ANALYSIS_KEYS: - msg = f"Please do check parameters again for analysis workflow!!" - raise ValueError(msg) + if list(data.keys()) != ANALYSIS_KEYS: + msg = f"Please do check parameters again for analysis workflow!!" + raise ValueError(msg) if self.workflow == "segmentation": - if list(data.keys()) != SEG_KEYS: - msg = f"Please do check parameters again for segmentation workflow!!" - raise ValueError(msg) - + if list(data.keys()) != SEG_KEYS: + msg = f"Please do check parameters again for segmentation workflow!!" + raise ValueError(msg) return data diff --git a/workflows/__init__.py b/workflows/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/cwl_workflows/bbbc.py b/workflows/bbbc.py similarity index 79% rename from cwl_workflows/bbbc.py rename to workflows/bbbc.py index 662ed3d..35ba536 100644 --- a/cwl_workflows/bbbc.py +++ b/workflows/bbbc.py @@ -3,7 +3,7 @@ from wic.api.pythonapi import Step, Workflow def workflow() -> Workflow: - bbbcdownload = Step(clt_path='cwl_adapters/bbbcdownload.cwl') + bbbcdownload = Step(clt_path='../image-workflows/cwl_adapters/bbbcdownload.cwl') # NOTE: object fields monkey patched at runtime from *.cwl file bbbcdownload.name = 'BBBC001' bbbcdownload.outDir = Path('bbbcdownload.outDir') @@ -13,31 +13,31 @@ def workflow() -> Workflow: subdirectory.glob_pattern = 'bbbcdownload.outDir/BBBC/BBBC001/raw/Images/human_ht29_colon_cancer_1_images/' subdirectory.subdirectory = Path('subdirectory.subdirectory') - filerenaming = Step(clt_path='cwl_adapters/file-renaming.cwl') + filerenaming = Step(clt_path='../image-workflows/cwl_adapters/file-renaming.cwl') # NOTE: FilePattern {} syntax shadows python f-string {} syntax filerenaming.filePattern = '.*_{row:c}{col:dd}f{f:dd}d{channel:d}.tif' filerenaming.inpDir = subdirectory.subdirectory filerenaming.outDir = Path('file-renaming.outDir') filerenaming.outFilePattern = 'x{row:dd}_y{col:dd}_p{f:dd}_c{channel:d}.tif' - omeconverter = Step(clt_path='cwl_adapters/ome-converter.cwl') + omeconverter = Step(clt_path='../image-workflows/cwl_adapters/ome-converter.cwl') omeconverter.inpDir = filerenaming.outDir omeconverter.filePattern = '.*.tif' omeconverter.fileExtension = '.ome.tif' omeconverter.outDir = Path('omeconverter.outDir') - montage = Step(clt_path='cwl_adapters/montage.cwl') + montage = Step(clt_path='../image-workflows/cwl_adapters/montage.cwl') montage.inpDir = omeconverter.outDir montage.filePattern = 'x00_y03_p{p:dd}_c0.ome.tif' montage.layout = 'p' montage.outDir = Path('montage.outDir') - image_assembler = Step(clt_path='cwl_adapters/image_assembler.cwl') + image_assembler = Step(clt_path='../image-workflows/cwl_adapters/image_assembler.cwl') image_assembler.stitchPath = montage.outDir image_assembler.imgPath = omeconverter.outDir image_assembler.outDir = Path('image_assembler.outDir') - precompute_slide = Step(clt_path='cwl_adapters/precompute_slide.cwl') + precompute_slide = Step(clt_path='../image-workflows/cwl_adapters/precompute_slide.cwl') precompute_slide.inpDir = image_assembler.outDir precompute_slide.pyramidType = 'Zarr' precompute_slide.imageType = 'image' diff --git a/cwl_workflows/bbbc.yml b/workflows/bbbc.yml similarity index 100% rename from cwl_workflows/bbbc.yml rename to workflows/bbbc.yml diff --git a/workflows/bbbc_sub.py b/workflows/bbbc_sub.py new file mode 100644 index 0000000..6b6f102 --- /dev/null +++ b/workflows/bbbc_sub.py @@ -0,0 +1,78 @@ +from pathlib import Path + +from wic import plugins +from wic.api import pythonapi +from wic.api.pythonapi import Step, Workflow + +def workflow() -> Workflow: + bbbcdownload = Step(clt_path='cwl_adapters/bbbcdownload.cwl') + # NOTE: object fields monkey patched at runtime from *.cwl file + bbbcdownload.name = 'BBBC001' + bbbcdownload.outDir = Path('bbbcdownload.outDir') + + subdirectory = Step(clt_path='../workflow-inference-compiler/cwl_adapters/subdirectory.cwl') + subdirectory.directory = bbbcdownload.outDir + subdirectory.glob_pattern = 'bbbcdownload.outDir/BBBC/BBBC001/raw/Images/human_ht29_colon_cancer_1_images/' + subdirectory.subdirectory = Path('subdirectory.subdirectory') + + filerenaming = Step(clt_path='cwl_adapters/file-renaming.cwl') + # NOTE: FilePattern {} syntax shadows python f-string {} syntax + filerenaming.filePattern = '.*_{row:c}{col:dd}f{f:dd}d{channel:d}.tif' + filerenaming.inpDir = subdirectory.subdirectory + filerenaming.outDir = Path('file-renaming.outDir') + filerenaming.outFilePattern = 'x{row:dd}_y{col:dd}_p{f:dd}_c{channel:d}.tif' + + # Trivially wrap the subdirectory step in a subworkflow. + # But notice that we are still linking the individual Steps above, + # which defeats the whole purpose of having reusable black boxes. + steps = [bbbcdownload, + Workflow([subdirectory], 'bbbc_sub_sub_py'), + filerenaming] + filename = 'bbbc_sub_py' # .yml + return Workflow(steps, filename) + + +def workflow2() -> Workflow: + bbbcdownload = Step(clt_path='cwl_adapters/bbbcdownload.cwl') + # NOTE: object fields monkey patched at runtime from *.cwl file + bbbcdownload.name = 'BBBC001' + bbbcdownload.outDir = Path('bbbcdownload.outDir') + + subdirectory = Step(clt_path='../workflow-inference-compiler/cwl_adapters/subdirectory.cwl') + subworkflow = Workflow([subdirectory], 'bbbc_sub_sub_py') # fails validation, due to + # https://workflow-inference-compiler.readthedocs.io/en/latest/dev/algorithms.html#deferred-satisfaction + + # First link all inputs within the subworkflow to the explicit inputs: tag. + # i.e. This is the API for the subworkflow. + subworkflow.steps[0].directory = subworkflow.directory + subworkflow.steps[0].glob_pattern = subworkflow.glob_pattern + subworkflow.steps[0].subdirectory = Path('subdirectory.subdirectory') + + # Then apply arguments at the call site. + # Notice how the caller does not need to know about the internal details of the subworkflow + # (For example, that the subdirectory step is index 0) + subworkflow.directory = bbbcdownload.outDir + subworkflow.glob_pattern = 'bbbcdownload.outDir/BBBC/BBBC001/raw/Images/human_ht29_colon_cancer_1_images/' + + filerenaming = Step(clt_path='cwl_adapters/file-renaming.cwl') + # NOTE: FilePattern {} syntax shadows python f-string {} syntax + filerenaming.filePattern = '.*_{row:c}{col:dd}f{f:dd}d{channel:d}.tif' + filerenaming.inpDir = subworkflow.steps[0].subdirectory # TODO: workflow outputs: tag + filerenaming.outDir = Path('file-renaming.outDir') + filerenaming.outFilePattern = 'x{row:dd}_y{col:dd}_p{f:dd}_c{channel:d}.tif' + + steps = [bbbcdownload, + subworkflow, + filerenaming] + filename = 'bbbc_sub_py' # .yml + return Workflow(steps, filename) + + +# viz = workflow() +# viz.compile() # Do NOT .run() here + +if __name__ == '__main__': + pythonapi.global_config = plugins.get_tools_cwl(str(Path().home())) # Use path fallback + + viz = workflow2() + viz.run() # .run() here, inside main diff --git a/cwl_workflows/cwl_analysis.py b/workflows/cwl_analysis.py similarity index 98% rename from cwl_workflows/cwl_analysis.py rename to workflows/cwl_analysis.py index 1fbbe66..f911f1c 100644 --- a/cwl_workflows/cwl_analysis.py +++ b/workflows/cwl_analysis.py @@ -8,7 +8,7 @@ import typing import sys sys.path.append('../') -from cwl_workflows.utils import GITHUB_TAG +from polus.image.workflows.utils import GITHUB_TAG # Initialize the logger logger = logging.getLogger(__name__) @@ -44,7 +44,8 @@ def __init__( group_by: str, map_directory: typing.Optional[bool] = False, features: typing.Optional[str]="ALL", - file_extension: typing.Optional[str]="arrowipc" + file_extension: typing.Optional[str]="arrowipc", + ): self.name = name self.file_pattern = file_pattern @@ -65,7 +66,7 @@ def _create_directories(self) -> None: """Create directories for CWL outputs""" cwl_path = self.PATH.joinpath("cwl_adapters") cwl_path.mkdir(parents=True, exist_ok=True) - workflow_path = self.PATH.joinpath("workflows").resolve() + workflow_path = self.PATH.joinpath("outputs").resolve() workflow_path.mkdir(exist_ok=True) return cwl_path, workflow_path @@ -215,6 +216,7 @@ def workflow(self) -> None: kaggle_nuclei_segmentation = self.create_step( self.manifest_urls("kaggle_nuclei_segmentation") ) + kaggle_nuclei_segmentation.inpDir = apply_flatfield.outDir kaggle_nuclei_segmentation.filePattern = self.image_pattern kaggle_nuclei_segmentation.outDir = Path("kaggle_nuclei_segmentation.outDir") diff --git a/cwl_workflows/cwl_nuclear_segmentation.py b/workflows/cwl_nuclear_segmentation.py similarity index 96% rename from cwl_workflows/cwl_nuclear_segmentation.py rename to workflows/cwl_nuclear_segmentation.py index 7fa3a69..72d9149 100644 --- a/cwl_workflows/cwl_nuclear_segmentation.py +++ b/workflows/cwl_nuclear_segmentation.py @@ -8,7 +8,7 @@ import shutil import sys sys.path.append('../') -from cwl_workflows.utils import GITHUB_TAG +from polus.image.workflows.utils import GITHUB_TAG # Initialize the logger logger = logging.getLogger(__name__) @@ -59,7 +59,7 @@ def _create_directories(self) -> None: """Create directories for CWL outputs""" cwl_path = self.PATH.joinpath("cwl_adapters") cwl_path.mkdir(parents=True, exist_ok=True) - workflow_path = self.PATH.joinpath("workflows").resolve() + workflow_path = self.PATH.joinpath("outputs").resolve() workflow_path.mkdir(exist_ok=True) return cwl_path, workflow_path @@ -67,13 +67,10 @@ def _clean(self) -> None: """Cleaning of redundant directories generating on running CWL""" logger.info("Cleaning directories!!!") destination_path = self.workflow_path.joinpath("experiment") - dir_names = ("autogenerated", "cachedir", "RUNS", "provenance") - for i, d in zip(self.wic_path.iterdir(), self.PATH.iterdir()): - if i.name.endswith(dir_names): - shutil.rmtree(d) - if d.name.endswith(dir_names): - shutil.rmtree(d) - + dir_names = ("autogenerated", "cachedir", "RUNS", "provenance", "cwl_adapters") + dir_list = [w for w in self.wic_path.iterdir() if w.is_dir() if w.name in dir_names] + for d in dir_list: + shutil.rmtree(d) for d in destination_path.iterdir(): if d.name.endswith("cwl_adapters"): shutil.rmtree(d) @@ -85,7 +82,7 @@ def _clean(self) -> None: def _move_outputs(self) -> None: """Transfer outputs from the WIC directory to the workflow path""" - logger.info("Move outputs to workflow path!!!") + logger.info("Move outputs to workflow path!!!") for d in self.wic_path.iterdir(): if d.name.endswith("outdir"): shutil.move(d, self.workflow_path) @@ -178,6 +175,7 @@ def workflow(self) -> None: rename.inpDir = bbbc.outDir rename.outDir = Path("rename.outDir") + # OMEConverter ome_converter = self.create_step(self.manifest_urls("ome_converter")) ome_converter.filePattern = self._string_after_period(self.out_file_pattern) @@ -208,6 +206,7 @@ def workflow(self) -> None: self.manifest_urls("kaggle_nuclei_segmentation") ) kaggle_nuclei_segmentation.inpDir = apply_flatfield.outDir + kaggle_nuclei_segmentation.filePattern = self.image_pattern kaggle_nuclei_segmentation.outDir = Path("kaggle_nuclei_segmentation.outDir") From 02692e26233ae073cc1e19fe399a21f6264d7852 Mon Sep 17 00:00:00 2001 From: hamshkhawar Date: Tue, 2 Apr 2024 08:16:47 -0500 Subject: [PATCH 13/16] delete cache file --- .../__pycache__/__init__.cpython-310.pyc | Bin 186 -> 0 bytes .../__pycache__/__main__.cpython-310.pyc | Bin 1693 -> 0 bytes .../workflows/__pycache__/utils.cpython-310.pyc | Bin 2724 -> 0 bytes 3 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 src/polus/image/workflows/__pycache__/__init__.cpython-310.pyc delete mode 100644 src/polus/image/workflows/__pycache__/__main__.cpython-310.pyc delete mode 100644 src/polus/image/workflows/__pycache__/utils.cpython-310.pyc diff --git a/src/polus/image/workflows/__pycache__/__init__.cpython-310.pyc b/src/polus/image/workflows/__pycache__/__init__.cpython-310.pyc deleted file mode 100644 index 1a9d53681adefe19e5647737bdc0f2bdfa0cd30b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 186 zcmd1j<>g`kf(jAtG!Xq5L?8o3AjbiSi&=m~3PUi1CZpdXD p=ad#hWb_d-@$s2?nI-Y@dIgoYIBatBQ%ZAE?LgKRGXV(}1_0d3FHHad diff --git a/src/polus/image/workflows/__pycache__/__main__.cpython-310.pyc b/src/polus/image/workflows/__pycache__/__main__.cpython-310.pyc deleted file mode 100644 index cb595cd260f47e14d2642d5dc4890297fee6431c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1693 zcmZuxOKa;!6xNKMwq@s?^in8F!6jbUg03#1Elu2(rlh%nw1f^6A#29A61~vKO)Q*M zx=sH;3E0q0|4r#q+pfCq>Rw83&&aWpz?GPp^PD;J%{LR*>s1N9>&;r<(9WbAv=+E#NfazSjroGStQi3QIrA>-T~Kybd+r0r&}-5guZulJkZ?l;d`xX}7>8oC+L zshhEWnB|T0qpo#2&W3Sh9h@F@a;@DK6Xv)b<(DTQS#jU;LwDdu0}HZT&N5*-Okq>j zp&tt$vkqD04MR7*w7PK+FsK+N+#6K~f+m=1clbu0$Wvn` z-6#`8b)d*mIkQpbR_^6;tqQIC(nR?U{esH)>I_H(@l}X--w|JXS9}>)6Z28?Olmdq ztuL>c2XI=?D46R49_v#puPqP~O2dnzzCRejdT>1U8)WB7XAHc=X$9aZYIDDn0B?!m zKF~C;&esqG$mQMLU1IVlRLx&><1h(W#%Ni@VnGNz`CV$6g%-qv0ZR+TkAQvRfC|HL z`hLJ1r_d5N1=17aBK9M(ZlNZrA7!Zwaij`f&YUsaom2rsS!OEEwWX%wU6~t95vztV zWkETi8}hU+yrO&aqy0T+DYxBT&*lE`WBZHP8;P67?Qi0qeQ*thoCD+LY+RC$+dTE` zBo0P=?y(m;#OJ^Y#>uMkb&1x13iunIo*N~vHN*? zYp2j1hsK`c5M7`I5EC^UCoCnp03+~wC1B+JvW`x9UpmV=I-|@F_#CS3M=M+uXK{EX zY~;h|&S>NX%uV6qJ-#?4szm*+TwHdOgwzCFUbmzSsSH?lT%KdvfISccF72^+eTZ^& z^3^M%dvO@L5hdEIM3AeHd6q7>E7<5bk7qKMHcK!*fpZ%L>@x%!KZZ_`05W7Qe?wMe mj4;yvt12eaWCaUN{K~(I_C$US@>{Iq*SK!}W3;MBdHX+m|JJAg diff --git a/src/polus/image/workflows/__pycache__/utils.cpython-310.pyc b/src/polus/image/workflows/__pycache__/utils.cpython-310.pyc deleted file mode 100644 index 950ea6f5439347ea5879ba7cd66a16bb21b5dd08..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2724 zcma)8Uys{F5Z|?(#Bq||(UvPM6}Y7oMbLy65Px*4deHVLEvHJ?Qk?~|eBO29o@1xG zo3@GWUbsrY1429_q?WgS0lo|S$`fCK7noUJE|<1QINI6u?CkFBZ{{~n(Q0`FoKyZ}vG*DpsZ`m$g=(+-2-THDob7$QDlsJ$>86l?@2g}NDr_eN1x&IlV0 z)3#JXU_NMPvnVpX5Qb5n7b;S4FByhUiCBVJBmNG$w;>L)8*OezGVNdJUM}L%0AeD$ zUlg0&m4|T3okDDN4eI9}z%a>*2eKbvDmxvvJ~>^mTUW zE)5!Hjdfs+1GWy>U_zHRP0+M3FwhQ71Cv;+$y$$`V1XTi;4EnOMiH@%Xpl`_{VK{* zh!5cE7m1HSlU(^@nCobFiimDNnkT8r(*CVBTx%$}uH28Z5!Y40A^-B&g1b*2j3Tzr zpvm_^D6$P#cub!Vxv))t0e1gt?O4j%wsz^aRN329IlGp4rL9G!O$nJGJy zw$BPKeMktOKTM@M<=@OW_zh}5?(=xdPn93_qBL(;1iGcG#!hMv_8ajlLKH%KsZ4XJ zqCDo}2pB!JwH;?tyow9#f$Yt4OdLa93+{_p8Rqt10hL7dG#!t+6q`dQOH*V!bH>ks zgI*bMaGPz)=zS_YhLDgF5(9ueIXMr&G$1~*r2{l`qddySGL^asRRpu%#4>2zPbH8l zotW3j^*w;%eo+8Uk#D^7=eG0Hb1<{FoqupQe{Qoj6|aHQ!bh=B!xEyU{aa7BHjIA* zOeK=Ub(|8X*Pq8}ZbeMNdio&waJ3Ch{tf~_B6m#|?OH#zejv(vY=2Ll&>eaQ>W@Pl zvTIY4yXT2K_JC4?zYS>0`HmzMY4^z8rjIL6$Yp}m+JQV|l@Sr|vFeVks@pbZC7e-X zwJl3rRcKv*0=CNAvc*;AK4Vn}tBnIz4Y1l^-T^Dj=f>px%}7Aj^$fxMl2jv6(hsY* zpA^F9QQY^jW&#sCnu@hKFc1U;1XEU?1knR9zVW`ccHkP{!$%UX0oVE2x`&0Bu9*-) zrdXYKH4Qbr|CDN)lz}s8?JZ}(A^3g`FJ8bsF*Q$;$y@so8iLO}G$mm~%g_rVa-5)KtvvT8~>diNG&N&@`1P1cHpX6sJK|#^7xnm4TQ6{SGLl z4`R`7(v$z3uw4s@EZy8I)oMHhhLi^xAG=@_c&pcLeSZ7X@Yc$z xzz>S}0JZf`fh6W%hWRj?ZC~V*c9+Zl8yB&zNc7(&O^QD#+qP@F^sv3;{0k3_#Ebv{ From 93ac0b86801de50feef7b0af69c12df04c0c9dc0 Mon Sep 17 00:00:00 2001 From: hamshkhawar Date: Wed, 8 May 2024 12:57:38 -0500 Subject: [PATCH 14/16] updating_cwlworkflows --- configuration/analysis/BBBC001.yml | 2 +- configuration/analysis/BBBC039.yml | 3 +- cwl_adapters/BbbcDownload.cwl | 29 +++++ cwl_adapters/FileRenaming.cwl | 41 +++++++ pyproject.toml | 2 - workflows/cwl_nuclear_segmentation.py | 159 +++++++++++++++----------- 6 files changed, 164 insertions(+), 72 deletions(-) create mode 100644 cwl_adapters/BbbcDownload.cwl create mode 100644 cwl_adapters/FileRenaming.cwl diff --git a/configuration/analysis/BBBC001.yml b/configuration/analysis/BBBC001.yml index c72c9c8..458b53e 100644 --- a/configuration/analysis/BBBC001.yml +++ b/configuration/analysis/BBBC001.yml @@ -10,4 +10,4 @@ group_by: c map_directory: false features: ALL file_extension: pandas - +background_correction: false \ No newline at end of file diff --git a/configuration/analysis/BBBC039.yml b/configuration/analysis/BBBC039.yml index 1894023..308a274 100644 --- a/configuration/analysis/BBBC039.yml +++ b/configuration/analysis/BBBC039.yml @@ -9,4 +9,5 @@ df_pattern: "x\\(00-15\\)_y\\(01-24\\)_p0\\(1-9\\)_c{c:d}_darkfield.ome.tif" group_by: c map_directory: false features: "ALL_INTENSITY" -file_extension: pandas \ No newline at end of file +file_extension: pandas +background_correction: false \ No newline at end of file diff --git a/cwl_adapters/BbbcDownload.cwl b/cwl_adapters/BbbcDownload.cwl new file mode 100644 index 0000000..9eeba93 --- /dev/null +++ b/cwl_adapters/BbbcDownload.cwl @@ -0,0 +1,29 @@ +class: CommandLineTool +cwlVersion: v1.2 +inputs: + name: + inputBinding: + prefix: --name + type: string + outDir: + inputBinding: + prefix: --outDir + type: Directory +outputs: + outDir: + outputBinding: + glob: $(inputs.outDir.basename) + type: Directory +requirements: + DockerRequirement: + dockerPull: polusai/bbbc-download-plugin:0.1.0-dev1 + EnvVarRequirement: + envDef: + HOME: /home/polusai + InitialWorkDirRequirement: + listing: + - entry: $(inputs.outDir) + writable: true + InlineJavascriptRequirement: {} + NetworkAccess: + networkAccess: true diff --git a/cwl_adapters/FileRenaming.cwl b/cwl_adapters/FileRenaming.cwl new file mode 100644 index 0000000..1e154ed --- /dev/null +++ b/cwl_adapters/FileRenaming.cwl @@ -0,0 +1,41 @@ +class: CommandLineTool +cwlVersion: v1.2 +inputs: + filePattern: + inputBinding: + prefix: --filePattern + type: string + inpDir: + inputBinding: + prefix: --inpDir + type: Directory + mapDirectory: + inputBinding: + prefix: --mapDirectory + type: boolean? + outDir: + inputBinding: + prefix: --outDir + type: Directory + outFilePattern: + inputBinding: + prefix: --outFilePattern + type: string +outputs: + outDir: + outputBinding: + glob: $(inputs.outDir.basename) + type: Directory +requirements: + DockerRequirement: + dockerPull: polusai/file-renaming-tool:0.2.4-dev1 + EnvVarRequirement: + envDef: + HOME: /home/polusai + InitialWorkDirRequirement: + listing: + - entry: $(inputs.outDir) + writable: true + InlineJavascriptRequirement: {} + NetworkAccess: + networkAccess: true diff --git a/pyproject.toml b/pyproject.toml index 85287fa..8a16132 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,8 +11,6 @@ python = ">=3.9,<3.12" typer = "^0.9.0" pyyaml = "^6.0.1" pydantic = "^2.6.1" -cwl-utils="0.31" -toil="^5.12" polus-plugins = {path = "../image-tools", develop = true} workflow-inference-compiler = {path = "../workflow-inference-compiler", develop = true} diff --git a/workflows/cwl_nuclear_segmentation.py b/workflows/cwl_nuclear_segmentation.py index 72d9149..1c920e4 100644 --- a/workflows/cwl_nuclear_segmentation.py +++ b/workflows/cwl_nuclear_segmentation.py @@ -123,8 +123,8 @@ def create_step(self, url: str) -> api.Step: cwl_tool = pp.get_plugin(self._camel(manifest.name), plugin_version).save_cwl( self.cwl_path.joinpath(f"{self._camel(manifest.name)}.cwl") ) - step = api.Step(cwl_tool) - return step + # step = api.Step(cwl_tool) + return cwl_tool def manifest_urls(self, x: str) -> str: """URLs on GitHub for plugin manifests""" @@ -162,84 +162,107 @@ def workflow(self) -> None: """ A CWL nuclear segmentation pipeline. """ - # BBBCDownload + # # BBBCDownload bbbc = self.create_step(self.manifest_urls("bbbc_download")) - bbbc.name = self.name - bbbc.outDir = Path("bbbc.outDir") + # bbbc.name = self.name + # bbbc.outDir = Path("bbbc.outDir") - # Renaming plugin + # # Renaming plugin rename = self.create_step(self.manifest_urls("file_renaming")) + # rename.filePattern = self.file_pattern + # rename.outFilePattern = self.out_file_pattern + # rename.mapDirectory = self.map_directory + # rename.inpDir = bbbc.outDir + # rename.outDir = Path("rename.outDir") + + self.modify_cwl() + + + # # OMEConverter + # ome_converter = self.create_step(self.manifest_urls("ome_converter")) + # ome_converter.filePattern = self._string_after_period(self.out_file_pattern) + # ome_converter.fileExtension = ".ome.tif" + # ome_converter.inpDir = rename.outDir + # ome_converter.outDir = Path("ome_converter.outDir") + + # # Estimate Flatfield + # estimate_flatfield = self.create_step(self.manifest_urls("estimate_flatfield")) + # estimate_flatfield.inpDir = ome_converter.outDir + # estimate_flatfield.filePattern = self.image_pattern + # estimate_flatfield.groupBy = self.group_by + # estimate_flatfield.getDarkfield = True + # estimate_flatfield.outDir = Path("estimate_flatfield.outDir") + + # # # Apply Flatfield + # apply_flatfield = self.create_step(self.manifest_urls("apply_flatfield")) + # apply_flatfield.imgDir = ome_converter.outDir + # apply_flatfield.imgPattern = self.image_pattern + # apply_flatfield.ffDir = estimate_flatfield.outDir + # apply_flatfield.ffPattern = self.ff_pattern + # apply_flatfield.dfPattern = self.df_pattern + # apply_flatfield.outDir = Path("apply_flatfield.outDir") + # apply_flatfield.dataType = True + + # ## Kaggle Nuclei Segmentation + # kaggle_nuclei_segmentation = self.create_step( + # self.manifest_urls("kaggle_nuclei_segmentation") + # ) + # kaggle_nuclei_segmentation.inpDir = apply_flatfield.outDir + + # kaggle_nuclei_segmentation.filePattern = self.image_pattern + # kaggle_nuclei_segmentation.outDir = Path("kaggle_nuclei_segmentation.outDir") + + # ## FTL Label Plugin + # ftl_plugin = self.create_step(self.manifest_urls("ftl_plugin")) + # ftl_plugin.inpDir = kaggle_nuclei_segmentation.outDir + # ftl_plugin.connectivity = 1 + # ftl_plugin.binarizationThreshold = 0.5 + # ftl_plugin.outDir = Path("ftl_plugin.outDir") + + # logger.info("Initiating CWL Nuclear Segmentation Workflow!!!") + # steps = [ + # bbbc, + # # rename, + # # ome_converter, + # # estimate_flatfield, + # # apply_flatfield, + # # kaggle_nuclei_segmentation, + # # ftl_plugin + # ] + bbbc = api.Step(clt_path='/Users/abbasih2/Documents/Job/Axle_Work/image-workflows/cwl_adapters/BbbcDownload.cwl') + # We can inline the inputs to each step individually. + bbbc.name = 'BBBC001' + bbbc.outDir = Path('bbbc.outDir') + + + rename = api.Step(clt_path='/Users/abbasih2/Documents/Job/Axle_Work/image-workflows/cwl_adapters/FileRenaming.cwl') rename.filePattern = self.file_pattern rename.outFilePattern = self.out_file_pattern rename.mapDirectory = self.map_directory rename.inpDir = bbbc.outDir rename.outDir = Path("rename.outDir") - - # OMEConverter - ome_converter = self.create_step(self.manifest_urls("ome_converter")) - ome_converter.filePattern = self._string_after_period(self.out_file_pattern) - ome_converter.fileExtension = ".ome.tif" - ome_converter.inpDir = rename.outDir - ome_converter.outDir = Path("ome_converter.outDir") - - # Estimate Flatfield - estimate_flatfield = self.create_step(self.manifest_urls("estimate_flatfield")) - estimate_flatfield.inpDir = ome_converter.outDir - estimate_flatfield.filePattern = self.image_pattern - estimate_flatfield.groupBy = self.group_by - estimate_flatfield.getDarkfield = True - estimate_flatfield.outDir = Path("estimate_flatfield.outDir") - - # # Apply Flatfield - apply_flatfield = self.create_step(self.manifest_urls("apply_flatfield")) - apply_flatfield.imgDir = ome_converter.outDir - apply_flatfield.imgPattern = self.image_pattern - apply_flatfield.ffDir = estimate_flatfield.outDir - apply_flatfield.ffPattern = self.ff_pattern - apply_flatfield.dfPattern = self.df_pattern - apply_flatfield.outDir = Path("apply_flatfield.outDir") - apply_flatfield.dataType = True - - ## Kaggle Nuclei Segmentation - kaggle_nuclei_segmentation = self.create_step( - self.manifest_urls("kaggle_nuclei_segmentation") - ) - kaggle_nuclei_segmentation.inpDir = apply_flatfield.outDir - - kaggle_nuclei_segmentation.filePattern = self.image_pattern - kaggle_nuclei_segmentation.outDir = Path("kaggle_nuclei_segmentation.outDir") - - ## FTL Label Plugin - ftl_plugin = self.create_step(self.manifest_urls("ftl_plugin")) - ftl_plugin.inpDir = kaggle_nuclei_segmentation.outDir - ftl_plugin.connectivity = 1 - ftl_plugin.binarizationThreshold = 0.5 - ftl_plugin.outDir = Path("ftl_plugin.outDir") - - logger.info("Initiating CWL Nuclear Segmentation Workflow!!!") steps = [ bbbc, - rename, - ome_converter, - estimate_flatfield, - apply_flatfield, - kaggle_nuclei_segmentation, - ftl_plugin + rename ] - workflow = api.Workflow(steps, "experiment", self.workflow_path) + workflow = api.Workflow([], "experiment") + for step in steps: + workflow.append(step) # # Saving CLT for plugins - workflow._save_all_cwl(overwrite=True) - # # Adding environmental variables for bbbc_download and ome_converter plugin - self.modify_cwl() - # # # Save yaml to run CWL tool - workflow._save_yaml() - # Compile and run using WIC python API - workflow.compile(run_local=True, overwrite=False) - # # print(workflow.yml_path) - # # clean autognerated directories - self._clean() - self._move_outputs() - logger.info("Completed CWL nuclear segmentation workflow.") + # workflow._save_all_cwl() + # # # Adding environmental variables for bbbc_download and ome_converter plugin + # self.modify_cwl() + + # # # # Save yaml to run CWL tool + workflow.write_ast_to_disk(directory=Path('/Users/abbasih2/Documents/Job/Axle_Work/image-workflows/outputs')) + # workflow.compile(write_to_disk=True) + # # Compile and run using WIC python API + # workflow.compile(run_local=True, overwrite=False) + workflow.run() + # # # clean autognerated directories + # self._clean() + # self._move_outputs() + # logger.info("Completed CWL nuclear segmentation workflow.") return \ No newline at end of file From 8d27c231a8bc9f679156268dea0c91d3dfb15fa5 Mon Sep 17 00:00:00 2001 From: hamshkhawar Date: Wed, 8 May 2024 14:06:41 -0500 Subject: [PATCH 15/16] updated toml --- pyproject.toml | 2 +- workflows/cwl_nuclear_segmentation.py | 44 +++++++++++++-------------- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 8a16132..3fde0eb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,7 +11,7 @@ python = ">=3.9,<3.12" typer = "^0.9.0" pyyaml = "^6.0.1" pydantic = "^2.6.1" -polus-plugins = {path = "../image-tools", develop = true} +#polus-plugins = {path = "../image-tools", develop = true} workflow-inference-compiler = {path = "../workflow-inference-compiler", develop = true} [tool.poetry.group.dev.dependencies] diff --git a/workflows/cwl_nuclear_segmentation.py b/workflows/cwl_nuclear_segmentation.py index 1c920e4..92e294a 100644 --- a/workflows/cwl_nuclear_segmentation.py +++ b/workflows/cwl_nuclear_segmentation.py @@ -1,5 +1,5 @@ import wic.api.pythonapi as api -import polus.plugins as pp +# import polus.plugins as pp from pathlib import Path import yaml import logging @@ -116,15 +116,15 @@ def _add_backslash_before_parentheses(self, x): ) return result - def create_step(self, url: str) -> api.Step: - """Generate the plugin class name from the plugin name specified in the manifest""" - manifest = pp.submit_plugin(url) - plugin_version = str(manifest.version) - cwl_tool = pp.get_plugin(self._camel(manifest.name), plugin_version).save_cwl( - self.cwl_path.joinpath(f"{self._camel(manifest.name)}.cwl") - ) - # step = api.Step(cwl_tool) - return cwl_tool + # def create_step(self, url: str) -> api.Step: + # """Generate the plugin class name from the plugin name specified in the manifest""" + # manifest = pp.submit_plugin(url) + # plugin_version = str(manifest.version) + # cwl_tool = pp.get_plugin(self._camel(manifest.name), plugin_version).save_cwl( + # self.cwl_path.joinpath(f"{self._camel(manifest.name)}.cwl") + # ) + # # step = api.Step(cwl_tool) + # return cwl_tool def manifest_urls(self, x: str) -> str: """URLs on GitHub for plugin manifests""" @@ -162,20 +162,20 @@ def workflow(self) -> None: """ A CWL nuclear segmentation pipeline. """ - # # BBBCDownload - bbbc = self.create_step(self.manifest_urls("bbbc_download")) - # bbbc.name = self.name - # bbbc.outDir = Path("bbbc.outDir") + # # # BBBCDownload + # bbbc = self.create_step(self.manifest_urls("bbbc_download")) + # # bbbc.name = self.name + # # bbbc.outDir = Path("bbbc.outDir") - # # Renaming plugin - rename = self.create_step(self.manifest_urls("file_renaming")) - # rename.filePattern = self.file_pattern - # rename.outFilePattern = self.out_file_pattern - # rename.mapDirectory = self.map_directory - # rename.inpDir = bbbc.outDir - # rename.outDir = Path("rename.outDir") + # # # Renaming plugin + # rename = self.create_step(self.manifest_urls("file_renaming")) + # # rename.filePattern = self.file_pattern + # # rename.outFilePattern = self.out_file_pattern + # # rename.mapDirectory = self.map_directory + # # rename.inpDir = bbbc.outDir + # # rename.outDir = Path("rename.outDir") - self.modify_cwl() + # self.modify_cwl() # # OMEConverter From 2ca9463c8da1e03476430636853a173d9647cf90 Mon Sep 17 00:00:00 2001 From: hamshkhawar Date: Wed, 8 May 2024 14:24:39 -0500 Subject: [PATCH 16/16] modified cltpath --- workflows/cwl_nuclear_segmentation.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/workflows/cwl_nuclear_segmentation.py b/workflows/cwl_nuclear_segmentation.py index 92e294a..e7f82fc 100644 --- a/workflows/cwl_nuclear_segmentation.py +++ b/workflows/cwl_nuclear_segmentation.py @@ -229,13 +229,16 @@ def workflow(self) -> None: # # kaggle_nuclei_segmentation, # # ftl_plugin # ] - bbbc = api.Step(clt_path='/Users/abbasih2/Documents/Job/Axle_Work/image-workflows/cwl_adapters/BbbcDownload.cwl') + root_dir = Path(__file__).parents[1] + adapters_path = root_dir.joinpath('cwl_adapters') + + bbbc = api.Step(clt_path=adapters_path.joinpath('BbbcDownload.cwl')) # We can inline the inputs to each step individually. bbbc.name = 'BBBC001' bbbc.outDir = Path('bbbc.outDir') - rename = api.Step(clt_path='/Users/abbasih2/Documents/Job/Axle_Work/image-workflows/cwl_adapters/FileRenaming.cwl') + rename = api.Step(clt_path=adapters_path.joinpath('FileRenaming.cwl')) rename.filePattern = self.file_pattern rename.outFilePattern = self.out_file_pattern rename.mapDirectory = self.map_directory @@ -250,16 +253,16 @@ def workflow(self) -> None: workflow = api.Workflow([], "experiment") for step in steps: workflow.append(step) - # # Saving CLT for plugins - # workflow._save_all_cwl() - # # # Adding environmental variables for bbbc_download and ome_converter plugin - # self.modify_cwl() + # # # Saving CLT for plugins + # # workflow._save_all_cwl() + # # # # Adding environmental variables for bbbc_download and ome_converter plugin + # # self.modify_cwl() - # # # # Save yaml to run CWL tool - workflow.write_ast_to_disk(directory=Path('/Users/abbasih2/Documents/Job/Axle_Work/image-workflows/outputs')) - # workflow.compile(write_to_disk=True) - # # Compile and run using WIC python API - # workflow.compile(run_local=True, overwrite=False) + # # # # # Save yaml to run CWL tool + workflow.write_ast_to_disk(directory=Path(root_dir.joinpath('outputs'))) + # # workflow.compile(write_to_disk=True) + # # # Compile and run using WIC python API + # # workflow.compile(run_local=True, overwrite=False) workflow.run() # # # clean autognerated directories # self._clean()