From be15ea5de1e2d6327aacbfd5c8290fad2125e3db Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Sat, 13 Dec 2025 17:05:32 +0100 Subject: [PATCH 01/17] upgrade some versions --- .github/workflows/python-package-conda.yml | 2 +- environment.yml | 4 ++-- pyproject.toml | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/python-package-conda.yml b/.github/workflows/python-package-conda.yml index c2c8c558..ac776e41 100644 --- a/.github/workflows/python-package-conda.yml +++ b/.github/workflows/python-package-conda.yml @@ -15,7 +15,7 @@ jobs: strategy: matrix: os: [ubuntu-22.04] - pyv: [ '3.10','3.11', '3.12', '3.13'] + pyv: [ '3.12', '3.13', '3.14'] max-parallel: 5 runs-on: ${{ matrix.os }} steps: diff --git a/environment.yml b/environment.yml index 20a49207..6deb7e57 100644 --- a/environment.yml +++ b/environment.yml @@ -4,7 +4,7 @@ channels: - anaconda - conda-forge dependencies: - - python=3.10 + - python=3.12 - astropy - setuptools - numpy @@ -18,6 +18,6 @@ dependencies: - ctapipe - pip: - numba - - tensorflow>=2.14,<2.15 + - tensorflow>=2.19,<2.21 - dl1_data_handler>=0.14.5,<0.15 - pydot diff --git a/pyproject.toml b/pyproject.toml index 434bd27a..fbff1985 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,7 @@ classifiers = [ ] -requires-python = ">=3.10" +requires-python = ">=3.12" dependencies = [ "dl1_data_handler>=0.14.5, <0.15", "astropy", @@ -37,10 +37,10 @@ dependencies = [ "pyyaml", "scikit-learn", "numba", - "tensorflow>=2.14,<2.15", + "tensorflow>=2.19,<2.21", "pydot", "setuptools", - "ctapipe>=0.22.0, <0.26", + "ctapipe>=0.28.0", ] dynamic = ["version"] From f040131681eb1de11edc85dcd39d120f467845d2 Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Mon, 15 Dec 2025 13:43:10 +0100 Subject: [PATCH 02/17] polish CI --- .github/workflows/python-package-conda.yml | 91 +++++++++++++--------- environment.yml | 23 ------ pyproject.toml | 8 +- 3 files changed, 59 insertions(+), 63 deletions(-) delete mode 100644 environment.yml diff --git a/.github/workflows/python-package-conda.yml b/.github/workflows/python-package-conda.yml index ac776e41..48426baa 100644 --- a/.github/workflows/python-package-conda.yml +++ b/.github/workflows/python-package-conda.yml @@ -1,4 +1,3 @@ - name: CI on: @@ -15,39 +14,61 @@ jobs: strategy: matrix: os: [ubuntu-22.04] - pyv: [ '3.12', '3.13', '3.14'] - max-parallel: 5 + python-version: ['3.12', '3.13', '3.14'] + ctlearn-version: ['latest', 'nightly'] + max-parallel: 6 runs-on: ${{ matrix.os }} + continue-on-error: ${{ matrix.ctlearn-version == 'nightly' || matrix.python-version == '3.14' }} steps: - - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.pyv }} - run: | - # Install Miniconda - wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh - bash miniconda.sh -b -p $HOME/miniconda - echo "$HOME/miniconda/bin" >> $GITHUB_PATH - source $HOME/miniconda/bin/activate - # Install Mamba via conda (since we don't have mamba yet) - $HOME/miniconda/bin/conda config --add channels conda-forge - $HOME/miniconda/bin/conda install -y mamba=2.0.8 - mamba install -y python=${{ matrix.pyv }} - - name: Add MKL_THREADING_LAYER variable - run: echo "MKL_THREADING_LAYER=GNU" >> $GITHUB_ENV - - name: Install dependencies with Mamba - run: | - source $HOME/miniconda/bin/activate - mamba env update --file environment.yml --name base - - name: Lint with flake8 - run: | - source $HOME/miniconda/bin/activate - mamba install flake8 - flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - - name: Install with pip - run: | - pip install -e . - - name: Test with pytest - run: | - source $HOME/miniconda/bin/activate - mamba install pytest - pytest + - uses: actions/checkout@v4 + + - name: Set up Miniconda + run: | + wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh + bash miniconda.sh -b -p $HOME/miniconda + echo "$HOME/miniconda/bin" >> $GITHUB_PATH + source $HOME/miniconda/etc/profile.d/conda.sh + conda config --add channels conda-forge + conda install -y mamba + + - name: Create Python environment + run: | + source $HOME/miniconda/etc/profile.d/conda.sh + mamba create -y -n ctlearn python==${{ matrix.python-version }} -c conda-forge + mamba activate ctlearn + + - name: Install dependencies + run: | + source $HOME/miniconda/etc/profile.d/conda.sh + mamba activate ctlearn + sudo apt-get update + sudo apt-get install -y git + pip install --upgrade pip + pip install pylint pylint-exit anybadge eventio pytest flake8 + if [ "${{ matrix.ctlearn-version }}" = "nightly" ]; then + pip install git+https://github.com/cta-observatory/dl1-data-handler.git + else + pip install dl1-data-handler + fi + + - name: Add MKL_THREADING_LAYER variable + run: echo "MKL_THREADING_LAYER=GNU" >> $GITHUB_ENV + + - name: Lint with flake8 + run: | + source $HOME/miniconda/etc/profile.d/conda.sh + mamba activate ctlearn + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + + - name: Install package with pip + run: | + source $HOME/miniconda/etc/profile.d/conda.sh + mamba activate ctlearn + pip install -e . + + - name: Run pytest + run: | + source $HOME/miniconda/etc/profile.d/conda.sh + mamba activate ctlearn + pytest diff --git a/environment.yml b/environment.yml deleted file mode 100644 index 6deb7e57..00000000 --- a/environment.yml +++ /dev/null @@ -1,23 +0,0 @@ -# conda env create -f environment.yml -name: ctlearn -channels: - - anaconda - - conda-forge -dependencies: - - python=3.12 - - astropy - - setuptools - - numpy - - pandas - - pip - - pytables - - tables - - c-blosc2=2.13 - - pyyaml - - scikit-learn - - ctapipe - - pip: - - numba - - tensorflow>=2.19,<2.21 - - dl1_data_handler>=0.14.5,<0.15 - - pydot diff --git a/pyproject.toml b/pyproject.toml index fbff1985..1ae3afa1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,8 +20,6 @@ classifiers = [ "Topic :: Scientific/Engineering :: Astronomy", "Topic :: Scientific/Engineering :: Physics", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", @@ -29,7 +27,7 @@ classifiers = [ requires-python = ">=3.12" dependencies = [ - "dl1_data_handler>=0.14.5, <0.15", + "dl1_data_handler>=0.14.5", "astropy", "numpy", "pandas", @@ -37,10 +35,10 @@ dependencies = [ "pyyaml", "scikit-learn", "numba", - "tensorflow>=2.19,<2.21", + "tensorflow>=2.20", "pydot", "setuptools", - "ctapipe>=0.28.0", + "ctapipe[all]>=0.28", ] dynamic = ["version"] From 1946d079f2755484ee7b2baef42a40a751105db0 Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Mon, 15 Dec 2025 13:47:14 +0100 Subject: [PATCH 03/17] run shell hook after mamba install in CI --- .github/workflows/python-package-conda.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/python-package-conda.yml b/.github/workflows/python-package-conda.yml index 48426baa..8a16954a 100644 --- a/.github/workflows/python-package-conda.yml +++ b/.github/workflows/python-package-conda.yml @@ -30,6 +30,7 @@ jobs: source $HOME/miniconda/etc/profile.d/conda.sh conda config --add channels conda-forge conda install -y mamba + eval "$(mamba shell hook --shell bash)" - name: Create Python environment run: | From 319b2277820f7500556f8b956ed48e4fa6793c92 Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Mon, 15 Dec 2025 13:50:47 +0100 Subject: [PATCH 04/17] use conda to activate the envs --- .github/workflows/python-package-conda.yml | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/.github/workflows/python-package-conda.yml b/.github/workflows/python-package-conda.yml index 8a16954a..3d37b982 100644 --- a/.github/workflows/python-package-conda.yml +++ b/.github/workflows/python-package-conda.yml @@ -30,18 +30,17 @@ jobs: source $HOME/miniconda/etc/profile.d/conda.sh conda config --add channels conda-forge conda install -y mamba - eval "$(mamba shell hook --shell bash)" - name: Create Python environment run: | source $HOME/miniconda/etc/profile.d/conda.sh mamba create -y -n ctlearn python==${{ matrix.python-version }} -c conda-forge - mamba activate ctlearn + conda activate ctlearn - name: Install dependencies run: | source $HOME/miniconda/etc/profile.d/conda.sh - mamba activate ctlearn + conda activate ctlearn sudo apt-get update sudo apt-get install -y git pip install --upgrade pip @@ -58,18 +57,18 @@ jobs: - name: Lint with flake8 run: | source $HOME/miniconda/etc/profile.d/conda.sh - mamba activate ctlearn + conda activate ctlearn flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - name: Install package with pip run: | source $HOME/miniconda/etc/profile.d/conda.sh - mamba activate ctlearn + conda activate ctlearn pip install -e . - name: Run pytest run: | source $HOME/miniconda/etc/profile.d/conda.sh - mamba activate ctlearn + conda activate ctlearn pytest From 895d5757c5d52901d61b4d0fe42f438b338394cf Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Mon, 15 Dec 2025 16:08:12 +0100 Subject: [PATCH 05/17] fixs some CI --- .github/workflows/python-package-conda.yml | 6 +++--- pyproject.toml | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/workflows/python-package-conda.yml b/.github/workflows/python-package-conda.yml index 3d37b982..da00acbf 100644 --- a/.github/workflows/python-package-conda.yml +++ b/.github/workflows/python-package-conda.yml @@ -15,10 +15,10 @@ jobs: matrix: os: [ubuntu-22.04] python-version: ['3.12', '3.13', '3.14'] - ctlearn-version: ['latest', 'nightly'] + dl1dh-version: ['latest', 'nightly'] max-parallel: 6 runs-on: ${{ matrix.os }} - continue-on-error: ${{ matrix.ctlearn-version == 'nightly' || matrix.python-version == '3.14' }} + continue-on-error: ${{ matrix.dl1dh-version == 'nightly' || matrix.python-version == '3.14' }} steps: - uses: actions/checkout@v4 @@ -45,7 +45,7 @@ jobs: sudo apt-get install -y git pip install --upgrade pip pip install pylint pylint-exit anybadge eventio pytest flake8 - if [ "${{ matrix.ctlearn-version }}" = "nightly" ]; then + if [ "${{ matrix.dl1dh-version }}" = "nightly" ]; then pip install git+https://github.com/cta-observatory/dl1-data-handler.git else pip install dl1-data-handler diff --git a/pyproject.toml b/pyproject.toml index 1ae3afa1..c361c71f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,6 +22,7 @@ classifiers = [ "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", ] @@ -35,7 +36,7 @@ dependencies = [ "pyyaml", "scikit-learn", "numba", - "tensorflow>=2.20", + "tensorflow>=2.16", "pydot", "setuptools", "ctapipe[all]>=0.28", From 79102c77b1f1ec245a4bd4e27fb062f226440ac9 Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Mon, 15 Dec 2025 16:16:45 +0100 Subject: [PATCH 06/17] remove support for keras2 --- ctlearn/tools/predict_LST1.py | 9 +++------ ctlearn/tools/predict_model.py | 18 +++++++++--------- ctlearn/tools/train_model.py | 6 +----- 3 files changed, 13 insertions(+), 20 deletions(-) diff --git a/ctlearn/tools/predict_LST1.py b/ctlearn/tools/predict_LST1.py index e27e68c6..a4751d9a 100644 --- a/ctlearn/tools/predict_LST1.py +++ b/ctlearn/tools/predict_LST1.py @@ -120,7 +120,7 @@ class LST1PredictionTool(Tool): load_type_model_from = Path( default_value=None, help=( - "Path to a Keras model file (Keras3) or directory (Keras2) " + "Path to a Keras model file (Keras3) " "for the classification of the primary particle type." ), allow_none=True, @@ -132,7 +132,7 @@ class LST1PredictionTool(Tool): load_energy_model_from = Path( default_value=None, help=( - "Path to a Keras model file (Keras3) or directory (Keras2) " + "Path to a Keras model file (Keras3) " "for the regression of the primary particle energy." ), allow_none=True, @@ -144,7 +144,7 @@ class LST1PredictionTool(Tool): load_cameradirection_model_from = Path( default_value=None, help=( - "Path to a Keras model file (Keras3) or directory (Keras2) " + "Path to a Keras model file (Keras3) " "for the regression of the primary particle arrival direction " "based on the camera coordinate offsets." ), @@ -514,9 +514,6 @@ def start(self): image = get_unmapped_image(event, self.channels, self.transforms) data.append(self.image_mapper.map_image(image)) input_data = {"input": np.array(data)} - # Temp fix for supporting keras2 & keras3 - if int(keras.__version__.split(".")[0]) >= 3: - input_data = input_data["input"] event_id.extend(dl1_table["event_id"].data) tel_azimuth.extend(dl1_table["tel_az"].data) diff --git a/ctlearn/tools/predict_model.py b/ctlearn/tools/predict_model.py index 0eaa8b23..ea86c811 100644 --- a/ctlearn/tools/predict_model.py +++ b/ctlearn/tools/predict_model.py @@ -107,14 +107,14 @@ class PredictCTLearnModel(Tool): prefix : str Name of the reconstruction algorithm used to generate the dl2 data. load_type_model_from : pathlib.Path - Path to a Keras model file (Keras3) or directory (Keras2) for the classification of the primary particle type. + Path to a Keras model file (Keras3) for the classification of the primary particle type. load_energy_model_from : pathlib.Path - Path to a Keras model file (Keras3) or directory (Keras2) for the regression of the primary particle energy. + Path to a Keras model file (Keras3) for the regression of the primary particle energy. load_cameradirection_model_from : pathlib.Path - Path to a Keras model file (Keras3) or directory (Keras2) for the regression + Path to a Keras model file (Keras3) for the regression of the primary particle arrival direction based on camera coordinate offsets. load_skydirection_model_from : pathlib.Path - Path to a Keras model file (Keras3) or directory (Keras2) for the regression + Path to a Keras model file (Keras3) for the regression of the primary particle arrival direction based on spherical coordinate offsets. output_path : pathlib.Path Output path to save the dl2 prediction results. @@ -228,7 +228,7 @@ class PredictCTLearnModel(Tool): load_type_model_from = Path( default_value=None, help=( - "Path to a Keras model file (Keras3) or directory (Keras2) for the classification " + "Path to a Keras model file (Keras3) for the classification " "of the primary particle type." ), allow_none=True, @@ -240,7 +240,7 @@ class PredictCTLearnModel(Tool): load_energy_model_from = Path( default_value=None, help=( - "Path to a Keras model file (Keras3) or directory (Keras2) for the regression " + "Path to a Keras model file (Keras3) for the regression " "of the primary particle energy." ), allow_none=True, @@ -252,7 +252,7 @@ class PredictCTLearnModel(Tool): load_cameradirection_model_from = Path( default_value=None, help=( - "Path to a Keras model file (Keras3) or directory (Keras2) for the regression " + "Path to a Keras model file (Keras3) for the regression " "of the primary particle arrival direction based on camera coordinate offsets." ), allow_none=True, @@ -264,7 +264,7 @@ class PredictCTLearnModel(Tool): load_skydirection_model_from = Path( default_value=None, help=( - "Path to a Keras model file (Keras3) or directory (Keras2) for the regression " + "Path to a Keras model file (Keras3) for the regression " "of the primary particle arrival direction based on spherical coordinate offsets." ), allow_none=True, @@ -435,7 +435,7 @@ def _predict_with_model(self, model_path): Parameters ---------- model_path : str - Path to a Keras model file (Keras3) or directory (Keras2). + Path to a Keras model file (Keras3). Returns ------- diff --git a/ctlearn/tools/train_model.py b/ctlearn/tools/train_model.py index 2ce4ced5..4c440350 100644 --- a/ctlearn/tools/train_model.py +++ b/ctlearn/tools/train_model.py @@ -335,11 +335,7 @@ def setup(self): monitor = "val_loss" monitor_mode = "min" # Model checkpoint callback - # Temp fix for supporting keras2 & keras3 - if int(keras.__version__.split(".")[0]) >= 3: - model_path = f"{self.output_dir}/ctlearn_model.keras" - else: - model_path = f"{self.output_dir}/ctlearn_model.cpk" + model_path = f"{self.output_dir}/ctlearn_model.keras" model_checkpoint_callback = keras.callbacks.ModelCheckpoint( filepath=model_path, monitor=monitor, From 226eaf500af5af7a322b78dd8091a287e448643b Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Mon, 15 Dec 2025 16:23:13 +0100 Subject: [PATCH 07/17] remove support for keras2 also in loader --- ctlearn/core/loader.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/ctlearn/core/loader.py b/ctlearn/core/loader.py index 5b723f9c..f018f677 100644 --- a/ctlearn/core/loader.py +++ b/ctlearn/core/loader.py @@ -186,9 +186,6 @@ def _get_mono_item(self, batch): ), axis=1, ) - # Temp fix for supporting keras2 & keras3 - if int(keras.__version__.split(".")[0]) >= 3: - features = features["input"] return features, labels def _get_stereo_item(self, batch): @@ -309,7 +306,4 @@ def _get_stereo_item(self, batch): features = {"input": np.array(mono_feature_vectors)} if "stereo_feature_vectors" in batch.colnames: features = {"input": np.array(stereo_feature_vectors)} - # Temp fix for supporting keras2 & keras3 - if int(keras.__version__.split(".")[0]) >= 3: - features = features["input"] return features, labels From 0d1955d8df2c10d771f00418081a2cc504c45b42 Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Mon, 15 Dec 2025 16:33:20 +0100 Subject: [PATCH 08/17] update installation instructions --- README.rst | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/README.rst b/README.rst index 81556e43..fa922416 100644 --- a/README.rst +++ b/README.rst @@ -28,22 +28,22 @@ CTLearn is a package under active development to run deep learning models to ana Installation for users ---------------------- -Download and install `Anaconda `_\ , or, for a minimal installation, `Miniconda `_. -The following command will set up a conda virtual environment, add the -necessary package channels, and install CTLearn specified version and its dependencies: +Installation +------------ + +First, create and activate a fresh conda environment: .. code-block:: bash - CTLEARN_VER=0.10.3 - wget https://raw.githubusercontent.com/ctlearn-project/ctlearn/v$CTLEARN_VER/environment.yml - conda env create -n [ENVIRONMENT_NAME] -f environment.yml - conda activate [ENVIRONMENT_NAME] - pip install ctlearn==$CTLEARN_VER - ctlearn -h + mamba create -n ctlearn -c conda-forge python==3.12 + mamba activate ctlearn + +The lastest version fo this package can be installed as a pip package: +.. code-block:: bash -This should automatically install all dependencies (NOTE: this may take some time, as by default MKL is included as a dependency of NumPy and it is very large). + pip install ctlearn See the documentation for further information like `installation instructions for developers `_, `package usage `_, and `dependencies `_ among other topics. From e78e19c7d98e4528246c70d033daf75850230638 Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Mon, 15 Dec 2025 16:56:21 +0100 Subject: [PATCH 09/17] remove conda deploy workflow and polish pypi one --- .github/conda/bld.bat | 2 - .github/conda/build.sh | 2 - .github/conda/conda_build_config.yaml | 7 --- .github/conda/meta.yaml | 56 ------------------ .github/workflows/release.yml | 81 ++++++++------------------- 5 files changed, 23 insertions(+), 125 deletions(-) delete mode 100644 .github/conda/bld.bat delete mode 100644 .github/conda/build.sh delete mode 100644 .github/conda/conda_build_config.yaml delete mode 100644 .github/conda/meta.yaml diff --git a/.github/conda/bld.bat b/.github/conda/bld.bat deleted file mode 100644 index 89481145..00000000 --- a/.github/conda/bld.bat +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/bash -$PYTHON setup.py install --single-version-externally-managed --record=record.txt diff --git a/.github/conda/build.sh b/.github/conda/build.sh deleted file mode 100644 index 89481145..00000000 --- a/.github/conda/build.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/bash -$PYTHON setup.py install --single-version-externally-managed --record=record.txt diff --git a/.github/conda/conda_build_config.yaml b/.github/conda/conda_build_config.yaml deleted file mode 100644 index 49777cb0..00000000 --- a/.github/conda/conda_build_config.yaml +++ /dev/null @@ -1,7 +0,0 @@ -python: - - 3.13 - - 3.12 - - 3.11 - - 3.10 - - diff --git a/.github/conda/meta.yaml b/.github/conda/meta.yaml deleted file mode 100644 index b78f32b9..00000000 --- a/.github/conda/meta.yaml +++ /dev/null @@ -1,56 +0,0 @@ -{% set data = load_setup_py_data() %} - -package: - name: ctlearn - version: {{ data.get('version') }} -source: - path: ../.. - -build: - #noarch: generic - number: 0 - -requirements: - build: - - python #==3.12 - - numpy >=1.20 - - setuptools - - astropy - - scipy - - jupyter - - ctapipe ==0.20.0 - - pytables >=3.8 - - pandas - host: - - python #==3.12 - - numpy >=1.20 - - astropy - - setuptools - - scipy - - jupyter - - ctapipe ==0.20.0 - - pytables >=3.8 - - pandas - run: - - python #==3.12 - - numpy >=1.20 - - jupyter - - setuptools - - astropy - - scipy - - ctapipe ==0.20.0 - - pytables >=3.8 - - pandas - - test: - imports: - - ctlearn -about: - home: https://github.com/ctlearn-project/ctlearn/ - license: BSD3-Clause - license_file: LICENSE - summary: Deep Learning for IACT Event Reconstruction. -extra: - recipe-maintainers: - - TjarkMiener - - nietootein diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 646613b1..51a7d068 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -2,13 +2,14 @@ name: Release CD on: release: - types: [published] + types: [published] workflow_dispatch: + jobs: pypi-publish: name: Publish release to PyPI environment: - name: pypi + name: pypi url: https://pypi.org/project/ctlearn/ permissions: id-token: write @@ -20,64 +21,28 @@ jobs: strategy: matrix: os: [ubuntu-22.04] - pyv: ['3.10'] + pyv: ['3.12'] max-parallel: 5 runs-on: ${{ matrix.os }} + steps: - - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.pyv }} - run: | - conda install -y python=${{ matrix.pyv }} - - - name: Add conda to system path - run: | - #$CONDA is an environment variable pointing to the root of the miniconda directory - echo $CONDA/bin >> $GITHUB_PATH - - - name: Install dependencies - run: | - conda env update --file environment.yml --name base - - - name: Build package - run: | - python --version - pip install -U build - python -m build - - name: Publish package distributions to PyPI - uses: pypa/gh-action-pypi-publish@release/v1 + - uses: actions/checkout@v4 + - name: Set up micromamba (Python ${{ matrix.pyv }}) + uses: mamba-org/setup-micromamba@v1 + with: + environment-name: ctlearn + create-args: >- + python=${{ matrix.pyv }} + pip + cache-environment: true - condapublish: - strategy: - matrix: - os: [ubuntu-22.04] - pyv: ["3.10"] - max-parallel: 5 - runs-on: ${{ matrix.os }} - permissions: - id-token: write - contents: write - steps: - - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Set up Python ${{ matrix.pyv }} - run: | - conda install -y python=${{ matrix.pyv }} - - - name: Add conda to system path - run: | - # $CONDA is an environment variable pointing to the root of the miniconda directory - echo $CONDA/bin >> $GITHUB_PATH - - - name: Install dependencies - run: | - conda env update --file environment.yml --name base - sudo apt-get install python3-numpy - - - name: publish-to-conda - uses: fcakyon/conda-publish-action@v1.3 - with: - subdir: '.github/conda' - anacondatoken: ${{ secrets.ANACONDA_TOKEN }} + - name: Build package + shell: micromamba-shell {0} + run: | + python --version + pip install -U build + python -m build + + - name: Publish package distributions to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 \ No newline at end of file From 22b6f6cd37d9f311ce1aebdfe6468ba0d19be772 Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Tue, 16 Dec 2025 14:15:02 +0100 Subject: [PATCH 10/17] test building of pypi run on PRs when workflow are touched, but only upload on github.event release --- .github/workflows/release.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 51a7d068..984895f4 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -4,6 +4,9 @@ on: release: types: [published] workflow_dispatch: + pull_request: + paths: + - '.github/workflows/**' jobs: pypi-publish: @@ -45,4 +48,5 @@ jobs: python -m build - name: Publish package distributions to PyPI + if: github.event_name == 'release' uses: pypa/gh-action-pypi-publish@release/v1 \ No newline at end of file From ae421657f0531091dc44ca87fae80d2816ddf237 Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Wed, 17 Dec 2025 10:00:14 +0100 Subject: [PATCH 11/17] bump python to v3.12 also for the docs --- .readthedocs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 9d5aecee..71fb5123 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -6,7 +6,7 @@ sphinx: build: os: ubuntu-22.04 tools: - python: "3.10" + python: "3.12" python: install: From 608ce714c18a597013114c5df24bda526453311c Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Wed, 17 Dec 2025 14:53:49 +0100 Subject: [PATCH 12/17] update installation instructions --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index fa922416..f69dbb25 100644 --- a/README.rst +++ b/README.rst @@ -36,7 +36,7 @@ First, create and activate a fresh conda environment: .. code-block:: bash - mamba create -n ctlearn -c conda-forge python==3.12 + mamba create -n ctlearn -c conda-forge python==3.12 llvmlite mamba activate ctlearn The lastest version fo this package can be installed as a pip package: From 84f549c0ab2df1210f8f38c75bf81b5e06f996c2 Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Wed, 17 Dec 2025 14:54:35 +0100 Subject: [PATCH 13/17] fix some keras3 updates --- ctlearn/conftest.py | 18 ++++++++---------- ctlearn/core/loader.py | 8 ++++---- ctlearn/core/model.py | 6 +++--- ctlearn/core/tests/test_loader.py | 6 +++--- ctlearn/tools/__init__.py | 11 +++++++++++ ctlearn/tools/predict_model.py | 6 ------ 6 files changed, 29 insertions(+), 26 deletions(-) diff --git a/ctlearn/conftest.py b/ctlearn/conftest.py index d912ae18..dd838034 100644 --- a/ctlearn/conftest.py +++ b/ctlearn/conftest.py @@ -1,16 +1,14 @@ """ common pytest fixtures for tests in ctlearn. -Credits to ctapipe for the original code. """ import pytest - from ctapipe.core import run_tool from ctapipe.utils import get_dataset_path @pytest.fixture(scope="session") -def prod5_gamma_simtel_path(): - return get_dataset_path("gamma_prod5.simtel.zst") +def gamma_simtel_path(): + return get_dataset_path("gamma_test_large.simtel.gz") @pytest.fixture(scope="session") def dl1_tmp_path(tmp_path_factory): @@ -23,7 +21,7 @@ def r1_tmp_path(tmp_path_factory): return tmp_path_factory.mktemp("r1_") @pytest.fixture(scope="session") -def dl1_gamma_file(dl1_tmp_path, prod5_gamma_simtel_path): +def dl1_gamma_file(dl1_tmp_path, gamma_simtel_path): """ DL1 file containing both images and parameters from a gamma simulation set. """ @@ -32,16 +30,16 @@ def dl1_gamma_file(dl1_tmp_path, prod5_gamma_simtel_path): output = dl1_tmp_path / "gamma.dl1.h5" argv = [ - f"--input={prod5_gamma_simtel_path}", + f"--input={gamma_simtel_path}", f"--output={output}", "--write-images", - "--DataWriter.Contact.name=αℓℓ the äüöß", + "--SimTelEventSource.focal_length_choice=EQUIVALENT", ] assert run_tool(ProcessorTool(), argv=argv, cwd=dl1_tmp_path) == 0 return output @pytest.fixture(scope="session") -def r1_gamma_file(r1_tmp_path, prod5_gamma_simtel_path): +def r1_gamma_file(r1_tmp_path, gamma_simtel_path): """ R1 file containing both waveforms and parameters from a gamma simulation set. """ @@ -50,10 +48,10 @@ def r1_gamma_file(r1_tmp_path, prod5_gamma_simtel_path): output = r1_tmp_path / "gamma.r1.h5" argv = [ - f"--input={prod5_gamma_simtel_path}", + f"--input={gamma_simtel_path}", f"--output={output}", f"--DataWriter.write_r1_waveforms=True", - "--DataWriter.Contact.name=αℓℓ the äüöß", + "--SimTelEventSource.focal_length_choice=EQUIVALENT", ] assert run_tool(ProcessorTool(), argv=argv, cwd=r1_tmp_path) == 0 return output \ No newline at end of file diff --git a/ctlearn/core/loader.py b/ctlearn/core/loader.py index f018f677..92fd96c2 100644 --- a/ctlearn/core/loader.py +++ b/ctlearn/core/loader.py @@ -155,7 +155,7 @@ def _get_mono_item(self, batch): """ # Retrieve the telescope images and store in the features dictionary labels = {} - features = {"input": batch["features"].data} + features = batch["features"].data if "type" in self.tasks: labels["type"] = to_categorical( batch["true_shower_primary_class"].data, @@ -300,10 +300,10 @@ def _get_stereo_item(self, batch): ) # Store the fatures in the features dictionary if "features" in batch.colnames: - features = {"input": np.array(features)} + features = np.array(features) # TDOO: Add support for both feature vectors if "mono_feature_vectors" in batch.colnames: - features = {"input": np.array(mono_feature_vectors)} + features = np.array(mono_feature_vectors) if "stereo_feature_vectors" in batch.colnames: - features = {"input": np.array(stereo_feature_vectors)} + features = np.array(stereo_feature_vectors) return features, labels diff --git a/ctlearn/core/model.py b/ctlearn/core/model.py index 07c79d46..bfac48c2 100644 --- a/ctlearn/core/model.py +++ b/ctlearn/core/model.py @@ -288,7 +288,7 @@ def _build_backbone(self, input_shape): """ # Define the input layer from the input shape - network_input = keras.Input(shape=input_shape, name="input") + network_input = keras.Input(shape=input_shape) # Get model arcihtecture parameters for the backbone filters_list = [layer["filters"] for layer in self.architecture] kernel_sizes = [layer["kernel_size"] for layer in self.architecture] @@ -472,7 +472,7 @@ def _build_backbone(self, input_shape): Keras input layer object for the backbone model. """ # Define the input layer from the input shape - network_input = keras.Input(shape=input_shape, name="input") + network_input = keras.Input(shape=input_shape) # Apply initial padding if specified if self.init_padding > 0: network_input = keras.layers.ZeroPadding2D( @@ -880,7 +880,7 @@ def _build_backbone(self, input_shape): """ # Define the input layer from the input shape - network_input = keras.Input(shape=input_shape, name="input") + network_input = keras.Input(shape=input_shape) # Set the backbone model to be trainable or not for layer in self.model.layers: if layer.name.endswith("_block"): diff --git a/ctlearn/core/tests/test_loader.py b/ctlearn/core/tests/test_loader.py index 97f87172..7fe71900 100644 --- a/ctlearn/core/tests/test_loader.py +++ b/ctlearn/core/tests/test_loader.py @@ -1,17 +1,17 @@ -import pytest from traitlets.config.loader import Config from dl1_data_handler.reader import DLImageReader from ctlearn.core.loader import DLDataLoader -def test_data_loader(dl1_tmp_path, dl1_gamma_file): +def test_data_loader(dl1_gamma_file): """check""" # Create a configuration suitable for the test config = Config( { "DLImageReader": { "allowed_tels": [4], + "focal_length_choice": "EQUIVALENT", }, } ) @@ -34,4 +34,4 @@ def test_data_loader(dl1_tmp_path, dl1_gamma_file): and "skydirection" in labels ) # Check the shape of the features - assert features["input"].shape == (1, 110, 110, 2) + assert features.shape == (1, 110, 110, 2) diff --git a/ctlearn/tools/__init__.py b/ctlearn/tools/__init__.py index 996469a1..d54df32d 100644 --- a/ctlearn/tools/__init__.py +++ b/ctlearn/tools/__init__.py @@ -1,2 +1,13 @@ """ctlearn command line tools. """ + +from .train_model import TrainCTLearnModel +from .predict_LST1 import LST1PredictionTool +from .predict_model import MonoPredictCTLearnModel, StereoPredictCTLearnModel + +__all__ = [ + "TrainCTLearnModel", + "LST1PredictionTool", + "MonoPredictCTLearnModel", + "StereoPredictCTLearnModel" +] \ No newline at end of file diff --git a/ctlearn/tools/predict_model.py b/ctlearn/tools/predict_model.py index ea86c811..3d55b150 100644 --- a/ctlearn/tools/predict_model.py +++ b/ctlearn/tools/predict_model.py @@ -65,12 +65,6 @@ SUBARRAY_EVENT_KEYS = ["obs_id", "event_id"] TELESCOPE_EVENT_KEYS = ["obs_id", "event_id", "tel_id"] -__all__ = [ - "PredictCTLearnModel", - "MonoPredictCTLearnModel", - "StereoPredictCTLearnModel", -] - class PredictCTLearnModel(Tool): """ From 3a3faa07b6bc5a25c7083ab637b8d6cabf0f5fc4 Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Wed, 17 Dec 2025 14:55:25 +0100 Subject: [PATCH 14/17] add first tests for TrainCTLearnModel --- ctlearn/tools/tests/test_train_model.py | 60 +++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 ctlearn/tools/tests/test_train_model.py diff --git a/ctlearn/tools/tests/test_train_model.py b/ctlearn/tools/tests/test_train_model.py new file mode 100644 index 00000000..5005cd60 --- /dev/null +++ b/ctlearn/tools/tests/test_train_model.py @@ -0,0 +1,60 @@ +import pandas as pd +import pytest +import shutil + +from ctapipe.core import run_tool +from ctlearn.tools import TrainCTLearnModel + +#@pytest.mark.parametrize("reco_task", ["type", "energy", "cameradirection", "skydirection"]) +@pytest.mark.parametrize("reco_task", ["energy", "cameradirection", "skydirection"]) +def test_train_ctlearn_model(reco_task, dl1_gamma_file, tmp_path): + """ + Test training CTLearn model using a temporary copy of the DL1 gamma file. + Each test run gets its own isolated temp directories. + """ + # Temporary directories for signal and background + signal_dir = tmp_path / "gamma_dl1" + signal_dir.mkdir(parents=True, exist_ok=True) + + background_dir = tmp_path / "proton_dl1" + background_dir.mkdir(parents=True, exist_ok=True) + + # Hardcopy DL1 gamma file to the signal directory + shutil.copy(dl1_gamma_file, signal_dir) + + # Output directory for trained model + output_dir = tmp_path / f"ctlearn_{reco_task}" + + # Build command-line arguments + argv = [ + f"--signal={signal_dir}", + "--pattern-signal=*.dl1.h5", + f"--output={output_dir}", + f"--reco={reco_task}", + "--TrainCTLearnModel.n_epochs=2", + "--TrainCTLearnModel.batch_size=4", + "--DLImageReader.focal_length_choice=EQUIVALENT", + "--DLImageReader.allowed_tel_types=LST_LST_LSTCam", + ] + + # Include background only for classification task + if reco_task == "type": + argv.extend([ + f"--background={background_dir}", + "--pattern-background=*.dl1.h5" + ]) + + # Run training + assert run_tool(TrainCTLearnModel(), argv=argv, cwd=tmp_path) == 0 + + # --- Additional checks --- + # Check that the trained model exists + model_file = output_dir / "ctlearn_model.keras" + assert model_file.exists(), f"Trained model file not found for {reco_task}" + # Check training_log.csv exists + log_file = output_dir / "training_log.csv" + assert log_file.exists(), f"Training log file not found for {reco_task}" + # Read CSV and verify number of epochs + log_df = pd.read_csv(log_file) + num_epochs_logged = log_df.shape[0] + assert num_epochs_logged == 2, f"Expected two epochs, found {num_epochs_logged} for {reco_task}" \ No newline at end of file From b4d226941ad573ae34c13bfc0a8342143da6fdf4 Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Wed, 17 Dec 2025 17:11:43 +0100 Subject: [PATCH 15/17] added test for the 'type' reco task --- ctlearn/conftest.py | 23 +++++++++++++++++ ctlearn/tools/tests/test_train_model.py | 34 ++++++++++++++++++++----- 2 files changed, 50 insertions(+), 7 deletions(-) diff --git a/ctlearn/conftest.py b/ctlearn/conftest.py index dd838034..8349f73f 100644 --- a/ctlearn/conftest.py +++ b/ctlearn/conftest.py @@ -10,6 +10,12 @@ def gamma_simtel_path(): return get_dataset_path("gamma_test_large.simtel.gz") +@pytest.fixture(scope="session") +def proton_simtel_path(): + return get_dataset_path( + "proton_20deg_0deg_run4___cta-prod5-paranal_desert-2147m-Paranal-dark-100evts.simtel.zst" + ) + @pytest.fixture(scope="session") def dl1_tmp_path(tmp_path_factory): """Temporary directory for global dl1 test data""" @@ -38,6 +44,23 @@ def dl1_gamma_file(dl1_tmp_path, gamma_simtel_path): assert run_tool(ProcessorTool(), argv=argv, cwd=dl1_tmp_path) == 0 return output +@pytest.fixture(scope="session") +def dl1_proton_file(dl1_tmp_path, proton_simtel_path): + """ + DL1 file containing both images and parameters from a proton simulation set. + """ + from ctapipe.tools.process import ProcessorTool + + output = dl1_tmp_path / "proton.dl1.h5" + argv = [ + f"--input={proton_simtel_path}", + f"--output={output}", + "--write-images", + "--SimTelEventSource.focal_length_choice=EQUIVALENT", + ] + assert run_tool(ProcessorTool(), argv=argv, cwd=dl1_tmp_path) == 0 + return output + @pytest.fixture(scope="session") def r1_gamma_file(r1_tmp_path, gamma_simtel_path): """ diff --git a/ctlearn/tools/tests/test_train_model.py b/ctlearn/tools/tests/test_train_model.py index 5005cd60..9d936b60 100644 --- a/ctlearn/tools/tests/test_train_model.py +++ b/ctlearn/tools/tests/test_train_model.py @@ -5,11 +5,10 @@ from ctapipe.core import run_tool from ctlearn.tools import TrainCTLearnModel -#@pytest.mark.parametrize("reco_task", ["type", "energy", "cameradirection", "skydirection"]) -@pytest.mark.parametrize("reco_task", ["energy", "cameradirection", "skydirection"]) -def test_train_ctlearn_model(reco_task, dl1_gamma_file, tmp_path): +@pytest.mark.parametrize("reco_task", ["type", "energy", "cameradirection", "skydirection"]) +def test_train_ctlearn_model(reco_task, dl1_gamma_file, dl1_proton_file, tmp_path): """ - Test training CTLearn model using a temporary copy of the DL1 gamma file. + Test training CTLearn model using the DL1 gamma and proton files for all reconstruction tasks. Each test run gets its own isolated temp directories. """ # Temporary directories for signal and background @@ -21,6 +20,8 @@ def test_train_ctlearn_model(reco_task, dl1_gamma_file, tmp_path): # Hardcopy DL1 gamma file to the signal directory shutil.copy(dl1_gamma_file, signal_dir) + # Hardcopy DL1 proton file to the background directory + shutil.copy(dl1_proton_file, background_dir) # Output directory for trained model output_dir = tmp_path / f"ctlearn_{reco_task}" @@ -34,15 +35,19 @@ def test_train_ctlearn_model(reco_task, dl1_gamma_file, tmp_path): "--TrainCTLearnModel.n_epochs=2", "--TrainCTLearnModel.batch_size=4", "--DLImageReader.focal_length_choice=EQUIVALENT", - "--DLImageReader.allowed_tel_types=LST_LST_LSTCam", ] # Include background only for classification task if reco_task == "type": + allowed_tels = {7, 13, 15, 16, 17, 19} argv.extend([ f"--background={background_dir}", - "--pattern-background=*.dl1.h5" + "--pattern-background=*.dl1.h5", + f"--DLImageReader.allowed_tels={allowed_tels}", + "--DLImageReader.enforce_subarray_equality=False", ]) + else: + argv.extend(["--DLImageReader.allowed_tel_types=LST_LST_LSTCam"]) # Run training assert run_tool(TrainCTLearnModel(), argv=argv, cwd=tmp_path) == 0 @@ -57,4 +62,19 @@ def test_train_ctlearn_model(reco_task, dl1_gamma_file, tmp_path): # Read CSV and verify number of epochs log_df = pd.read_csv(log_file) num_epochs_logged = log_df.shape[0] - assert num_epochs_logged == 2, f"Expected two epochs, found {num_epochs_logged} for {reco_task}" \ No newline at end of file + assert num_epochs_logged == 2, f"Expected two epochs, found {num_epochs_logged} for {reco_task}" + # Check that val_loss column exists + assert "val_loss" in log_df.columns, ( + f"'val_loss' column missing in training_log.csv for {reco_task}" + ) + val_loss_min= 0.0 + val_loss_max= 1.5 if reco_task == "skydirection" else 1.0 + # Check val_loss values are between 0.0 and 1.0 (or 1.5 for skydirection) + val_loss = log_df["val_loss"].dropna() + assert not val_loss.empty, ( + f"'val_loss' column is empty for {reco_task}" + ) + assert ((val_loss >= val_loss_min) & (val_loss <= val_loss_max)).all(), ( + f"'val_loss' values out of range [{val_loss_min}, {val_loss_max}] for {reco_task}: " + f"{val_loss.tolist()}" + ) \ No newline at end of file From b2a5f6b56a06b6c8bdc56fbff33a8de69500644e Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Fri, 19 Dec 2025 20:17:44 +0100 Subject: [PATCH 16/17] add test for prediction tool --- ctlearn/conftest.py | 62 +++++++++++++++++- ctlearn/tools/tests/test_predict_model.py | 77 +++++++++++++++++++++++ ctlearn/tools/tests/test_train_model.py | 4 -- 3 files changed, 137 insertions(+), 6 deletions(-) create mode 100644 ctlearn/tools/tests/test_predict_model.py diff --git a/ctlearn/conftest.py b/ctlearn/conftest.py index 8349f73f..c8d22ef8 100644 --- a/ctlearn/conftest.py +++ b/ctlearn/conftest.py @@ -3,8 +3,11 @@ """ import pytest +import shutil + from ctapipe.core import run_tool from ctapipe.utils import get_dataset_path +from ctlearn.tools import TrainCTLearnModel @pytest.fixture(scope="session") def gamma_simtel_path(): @@ -33,13 +36,14 @@ def dl1_gamma_file(dl1_tmp_path, gamma_simtel_path): """ from ctapipe.tools.process import ProcessorTool + allowed_tels = {7, 13, 15, 16, 17, 19} output = dl1_tmp_path / "gamma.dl1.h5" - argv = [ f"--input={gamma_simtel_path}", f"--output={output}", "--write-images", "--SimTelEventSource.focal_length_choice=EQUIVALENT", + f"--SimTelEventSource.allowed_tels={allowed_tels}", ] assert run_tool(ProcessorTool(), argv=argv, cwd=dl1_tmp_path) == 0 return output @@ -51,12 +55,14 @@ def dl1_proton_file(dl1_tmp_path, proton_simtel_path): """ from ctapipe.tools.process import ProcessorTool + allowed_tels = {7, 13, 15, 16, 17, 19} output = dl1_tmp_path / "proton.dl1.h5" argv = [ f"--input={proton_simtel_path}", f"--output={output}", "--write-images", "--SimTelEventSource.focal_length_choice=EQUIVALENT", + f"--SimTelEventSource.allowed_tels={allowed_tels}", ] assert run_tool(ProcessorTool(), argv=argv, cwd=dl1_tmp_path) == 0 return output @@ -77,4 +83,56 @@ def r1_gamma_file(r1_tmp_path, gamma_simtel_path): "--SimTelEventSource.focal_length_choice=EQUIVALENT", ] assert run_tool(ProcessorTool(), argv=argv, cwd=r1_tmp_path) == 0 - return output \ No newline at end of file + return output + +@pytest.fixture(scope="session") +def ctlearn_trained_dl1_models(dl1_gamma_file, dl1_proton_file, tmp_path_factory): + """ + Test training CTLearn model using the DL1 gamma and proton files for all reconstruction tasks. + Each test run gets its own isolated temp directories. + """ + tmp_path = tmp_path_factory.mktemp("ctlearn_models") + + # Temporary directories for signal and background + signal_dir = tmp_path / "gamma_dl1" + signal_dir.mkdir(parents=True, exist_ok=True) + + background_dir = tmp_path / "proton_dl1" + background_dir.mkdir(parents=True, exist_ok=True) + + # Hardcopy DL1 gamma file to the signal directory + shutil.copy(dl1_gamma_file, signal_dir) + # Hardcopy DL1 proton file to the background directory + shutil.copy(dl1_proton_file, background_dir) + + ctlearn_trained_dl1_models = {} + for reco_task in ["type", "energy", "cameradirection"]: + # Output directory for trained model + output_dir = tmp_path / f"ctlearn_{reco_task}" + + # Build command-line arguments + argv = [ + f"--signal={signal_dir}", + "--pattern-signal=*.dl1.h5", + f"--output={output_dir}", + f"--reco={reco_task}", + "--TrainCTLearnModel.n_epochs=1", + "--TrainCTLearnModel.batch_size=2", + "--DLImageReader.focal_length_choice=EQUIVALENT", + ] + + # Include background only for classification task + if reco_task == "type": + argv.extend([ + f"--background={background_dir}", + "--pattern-background=*.dl1.h5", + "--DLImageReader.enforce_subarray_equality=False", + ]) + + # Run training + assert run_tool(TrainCTLearnModel(), argv=argv, cwd=tmp_path) == 0 + + ctlearn_trained_dl1_models[reco_task] = output_dir / "ctlearn_model.keras" + # Check that the trained model exists + assert ctlearn_trained_dl1_models[reco_task].exists() + return ctlearn_trained_dl1_models \ No newline at end of file diff --git a/ctlearn/tools/tests/test_predict_model.py b/ctlearn/tools/tests/test_predict_model.py new file mode 100644 index 00000000..55e6d97e --- /dev/null +++ b/ctlearn/tools/tests/test_predict_model.py @@ -0,0 +1,77 @@ +import shutil +import numpy as np + +from ctapipe.core import run_tool +from ctapipe.io import TableLoader +from ctlearn.tools import MonoPredictCTLearnModel + +def test_predict_model(ctlearn_trained_dl1_models, dl1_gamma_file, tmp_path): + """ + Test training CTLearn model using the DL1 gamma and proton files for all reconstruction tasks. + Each test run gets its own isolated temp directories. + """ + + model_dir = tmp_path / "trained_models" + model_dir.mkdir(parents=True, exist_ok=True) + + dl2_dir = tmp_path / "dl2_output" + dl2_dir.mkdir(parents=True, exist_ok=True) + + # Hardcopy the trained models to the model directory + for reco_task in ["type", "energy", "cameradirection"]: + shutil.copy(ctlearn_trained_dl1_models[f"{reco_task}"], model_dir / f"ctlearn_model_{reco_task}.keras") + model_file = model_dir / f"ctlearn_model_{reco_task}.keras" + assert model_file.exists(), f"Trained model file not found for {reco_task}" + + # Build command-line arguments + output_file = dl2_dir / "gamma.dl2.h5" + argv = [ + f"--input_url={dl1_gamma_file}", + f"--output={output_file}", + "--PredictCTLearnModel.batch_size=4", + "--DLImageReader.focal_length_choice=EQUIVALENT", + ] + + # Run Prediction for energy and type together + assert run_tool( + MonoPredictCTLearnModel(), + argv = argv + [ + f"--PredictCTLearnModel.load_type_model_from={model_dir}/ctlearn_model_type.keras", + f"--PredictCTLearnModel.load_energy_model_from={model_dir}/ctlearn_model_energy.keras", + "--use-HDF5Merger", + "--no-dl1-images", + "--no-true-images", + ], + cwd=tmp_path + ) == 0 + + assert run_tool( + MonoPredictCTLearnModel(), + argv= argv + [ + f"--PredictCTLearnModel.load_cameradirection_model_from=" + f"{model_dir}/ctlearn_model_cameradirection.keras", + "--no-use-HDF5Merger", + ], + cwd=tmp_path, + ) == 0 + + + allowed_tels = [7, 13, 15, 16, 17, 19] + required_columns = [ + "telescope_pointing_azimuth", + "telescope_pointing_altitude", + "CTLearn_alt", + "CTLearn_az", + "CTLearn_prediction", + "CTLearn_energy", + ] + # Check that the output DL2 file was created + assert output_file.exists(), "Output DL2 file not created" + # Check that the created DL2 file can be read with the TableLoader + with TableLoader(output_file, pointing=True, focal_length_choice="EQUIVALENT") as loader: + events = loader.read_telescope_events_by_id(telescopes=allowed_tels) + for tel_id in allowed_tels: + assert len(events[tel_id]) > 0 + for col in required_columns: + assert col in events[tel_id].colnames, f"{col} missing in DL2 file {output_file.name}" + assert events[tel_id][col][0] is not np.nan, f"{col} has NaN values in DL2 file {output_file.name}" \ No newline at end of file diff --git a/ctlearn/tools/tests/test_train_model.py b/ctlearn/tools/tests/test_train_model.py index 9d936b60..ed661409 100644 --- a/ctlearn/tools/tests/test_train_model.py +++ b/ctlearn/tools/tests/test_train_model.py @@ -39,15 +39,11 @@ def test_train_ctlearn_model(reco_task, dl1_gamma_file, dl1_proton_file, tmp_pat # Include background only for classification task if reco_task == "type": - allowed_tels = {7, 13, 15, 16, 17, 19} argv.extend([ f"--background={background_dir}", "--pattern-background=*.dl1.h5", - f"--DLImageReader.allowed_tels={allowed_tels}", "--DLImageReader.enforce_subarray_equality=False", ]) - else: - argv.extend(["--DLImageReader.allowed_tel_types=LST_LST_LSTCam"]) # Run training assert run_tool(TrainCTLearnModel(), argv=argv, cwd=tmp_path) == 0 From f524fb861ffaf7e410fe56263dd182ecc427f807 Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Fri, 9 Jan 2026 17:43:00 +0100 Subject: [PATCH 17/17] fixes prediction tools when selecting a subset of tel_ids also added tests for mono predict tool remove flag to use_HDF5_merger component and use it by default overall clean up and code improvement --- ctlearn/conftest.py | 6 +- ctlearn/tools/predict_model.py | 847 +++++++++++++--------- ctlearn/tools/tests/test_predict_model.py | 109 +-- ctlearn/tools/tests/test_train_model.py | 13 +- 4 files changed, 574 insertions(+), 401 deletions(-) diff --git a/ctlearn/conftest.py b/ctlearn/conftest.py index c8d22ef8..63226540 100644 --- a/ctlearn/conftest.py +++ b/ctlearn/conftest.py @@ -36,14 +36,12 @@ def dl1_gamma_file(dl1_tmp_path, gamma_simtel_path): """ from ctapipe.tools.process import ProcessorTool - allowed_tels = {7, 13, 15, 16, 17, 19} output = dl1_tmp_path / "gamma.dl1.h5" argv = [ f"--input={gamma_simtel_path}", f"--output={output}", "--write-images", "--SimTelEventSource.focal_length_choice=EQUIVALENT", - f"--SimTelEventSource.allowed_tels={allowed_tels}", ] assert run_tool(ProcessorTool(), argv=argv, cwd=dl1_tmp_path) == 0 return output @@ -55,14 +53,12 @@ def dl1_proton_file(dl1_tmp_path, proton_simtel_path): """ from ctapipe.tools.process import ProcessorTool - allowed_tels = {7, 13, 15, 16, 17, 19} output = dl1_tmp_path / "proton.dl1.h5" argv = [ f"--input={proton_simtel_path}", f"--output={output}", "--write-images", "--SimTelEventSource.focal_length_choice=EQUIVALENT", - f"--SimTelEventSource.allowed_tels={allowed_tels}", ] assert run_tool(ProcessorTool(), argv=argv, cwd=dl1_tmp_path) == 0 return output @@ -111,6 +107,7 @@ def ctlearn_trained_dl1_models(dl1_gamma_file, dl1_proton_file, tmp_path_factory output_dir = tmp_path / f"ctlearn_{reco_task}" # Build command-line arguments + allowed_tels = [7, 13, 15, 16, 17, 19] argv = [ f"--signal={signal_dir}", "--pattern-signal=*.dl1.h5", @@ -119,6 +116,7 @@ def ctlearn_trained_dl1_models(dl1_gamma_file, dl1_proton_file, tmp_path_factory "--TrainCTLearnModel.n_epochs=1", "--TrainCTLearnModel.batch_size=2", "--DLImageReader.focal_length_choice=EQUIVALENT", + f"--DLImageReader.allowed_tels={allowed_tels}", ] # Include background only for classification task diff --git a/ctlearn/tools/predict_model.py b/ctlearn/tools/predict_model.py index 3d55b150..d0f3649e 100644 --- a/ctlearn/tools/predict_model.py +++ b/ctlearn/tools/predict_model.py @@ -6,6 +6,7 @@ import pathlib import numpy as np import os +import tables import tensorflow as tf import keras @@ -43,7 +44,50 @@ classes_with_traits, ) from ctapipe.monitoring.interpolation import PointingInterpolator +from ctapipe.instrument import SubarrayDescription from ctapipe.io import read_table, write_table, HDF5Merger +from ctapipe.io.hdf5dataformat import ( + DL0_TEL_POINTING_GROUP, + DL1_CAMERA_COEFFICIENTS_GROUP, + DL1_COLUMN_NAMES, + DL1_IMAGE_STATISTICS_TABLE, + DL1_PIXEL_STATISTICS_GROUP, + DL1_SUBARRAY_GROUP, + DL1_SUBARRAY_POINTING_GROUP, + DL1_SUBARRAY_TRIGGER_TABLE, + DL1_TEL_GROUP, + DL1_TEL_CALIBRATION_GROUP, + DL1_TEL_ILLUMINATOR_THROUGHPUT_GROUP, + DL1_TEL_IMAGES_GROUP, + DL1_TEL_MUON_GROUP, + DL1_TEL_MUON_THROUGHPUT_GROUP, + DL1_TEL_OPTICAL_PSF_GROUP, + DL1_TEL_PARAMETERS_GROUP, + DL1_TEL_POINTING_GROUP, + DL1_TEL_TRIGGER_TABLE, + DL2_EVENT_STATISTICS_GROUP, + DL2_SUBARRAY_CROSS_CALIBRATION_GROUP, + DL2_SUBARRAY_INTER_CALIBRATION_GROUP, + DL2_TEL_GROUP, + FIXED_POINTING_GROUP, + OBSERVATION_BLOCK_TABLE, + R0_TEL_GROUP, + R1_TEL_GROUP, + SCHEDULING_BLOCK_TABLE, + SHOWER_DISTRIBUTION_TABLE, + SIMULATION_GROUP, + SIMULATION_IMAGES_GROUP, + SIMULATION_IMPACT_GROUP, + SIMULATION_PARAMETERS_GROUP, + SIMULATION_RUN_TABLE, + SIMULATION_SHOWER_TABLE, + DL2_TEL_PARTICLETYPE_GROUP, + DL2_TEL_ENERGY_GROUP, + DL2_TEL_GEOMETRY_GROUP, + DL2_SUBARRAY_PARTICLETYPE_GROUP, + DL2_SUBARRAY_ENERGY_GROUP, + DL2_SUBARRAY_GEOMETRY_GROUP, +) from ctapipe.reco.reconstructor import ReconstructionProperty from ctapipe.reco.stereo_combination import StereoCombiner from ctapipe.reco.utils import add_defaults_and_meta @@ -54,17 +98,29 @@ ) from ctlearn.core.loader import DLDataLoader -SIMULATION_CONFIG_TABLE = "/configuration/simulation/run" -FIXED_POINTING_GROUP = "/configuration/telescope/pointing" -POINTING_GROUP = "/dl1/monitoring/telescope/pointing" -SUBARRAY_POINTING_GROUP = "/dl1/monitoring/subarray/pointing" -DL1_TELESCOPE_GROUP = "/dl1/event/telescope" -DL1_SUBARRAY_GROUP = "/dl1/event/subarray" -DL2_SUBARRAY_GROUP = "/dl2/event/subarray" -DL2_TELESCOPE_GROUP = "/dl2/event/telescope" +# Convienient constants for column names and table keys +CONFIG_INSTRUMENT_SUBARRAY_LAYOUT = "/configuration/instrument/subarray/layout" +CONFIG_INSTRUMENT_TEL = "/configuration/instrument/telescope" +CONFIG_INSTRUMENT_TEL_CAMERA = "/configuration/instrument/telescope/camera" SUBARRAY_EVENT_KEYS = ["obs_id", "event_id"] -TELESCOPE_EVENT_KEYS = ["obs_id", "event_id", "tel_id"] - +TEL_EVENT_KEYS = ["obs_id", "event_id", "tel_id"] +TEL_ITER_GROUPS = [ + R0_TEL_GROUP, + R1_TEL_GROUP, + FIXED_POINTING_GROUP, + DL0_TEL_POINTING_GROUP, + DL1_TEL_POINTING_GROUP, + DL1_TEL_CALIBRATION_GROUP, + DL1_TEL_ILLUMINATOR_THROUGHPUT_GROUP, + DL1_TEL_MUON_THROUGHPUT_GROUP, + DL1_TEL_OPTICAL_PSF_GROUP, + DL1_TEL_PARAMETERS_GROUP, + DL1_TEL_IMAGES_GROUP, + DL1_TEL_MUON_GROUP, + SIMULATION_IMAGES_GROUP, + SIMULATION_IMPACT_GROUP, + SIMULATION_PARAMETERS_GROUP, +] class PredictCTLearnModel(Tool): """ @@ -82,8 +138,6 @@ class PredictCTLearnModel(Tool): ---------- input_url : pathlib.Path Input ctapipe HDF5 files including pixel-wise image or waveform data. - use_HDF5Merger : bool - Set whether to use the HDF5Merger component to copy the selected tables from the input file to the output file. dl1_features : bool Set whether to include the dl1 feature vectors in the output file. dl2_telescope : bool @@ -112,8 +166,6 @@ class PredictCTLearnModel(Tool): of the primary particle arrival direction based on spherical coordinate offsets. output_path : pathlib.Path Output path to save the dl2 prediction results. - overwrite_tables : bool - Overwrite the table in the output file if it exists. keras_verbose : int Verbosity mode of Keras during the prediction. strategy : tf.distribute.Strategy @@ -135,7 +187,7 @@ class PredictCTLearnModel(Tool): Finish the tool. _predict_with_model(model_path) Load and predict with a CTLearn model. - _predict_classification(example_identifiers) + _predict_particletype(example_identifiers) Predict the classification of the primary particle type. _predict_energy(example_identifiers) Predict the energy of the primary particle. @@ -151,7 +203,7 @@ class PredictCTLearnModel(Tool): Create a table with NaNs for missing predictions. _store_pointing(all_identifiers) Store the telescope pointing table from to the output file. - _create_feature_vectors_table(example_identifiers, nonexample_identifiers, classification_feature_vectors, energy_feature_vectors, direction_feature_vectors) + _create_feature_vectors_table(example_identifiers, nonexample_identifiers, particletype_feature_vectors, energy_feature_vectors, direction_feature_vectors) Create the table for the DL1 feature vectors. """ @@ -163,16 +215,6 @@ class PredictCTLearnModel(Tool): file_ok=True, ).tag(config=True) - use_HDF5Merger = Bool( - default_value=True, - allow_none=False, - help=( - "Set whether to use the HDF5Merger component to copy the selected tables " - "from the input file to the output file. CAUTION: This can only be used " - "if the output file not exists." - ), - ).tag(config=True) - dl1_features = Bool( default_value=False, allow_none=False, @@ -279,12 +321,6 @@ class PredictCTLearnModel(Tool): help="Output path to save the dl2 prediction results", ).tag(config=True) - overwrite_tables = Bool( - default_value=True, - allow_none=False, - help="Overwrite the table in the output file if it exists", - ).tag(config=True) - keras_verbose = Int( default_value=1, min=0, @@ -309,6 +345,12 @@ class PredictCTLearnModel(Tool): } flags = { + **flag( + "overwrite" , + "HDF5Merger.overwrite", + "Overwrite the output file if it exists", + "Do not overwrite the output file if it exists", + ), **flag( "dl1-features", "PredictCTLearnModel.dl1_features", @@ -327,12 +369,6 @@ class PredictCTLearnModel(Tool): "Include dl2 telescope-event-wise data in the output file", "Exclude dl2 telescope-event-wise data in the output file", ), - **flag( - "use-HDF5Merger", - "PredictCTLearnModel.use_HDF5Merger", - "Copy data using the HDF5Merger component (CAUTION: This can not be used if the output file already exists)", - "Do not copy data using the HDF5Merger component", - ), **flag( "r0-waveforms", "HDF5Merger.r0_waveforms", @@ -374,22 +410,10 @@ class PredictCTLearnModel(Tool): classes = classes_with_traits(DLDataReader) def setup(self): - # Check if the ctapipe HDF5Merger component is enabled - if self.use_HDF5Merger: - if os.path.exists(self.output_path): - raise ToolConfigurationError( - f"The output file '{self.output_path}' already exists. Please use " - "'--no-use-HDF5Merger' to disable the usage of the HDF5Merger component." - ) - # Copy selected tables from the input file to the output file - self.log.info("Copying to output destination.") - with HDF5Merger(self.output_path, parent=self) as merger: - merger(self.input_url) - else: - self.log.info( - "No copy to output destination, since the usage of the HDF5Merger component is disabled." - ) - + # Copy selected tables from the input file to the output file + self.log.info("Copying to output destination.") + with HDF5Merger(self.output_path, parent=self) as merger: + merger(self.input_url) # Create a MirroredStrategy. self.strategy = tf.distribute.MirroredStrategy() atexit.register(self.strategy._extended._collective_ops._lock.locked) # type: ignore @@ -415,10 +439,197 @@ def setup(self): self.last_batch_size = len(self.indices) % ( self.batch_size * self.strategy.num_replicas_in_sync ) + # Ensure subarray consistency in the output file + self._ensure_subarray_consistency() def finish(self): self.log.info("Tool is shutting down") + def _ensure_subarray_consistency(self): + """ + Align subarray metadata and trigger tables with the selected telescopes. + + When only a subset of telescopes is processed, overwrite the output file's + SubarrayDescription and trim the DL1 trigger tables to keep events that + involve the selected telescopes. Also rebuild the subarray trigger table + with the corresponding telescope participation masks. + """ + + input_subarray = SubarrayDescription.from_hdf( + self.input_url, + focal_length_choice=self.dl1dh_reader.focal_length_choice, + ) + if input_subarray.__eq__(self.dl1dh_reader.subarray): + return + + self.dl1dh_reader.subarray.to_hdf(self.output_path, overwrite=True) + self.log.info("SubarrayDescription was stored in '%s'", self.output_path) + + tel_trigger_table = read_table( + self.output_path, + DL1_TEL_TRIGGER_TABLE, + ) + mask = np.isin(tel_trigger_table["tel_id"], self.dl1dh_reader.tel_ids) + tel_trigger_table = tel_trigger_table[mask] + tel_trigger_table.sort(TEL_EVENT_KEYS) + + write_table( + tel_trigger_table, + self.output_path, + DL1_TEL_TRIGGER_TABLE, + overwrite=True, + ) + + subarray_trigger_table = tel_trigger_table.copy() + subarray_trigger_table.keep_columns( + SUBARRAY_EVENT_KEYS + ["time", "event_type"] + ) + subarray_trigger_table = unique( + subarray_trigger_table, keys=SUBARRAY_EVENT_KEYS + ) + + tel_trigger_groups = tel_trigger_table.group_by(SUBARRAY_EVENT_KEYS) + tel_with_trigger = [] + for tel_trigger in tel_trigger_groups.groups: + tel_with_trigger_mask = np.zeros( + len(self.dl1dh_reader.tel_ids), dtype=bool + ) + tel_with_trigger_mask[ + self.dl1dh_reader.subarray.tel_ids_to_indices( + tel_trigger["tel_id"] + ) + ] = True + tel_with_trigger.append(tel_with_trigger_mask) + + subarray_trigger_table.add_column( + tel_with_trigger, index=-2, name="tels_with_trigger" + ) + + write_table( + subarray_trigger_table, + self.output_path, + DL1_SUBARRAY_TRIGGER_TABLE, + overwrite=True, + ) + # Update the simulation shower table to keep only events present in the subarray trigger table + subarray_trigger_table.keep_columns(SUBARRAY_EVENT_KEYS) + sim_shower_table = read_table( + self.output_path, + SIMULATION_SHOWER_TABLE, + ) + sim_shower_table = join( + sim_shower_table, + subarray_trigger_table, + keys=SUBARRAY_EVENT_KEYS, + join_type="right" + ) + sim_shower_table.sort(SUBARRAY_EVENT_KEYS) + write_table( + sim_shower_table, + self.output_path, + SIMULATION_SHOWER_TABLE, + overwrite=True, + ) + # Delete telescope-specific tables for unselected telescopes + self._delete_unselected_telescope_tables() + + def _delete_unselected_telescope_tables(self): + """ + Delete telescope-specific tables for unselected telescopes from the output file. + + Iterates through all telescope-related groups in the HDF5 file and removes + tables corresponding to telescopes that are not in the selected telescope list. + This ensures the output file only contains data for the telescopes that were + processed. The camera configuration tables are also pruned based on the camera indices. + """ + # Open the HDF5 file to prune the unselected telescope tables and camera configurations + with tables.open_file(self.output_path, mode="r+") as h5_file: + + def prune_group(group, valid_ids): + for table in group._f_iter_nodes("Table"): + idx = int(table._v_name.split("_")[-1]) + if idx not in valid_ids: + table._f_remove() + + # Telescope-specific tables + tel_ids = set(self.dl1dh_reader.tel_ids) + for group_name in TEL_ITER_GROUPS: + group = getattr(h5_file.root, group_name, None) + if group is not None: + prune_group(group, tel_ids) + + #Camera configuration tables + layout_node = getattr(h5_file.root, CONFIG_INSTRUMENT_SUBARRAY_LAYOUT, None) + camera_group = getattr(h5_file.root, CONFIG_INSTRUMENT_TEL_CAMERA, None) + if not (layout_node and camera_group): + return + # layout can be either a Table or a Group containing a Table + layout_table = ( + layout_node + if isinstance(layout_node, tables.Table) + else next(layout_node._f_iter_nodes("Table")) + ) + camera_indices = set(layout_table.col("camera_index")) + prune_group(camera_group, camera_indices) + + def _create_nan_table(self, nonexample_identifiers, columns, shapes): + """ + Create a table with NaNs for missing predictions. + + This method creates a table with NaNs for missing predictions for the non-example identifiers. + In stereo mode, the table also a column for the valid telescopes is added with all False values. + + Parameters: + ----------- + nonexample_identifiers : astropy.table.Table + Table containing the non-example identifiers. + columns : list of str + List of column names to create in the table. + shapes : list of shapes + List of shapes for the columns to create in the table. + + Returns: + -------- + nan_table : astropy.table.Table + Table containing NaNs for missing predictions. + """ + # Create a table with NaNs for missing predictions + nan_table = nonexample_identifiers.copy() + for column_name, shape in zip(columns, shapes): + nan_table.add_column(np.full(shape, np.nan), name=column_name) + # Add that no telescope is valid for the non-example identifiers in stereo mode + if self.dl1dh_reader.mode == "stereo": + nan_table.add_column( + np.zeros( + (len(nonexample_identifiers), len(self.dl1dh_reader.tel_ids)), + dtype=bool, + ), + name=f"{self.prefix}_telescopes", + ) + return nan_table + + def deduplicate_first_valid( + self, + table: Table, + keys=('obs_id', 'event_id'), + valid_col='CTLearn_is_valid', + ): + """ + Return a deduplicated Astropy Table. + + For each group defined by `keys`, keep the first row where + `valid_col` is True. If none are valid, keep the first row. + """ + + t = table.copy() + + t.sort( + list(keys) + [valid_col], + reverse=[False] * len(keys) + [True] + ) + + return unique(t, keys=list(keys), keep='first') + def _predict_with_model(self, model_path): """ Load and predict with a CTLearn model. @@ -482,9 +693,18 @@ def _predict_with_model(self, model_path): x = layer(x) head = keras.Model(inputs=backbone_output_shape, outputs=x) # Apply the backbone model with the data loader to retrieve the feature vectors - feature_vectors = backbone_model.predict( - data_loader, verbose=self.keras_verbose - ) + try: + feature_vectors = backbone_model.predict( + data_loader, verbose=self.keras_verbose + ) + except ValueError as err: + if str(err).startswith("Input 0 of layer"): + raise ToolConfigurationError( + "Model input shape does not match the prediction data. " + "This is usually caused by selecting the wrong telescope_id. " + "Please ensure the telescope configuration matches the one used for training." + ) from err + raise # Apply the head model with the feature vectors to retrieve the prediction predict_data = Table( { @@ -516,7 +736,16 @@ def _predict_with_model(self, model_path): ) else: # Predict the data using the loaded model - predict_data = model.predict(data_loader, verbose=self.keras_verbose) + try: + predict_data = model.predict(data_loader, verbose=self.keras_verbose) + except ValueError as err: + if str(err).startswith("Input 0 of layer"): + raise ToolConfigurationError( + "Model input shape does not match the prediction data. " + "This is usually caused by selecting the wrong telescope_id. " + "Please ensure the telescope configuration matches the one used for training." + ) from err + raise # Create a astropy table with the prediction results # The classification task has a softmax layer as the last layer # which returns the probabilities for each class in an array, while @@ -540,7 +769,7 @@ def _predict_with_model(self, model_path): predict_data = vstack([predict_data, predict_data_last_batch]) return predict_data, feature_vectors - def _predict_classification(self, example_identifiers): + def _predict_particletype(self, example_identifiers): """ Predict the classification of the primary particle type. @@ -550,7 +779,7 @@ def _predict_classification(self, example_identifiers): Parameters: ----------- - classification_table : astropy.table.Table + particletype_table : astropy.table.Table Table containing the example identifiers with an additional column for the predicted classification score ('gammaness'). feature_vectors : np.ndarray @@ -564,11 +793,11 @@ def _predict_classification(self, example_identifiers): self.load_type_model_from ) # Create prediction table and add the predicted classification score ('gammaness') - classification_table = example_identifiers.copy() - classification_table.add_column( + particletype_table = example_identifiers.copy() + particletype_table.add_column( predict_data["type"].T[1], name=f"{self.prefix}_tel_prediction" ) - return classification_table, feature_vectors + return particletype_table, feature_vectors def _predict_energy(self, example_identifiers): """ @@ -813,64 +1042,6 @@ def _transform_spher_coord_offsets_to_sky(self, table) -> Table: ) return table - def _create_nan_table(self, nonexample_identifiers, columns, shapes): - """ - Create a table with NaNs for missing predictions. - - This method creates a table with NaNs for missing predictions for the non-example identifiers. - In stereo mode, the table also a column for the valid telescopes is added with all False values. - - Parameters: - ----------- - nonexample_identifiers : astropy.table.Table - Table containing the non-example identifiers. - columns : list of str - List of column names to create in the table. - shapes : list of shapes - List of shapes for the columns to create in the table. - - Returns: - -------- - nan_table : astropy.table.Table - Table containing NaNs for missing predictions. - """ - # Create a table with NaNs for missing predictions - nan_table = nonexample_identifiers.copy() - for column_name, shape in zip(columns, shapes): - nan_table.add_column(np.full(shape, np.nan), name=column_name) - # Add that no telescope is valid for the non-example identifiers in stereo mode - if self.dl1dh_reader.mode == "stereo": - nan_table.add_column( - np.zeros( - (len(nonexample_identifiers), len(self.dl1dh_reader.tel_ids)), - dtype=bool, - ), - name=f"{self.prefix}_telescopes", - ) - return nan_table - - def deduplicate_first_valid( - self, - table: Table, - keys=('obs_id', 'event_id'), - valid_col='CTLearn_is_valid', - ): - """ - Return a deduplicated Astropy Table. - - For each group defined by `keys`, keep the first row where - `valid_col` is True. If none are valid, keep the first row. - """ - - t = table.copy() - - t.sort( - list(keys) + [valid_col], - reverse=[False] * len(keys) + [True] - ) - - return unique(t, keys=list(keys), keep='first') - def _store_pointing(self, all_identifiers): """ Store the telescope pointing table from to the output file. @@ -912,13 +1083,12 @@ def _store_pointing(self, all_identifiers): write_table( tel_pointing_table, self.output_path, - f"{POINTING_GROUP}/tel_{tel_id:03d}", - overwrite=self.overwrite_tables, + f"{DL1_TEL_POINTING_GROUP}/tel_{tel_id:03d}", ) self.log.info( "DL1 telescope pointing table was stored in '%s' under '%s'", self.output_path, - f"{POINTING_GROUP}/tel_{tel_id:03d}", + f"{DL1_TEL_POINTING_GROUP}/tel_{tel_id:03d}", ) pointing_info = vstack(pointing_info) if self.dl1dh_reader.mode == "stereo": @@ -947,13 +1117,12 @@ def _store_pointing(self, all_identifiers): write_table( pointing_table, self.output_path, - f"{SUBARRAY_POINTING_GROUP}", - overwrite=self.overwrite_tables, + DL1_SUBARRAY_POINTING_GROUP, ) self.log.info( "DL1 subarray pointing table was stored in '%s' under '%s'", self.output_path, - f"{SUBARRAY_POINTING_GROUP}", + DL1_SUBARRAY_POINTING_GROUP, ) return pointing_info @@ -961,7 +1130,7 @@ def _create_feature_vectors_table( self, example_identifiers, nonexample_identifiers=None, - classification_feature_vectors=None, + particletype_feature_vectors=None, energy_feature_vectors=None, direction_feature_vectors=None, ): @@ -978,8 +1147,8 @@ def _create_feature_vectors_table( Table containing the example identifiers. nonexample_identifiers : astropy.table.Table or None Table containing the non-example identifiers to fill the NaNs. - classification_feature_vectors : np.ndarray or None - Array containing the classification feature vectors. + particletype_feature_vectors : np.ndarray or None + Array containing the particletype feature vectors. energy_feature_vectors : np.ndarray or None Array containing the energy feature vectors. direction_feature_vectors : np.ndarray or None @@ -993,20 +1162,20 @@ def _create_feature_vectors_table( # Create the feature vector table feature_vector_table = example_identifiers.copy() columns_list, shapes_list = [], [] - if classification_feature_vectors is not None: + if particletype_feature_vectors is not None: is_valid_col = ~np.isnan( - np.min(classification_feature_vectors, axis=1), dtype=bool + np.min(particletype_feature_vectors, axis=1), dtype=bool ) feature_vector_table.add_column( - classification_feature_vectors, - name=f"{self.prefix}_tel_classification_feature_vectors", + particletype_feature_vectors, + name=f"{self.prefix}_tel_particletype_feature_vectors", ) if nonexample_identifiers is not None: - columns_list.append(f"{self.prefix}_tel_classification_feature_vectors") + columns_list.append(f"{self.prefix}_tel_particletype_feature_vectors") shapes_list.append( ( len(nonexample_identifiers), - classification_feature_vectors.shape[1], + particletype_feature_vectors.shape[1], ) ) if energy_feature_vectors is not None: @@ -1102,11 +1271,9 @@ class MonoPredictCTLearnModel(PredictCTLearnModel): --energy_model="/path/to/your/mono/energy/ctlearn_model.cpk" \\ --cameradirection_model="/path/to/your/mono/cameradirection/ctlearn_model.cpk" \\ --dl1-features \\ - --use-HDF5Merger \\ --no-dl1-images \\ --no-true-images \\ --output output.dl2.h5 \\ - --PredictCTLearnModel.overwrite_tables=True \\ To predict from pixel-wise waveform data in mono mode using trained CTLearn models: > ctlearn-predict-mono-model \\ @@ -1117,13 +1284,11 @@ class MonoPredictCTLearnModel(PredictCTLearnModel): --type_model="/path/to/your/mono_waveform/type/ctlearn_model.cpk" \\ --energy_model="/path/to/your/mono_waveform/energy/ctlearn_model.cpk" \\ --cameradirection_model="/path/to/your/mono_waveform/cameradirection/ctlearn_model.cpk" \\ - --use-HDF5Merger \\ --no-r0-waveforms \\ --no-r1-waveforms \\ --no-dl1-images \\ --no-true-images \\ --output output.dl2.h5 \\ - --PredictCTLearnModel.overwrite_tables=True \\ """ stereo_combiner_cls = ComponentName( @@ -1136,11 +1301,14 @@ def start(self): self.log.info("Processing the telescope pointings...") # Retrieve the IDs from the dl1dh for the prediction tables example_identifiers = self.dl1dh_reader.example_identifiers.copy() - example_identifiers.keep_columns(TELESCOPE_EVENT_KEYS) - all_identifiers = self.dl1dh_reader.tel_trigger_table.copy() - all_identifiers.keep_columns(TELESCOPE_EVENT_KEYS + ["time"]) + example_identifiers.keep_columns(TEL_EVENT_KEYS) + all_identifiers = read_table( + self.output_path, + DL1_TEL_TRIGGER_TABLE, + ) + all_identifiers.keep_columns(TEL_EVENT_KEYS + ["time"]) nonexample_identifiers = setdiff( - all_identifiers, example_identifiers, keys=TELESCOPE_EVENT_KEYS + all_identifiers, example_identifiers, keys=TEL_EVENT_KEYS ) nonexample_identifiers.remove_column("time") # Pointing table for the mono mode for MC simulation @@ -1152,111 +1320,111 @@ def start(self): pointing_info = super()._store_pointing(all_identifiers) self.log.info("Starting the prediction...") - classification_feature_vectors = None + particletype_feature_vectors = None if self.load_type_model_from is not None: - self.type_stereo_combiner = StereoCombiner.from_name( - self.stereo_combiner_cls, - prefix=self.prefix, - property=ReconstructionProperty.PARTICLE_TYPE, - parent=self, + # Predict the type of the primary particle + particletype_table, particletype_feature_vectors = ( + super()._predict_particletype(example_identifiers) ) - # Predict the energy of the primary particle - classification_table, classification_feature_vectors = ( - super()._predict_classification(example_identifiers) + # Produce output table with NaNs for missing predictions + if len(nonexample_identifiers) > 0: + nan_table = super()._create_nan_table( + nonexample_identifiers, + columns=[f"{self.prefix}_tel_prediction"], + shapes=[(len(nonexample_identifiers),)], + ) + particletype_table = vstack([particletype_table, nan_table]) + # Add is_valid column to the particle type table + particletype_table.add_column( + ~np.isnan( + particletype_table[f"{self.prefix}_tel_prediction"].data, + dtype=bool, + ), + name=f"{self.prefix}_tel_is_valid", + ) + # Add the default values and meta data to the table + add_defaults_and_meta( + particletype_table, + ParticleClassificationContainer, + prefix=self.prefix, + add_tel_prefix=True, ) if self.dl2_telescope: - # Produce output table with NaNs for missing predictions - if len(nonexample_identifiers) > 0: - nan_table = super()._create_nan_table( - nonexample_identifiers, - columns=[f"{self.prefix}_tel_prediction"], - shapes=[(len(nonexample_identifiers),)], - ) - classification_table = vstack([classification_table, nan_table]) - # Add is_valid column to the energy table - classification_table.add_column( - ~np.isnan( - classification_table[f"{self.prefix}_tel_prediction"].data, - dtype=bool, - ), - name=f"{self.prefix}_tel_is_valid", - ) - # Add the default values and meta data to the table - add_defaults_and_meta( - classification_table, - ParticleClassificationContainer, - prefix=self.prefix, - add_tel_prefix=True, - ) for tel_id in self.dl1dh_reader.selected_telescopes[ self.dl1dh_reader.tel_type ]: # Retrieve the example identifiers for the selected telescope - telescope_mask = classification_table["tel_id"] == tel_id - classification_tel_table = classification_table[telescope_mask] - classification_tel_table.sort(TELESCOPE_EVENT_KEYS) + telescope_mask = particletype_table["tel_id"] == tel_id + particletype_tel_table = particletype_table[telescope_mask] + particletype_tel_table.sort(TEL_EVENT_KEYS) # Save the prediction to the output file for the selected telescope write_table( - classification_tel_table, + particletype_tel_table, self.output_path, - f"{DL2_TELESCOPE_GROUP}/classification/{self.prefix}/tel_{tel_id:03d}", - overwrite=self.overwrite_tables, + f"{DL2_TEL_PARTICLETYPE_GROUP}/{self.prefix}/tel_{tel_id:03d}", ) self.log.info( "DL2 prediction data was stored in '%s' under '%s'", self.output_path, - f"{DL2_TELESCOPE_GROUP}/classification/{self.prefix}/tel_{tel_id:03d}", + f"{DL2_TEL_PARTICLETYPE_GROUP}/{self.prefix}/tel_{tel_id:03d}", ) + if self.dl2_subarray: self.log.info("Processing and storing the subarray type prediction...") - # If only one telescope is used, copy the classification table + # If only one telescope is used, copy the particletype table # and modify it to subarray format if len(self.dl1dh_reader.tel_ids) == 1: - classification_subarray_table = classification_table.copy() + particletype_subarray_table = particletype_table.copy() telescope_mask = ( - classification_subarray_table["tel_id"] + particletype_subarray_table["tel_id"] == self.dl1dh_reader.tel_ids[0] ) - classification_subarray_table = classification_subarray_table[ + particletype_subarray_table = particletype_subarray_table[ telescope_mask ] - classification_subarray_table.remove_column("tel_id") - for colname in classification_subarray_table.colnames: + particletype_subarray_table.remove_column("tel_id") + for colname in particletype_subarray_table.colnames: if "_tel_" in colname: - classification_subarray_table.rename_column( + particletype_subarray_table.rename_column( colname, colname.replace("_tel", "") ) - classification_subarray_table.add_column( + particletype_subarray_table.add_column( [ [val] - for val in classification_subarray_table[ + for val in particletype_subarray_table[ f"{self.prefix}_is_valid" ] ], name=f"{self.prefix}_telescopes", ) else: + self.type_stereo_combiner = StereoCombiner.from_name( + self.stereo_combiner_cls, + prefix=self.prefix, + property=ReconstructionProperty.PARTICLE_TYPE, + parent=self, + ) # Combine the telescope predictions to the subarray prediction using the stereo combiner - classification_subarray_table = ( - self.type_stereo_combiner.predict_table(classification_table) + particletype_subarray_table = ( + self.type_stereo_combiner.predict_table(particletype_table) ) # TODO: Remove temporary fix once the stereo combiner returns correct table # Check if the table has to be converted to a boolean mask if ( - classification_subarray_table[f"{self.prefix}_telescopes"].dtype + particletype_subarray_table[f"{self.prefix}_telescopes"].dtype != np.bool_ ): # Create boolean mask for telescopes that participate in the stereo reconstruction combination reco_telescopes = np.zeros( ( - len(classification_subarray_table), + len(particletype_subarray_table), len(self.dl1dh_reader.tel_ids), ), dtype=bool, ) # Loop over the table and set the boolean mask for the telescopes for index, tel_id_mask in enumerate( - classification_subarray_table[f"{self.prefix}_telescopes"] + particletype_subarray_table[f"{self.prefix}_telescopes"] ): if not tel_id_mask: continue @@ -1267,88 +1435,80 @@ def start(self): ) ] = True # Overwrite the column with the boolean mask with fix length - classification_subarray_table[f"{self.prefix}_telescopes"] = ( + particletype_subarray_table[f"{self.prefix}_telescopes"] = ( reco_telescopes ) - # Deduplicate the subarray classification table to have only one entry per event - classification_subarray_table = super().deduplicate_first_valid( - table=classification_subarray_table, + # Deduplicate the subarray particletype table to have only one entry per event + particletype_subarray_table = super().deduplicate_first_valid( + table=particletype_subarray_table, keys=SUBARRAY_EVENT_KEYS, valid_col=f"{self.prefix}_is_valid", ) - # Sort the subarray classification table - classification_subarray_table.sort(SUBARRAY_EVENT_KEYS) + # Sort the subarray particletype table + particletype_subarray_table.sort(SUBARRAY_EVENT_KEYS) # Save the prediction to the output file write_table( - classification_subarray_table, + particletype_subarray_table, self.output_path, - f"{DL2_SUBARRAY_GROUP}/classification/{self.prefix}", - overwrite=self.overwrite_tables, + f"{DL2_SUBARRAY_PARTICLETYPE_GROUP}/{self.prefix}", ) self.log.info( "DL2 prediction data was stored in '%s' under '%s'", self.output_path, - f"{DL2_SUBARRAY_GROUP}/classification/{self.prefix}", + f"{DL2_SUBARRAY_PARTICLETYPE_GROUP}/{self.prefix}", ) energy_feature_vectors = None if self.load_energy_model_from is not None: - self.energy_stereo_combiner = StereoCombiner.from_name( - self.stereo_combiner_cls, - prefix=self.prefix, - property=ReconstructionProperty.ENERGY, - parent=self, - ) # Predict the energy of the primary particle energy_table, energy_feature_vectors = super()._predict_energy( example_identifiers ) - if self.dl2_telescope: - # Produce output table with NaNs for missing predictions - if len(nonexample_identifiers) > 0: - nan_table = super()._create_nan_table( - nonexample_identifiers, - columns=[f"{self.prefix}_tel_energy"], - shapes=[(len(nonexample_identifiers),)], - ) - energy_table = vstack([energy_table, nan_table]) - # Add is_valid column to the energy table - energy_table.add_column( - ~np.isnan( - energy_table[f"{self.prefix}_tel_energy"].data, dtype=bool - ), - name=f"{self.prefix}_tel_is_valid", - ) - # Add the default values and meta data to the table - add_defaults_and_meta( - energy_table, - ReconstructedEnergyContainer, - prefix=self.prefix, - add_tel_prefix=True, + # Produce output table with NaNs for missing predictions + if len(nonexample_identifiers) > 0: + nan_table = super()._create_nan_table( + nonexample_identifiers, + columns=[f"{self.prefix}_tel_energy"], + shapes=[(len(nonexample_identifiers),)], ) + energy_table = vstack([energy_table, nan_table]) + # Add is_valid column to the energy table + energy_table.add_column( + ~np.isnan( + energy_table[f"{self.prefix}_tel_energy"].data, dtype=bool + ), + name=f"{self.prefix}_tel_is_valid", + ) + # Add the default values and meta data to the table + add_defaults_and_meta( + energy_table, + ReconstructedEnergyContainer, + prefix=self.prefix, + add_tel_prefix=True, + ) + if self.dl2_telescope: for tel_id in self.dl1dh_reader.selected_telescopes[ self.dl1dh_reader.tel_type ]: # Retrieve the example identifiers for the selected telescope telescope_mask = energy_table["tel_id"] == tel_id energy_tel_table = energy_table[telescope_mask] - energy_tel_table.sort(TELESCOPE_EVENT_KEYS) + energy_tel_table.sort(TEL_EVENT_KEYS) # Save the prediction to the output file write_table( energy_tel_table, self.output_path, - f"{DL2_TELESCOPE_GROUP}/energy/{self.prefix}/tel_{tel_id:03d}", - overwrite=self.overwrite_tables, + f"{DL2_TEL_ENERGY_GROUP}/{self.prefix}/tel_{tel_id:03d}", ) self.log.info( "DL2 prediction data was stored in '%s' under '%s'", self.output_path, - f"{DL2_TELESCOPE_GROUP}/energy/{self.prefix}/tel_{tel_id:03d}", + f"{DL2_TEL_ENERGY_GROUP}/{self.prefix}/tel_{tel_id:03d}", ) if self.dl2_subarray: self.log.info( "Processing and storing the subarray energy prediction..." ) - # If only one telescope is used, copy the classification table + # If only one telescope is used, copy the particletype table # and modify it to subarray format if len(self.dl1dh_reader.tel_ids) == 1: energy_subarray_table = energy_table.copy() @@ -1370,6 +1530,12 @@ def start(self): name=f"{self.prefix}_telescopes", ) else: + self.energy_stereo_combiner = StereoCombiner.from_name( + self.stereo_combiner_cls, + prefix=self.prefix, + property=ReconstructionProperty.ENERGY, + parent=self, + ) # Combine the telescope predictions to the subarray prediction using the stereo combiner energy_subarray_table = self.energy_stereo_combiner.predict_table( energy_table @@ -1404,7 +1570,7 @@ def start(self): energy_subarray_table[f"{self.prefix}_telescopes"] = ( reco_telescopes ) - # Deduplicate the subarray classification table to have only one entry per event + # Deduplicate the subarray energy table to have only one entry per event energy_subarray_table = super().deduplicate_first_valid( table=energy_subarray_table, keys=SUBARRAY_EVENT_KEYS, @@ -1416,86 +1582,78 @@ def start(self): write_table( energy_subarray_table, self.output_path, - f"{DL2_SUBARRAY_GROUP}/energy/{self.prefix}", - overwrite=self.overwrite_tables, + f"{DL2_SUBARRAY_ENERGY_GROUP}/{self.prefix}", ) self.log.info( "DL2 prediction data was stored in '%s' under '%s'", self.output_path, - f"{DL2_SUBARRAY_GROUP}/energy/{self.prefix}", + f"{DL2_SUBARRAY_ENERGY_GROUP}/{self.prefix}", ) direction_feature_vectors = None if self.load_cameradirection_model_from is not None: - self.geometry_stereo_combiner = StereoCombiner.from_name( - self.stereo_combiner_cls, - prefix=self.prefix, - property=ReconstructionProperty.GEOMETRY, - parent=self, - ) # Join the prediction table with the telescope pointing table example_identifiers = join( left=example_identifiers, right=pointing_info, - keys=TELESCOPE_EVENT_KEYS, + keys=TEL_EVENT_KEYS, ) # Predict the arrival direction of the primary particle direction_table, direction_feature_vectors = ( super()._predict_cameradirection(example_identifiers) ) direction_tel_tables = [] - if self.dl2_telescope: - for tel_id in self.dl1dh_reader.selected_telescopes[ - self.dl1dh_reader.tel_type - ]: - # Retrieve the example identifiers for the selected telescope - telescope_mask = direction_table["tel_id"] == tel_id - direction_tel_table = direction_table[telescope_mask] - direction_tel_table = super()._transform_cam_coord_offsets_to_sky( - direction_tel_table - ) - # Produce output table with NaNs for missing predictions - nan_telescope_mask = nonexample_identifiers["tel_id"] == tel_id - nonexample_identifiers_tel = nonexample_identifiers[ - nan_telescope_mask - ] - if len(nonexample_identifiers_tel) > 0: - nan_table = super()._create_nan_table( - nonexample_identifiers_tel, - columns=[f"{self.prefix}_tel_alt", f"{self.prefix}_tel_az"], - shapes=[ - (len(nonexample_identifiers_tel),), - (len(nonexample_identifiers_tel),), - ], - ) - direction_tel_table = vstack([direction_tel_table, nan_table]) - direction_tel_table.sort(TELESCOPE_EVENT_KEYS) - # Add is_valid column to the direction table - direction_tel_table.add_column( - ~np.isnan( - direction_tel_table[f"{self.prefix}_tel_alt"].data, - dtype=bool, - ), - name=f"{self.prefix}_tel_is_valid", - ) - # Add the default values and meta data to the table - add_defaults_and_meta( - direction_tel_table, - ReconstructedGeometryContainer, - prefix=self.prefix, - add_tel_prefix=True, + for tel_id in self.dl1dh_reader.selected_telescopes[ + self.dl1dh_reader.tel_type + ]: + # Retrieve the example identifiers for the selected telescope + telescope_mask = direction_table["tel_id"] == tel_id + direction_tel_table = direction_table[telescope_mask] + direction_tel_table = super()._transform_cam_coord_offsets_to_sky( + direction_tel_table + ) + # Produce output table with NaNs for missing predictions + nan_telescope_mask = nonexample_identifiers["tel_id"] == tel_id + nonexample_identifiers_tel = nonexample_identifiers[ + nan_telescope_mask + ] + if len(nonexample_identifiers_tel) > 0: + nan_table = super()._create_nan_table( + nonexample_identifiers_tel, + columns=[f"{self.prefix}_tel_alt", f"{self.prefix}_tel_az"], + shapes=[ + (len(nonexample_identifiers_tel),), + (len(nonexample_identifiers_tel),), + ], ) - direction_tel_tables.append(direction_tel_table) + direction_tel_table = vstack([direction_tel_table, nan_table]) + direction_tel_table.sort(TEL_EVENT_KEYS) + # Add is_valid column to the direction table + direction_tel_table.add_column( + ~np.isnan( + direction_tel_table[f"{self.prefix}_tel_alt"].data, + dtype=bool, + ), + name=f"{self.prefix}_tel_is_valid", + ) + # Add the default values and meta data to the table + add_defaults_and_meta( + direction_tel_table, + ReconstructedGeometryContainer, + prefix=self.prefix, + add_tel_prefix=True, + ) + direction_tel_tables.append(direction_tel_table) + if self.dl2_telescope: # Save the prediction to the output file write_table( direction_tel_table, self.output_path, - f"{DL2_TELESCOPE_GROUP}/geometry/{self.prefix}/tel_{tel_id:03d}", - overwrite=self.overwrite_tables, + f"{DL2_TEL_GEOMETRY_GROUP}/{self.prefix}/tel_{tel_id:03d}", ) self.log.info( "DL2 prediction data was stored in '%s' under '%s'", self.output_path, - f"{DL2_TELESCOPE_GROUP}/geometry/{self.prefix}/tel_{tel_id:03d}", + f"{DL2_TEL_GEOMETRY_GROUP}/{self.prefix}/tel_{tel_id:03d}", ) if self.dl2_subarray: self.log.info( @@ -1504,7 +1662,7 @@ def start(self): # Stack the telescope tables to the subarray table direction_tel_tables = vstack(direction_tel_tables) # Sort the table by the telescope event keys - direction_tel_tables.sort(TELESCOPE_EVENT_KEYS) + direction_tel_tables.sort(TEL_EVENT_KEYS) # If only one telescope is used, copy the classification table # and modify it to subarray format if len(self.dl1dh_reader.tel_ids) == 1: @@ -1530,6 +1688,12 @@ def start(self): name=f"{self.prefix}_telescopes", ) else: + self.geometry_stereo_combiner = StereoCombiner.from_name( + self.stereo_combiner_cls, + prefix=self.prefix, + property=ReconstructionProperty.GEOMETRY, + parent=self, + ) # Combine the telescope predictions to the subarray prediction using the stereo combiner direction_subarray_table = ( self.geometry_stereo_combiner.predict_table( @@ -1566,7 +1730,7 @@ def start(self): direction_subarray_table[f"{self.prefix}_telescopes"] = ( reco_telescopes ) - # Deduplicate the subarray classification table to have only one entry per event + # Deduplicate the subarray direction table to have only one entry per event direction_subarray_table = super().deduplicate_first_valid( table=direction_subarray_table, keys=SUBARRAY_EVENT_KEYS, @@ -1578,13 +1742,12 @@ def start(self): write_table( direction_subarray_table, self.output_path, - f"{DL2_SUBARRAY_GROUP}/geometry/{self.prefix}", - overwrite=self.overwrite_tables, + f"{DL2_SUBARRAY_GEOMETRY_GROUP}/{self.prefix}", ) self.log.info( "DL2 prediction data was stored in '%s' under '%s'", self.output_path, - f"{DL2_SUBARRAY_GROUP}/geometry/{self.prefix}", + f"{DL2_SUBARRAY_GEOMETRY_GROUP}/{self.prefix}", ) # Create the feature vector table if the DL1 features are enabled if self.dl1_features: @@ -1592,31 +1755,30 @@ def start(self): feature_vector_table = super()._create_feature_vectors_table( example_identifiers, nonexample_identifiers, - classification_feature_vectors, + particletype_feature_vectors, energy_feature_vectors, direction_feature_vectors, ) # Loop over the selected telescopes and store the feature vectors # for each telescope in the output file. The feature vectors are stored - # in the DL1_TELESCOPE_GROUP/features/{prefix}/tel_{tel_id:03d} table. + # in the DL1_TEL_GROUP/features/{prefix}/tel_{tel_id:03d} table. for tel_id in self.dl1dh_reader.selected_telescopes[ self.dl1dh_reader.tel_type ]: # Retrieve the example identifiers for the selected telescope telescope_mask = feature_vector_table["tel_id"] == tel_id feature_vectors_tel_table = feature_vector_table[telescope_mask] - feature_vectors_tel_table.sort(TELESCOPE_EVENT_KEYS) + feature_vectors_tel_table.sort(TEL_EVENT_KEYS) # Save the prediction to the output file write_table( feature_vectors_tel_table, self.output_path, - f"{DL1_TELESCOPE_GROUP}/features/{self.prefix}/tel_{tel_id:03d}", - overwrite=self.overwrite_tables, + f"{DL1_TEL_GROUP}/features/{self.prefix}/tel_{tel_id:03d}", ) self.log.info( "DL1 feature vectors was stored in '%s' under '%s'", self.output_path, - f"{DL1_TELESCOPE_GROUP}/features/{self.prefix}/tel_{tel_id:03d}", + f"{DL1_TEL_GROUP}/features/{self.prefix}/tel_{tel_id:03d}", ) def _store_mc_telescope_pointing(self, all_identifiers): @@ -1644,7 +1806,7 @@ def _store_mc_telescope_pointing(self, all_identifiers): keys=["obs_id", "tel_id"], ) # TODO: use keep_order for astropy v7.0.0 - tel_pointing.sort(TELESCOPE_EVENT_KEYS) + tel_pointing.sort(TEL_EVENT_KEYS) # Retrieve the example identifiers for the selected telescope tel_pointing_table = Table( { @@ -1656,13 +1818,12 @@ def _store_mc_telescope_pointing(self, all_identifiers): write_table( tel_pointing_table, self.output_path, - f"{POINTING_GROUP}/tel_{tel_id:03d}", - overwrite=self.overwrite_tables, + f"{DL1_TEL_POINTING_GROUP}/tel_{tel_id:03d}", ) self.log.info( "DL1 telescope pointing table was stored in '%s' under '%s'", self.output_path, - f"{POINTING_GROUP}/tel_{tel_id:03d}", + f"{DL1_TEL_POINTING_GROUP}/tel_{tel_id:03d}", ) pointing_info.append(tel_pointing) pointing_info = vstack(pointing_info) @@ -1713,7 +1874,6 @@ class StereoPredictCTLearnModel(PredictCTLearnModel): --energy_model="/path/to/your/stereo/energy/ctlearn_model.cpk" \\ --skydirection_model="/path/to/your/stereo/skydirection/ctlearn_model.cpk" \\ --output output.dl2.h5 \\ - --PredictCTLearnModel.overwrite_tables=True \\ """ def start(self): @@ -1721,7 +1881,10 @@ def start(self): # Retrieve the IDs from the dl1dh for the prediction tables example_identifiers = self.dl1dh_reader.unique_example_identifiers.copy() example_identifiers.keep_columns(SUBARRAY_EVENT_KEYS) - all_identifiers = self.dl1dh_reader.subarray_trigger_table.copy() + all_identifiers = read_table( + self.output_path, + DL1_TEL_TRIGGER_TABLE, + ) all_identifiers.keep_columns(SUBARRAY_EVENT_KEYS + ["time"]) nonexample_identifiers = setdiff( all_identifiers, example_identifiers, keys=SUBARRAY_EVENT_KEYS @@ -1750,11 +1913,11 @@ def start(self): pointing_info = super()._store_pointing(all_identifiers) self.log.info("Starting the prediction...") - classification_feature_vectors = None + particletype_feature_vectors = None if self.load_type_model_from is not None: # Predict the classification of the primary particle - classification_table, classification_feature_vectors = ( - super()._predict_classification(example_identifiers) + particletype_table, particletype_feature_vectors = ( + super()._predict_particletype(example_identifiers) ) if self.dl2_subarray: # Produce output table with NaNs for missing predictions @@ -1764,46 +1927,46 @@ def start(self): columns=[f"{self.prefix}_tel_prediction"], shapes=[(len(nonexample_identifiers),)], ) - classification_table = vstack([classification_table, nan_table]) - # Add is_valid column to the classification table - classification_table.add_column( + particletype_table = vstack([particletype_table, nan_table]) + # Add is_valid column to the particletype table + particletype_table.add_column( ~np.isnan( - classification_table[f"{self.prefix}_tel_prediction"].data, + particletype_table[f"{self.prefix}_tel_prediction"].data, dtype=bool, ), name=f"{self.prefix}_tel_is_valid", ) # Rename the columns for the stereo mode - classification_table.rename_column( + particletype_table.rename_column( f"{self.prefix}_tel_prediction", f"{self.prefix}_prediction" ) - classification_table.rename_column( + particletype_table.rename_column( f"{self.prefix}_tel_is_valid", f"{self.prefix}_is_valid" ) - # Deduplicate the subarray classification table to have only one entry per event - classification_table = super().deduplicate_first_valid( - table=classification_table, + # Deduplicate the subarray particletype table to have only one entry per event + particletype_table = super().deduplicate_first_valid( + table=particletype_table, keys=SUBARRAY_EVENT_KEYS, valid_col=f"{self.prefix}_is_valid", ) - classification_table.sort(SUBARRAY_EVENT_KEYS) + particletype_table.sort(SUBARRAY_EVENT_KEYS) # Add the default values and meta data to the table add_defaults_and_meta( - classification_table, + particletype_table, ParticleClassificationContainer, prefix=self.prefix, ) # Save the prediction to the output file write_table( - classification_table, + particletype_table, self.output_path, - f"{DL2_SUBARRAY_GROUP}/classification/{self.prefix}", + f"{DL2_SUBARRAY_PARTICLETYPE_GROUP}/{self.prefix}", overwrite=self.overwrite_tables, ) self.log.info( "DL2 prediction data was stored in '%s' under '%s'", self.output_path, - f"{DL2_SUBARRAY_GROUP}/classification/{self.prefix}", + f"{DL2_SUBARRAY_PARTICLETYPE_GROUP}/{self.prefix}", ) energy_feature_vectors = None if self.load_energy_model_from is not None: @@ -1851,13 +2014,12 @@ def start(self): write_table( energy_table, self.output_path, - f"{DL2_SUBARRAY_GROUP}/energy/{self.prefix}", - overwrite=self.overwrite_tables, + f"{DL2_SUBARRAY_ENERGY_GROUP}/{self.prefix}", ) self.log.info( "DL2 prediction data was stored in '%s' under '%s'", self.output_path, - f"{DL2_SUBARRAY_GROUP}/energy/{self.prefix}", + f"{DL2_SUBARRAY_ENERGY_GROUP}/{self.prefix}", ) direction_feature_vectors = None if self.load_skydirection_model_from is not None: @@ -1909,13 +2071,12 @@ def start(self): write_table( direction_table, self.output_path, - f"{DL2_SUBARRAY_GROUP}/geometry/{self.prefix}", - overwrite=self.overwrite_tables, + f"{DL2_SUBARRAY_GEOMETRY_GROUP}/{self.prefix}", ) self.log.info( "DL2 prediction data was stored in '%s' under '%s'", self.output_path, - f"{DL2_SUBARRAY_GROUP}/geometry/{self.prefix}", + f"{DL2_SUBARRAY_GEOMETRY_GROUP}/{self.prefix}", ) # Create the feature vector table if the DL1 features are enabled @@ -1924,17 +2085,17 @@ def start(self): feature_vector_table = super()._create_feature_vectors_table( example_identifiers, nonexample_identifiers, - classification_feature_vectors, + particletype_feature_vectors, energy_feature_vectors, direction_feature_vectors, ) # Loop over the selected telescopes and store the feature vectors # for each telescope in the output file. The feature vectors are stored - # in the DL1_TELESCOPE_GROUP/features/{prefix}/tel_{tel_id:03d} table. + # in the DL1_TEL_GROUP/features/{prefix}/tel_{tel_id:03d} table. # Rename the columns for the stereo mode feature_vector_table.rename_column( - f"{self.prefix}_tel_classification_feature_vectors", - f"{self.prefix}_classification_feature_vectors", + f"{self.prefix}_tel_particletype_feature_vectors", + f"{self.prefix}_particletype_feature_vectors", ) feature_vector_table.rename_column( f"{self.prefix}_tel_energy_feature_vectors", @@ -1953,7 +2114,6 @@ def start(self): feature_vector_table, self.output_path, f"{DL1_SUBARRAY_GROUP}/features/{self.prefix}", - overwrite=self.overwrite_tables, ) self.log.info( "DL1 feature vectors was stored in '%s' under '%s'", @@ -1973,7 +2133,7 @@ def _store_mc_subarray_pointing(self, all_identifiers): # Read the subarray pointing table pointing_info = read_table( self.input_url, - f"{SIMULATION_CONFIG_TABLE}", + SIMULATION_RUN_TABLE, ) # Assuming min_az = max_az and min_alt = max_alt pointing_info.keep_columns(["obs_id", "min_az", "min_alt"]) @@ -2001,13 +2161,12 @@ def _store_mc_subarray_pointing(self, all_identifiers): write_table( pointing_table, self.output_path, - f"{SUBARRAY_POINTING_GROUP}", - overwrite=self.overwrite_tables, + DL1_SUBARRAY_POINTING_GROUP, ) self.log.info( "DL1 subarray pointing table was stored in '%s' under '%s'", self.output_path, - f"{SUBARRAY_POINTING_GROUP}", + DL1_SUBARRAY_POINTING_GROUP, ) return pointing_info diff --git a/ctlearn/tools/tests/test_predict_model.py b/ctlearn/tools/tests/test_predict_model.py index 55e6d97e..23f32352 100644 --- a/ctlearn/tools/tests/test_predict_model.py +++ b/ctlearn/tools/tests/test_predict_model.py @@ -1,11 +1,31 @@ import shutil import numpy as np +import pytest from ctapipe.core import run_tool from ctapipe.io import TableLoader from ctlearn.tools import MonoPredictCTLearnModel -def test_predict_model(ctlearn_trained_dl1_models, dl1_gamma_file, tmp_path): +# Columns that should be present in the output DL2 file +REQUIRED_COLUMNS = [ + "event_id", + "obs_id", + "CTLearn_alt", + "true_alt", + "CTLearn_az", + "true_az", + "CTLearn_prediction", + "true_shower_primary_id", + "CTLearn_energy", + "true_energy", + "CTLearn_is_valid", + "CTLearn_telescopes", + "tels_with_trigger" +] + + +@pytest.mark.parametrize("dl2_tel_flag", ["dl2-telescope", "no-dl2-telescope"]) +def test_predict_model(tmp_path, ctlearn_trained_dl1_models, dl1_gamma_file, dl2_tel_flag): """ Test training CTLearn model using the DL1 gamma and proton files for all reconstruction tasks. Each test run gets its own isolated temp directories. @@ -24,54 +44,51 @@ def test_predict_model(ctlearn_trained_dl1_models, dl1_gamma_file, tmp_path): assert model_file.exists(), f"Trained model file not found for {reco_task}" # Build command-line arguments - output_file = dl2_dir / "gamma.dl2.h5" argv = [ f"--input_url={dl1_gamma_file}", - f"--output={output_file}", "--PredictCTLearnModel.batch_size=4", "--DLImageReader.focal_length_choice=EQUIVALENT", + f"--PredictCTLearnModel.load_type_model_from={model_dir}/ctlearn_model_type.keras", + f"--PredictCTLearnModel.load_energy_model_from={model_dir}/ctlearn_model_energy.keras", + f"--PredictCTLearnModel.load_cameradirection_model_from={model_dir}/ctlearn_model_cameradirection.keras", + "--no-dl1-images", + "--no-true-images", + f"--{dl2_tel_flag}", ] - - # Run Prediction for energy and type together - assert run_tool( - MonoPredictCTLearnModel(), - argv = argv + [ - f"--PredictCTLearnModel.load_type_model_from={model_dir}/ctlearn_model_type.keras", - f"--PredictCTLearnModel.load_energy_model_from={model_dir}/ctlearn_model_energy.keras", - "--use-HDF5Merger", - "--no-dl1-images", - "--no-true-images", - ], - cwd=tmp_path - ) == 0 - - assert run_tool( - MonoPredictCTLearnModel(), - argv= argv + [ - f"--PredictCTLearnModel.load_cameradirection_model_from=" - f"{model_dir}/ctlearn_model_cameradirection.keras", - "--no-use-HDF5Merger", - ], - cwd=tmp_path, - ) == 0 + # Test with different allowed telescope lists (single and multiple telescope IDs) + allowed_tels_dict = {"single_tel_id" : [7], "multiple_tel_ids": [7, 13, 15, 16, 17, 19]} + for name, allowed_tels in allowed_tels_dict.items(): + output_file = dl2_dir / f"gamma_{dl2_tel_flag}_{name}.dl2.h5" + # Run Prediction tool + assert run_tool( + MonoPredictCTLearnModel(), + argv = argv + [ + f"--output={output_file}", + f"--DLImageReader.allowed_tels={allowed_tels}", + ], + cwd=tmp_path + ) == 0 - - allowed_tels = [7, 13, 15, 16, 17, 19] - required_columns = [ - "telescope_pointing_azimuth", - "telescope_pointing_altitude", - "CTLearn_alt", - "CTLearn_az", - "CTLearn_prediction", - "CTLearn_energy", - ] - # Check that the output DL2 file was created - assert output_file.exists(), "Output DL2 file not created" - # Check that the created DL2 file can be read with the TableLoader - with TableLoader(output_file, pointing=True, focal_length_choice="EQUIVALENT") as loader: - events = loader.read_telescope_events_by_id(telescopes=allowed_tels) - for tel_id in allowed_tels: - assert len(events[tel_id]) > 0 - for col in required_columns: - assert col in events[tel_id].colnames, f"{col} missing in DL2 file {output_file.name}" - assert events[tel_id][col][0] is not np.nan, f"{col} has NaN values in DL2 file {output_file.name}" \ No newline at end of file + # Check that the output DL2 file was created + assert output_file.exists(), "Output DL2 file not created" + # Check that the created DL2 file can be read with the TableLoader + with TableLoader(output_file, pointing=True, focal_length_choice="EQUIVALENT") as loader: + # Check telescope-wise data + tel_events = loader.read_telescope_events_by_id(telescopes=allowed_tels, dl1_parameters=True, dl2=True) + for tel_id in allowed_tels: + assert len(tel_events[tel_id]) > 0 + for col in REQUIRED_COLUMNS + [ + "tel_id", + "hillas_intensity", + "leakage_pixels_width_2", + "telescope_pointing_azimuth", + "telescope_pointing_altitude", + ]: + assert col in tel_events[tel_id].colnames, f"{col} missing in DL2 file {output_file.name}" + assert tel_events[tel_id][col][0] is not np.nan, f"{col} has NaN values in DL2 file {output_file.name}" + # Check subarray-wise data + subarray_events = loader.read_subarray_events(start=0, stop=2, dl2=True) + assert len(subarray_events) > 0 + for col in REQUIRED_COLUMNS: + assert col in subarray_events.colnames, f"{col} missing in DL2 file {output_file.name}" + assert subarray_events[col][0] is not np.nan, f"{col} has NaN values in DL2 file {output_file.name}" \ No newline at end of file diff --git a/ctlearn/tools/tests/test_train_model.py b/ctlearn/tools/tests/test_train_model.py index ed661409..b32c5256 100644 --- a/ctlearn/tools/tests/test_train_model.py +++ b/ctlearn/tools/tests/test_train_model.py @@ -5,7 +5,7 @@ from ctapipe.core import run_tool from ctlearn.tools import TrainCTLearnModel -@pytest.mark.parametrize("reco_task", ["type", "energy", "cameradirection", "skydirection"]) +@pytest.mark.parametrize("reco_task", ["type", "energy", "cameradirection"]) def test_train_ctlearn_model(reco_task, dl1_gamma_file, dl1_proton_file, tmp_path): """ Test training CTLearn model using the DL1 gamma and proton files for all reconstruction tasks. @@ -25,7 +25,8 @@ def test_train_ctlearn_model(reco_task, dl1_gamma_file, dl1_proton_file, tmp_pat # Output directory for trained model output_dir = tmp_path / f"ctlearn_{reco_task}" - + allowed_tels = [7, 13, 15, 16, 17, 19] + # Build command-line arguments argv = [ f"--signal={signal_dir}", @@ -35,6 +36,7 @@ def test_train_ctlearn_model(reco_task, dl1_gamma_file, dl1_proton_file, tmp_pat "--TrainCTLearnModel.n_epochs=2", "--TrainCTLearnModel.batch_size=4", "--DLImageReader.focal_length_choice=EQUIVALENT", + f"--DLImageReader.allowed_tels={allowed_tels}" ] # Include background only for classification task @@ -63,14 +65,11 @@ def test_train_ctlearn_model(reco_task, dl1_gamma_file, dl1_proton_file, tmp_pat assert "val_loss" in log_df.columns, ( f"'val_loss' column missing in training_log.csv for {reco_task}" ) - val_loss_min= 0.0 - val_loss_max= 1.5 if reco_task == "skydirection" else 1.0 - # Check val_loss values are between 0.0 and 1.0 (or 1.5 for skydirection) val_loss = log_df["val_loss"].dropna() assert not val_loss.empty, ( f"'val_loss' column is empty for {reco_task}" ) - assert ((val_loss >= val_loss_min) & (val_loss <= val_loss_max)).all(), ( - f"'val_loss' values out of range [{val_loss_min}, {val_loss_max}] for {reco_task}: " + assert ((val_loss >= 0.0) & (val_loss <= 1.0)).all(), ( + f"'val_loss' values out of range [0.0, 1.0] for {reco_task}: " f"{val_loss.tolist()}" ) \ No newline at end of file