From 7a9a84fcfc028fc03682c6c64fe9f8b24e834aee Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Tue, 18 Nov 2025 16:52:48 -0800 Subject: [PATCH 01/21] minor changes --- src/biocutils/combine_rows.py | 2 +- src/biocutils/package_utils.py | 6 ++-- src/biocutils/subset_rows.py | 49 ++++++++++++++++++++++++----- src/biocutils/subset_sequence.py | 54 +++++++++++++++++++++++--------- 4 files changed, 86 insertions(+), 25 deletions(-) diff --git a/src/biocutils/combine_rows.py b/src/biocutils/combine_rows.py index 1783dcf..9102d1e 100644 --- a/src/biocutils/combine_rows.py +++ b/src/biocutils/combine_rows.py @@ -77,7 +77,7 @@ def _combine_rows_sparse_arrays(*x): _check_array_dimensions(x, 0) if is_list_of_type(x, sp.sparray): combined = sp.vstack(x) - return _coerce_sparse_array(first, combined, sp) + return _coerce_sparse_array(x[0], combined, sp) warn("not all elements are SciPy sparse arrays") x = [convert_to_dense(y) for y in x] diff --git a/src/biocutils/package_utils.py b/src/biocutils/package_utils.py index e6e4b3a..dcf03d1 100644 --- a/src/biocutils/package_utils.py +++ b/src/biocutils/package_utils.py @@ -4,13 +4,13 @@ def is_package_installed(package_name: str) -> bool: - """Check if the package is installed. + """Check if a package is installed. Args: - package_name (str): Package name. + package_name: Package name. Returns: - bool: True if package is installed, otherwise False. + True if package is installed, otherwise False. """ _installed = False try: diff --git a/src/biocutils/subset_rows.py b/src/biocutils/subset_rows.py index a09fadb..e5b31cb 100644 --- a/src/biocutils/subset_rows.py +++ b/src/biocutils/subset_rows.py @@ -1,19 +1,21 @@ from functools import singledispatch from typing import Any, Sequence +import numpy + +from .package_utils import is_package_installed + @singledispatch def subset_rows(x: Any, indices: Sequence[int]) -> Any: - """ + """Subset a high-dimensional object by indices on the first dimension. + Subset ``x`` by ``indices`` on the first dimension. The default - method attempts to use ``x``'s ``__getitem__`` method, + method attempts to use ``x``'s ``__getitem__`` method. Args: - x: - Any high-dimensional object. - - indices: - Sequence of non-negative integers specifying the integers of interest. + x: Any high-dimensional object. + indices: Sequence of non-negative integers specifying the rows of interest. Returns: The result of slicing ``x`` by ``indices``. The exact type @@ -22,3 +24,36 @@ def subset_rows(x: Any, indices: Sequence[int]) -> Any: tmp = [slice(None)] * len(x.shape) tmp[0] = indices return x[(*tmp,)] + + +@subset_rows.register +def _subset_rows_numpy(x: numpy.ndarray, indices: Sequence[int]) -> numpy.ndarray: + """Subset a NumPy array by row indices. + + Args: + x: NumPy array to subset. + indices: Sequence of non-negative integers specifying rows. + + Returns: + Subsetted NumPy array. + """ + tmp = [slice(None)] * len(x.shape) + tmp[0] = indices + return x[(*tmp,)] + + +if is_package_installed("pandas"): + from pandas import DataFrame + + @subset_rows.register(DataFrame) + def _subset_rows_dataframe(x: DataFrame, indices: Sequence[int]) -> DataFrame: + """Subset a pandas DataFrame by row indices. + + Args: + x: DataFrame to subset. + indices: Sequence of non-negative integers specifying rows. + + Returns: + Subsetted DataFrame. + """ + return x.iloc[indices, :] diff --git a/src/biocutils/subset_sequence.py b/src/biocutils/subset_sequence.py index 609027e..1c39517 100644 --- a/src/biocutils/subset_sequence.py +++ b/src/biocutils/subset_sequence.py @@ -4,17 +4,15 @@ @singledispatch def subset_sequence(x: Any, indices: Sequence[int]) -> Any: - """ + """Subset a sequence-like object by indices. + Subset ``x`` by ``indices`` to obtain a new object. The default method attempts to use ``x``'s ``__getitem__`` method. Args: - x: - Any object that supports ``__getitem__`` with an integer sequence. - - indices: - Sequence of non-negative integers specifying the integers of interest. - All indices should be less than ``len(x)``. + x: Any object that supports ``__getitem__`` with an integer sequence. + indices: Sequence of non-negative integers specifying the positions of + interest. All indices should be less than ``len(x)``. Returns: The result of slicing ``x`` by ``indices``. The exact type @@ -24,19 +22,47 @@ def subset_sequence(x: Any, indices: Sequence[int]) -> Any: @subset_sequence.register -def _subset_sequence_list(x: list, indices: Sequence) -> list: +def _subset_sequence_list(x: list, indices: Sequence[int]) -> list: + """Subset a list by indices. + + Args: + x: List to subset. + indices: Sequence of non-negative integers specifying positions. + + Returns: + A new list containing the specified elements. + """ return type(x)(x[i] for i in indices) @subset_sequence.register -def _subset_sequence_range(x: range, indices: Sequence) -> Union[list, range]: +def _subset_sequence_range(x: range, indices: Sequence[int]) -> Union[list, range]: + """Subset a range by indices. + + Args: + x: Range object to subset. + indices: Sequence of non-negative integers or a range object. + + Returns: + A range if indices is a range, otherwise a list. + """ if isinstance(indices, range): # We can just assume that all 'indices' are in [0, len(x)), # so no need to handle out-of-range indices. - return range( - x.start + x.step * indices.start, - x.start + x.step * indices.stop, - x.step * indices.step - ) + return range(x.start + x.step * indices.start, x.start + x.step * indices.stop, x.step * indices.step) else: return [x[i] for i in indices] + + +@subset_sequence.register +def _subset_sequence_tuple(x: tuple, indices: Sequence[int]) -> tuple: + """Subset a tuple by indices. + + Args: + x: Tuple to subset. + indices: Sequence of non-negative integers specifying positions. + + Returns: + A new tuple containing the specified elements. + """ + return tuple(x[i] for i in indices) From ef1ca9678e7561c1654b88628d545d9d2e64502a Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Tue, 18 Nov 2025 16:55:18 -0800 Subject: [PATCH 02/21] update actions --- .github/workflows/publish-pypi.yml | 52 +++++++++++++++++++++ .github/workflows/pypi-publish.yml | 51 --------------------- .github/workflows/pypi-test.yml | 40 ---------------- .github/workflows/run-tests.yml | 73 ++++++++++++++++++++++++++++++ 4 files changed, 125 insertions(+), 91 deletions(-) create mode 100644 .github/workflows/publish-pypi.yml delete mode 100644 .github/workflows/pypi-publish.yml delete mode 100644 .github/workflows/pypi-test.yml create mode 100644 .github/workflows/run-tests.yml diff --git a/.github/workflows/publish-pypi.yml b/.github/workflows/publish-pypi.yml new file mode 100644 index 0000000..405fee0 --- /dev/null +++ b/.github/workflows/publish-pypi.yml @@ -0,0 +1,52 @@ +name: Publish to PyPI + +on: + push: + tags: "*" + +jobs: + build: + runs-on: ubuntu-latest + permissions: + id-token: write + repository-projects: write + contents: write + pages: write + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python 3.12 + uses: actions/setup-python@v5 + with: + python-version: 3.12 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install tox + + - name: Test with tox + run: | + tox + + - name: Build Project and Publish + run: | + python -m tox -e clean,build + + # This uses the trusted publisher workflow so no token is required. + - name: Publish to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + + - name: Build docs + run: | + tox -e docs + + - run: touch ./docs/_build/html/.nojekyll + + - name: GH Pages Deployment + uses: JamesIves/github-pages-deploy-action@v4 + with: + branch: gh-pages # The branch the action should deploy to. + folder: ./docs/_build/html + clean: true # Automatically remove deleted files from the deploy branch diff --git a/.github/workflows/pypi-publish.yml b/.github/workflows/pypi-publish.yml deleted file mode 100644 index 030cd10..0000000 --- a/.github/workflows/pypi-publish.yml +++ /dev/null @@ -1,51 +0,0 @@ -# This workflow will install Python dependencies, run tests and lint with a single version of Python -# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions - -name: Publish to PyPI - -on: - push: - tags: "*" - -jobs: - build: - - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v4 - - name: Set up Python 3.11 - uses: actions/setup-python@v5 - with: - python-version: 3.11 - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install flake8 pytest tox - # - name: Lint with flake8 - # run: | - # # stop the build if there are Python syntax errors or undefined names - # flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics - # # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - # # flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - - name: Test with tox - run: | - tox - - name: Build docs - run: | - tox -e docs - - run: touch ./docs/_build/html/.nojekyll - - name: GH Pages Deployment - uses: JamesIves/github-pages-deploy-action@4.1.3 - with: - branch: gh-pages # The branch the action should deploy to. - folder: ./docs/_build/html - clean: true # Automatically remove deleted files from the deploy branch - - name: Build Project and Publish - run: | - python -m tox -e clean,build - - name: Publish package - uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 - with: - user: __token__ - password: ${{ secrets.PYPI_PASSWORD }} diff --git a/.github/workflows/pypi-test.yml b/.github/workflows/pypi-test.yml deleted file mode 100644 index 22f6c4a..0000000 --- a/.github/workflows/pypi-test.yml +++ /dev/null @@ -1,40 +0,0 @@ -# This workflow will install Python dependencies, run tests and lint with a single version of Python -# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions - -name: Test the library - -on: - push: - branches: [ master ] - pull_request: - branches: [ master ] - -jobs: - build: - - runs-on: ubuntu-latest - strategy: - matrix: - python-version: [ '3.9', '3.10', '3.11', '3.12', '3.13' ] - - name: Python ${{ matrix.python-version }} - steps: - - uses: actions/checkout@v4 - - name: Setup Python - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - cache: 'pip' - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install flake8 pytest tox - # - name: Lint with flake8 - # run: | - # # stop the build if there are Python syntax errors or undefined names - # flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics - # # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - # # flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - - name: Test with tox - run: | - tox diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml new file mode 100644 index 0000000..e8ab6fa --- /dev/null +++ b/.github/workflows/run-tests.yml @@ -0,0 +1,73 @@ +name: Test the library + +on: + push: + branches: + - master # for legacy repos + - main + pull_request: + branches: + - master # for legacy repos + - main + workflow_dispatch: # Allow manually triggering the workflow + schedule: + # Run roughly every 15 days at 00:00 UTC + # (useful to check if updates on dependencies break the package) + - cron: "0 0 1,16 * *" + +permissions: + contents: read + +concurrency: + group: >- + ${{ github.workflow }}-${{ github.ref_type }}- + ${{ github.event.pull_request.number || github.sha }} + cancel-in-progress: true + +jobs: + test: + strategy: + matrix: + python: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] + platform: + - ubuntu-latest + - macos-latest + - windows-latest + runs-on: ${{ matrix.platform }} + name: Python ${{ matrix.python }}, ${{ matrix.platform }} + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + id: setup-python + with: + python-version: ${{ matrix.python }} + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install tox coverage + + - name: Run tests + run: >- + pipx run --python '${{ steps.setup-python.outputs.python-path }}' + tox + -- -rFEx --durations 10 --color yes --cov --cov-branch --cov-report=xml # pytest args + + - name: Check for codecov token availability + id: codecov-check + shell: bash + run: | + if [ ${{ secrets.CODECOV_TOKEN }} != '' ]; then + echo "codecov=true" >> $GITHUB_OUTPUT; + else + echo "codecov=false" >> $GITHUB_OUTPUT; + fi + + - name: Upload coverage reports to Codecov with GitHub Action + uses: codecov/codecov-action@v5 + if: ${{ steps.codecov-check.outputs.codecov == 'true' }} + env: + CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} + slug: ${{ github.repository }} + flags: ${{ matrix.platform }} - py${{ matrix.python }} From 43775ea197ddfbed0128eb5674f571e45bd80dd0 Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Tue, 18 Nov 2025 17:40:48 -0800 Subject: [PATCH 03/21] minor docstring updates --- src/biocutils/BooleanList.py | 19 +++++++++++++++++-- src/biocutils/FloatList.py | 19 +++++++++++++++++-- src/biocutils/IntegerList.py | 19 +++++++++++++++++-- src/biocutils/StringList.py | 19 +++++++++++++++++-- src/biocutils/extract_column_names.py | 2 +- src/biocutils/extract_row_names.py | 2 +- src/biocutils/is_high_dimensional.py | 9 +++++---- src/biocutils/is_missing_scalar.py | 10 ++++++---- src/biocutils/match.py | 2 +- src/biocutils/normalize_subscript.py | 24 +++++++++++++----------- src/biocutils/print_wrapped_table.py | 11 +++++++---- src/biocutils/reverse_index.py | 6 +++--- src/biocutils/which.py | 2 +- 13 files changed, 106 insertions(+), 38 deletions(-) diff --git a/src/biocutils/BooleanList.py b/src/biocutils/BooleanList.py index 1e46826..f309322 100644 --- a/src/biocutils/BooleanList.py +++ b/src/biocutils/BooleanList.py @@ -10,10 +10,25 @@ def _coerce_to_bool(x: Any): class _SubscriptCoercer: - def __init__(self, data): + """Coercer for subscript operations on BooleanList.""" + + def __init__(self, data: Sequence) -> None: + """Initialize the coercer. + + Args: + data: Sequence of values to coerce. + """ self._data = data - def __getitem__(self, index): + def __getitem__(self, index: int) -> Optional[bool]: + """Get an item and coerce it to boolean. + + Args: + index: Index of the item. + + Returns: + Coerced boolean value. + """ return _coerce_to_bool(self._data[index]) diff --git a/src/biocutils/FloatList.py b/src/biocutils/FloatList.py index 3b587bd..3249da5 100644 --- a/src/biocutils/FloatList.py +++ b/src/biocutils/FloatList.py @@ -15,10 +15,25 @@ def _coerce_to_float(x: Any): class _SubscriptCoercer: - def __init__(self, data): + """Coercer for subscript operations on FloatList.""" + + def __init__(self, data: Sequence) -> None: + """Initialize the coercer. + + Args: + data: Sequence of values to coerce. + """ self._data = data - def __getitem__(self, index): + def __getitem__(self, index: int) -> Optional[float]: + """Get an item and coerce it to float. + + Args: + index: Index of the item. + + Returns: + Coerced float value. + """ return _coerce_to_float(self._data[index]) diff --git a/src/biocutils/IntegerList.py b/src/biocutils/IntegerList.py index c06e3e6..8f8f191 100644 --- a/src/biocutils/IntegerList.py +++ b/src/biocutils/IntegerList.py @@ -15,10 +15,25 @@ def _coerce_to_int(x: Any): class _SubscriptCoercer: - def __init__(self, data): + """Coercer for subscript operations on IntegerList.""" + + def __init__(self, data: Sequence) -> None: + """Initialize the coercer. + + Args: + data: Sequence of values to coerce. + """ self._data = data - def __getitem__(self, index): + def __getitem__(self, index: int) -> Optional[int]: + """Get an item and coerce it to integer. + + Args: + index: Index of the item. + + Returns: + Coerced integer value. + """ return _coerce_to_int(self._data[index]) diff --git a/src/biocutils/StringList.py b/src/biocutils/StringList.py index c16366b..518b729 100644 --- a/src/biocutils/StringList.py +++ b/src/biocutils/StringList.py @@ -10,10 +10,25 @@ def _coerce_to_str(x: Any): class _SubscriptCoercer: - def __init__(self, data): + """Coercer for subscript operations on StringList.""" + + def __init__(self, data: Sequence) -> None: + """Initialize the coercer. + + Args: + data: Sequence of values to coerce. + """ self._data = data - def __getitem__(self, index): + def __getitem__(self, index: int) -> Optional[str]: + """Get an item and coerce it to string. + + Args: + index: Index of the item. + + Returns: + Coerced string value. + """ return _coerce_to_str(self._data[index]) diff --git a/src/biocutils/extract_column_names.py b/src/biocutils/extract_column_names.py index 5b63683..6f98521 100644 --- a/src/biocutils/extract_column_names.py +++ b/src/biocutils/extract_column_names.py @@ -15,7 +15,7 @@ def extract_column_names(x: Any) -> numpy.ndarray: """Access column names from 2-dimensional representations. Args: - x: Any object. + x: Any object with column names. Returns: Array of strings containing column names. diff --git a/src/biocutils/extract_row_names.py b/src/biocutils/extract_row_names.py index 81b81fb..5ea3927 100644 --- a/src/biocutils/extract_row_names.py +++ b/src/biocutils/extract_row_names.py @@ -15,7 +15,7 @@ def extract_row_names(x: Any) -> numpy.ndarray: """Access row names from 2-dimensional representations. Args: - x: Any object. + x: Any object with row names. Returns: Array of strings containing row names. diff --git a/src/biocutils/is_high_dimensional.py b/src/biocutils/is_high_dimensional.py index 9d9d5d2..3bdfc6f 100644 --- a/src/biocutils/is_high_dimensional.py +++ b/src/biocutils/is_high_dimensional.py @@ -1,15 +1,16 @@ from functools import singledispatch +from typing import Any @singledispatch -def is_high_dimensional(x): - """ +def is_high_dimensional(x: Any) -> bool: + """Check if an object is high-dimensional. + Whether an object is high-dimensional, i.e., has a ``shape`` attribute that is of length greater than 1. Args: - x: - Some kind of object. + x: Some kind of object. Returns: Whether ``x`` is high-dimensional. diff --git a/src/biocutils/is_missing_scalar.py b/src/biocutils/is_missing_scalar.py index 8392fdf..f32dd60 100644 --- a/src/biocutils/is_missing_scalar.py +++ b/src/biocutils/is_missing_scalar.py @@ -1,11 +1,13 @@ +from typing import Any + import numpy -def is_missing_scalar(x) -> bool: - """ +def is_missing_scalar(x: Any) -> bool: + """Check if a scalar value is missing. + Args: - x: - Any scalar value. + x: Any scalar value. Returns: Whether ``x`` is None or a NumPy masked constant. diff --git a/src/biocutils/match.py b/src/biocutils/match.py index 2881bec..0e237c5 100644 --- a/src/biocutils/match.py +++ b/src/biocutils/match.py @@ -8,7 +8,7 @@ def match( x: Sequence, targets: Union[dict, Sequence], duplicate_method: DUPLICATE_METHOD = "first", - dtype: Optional[numpy.ndarray] = None, + dtype: Optional[numpy.dtype] = None, fail_missing: Optional[bool] = None, ) -> numpy.ndarray: """Find a matching value of each element of ``x`` in ``target``. diff --git a/src/biocutils/normalize_subscript.py b/src/biocutils/normalize_subscript.py index 890a9b3..da37a7e 100644 --- a/src/biocutils/normalize_subscript.py +++ b/src/biocutils/normalize_subscript.py @@ -23,11 +23,11 @@ class NormalizedSubscript: such that :py:func:`~normalize_subscript` is just a no-op. """ - def __init__(self, subscript: Sequence[int]): - """ + def __init__(self, subscript: Sequence[int]) -> None: + """Initialize a NormalizedSubscript. + Args: - subscript: - Sequence of integers for a normalized subscript. + subscript: Sequence of integers for a normalized subscript. """ self._subscript = subscript @@ -40,11 +40,11 @@ def subscript(self) -> Sequence[int]: return self._subscript def __getitem__(self, index: Any) -> Any: - """ + """Get an item from the subscript. + Args: - index: - Any argument accepted by the ``__getitem__`` method of the - :py:attr:`~subscript`. + index: Any argument accepted by the ``__getitem__`` method of the + subscript. Returns: The same return value as the ``__getitem__`` method of the @@ -53,7 +53,8 @@ def __getitem__(self, index: Any) -> Any: return self._subscript[index] def __len__(self) -> int: - """ + """Get the length of the subscript. + Returns: Length of the subscript. """ @@ -68,8 +69,9 @@ def normalize_subscript( length: int, names: Optional[Sequence[str]] = None, non_negative_only: bool = True, -) -> Tuple: - """ +) -> Tuple[Sequence[int], bool]: + """Normalize a subscript into a sequence of integer indices. + Normalize a subscript for ``__getitem__`` or friends into a sequence of integer indices, for consistent downstream use. diff --git a/src/biocutils/print_wrapped_table.py b/src/biocutils/print_wrapped_table.py index cbfcb36..12f602f 100644 --- a/src/biocutils/print_wrapped_table.py +++ b/src/biocutils/print_wrapped_table.py @@ -144,14 +144,17 @@ def truncate_strings(values: List[str], width: int = 40) -> List[str]: return replacement -def print_type(x) -> str: - """Print the type of an object, with some special behavior for certain classes (e.g., to add the data type of NumPy - arrays). This is intended for display at the top of the columns of :py:meth:`~print_wrapped_table`. +def print_type(x: Any) -> str: + """Print the type of an object. + + Print the type of an object, with some special behavior for certain classes + (e.g., to add the data type of NumPy arrays). This is intended for display + at the top of the columns of :py:meth:`~print_wrapped_table`. Args: x: Some object. - Return: + Returns: String containing the class of the object. """ cls = type(x).__name__ diff --git a/src/biocutils/reverse_index.py b/src/biocutils/reverse_index.py index 3b64c9e..528f1ee 100644 --- a/src/biocutils/reverse_index.py +++ b/src/biocutils/reverse_index.py @@ -1,16 +1,16 @@ from typing import Sequence -def build_reverse_index(obj: Sequence[str]): +def build_reverse_index(obj: Sequence[str]) -> dict: """Build a reverse index by name, for fast lookup operations. - Only contains the first occurence of a term. + Only contains the first occurrence of a term. Args: obj: List of names. Returns: - A map of keys and their index positions. + A dictionary mapping names to their index positions. """ revmap = {} for i, n in enumerate(obj): diff --git a/src/biocutils/which.py b/src/biocutils/which.py index 2a0c0d1..87d2fdd 100644 --- a/src/biocutils/which.py +++ b/src/biocutils/which.py @@ -4,7 +4,7 @@ def which( x: Sequence, - dtype: Optional[numpy.ndarray] = None, + dtype: Optional[numpy.dtype] = None, ) -> numpy.ndarray: """Report the indices of all elements of ``x`` that are truthy. From 591af1f33975019007a47053e344359d20ac9bbf Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Tue, 18 Nov 2025 21:14:57 -0800 Subject: [PATCH 04/21] missing import --- src/biocutils/print_wrapped_table.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/biocutils/print_wrapped_table.py b/src/biocutils/print_wrapped_table.py index 12f602f..583f583 100644 --- a/src/biocutils/print_wrapped_table.py +++ b/src/biocutils/print_wrapped_table.py @@ -1,4 +1,4 @@ -from typing import List, Optional, Sequence +from typing import Any, List, Optional, Sequence import numpy From 21b8bfcd530389de97e12527c30c83421656080a Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Thu, 18 Dec 2025 14:58:07 -0800 Subject: [PATCH 05/21] renaming to follow pep guidelines --- src/biocutils/Factor.py | 4 ++-- src/biocutils/__init__.py | 14 +++++++------- src/biocutils/bioc_object.py | 2 +- src/biocutils/{BooleanList.py => boolean_list.py} | 4 ++-- src/biocutils/{FloatList.py => float_list.py} | 4 ++-- src/biocutils/{IntegerList.py => integer_list.py} | 4 ++-- src/biocutils/{NamedList.py => named_list.py} | 2 +- src/biocutils/normalize_subscript.py | 6 +++--- src/biocutils/{StringList.py => string_list.py} | 4 ++-- tests/test_biocobject.py | 2 +- 10 files changed, 23 insertions(+), 23 deletions(-) rename src/biocutils/{BooleanList.py => boolean_list.py} (98%) rename src/biocutils/{FloatList.py => float_list.py} (98%) rename src/biocutils/{IntegerList.py => integer_list.py} (98%) rename src/biocutils/{NamedList.py => named_list.py} (99%) rename src/biocutils/{StringList.py => string_list.py} (98%) diff --git a/src/biocutils/Factor.py b/src/biocutils/Factor.py index 374b34e..fedf75e 100644 --- a/src/biocutils/Factor.py +++ b/src/biocutils/Factor.py @@ -10,14 +10,14 @@ from .is_list_of_type import is_list_of_type from .is_missing_scalar import is_missing_scalar from .match import match -from .Names import Names, _combine_names, _name_to_position, _sanitize_names +from .names import Names, _combine_names, _name_to_position, _sanitize_names from .normalize_subscript import ( NormalizedSubscript, SubscriptTypes, normalize_subscript, ) from .print_truncated import print_truncated_list -from .StringList import StringList +from .string_list import StringList from .subset_sequence import subset_sequence diff --git a/src/biocutils/__init__.py b/src/biocutils/__init__.py index 6f2326f..13fa31c 100644 --- a/src/biocutils/__init__.py +++ b/src/biocutils/__init__.py @@ -15,13 +15,13 @@ finally: del version, PackageNotFoundError -from .Factor import Factor -from .StringList import StringList -from .IntegerList import IntegerList -from .FloatList import FloatList -from .BooleanList import BooleanList -from .Names import Names -from .NamedList import NamedList +from .factor import Factor +from .string_list import StringList +from .integer_list import IntegerList +from .float_list import FloatList +from .boolean_list import BooleanList +from .names import Names +from .named_list import NamedList from .factorize import factorize from .intersect import intersect diff --git a/src/biocutils/bioc_object.py b/src/biocutils/bioc_object.py index 1302663..ac40431 100644 --- a/src/biocutils/bioc_object.py +++ b/src/biocutils/bioc_object.py @@ -9,7 +9,7 @@ except ImportError: Self = "BiocObject" -from .NamedList import NamedList +from .named_list import NamedList __author__ = "Jayaram Kancherla" __copyright__ = "jkanche" diff --git a/src/biocutils/BooleanList.py b/src/biocutils/boolean_list.py similarity index 98% rename from src/biocutils/BooleanList.py rename to src/biocutils/boolean_list.py index f309322..6f19900 100644 --- a/src/biocutils/BooleanList.py +++ b/src/biocutils/boolean_list.py @@ -1,7 +1,7 @@ from typing import Any, Iterable, Optional, Sequence, Union -from .NamedList import NamedList -from .Names import Names +from .named_list import NamedList +from .names import Names from .normalize_subscript import SubscriptTypes diff --git a/src/biocutils/FloatList.py b/src/biocutils/float_list.py similarity index 98% rename from src/biocutils/FloatList.py rename to src/biocutils/float_list.py index 3249da5..ccf8bd4 100644 --- a/src/biocutils/FloatList.py +++ b/src/biocutils/float_list.py @@ -1,7 +1,7 @@ from typing import Any, Iterable, Optional, Sequence, Union -from .NamedList import NamedList -from .Names import Names +from .named_list import NamedList +from .names import Names from .normalize_subscript import SubscriptTypes diff --git a/src/biocutils/IntegerList.py b/src/biocutils/integer_list.py similarity index 98% rename from src/biocutils/IntegerList.py rename to src/biocutils/integer_list.py index 8f8f191..9ca9fbb 100644 --- a/src/biocutils/IntegerList.py +++ b/src/biocutils/integer_list.py @@ -1,7 +1,7 @@ from typing import Any, Iterable, Optional, Sequence, Union -from .NamedList import NamedList -from .Names import Names +from .named_list import NamedList +from .names import Names from .normalize_subscript import SubscriptTypes diff --git a/src/biocutils/NamedList.py b/src/biocutils/named_list.py similarity index 99% rename from src/biocutils/NamedList.py rename to src/biocutils/named_list.py index af4670a..0d971d0 100644 --- a/src/biocutils/NamedList.py +++ b/src/biocutils/named_list.py @@ -3,7 +3,7 @@ from .assign_sequence import assign_sequence from .combine_sequences import combine_sequences -from .Names import Names, _name_to_position, _sanitize_names +from .names import Names, _name_to_position, _sanitize_names from .normalize_subscript import ( NormalizedSubscript, SubscriptTypes, diff --git a/src/biocutils/normalize_subscript.py b/src/biocutils/normalize_subscript.py index da37a7e..d85c6d0 100644 --- a/src/biocutils/normalize_subscript.py +++ b/src/biocutils/normalize_subscript.py @@ -103,7 +103,7 @@ def normalize_subscript( names: List of names for each entry in the object. If not None, this should have length equal to ``length``. Some optimizations - are possible if this is a :py:class:`~Names.Names` object. + are possible if this is a :py:class:`~Names.names` object. non_negative_only: Whether negative indices must be converted into non-negative @@ -138,7 +138,7 @@ def normalize_subscript( + "' for vector-like object with no names" ) i = -1 - from .Names import Names + from .names import Names if isinstance(names, Names): i = names.map(sub) @@ -197,7 +197,7 @@ def normalize_subscript( output = [] has_strings = set() string_positions = [] - from .Names import Names + from .names import Names are_names_indexed = isinstance(names, Names) diff --git a/src/biocutils/StringList.py b/src/biocutils/string_list.py similarity index 98% rename from src/biocutils/StringList.py rename to src/biocutils/string_list.py index 518b729..8fe1d84 100644 --- a/src/biocutils/StringList.py +++ b/src/biocutils/string_list.py @@ -1,7 +1,7 @@ from typing import Any, Iterable, Optional, Sequence, Union -from .NamedList import NamedList -from .Names import Names +from .named_list import NamedList +from .names import Names from .normalize_subscript import SubscriptTypes diff --git a/tests/test_biocobject.py b/tests/test_biocobject.py index 4315995..3645936 100644 --- a/tests/test_biocobject.py +++ b/tests/test_biocobject.py @@ -1,7 +1,7 @@ import pytest from copy import copy from biocutils.bioc_object import BiocObject -from biocutils.NamedList import NamedList +from biocutils.named_list import NamedList def test_init_empty(): From dea34833f2949f67d32b3f1117427820713417d3 Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Thu, 18 Dec 2025 15:04:18 -0800 Subject: [PATCH 06/21] rename files --- src/biocutils/{Factor.py => factor.py} | 0 src/biocutils/{Names.py => names.py} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename src/biocutils/{Factor.py => factor.py} (100%) rename src/biocutils/{Names.py => names.py} (100%) diff --git a/src/biocutils/Factor.py b/src/biocutils/factor.py similarity index 100% rename from src/biocutils/Factor.py rename to src/biocutils/factor.py diff --git a/src/biocutils/Names.py b/src/biocutils/names.py similarity index 100% rename from src/biocutils/Names.py rename to src/biocutils/names.py From 5692f35342bbfe7cf57a83561e6107e3a0b656cc Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Thu, 18 Dec 2025 15:06:02 -0800 Subject: [PATCH 07/21] remove 3.9 from actions --- .github/workflows/run-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml index e8ab6fa..e0f247d 100644 --- a/.github/workflows/run-tests.yml +++ b/.github/workflows/run-tests.yml @@ -28,7 +28,7 @@ jobs: test: strategy: matrix: - python: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] + python: ["3.10", "3.11", "3.12", "3.13", "3.14"] platform: - ubuntu-latest - macos-latest From 31bb62783dc16399bb19e74d48e0d0b783564d71 Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Thu, 18 Dec 2025 15:07:45 -0800 Subject: [PATCH 08/21] export biocobject --- src/biocutils/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/biocutils/__init__.py b/src/biocutils/__init__.py index 13fa31c..ed683fd 100644 --- a/src/biocutils/__init__.py +++ b/src/biocutils/__init__.py @@ -60,3 +60,5 @@ from .get_height import get_height from .is_high_dimensional import is_high_dimensional + +from .bioc_object import BiocObject \ No newline at end of file From c8aa74082d62afa12186bd4bf21dc287dc73d31f Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Thu, 18 Dec 2025 15:13:25 -0800 Subject: [PATCH 09/21] NamedList() refer to use from_dict instead --- src/biocutils/named_list.py | 32 +++++++++----------------------- 1 file changed, 9 insertions(+), 23 deletions(-) diff --git a/src/biocutils/named_list.py b/src/biocutils/named_list.py index 0d971d0..f51b050 100644 --- a/src/biocutils/named_list.py +++ b/src/biocutils/named_list.py @@ -39,6 +39,9 @@ def __init__( _validate: Internal use only. """ + if isinstance(data, dict): + raise TypeError("'data' is a dictionary, use 'NamedList.from_dict' instead.") + if _validate: if data is None: data = [] @@ -86,14 +89,7 @@ def __str__(self) -> str: names if any exist. """ if self._names is not None: - return ( - "[" - + ", ".join( - repr(self._names[i]) + "=" + repr(x) - for i, x in enumerate(self._data) - ) - + "]" - ) + return "[" + ", ".join(repr(self._names[i]) + "=" + repr(x) for i, x in enumerate(self._data)) + "]" else: return repr(self._data) @@ -204,9 +200,7 @@ def __getitem__(self, index: SubscriptTypes) -> Union["NamedList", Any]: else: return self.get_slice(NormalizedSubscript(index)) - def set_value( - self, index: Union[str, int], value: Any, in_place: bool = False - ) -> "NamedList": + def set_value(self, index: Union[str, int], value: Any, in_place: bool = False) -> "NamedList": """ Args: index: @@ -253,9 +247,7 @@ def set_value( return output - def set_slice( - self, index: SubscriptTypes, value: Sequence, in_place: bool = False - ) -> "NamedList": + def set_slice(self, index: SubscriptTypes, value: Sequence, in_place: bool = False) -> "NamedList": """ Args: index: @@ -324,9 +316,7 @@ def _define_output(self, in_place: bool) -> "NamedList": else: return self.copy() - def safe_insert( - self, index: Union[int, str], value: Any, in_place: bool = False - ) -> "NamedList": + def safe_insert(self, index: Union[int, str], value: Any, in_place: bool = False) -> "NamedList": """ Args: index: @@ -530,9 +520,7 @@ def _combine_sequences_NamedList(*x: NamedList) -> NamedList: @assign_sequence.register -def _assign_sequence_NamedList( - x: NamedList, indices: Sequence[int], other: Sequence -) -> NamedList: +def _assign_sequence_NamedList(x: NamedList, indices: Sequence[int], other: Sequence) -> NamedList: if isinstance(other, NamedList): # Do NOT set the names if 'other' is a NamedList. Names don't change # during assignment/setting operations, as a matter of policy. This is @@ -541,6 +529,4 @@ def _assign_sequence_NamedList( # of names, and it would be weird for the same sequence of names to # suddently become an invalid indexing vector after an assignment. other = other._data - return type(x)( - assign_sequence(x._data, NormalizedSubscript(indices), other), names=x._names - ) + return type(x)(assign_sequence(x._data, NormalizedSubscript(indices), other), names=x._names) From cd0fa74bc7f460ce77287eaa5a9ee0198e94cc18 Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Thu, 18 Dec 2025 15:17:16 -0800 Subject: [PATCH 10/21] switch to classmethod --- src/biocutils/named_list.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/biocutils/named_list.py b/src/biocutils/named_list.py index f51b050..94918bd 100644 --- a/src/biocutils/named_list.py +++ b/src/biocutils/named_list.py @@ -482,28 +482,28 @@ def as_dict(self) -> Dict[str, Any]: output[n] = self[i] return output - @staticmethod - def from_list(x: list) -> "NamedList": + @classmethod + def from_list(cls, x: list) -> "NamedList": """ Args: x: List of data elements. Returns: - A ``NamedList`` instance with the contents of ``x`` and no names. + A instance with the contents of ``x`` and no names. """ - return NamedList(x) + return cls(x) - @staticmethod - def from_dict(x: dict) -> "NamedList": + @classmethod + def from_dict(cls, x: dict) -> "NamedList": """ Args: x: Dictionary where keys are strings (or can be coerced to them). Returns: - A ``NamedList`` instance where the list elements are the values of + A instance where the list elements are the values of ``x`` and the names are the stringified keys. """ - return NamedList(list(x.values()), names=Names(str(y) for y in x.keys())) + return cls(list(x.values()), names=Names(str(y) for y in x.keys())) @subset_sequence.register From 4990700560706e0d347181358eb0d42e4a404852 Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Fri, 26 Dec 2025 18:33:51 -0800 Subject: [PATCH 11/21] linting, typehints etc etc --- src/biocutils/_utils_combine.py | 2 + src/biocutils/assign.py | 3 +- src/biocutils/assign_rows.py | 6 +- src/biocutils/assign_sequence.py | 8 +- src/biocutils/bioc_object.py | 15 +-- src/biocutils/boolean_list.py | 47 +++++---- src/biocutils/combine.py | 10 +- src/biocutils/combine_columns.py | 17 ++-- src/biocutils/combine_rows.py | 10 +- src/biocutils/combine_sequences.py | 11 +-- src/biocutils/convert_to_dense.py | 3 +- src/biocutils/extract_column_names.py | 7 +- src/biocutils/extract_row_names.py | 4 +- src/biocutils/factor.py | 121 ++++++++--------------- src/biocutils/float_list.py | 25 ++--- src/biocutils/integer_list.py | 30 +++--- src/biocutils/is_high_dimensional.py | 3 +- src/biocutils/is_list_of_type.py | 11 ++- src/biocutils/is_missing_scalar.py | 3 +- src/biocutils/map_to_index.py | 5 +- src/biocutils/match.py | 7 +- src/biocutils/named_list.py | 42 ++++---- src/biocutils/names.py | 45 +++++---- src/biocutils/normalize_subscript.py | 20 +--- src/biocutils/package_utils.py | 3 +- src/biocutils/print_truncated.py | 19 ++-- src/biocutils/print_wrapped_table.py | 4 +- src/biocutils/relaxed_combine_columns.py | 6 +- src/biocutils/relaxed_combine_rows.py | 4 +- src/biocutils/reverse_index.py | 3 +- src/biocutils/show_as_cell.py | 2 +- src/biocutils/string_list.py | 24 ++--- src/biocutils/table.py | 38 +++++++ src/biocutils/which.py | 1 + 34 files changed, 264 insertions(+), 295 deletions(-) create mode 100644 src/biocutils/table.py diff --git a/src/biocutils/_utils_combine.py b/src/biocutils/_utils_combine.py index 328bf6a..2221840 100644 --- a/src/biocutils/_utils_combine.py +++ b/src/biocutils/_utils_combine.py @@ -26,6 +26,8 @@ def _check_array_dimensions(x, active: int) -> bool: + ")" ) + return True + def _coerce_sparse_matrix(first, combined, module): if isinstance(first, module.csr_matrix): diff --git a/src/biocutils/assign.py b/src/biocutils/assign.py index a590033..875033d 100644 --- a/src/biocutils/assign.py +++ b/src/biocutils/assign.py @@ -14,7 +14,8 @@ def assign(x: Any, indices: Sequence[int], replacement: Any) -> Any: :py:func:`~biocutils.assign_sequence.assign_sequence` instead. Args: - x: Object to be assignted. + x: + Object to be assignted. Returns: The object after assignment, typically the same type as ``x``. diff --git a/src/biocutils/assign_rows.py b/src/biocutils/assign_rows.py index 4ddcf13..ada797b 100644 --- a/src/biocutils/assign_rows.py +++ b/src/biocutils/assign_rows.py @@ -31,15 +31,15 @@ def assign_rows(x: Any, indices: Sequence[int], replacement: Any) -> Any: tmp = [slice(None)] * len(x.shape) tmp[0] = indices output[(*tmp,)] = replacement + return output @assign_rows.register -def _assign_rows_numpy( - x: numpy.ndarray, indices: Sequence[int], replacement: Any -) -> numpy.ndarray: +def _assign_rows_numpy(x: numpy.ndarray, indices: Sequence[int], replacement: Any) -> numpy.ndarray: tmp = [slice(None)] * len(x.shape) tmp[0] = indices output = numpy.copy(x) output[(*tmp,)] = replacement + return output diff --git a/src/biocutils/assign_sequence.py b/src/biocutils/assign_sequence.py index 091dc0c..d6b23d8 100644 --- a/src/biocutils/assign_sequence.py +++ b/src/biocutils/assign_sequence.py @@ -41,18 +41,14 @@ def _assign_sequence_list(x: list, indices: Sequence[int], replacement: Any) -> @assign_sequence.register -def _assign_sequence_numpy( - x: numpy.ndarray, indices: Sequence[int], replacement: Any -) -> numpy.ndarray: +def _assign_sequence_numpy(x: numpy.ndarray, indices: Sequence[int], replacement: Any) -> numpy.ndarray: output = numpy.copy(x) output[indices] = replacement return output @assign_sequence.register -def _assign_sequence_range( - x: range, indices: Sequence[int], replacement: Any -) -> Union[range, list]: +def _assign_sequence_range(x: range, indices: Sequence[int], replacement: Any) -> Union[range, list]: if ( isinstance(replacement, range) and isinstance(indices, range) diff --git a/src/biocutils/bioc_object.py b/src/biocutils/bioc_object.py index ac40431..d77b3c8 100644 --- a/src/biocutils/bioc_object.py +++ b/src/biocutils/bioc_object.py @@ -4,11 +4,6 @@ from typing import Any, Dict, Optional, Union from warnings import warn -try: - from typing import Self -except ImportError: - Self = "BiocObject" - from .named_list import NamedList __author__ = "Jayaram Kancherla" @@ -40,22 +35,22 @@ class BiocObject: Provides a standardized `metadata` slot and copy-on-write semantics. """ - def __init__(self, metadata: Optional[Union[Dict[str, Any], NamedList]] = None, validate: bool = True) -> None: + def __init__(self, metadata: Optional[Union[Dict[str, Any], NamedList]] = None, _validate: bool = True) -> None: """Initialize the BiocObject. Args: metadata: Additional metadata. Defaults to an empty NamedList. - validate: + _validate: Whether to validate the input. Defaults to True. """ - if validate and metadata is not None: + if _validate and metadata is not None: _validate_metadata(metadata) self._metadata = sanitize_metadata(metadata) - def _define_output(self, in_place: bool = False) -> Self: + def _define_output(self, in_place: bool = False) -> BiocObject: """Internal utility to handle in-place vs copy-on-modify.""" if in_place: return self @@ -93,7 +88,7 @@ def get_metadata(self) -> NamedList: """Alias for :py:attr:`~metadata` getter.""" return self.metadata - def set_metadata(self, metadata: Optional[Union[Dict[str, Any], NamedList]], in_place: bool = False) -> Self: + def set_metadata(self, metadata: Optional[Union[Dict[str, Any], NamedList]], in_place: bool = False) -> BiocObject: """Set new metadata. Args: diff --git a/src/biocutils/boolean_list.py b/src/biocutils/boolean_list.py index 6f19900..447a8f9 100644 --- a/src/biocutils/boolean_list.py +++ b/src/biocutils/boolean_list.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from typing import Any, Iterable, Optional, Sequence, Union from .named_list import NamedList @@ -16,7 +18,8 @@ def __init__(self, data: Sequence) -> None: """Initialize the coercer. Args: - data: Sequence of values to coerce. + data: + Sequence of values to coerce. """ self._data = data @@ -24,7 +27,8 @@ def __getitem__(self, index: int) -> Optional[bool]: """Get an item and coerce it to boolean. Args: - index: Index of the item. + index: + Index of the item. Returns: Coerced boolean value. @@ -42,7 +46,7 @@ class BooleanList(NamedList): def __init__( self, - data: Optional[Iterable] = None, + data: Optional[Sequence] = None, names: Optional[Names] = None, _validate: bool = True, ): @@ -60,41 +64,34 @@ def __init__( _validate: Internal use only. """ - if _validate: - if data is not None: - if isinstance(data, BooleanList): + if data is not None: + if isinstance(data, BooleanList): + data = data._data + else: + if isinstance(data, NamedList): data = data._data - else: - if isinstance(data, NamedList): - data = data._data - original = data - data = list(_coerce_to_bool(item) for item in original) + + original = data + data = list(_coerce_to_bool(item) for item in original) + super().__init__(data, names, _validate=_validate) - def set_value( - self, index: Union[int, str], value: Any, in_place: bool = False - ) -> "BooleanList": + def set_value(self, index: Union[int, str], value: Any, in_place: bool = False) -> BooleanList: """Calls :py:meth:`~biocutils.NamedList.NamedList.set_value` after coercing ``value`` to a boolean.""" return super().set_value(index, _coerce_to_bool(value), in_place=in_place) - def set_slice( - self, index: SubscriptTypes, value: Sequence, in_place: bool = False - ) -> "BooleanList": + def set_slice(self, index: SubscriptTypes, value: Sequence, in_place: bool = False) -> BooleanList: """Calls :py:meth:`~biocutils.NamedList.NamedList.set_slice` after coercing ``value`` to booleans.""" return super().set_slice(index, _SubscriptCoercer(value), in_place=in_place) - def safe_insert( - self, index: Union[int, str], value: Any, in_place: bool = False - ) -> "BooleanList": + def safe_insert(self, index: Union[int, str], value: Any, in_place: bool = False) -> BooleanList: """Calls :py:meth:`~biocutils.NamedList.NamedList.safe_insert` after coercing ``value`` to a boolean.""" return super().safe_insert(index, _coerce_to_bool(value), in_place=in_place) - def safe_append(self, value: Any, in_place: bool = False) -> "BooleanList": + def safe_append(self, value: Any, in_place: bool = False) -> BooleanList: """Calls :py:meth:`~biocutils.NamedList.NamedList.safe_append` after coercing ``value`` to a boolean.""" return super().safe_append(_coerce_to_bool(value), in_place=in_place) - def safe_extend(self, other: Iterable, in_place: bool = True) -> "BooleanList": + def safe_extend(self, other: Iterable, in_place: bool = False) -> BooleanList: """Calls :py:meth:`~biocutils.NamedList.NamedList.safe_extend` after coercing elements of ``other`` to booleans.""" - return super().safe_extend( - (_coerce_to_bool(y) for y in other), in_place=in_place - ) + return super().safe_extend((_coerce_to_bool(y) for y in other), in_place=in_place) diff --git a/src/biocutils/combine.py b/src/biocutils/combine.py index 5e7a058..84bce6a 100644 --- a/src/biocutils/combine.py +++ b/src/biocutils/combine.py @@ -5,7 +5,7 @@ from .is_high_dimensional import is_high_dimensional -def combine(*x: Any): +def combine(*x: Any) -> Any: """ Generic combine that checks if the objects are n-dimensional for n > 1 (i.e. has a ``shape`` property of length greater than 1); if so, it calls @@ -14,7 +14,8 @@ def combine(*x: Any): :py:func:`~biocutils.combine_sequences.combine_sequences` instead. Args: - x: Objects to combine. + x: + Objects to combine. Returns: A combined object, typically the same type as the first element in ``x``. @@ -28,9 +29,8 @@ def combine(*x: Any): has_1d = True if has_nd and has_1d: - raise ValueError( - "cannot mix 1-dimensional and higher-dimensional objects in `combine`" - ) + raise ValueError("cannot mix 1-dimensional and higher-dimensional objects in `combine`") + if has_nd: return combine_rows(*x) else: diff --git a/src/biocutils/combine_columns.py b/src/biocutils/combine_columns.py index d1ad72c..bd71ecb 100644 --- a/src/biocutils/combine_columns.py +++ b/src/biocutils/combine_columns.py @@ -19,7 +19,7 @@ @singledispatch -def combine_columns(*x: Any): +def combine_columns(*x: Any) -> Any: """Combine n-dimensional objects along the second dimension. If all elements are :py:class:`~numpy.ndarray`, @@ -40,11 +40,7 @@ def combine_columns(*x: Any): Returns: Combined object, typically the same type as the first entry of ``x`` """ - raise NotImplementedError( - "no `combine_columns` method implemented for '" - + type(x[0]).__name__ - + "' objects" - ) + raise NotImplementedError("no `combine_columns` method implemented for '" + type(x[0]).__name__ + "' objects") @combine_columns.register @@ -57,7 +53,7 @@ def _combine_columns_dense_arrays(*x: numpy.ndarray): return numpy.concatenate(x, axis=1) -if is_package_installed("scipy") is True: +if is_package_installed("scipy"): import scipy.sparse as sp def _combine_columns_sparse_matrices(*x): @@ -85,8 +81,13 @@ def _combine_columns_sparse_arrays(*x): x = [convert_to_dense(y) for y in x] return numpy.concatenate(x, axis=1) + try: + combine_columns.register(sp.sparray, _combine_columns_sparse_arrays) + except Exception: + pass + -if is_package_installed("pandas") is True: +if is_package_installed("pandas"): from pandas import DataFrame, concat @combine_columns.register(DataFrame) diff --git a/src/biocutils/combine_rows.py b/src/biocutils/combine_rows.py index 9102d1e..e8e8573 100644 --- a/src/biocutils/combine_rows.py +++ b/src/biocutils/combine_rows.py @@ -19,7 +19,7 @@ @singledispatch -def combine_rows(*x: Any): +def combine_rows(*x: Any) -> Any: """Combine n-dimensional objects along their first dimension. If all elements are :py:class:`~numpy.ndarray`, we combine them using @@ -40,9 +40,7 @@ def combine_rows(*x: Any): Returns: Combined object, typically the same type as the first entry of ``x``. """ - raise NotImplementedError( - "no `combine_rows` method implemented for '" + type(x[0]).__name__ + "' objects" - ) + raise NotImplementedError("no `combine_rows` method implemented for '" + type(x[0]).__name__ + "' objects") @combine_rows.register(numpy.ndarray) @@ -69,7 +67,7 @@ def _combine_rows_sparse_matrices(*x): return numpy.concatenate(x) try: - combine_rows.register(sp.sparray, _combine_rows_sparse_arrays) + combine_rows.register(sp.spmatrix, _combine_rows_sparse_matrices) except Exception: pass @@ -84,7 +82,7 @@ def _combine_rows_sparse_arrays(*x): return numpy.concatenate(x) try: - combine_rows.register(sp.spmatrix, _combine_rows_sparse_matrices) + combine_rows.register(sp.sparray, _combine_rows_sparse_arrays) except Exception: pass diff --git a/src/biocutils/combine_sequences.py b/src/biocutils/combine_sequences.py index 8613385..2b34be3 100644 --- a/src/biocutils/combine_sequences.py +++ b/src/biocutils/combine_sequences.py @@ -13,7 +13,7 @@ @singledispatch -def combine_sequences(*x: Any): +def combine_sequences(*x: Any) -> Any: """Combine vector-like objects (1-dimensional arrays). If all elements are :py:class:`~numpy.ndarray`, @@ -34,11 +34,7 @@ def combine_sequences(*x: Any): Returns: A combined object, ideally of the same type as the first element in ``x``. """ - raise NotImplementedError( - "no `combine_sequences` method implemented for '" - + type(x[0]).__name__ - + "' objects" - ) + raise NotImplementedError("no `combine_sequences` method implemented for '" + type(x[0]).__name__ + "' objects") @combine_sequences.register(list) @@ -51,6 +47,7 @@ def _combine_sequences_dense_arrays(*x: numpy.ndarray): for y in x: if numpy.ma.is_masked(y): return numpy.ma.concatenate(x, axis=None) + return numpy.concatenate(x, axis=None) @@ -85,6 +82,7 @@ def _combine_sequences_ranges(*x: range): if not failed: return range(start, stop, step) + return list(chain(*x)) @@ -101,4 +99,5 @@ def _combine_sequences_pandas_series(*x): else: elems.append(elem) x = elems + return concat(x) diff --git a/src/biocutils/convert_to_dense.py b/src/biocutils/convert_to_dense.py index 1281e4c..12aa11f 100644 --- a/src/biocutils/convert_to_dense.py +++ b/src/biocutils/convert_to_dense.py @@ -15,7 +15,8 @@ def convert_to_dense(x: Any) -> numpy.ndarray: ``numpy.concatenate`` doesn't understand. Args: - x: Some array-like object to be stored as a NumPy array. + x: + Some array-like object to be stored as a NumPy array. Returns: A NumPy array. diff --git a/src/biocutils/extract_column_names.py b/src/biocutils/extract_column_names.py index 6f98521..d3a4e02 100644 --- a/src/biocutils/extract_column_names.py +++ b/src/biocutils/extract_column_names.py @@ -11,11 +11,12 @@ @singledispatch -def extract_column_names(x: Any) -> numpy.ndarray: +def extract_column_names(x: Any) -> Any: """Access column names from 2-dimensional representations. Args: - x: Any object with column names. + x: + Any object with column names. Returns: Array of strings containing column names. @@ -27,5 +28,5 @@ def extract_column_names(x: Any) -> numpy.ndarray: from pandas import DataFrame @extract_column_names.register(DataFrame) - def _colnames_dataframe(x: DataFrame) -> list: + def _colnames_dataframe(x): return numpy.array(x.columns, dtype=str) diff --git a/src/biocutils/extract_row_names.py b/src/biocutils/extract_row_names.py index 5ea3927..8ebacee 100644 --- a/src/biocutils/extract_row_names.py +++ b/src/biocutils/extract_row_names.py @@ -11,7 +11,7 @@ @singledispatch -def extract_row_names(x: Any) -> numpy.ndarray: +def extract_row_names(x: Any) -> Any: """Access row names from 2-dimensional representations. Args: @@ -27,5 +27,5 @@ def extract_row_names(x: Any) -> numpy.ndarray: from pandas import DataFrame @extract_row_names.register(DataFrame) - def _rownames_dataframe(x: DataFrame) -> list: + def _rownames_dataframe(x): return numpy.array(x.index, dtype=str) diff --git a/src/biocutils/factor.py b/src/biocutils/factor.py index fedf75e..2bdad7e 100644 --- a/src/biocutils/factor.py +++ b/src/biocutils/factor.py @@ -1,8 +1,10 @@ +from __future__ import annotations + +import warnings from copy import copy, deepcopy from typing import Optional, Sequence, Union import numpy -import warnings from .assign_sequence import assign_sequence from .combine_sequences import combine_sequences @@ -23,9 +25,7 @@ def _sanitize_codes(codes: Sequence[int], num_levels: int) -> numpy.ndarray: if not isinstance(codes, numpy.ndarray): - replacement = numpy.ndarray( - len(codes), dtype=numpy.min_scalar_type(-num_levels) - ) # get a signed type. + replacement = numpy.ndarray(len(codes), dtype=numpy.min_scalar_type(-num_levels)) # get a signed type. for i, x in enumerate(codes): if is_missing_scalar(x) or x < 0: replacement[i] = -1 @@ -35,16 +35,12 @@ def _sanitize_codes(codes: Sequence[int], num_levels: int) -> numpy.ndarray: else: if len(codes.shape) != 1: raise ValueError("'codes' should be a 1-dimensional array") - if not numpy.issubdtype( - codes.dtype, numpy.signedinteger - ): # force it to be signed. + if not numpy.issubdtype(codes.dtype, numpy.signedinteger): # force it to be signed. codes = codes.astype(numpy.min_scalar_type(-num_levels)) for x in codes: if x < -1 or x >= num_levels: - raise ValueError( - "all entries of 'codes' should refer to an entry of 'levels'" - ) + raise ValueError("all entries of 'codes' should refer to an entry of 'levels'") return codes @@ -67,7 +63,7 @@ def _sanitize_levels(levels: Sequence[str], check: bool = True) -> StringList: class FactorIterator: """Iterator for a :py:class:`~Factor` object.""" - def __init__(self, parent: "Factor"): + def __init__(self, parent: Factor): """ Args: parent: The parent :py:class:`~Factor` object. @@ -75,7 +71,7 @@ def __init__(self, parent: "Factor"): self._parent = parent self._position = 0 - def __iter__(self) -> "FactorIterator": + def __iter__(self) -> FactorIterator: """ Returns: The iterator. @@ -106,10 +102,10 @@ class Factor: def __init__( self, - codes: Sequence[int], - levels: Sequence[str], + codes: Union[numpy.ndarray, Sequence[int]], + levels: Union[StringList, Sequence[str]], ordered: bool = False, - names: Optional[Names] = None, + names: Optional[Union[Names, Sequence[str]]] = None, _validate: bool = True, ): """Initialize a Factor object. @@ -149,7 +145,7 @@ def __init__( #####>>>> Simple getters <<<<##### ################################## - def _define_output(self, in_place: bool) -> "Factor": + def _define_output(self, in_place: bool) -> Factor: if in_place: return self else: @@ -171,7 +167,7 @@ def codes(self) -> numpy.ndarray: """Alias for :py:meth:`~get_codes`.""" return self.get_codes() - def set_codes(self, codes: Sequence[int], in_place: bool = False) -> "Factor": + def set_codes(self, codes: Sequence[int], in_place: bool = False) -> Factor: """ Args: codes: @@ -187,9 +183,7 @@ def set_codes(self, codes: Sequence[int], in_place: bool = False) -> "Factor": """ output = self._define_output(in_place) if len(codes) != len(self): - raise ValueError( - "length of 'codes' should be equal to that of the current object" - ) + raise ValueError("length of 'codes' should be equal to that of the current object") output._codes = _sanitize_codes(codes, len(self._levels)) return output @@ -220,7 +214,7 @@ def ordered(self) -> bool: """Alias for :py:meth:`~get_ordered`.""" return self.get_ordered() - def set_ordered(self, ordered: bool, in_place: bool = False) -> "Factor": + def set_ordered(self, ordered: bool, in_place: bool = False) -> Factor: """ Args: ordered: @@ -293,12 +287,7 @@ def __repr__(self) -> str: Returns: A stringified representation of this object. """ - tmp = ( - "Factor(codes=" - + print_truncated_list(self._codes) - + ", levels=" - + print_truncated_list(self._levels) - ) + tmp = "Factor(codes=" + print_truncated_list(self._codes) + ", levels=" + print_truncated_list(self._levels) if self._ordered: tmp += ", ordered=True" if self._names: @@ -311,42 +300,24 @@ def __str__(self) -> str: Returns: A pretty-printed representation of this object. """ - message = ( - "Factor of length " - + str(len(self._codes)) - + " with " - + str(len(self._levels)) - + " level" - ) + message = "Factor of length " + str(len(self._codes)) + " with " + str(len(self._levels)) + " level" if len(self._levels) != 0: message += "s" message += "\n" message += ( "values: " - + print_truncated_list( - self._codes, transform=lambda i: self._levels[i], include_brackets=False - ) + + print_truncated_list(self._codes, transform=lambda i: self._levels[i], include_brackets=False) + "\n" ) if self._names is not None: message += ( - "names: " - + print_truncated_list( - self._names, transform=lambda x: x, include_brackets=False - ) - + "\n" - ) - message += ( - "levels: " - + print_truncated_list( - self._levels, transform=lambda x: x, include_brackets=False + "names: " + print_truncated_list(self._names, transform=lambda x: x, include_brackets=False) + "\n" ) - + "\n" - ) + message += "levels: " + print_truncated_list(self._levels, transform=lambda x: x, include_brackets=False) + "\n" message += "ordered: " + str(self._ordered) return message - def __eq__(self, other: "Factor"): + def __eq__(self, other: Factor): """ Args: other: Another ``Factor``. @@ -357,7 +328,12 @@ def __eq__(self, other: "Factor"): """ if not isinstance(other, Factor): return False - if len(self) != len(other) or self._levels != other._levels or self._names != other._names or self._ordered != other._ordered: + if ( + len(self) != len(other) + or self._levels != other._levels + or self._names != other._names + or self._ordered != other._ordered + ): return False return (self._codes == other._codes).all() @@ -384,7 +360,7 @@ def get_value(self, index: Union[str, int]) -> Union[str, None]: return None return self._levels[i] - def get_slice(self, index: SubscriptTypes) -> "Factor": + def get_slice(self, index: SubscriptTypes) -> Factor: """ Args: index: @@ -404,7 +380,7 @@ def get_slice(self, index: SubscriptTypes) -> "Factor": output._names = subset_sequence(self._names, index) return output - def __getitem__(self, index: SubscriptTypes) -> Union[str, "Factor"]: + def __getitem__(self, index: SubscriptTypes) -> Union[str, Factor]: """ If ``index`` is a scalar, this is an alias for :py:meth:`~get_value`. @@ -416,9 +392,7 @@ def __getitem__(self, index: SubscriptTypes) -> Union[str, "Factor"]: else: return self.get_slice(NormalizedSubscript(index)) - def set_value( - self, index: Union[str, int], value: Union[str, None], in_place: bool = False - ) -> "Factor": + def set_value(self, index: Union[str, int], value: Union[str, None], in_place: bool = False) -> Factor: """ Args: index: @@ -457,7 +431,7 @@ def set_value( raise IndexError("failed to find level '" + str(value) + "'") - def set_slice(self, index: SubscriptTypes, value: "Factor", in_place: bool = False): + def set_slice(self, index: SubscriptTypes, value: Factor, in_place: bool = False): """ Replace items in the ``Factor`` list. The ``index`` elements in the current object are replaced with the corresponding values in ``value``. @@ -508,7 +482,7 @@ def set_slice(self, index: SubscriptTypes, value: "Factor", in_place: bool = Fal return output - def __setitem__(self, index: SubscriptTypes, value: Union[str, "Factor"]): + def __setitem__(self, index: SubscriptTypes, value: Union[str, Factor]): """ If ``index`` is a scalar, this is an alias for :py:meth:`~set_value`. @@ -524,7 +498,7 @@ def __setitem__(self, index: SubscriptTypes, value: Union[str, "Factor"]): #####>>>> Level setting <<<<##### ################################# - def drop_unused_levels(self, in_place: bool = False) -> "Factor": + def drop_unused_levels(self, in_place: bool = False) -> Factor: """Drop unused levels. Args: @@ -567,7 +541,7 @@ def replace_levels( self, levels: Sequence[str], in_place: bool = False, - ) -> "Factor": + ) -> Factor: """Replace the existing levels with a new list. The codes of the returned ``Factor`` are unchanged by this method and will index into the replacement ``levels``, so each element of the ``Factor`` may refer @@ -614,12 +588,7 @@ def replace_levels( output._levels = new_levels return output - def set_levels( - self, - levels: Union[str, Sequence[str]], - remap: bool = True, - in_place: bool = False - ) -> "Factor": + def set_levels(self, levels: Union[str, Sequence[str]], remap: bool = True, in_place: bool = False) -> Factor: """ Alias for :py:meth:`~remap_levels` if ``remap = True``, otherwise an alias for :py:meth:`~replace_levels`. The first alias is deprecated and @@ -631,9 +600,7 @@ def set_levels( else: return self.replace_levels(levels, in_place=in_place) - def remap_levels( - self, levels: Union[str, Sequence[str]], in_place: bool = False - ) -> "Factor": + def remap_levels(self, levels: Union[str, Sequence[str]], in_place: bool = False) -> Factor: """Remap codes to a replacement list of levels. Each entry of the remapped ``Factor`` will refer to the same string across the old and new levels, provided that string is present in both sets of levels. @@ -679,9 +646,7 @@ def remap_levels( lmapping[x] = len(new_levels) new_levels.append(x) if levels not in lmapping: - raise ValueError( - "string 'levels' should already be present among object levels" - ) + raise ValueError("string 'levels' should already be present among object levels") else: new_levels = levels if not isinstance(new_levels, StringList): @@ -712,7 +677,7 @@ def remap_levels( #####>>>> Copying <<<<##### ########################### - def __copy__(self) -> "Factor": + def __copy__(self) -> Factor: """ Returns: A shallow copy of the ``Factor`` object. @@ -725,7 +690,7 @@ def __copy__(self) -> "Factor": _validate=False, ) - def __deepcopy__(self, memo) -> "Factor": + def __deepcopy__(self, memo) -> Factor: """ Returns: A deep copy of the ``Factor`` object. @@ -762,8 +727,8 @@ def from_sequence( sort_levels: bool = True, ordered: bool = False, names: Optional[Sequence[str]] = None, - **kwargs - ) -> "Factor": + **kwargs, + ) -> Factor: """Convert a sequence of hashable values into a factor. Args: @@ -841,9 +806,7 @@ def _combine_factors(*x: Factor): new_levels.append(y) mapping.append(all_levels_map[y]) - curout = numpy.ndarray( - len(f), dtype=numpy.min_scalar_type(-len(new_levels)) - ) + curout = numpy.ndarray(len(f), dtype=numpy.min_scalar_type(-len(new_levels))) for i, j in enumerate(f._codes): if j < 0: curout[i] = j diff --git a/src/biocutils/float_list.py b/src/biocutils/float_list.py index ccf8bd4..7d3c0e3 100644 --- a/src/biocutils/float_list.py +++ b/src/biocutils/float_list.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from typing import Any, Iterable, Optional, Sequence, Union from .named_list import NamedList @@ -47,7 +49,7 @@ class FloatList(NamedList): def __init__( self, - data: Optional[Iterable] = None, + data: Optional[Sequence] = None, names: Optional[Names] = None, _validate: bool = True, ): @@ -74,32 +76,25 @@ def __init__( data = data._data original = data data = list(_coerce_to_float(item) for item in original) + super().__init__(data, names, _validate=_validate) - def set_value( - self, index: Union[int, str], value: Any, in_place: bool = False - ) -> "FloatList": + def set_value(self, index: Union[int, str], value: Any, in_place: bool = False) -> FloatList: """Calls :py:meth:`~biocutils.NamedList.NamedList.set_value` after coercing ``value`` to a float.""" return super().set_value(index, _coerce_to_float(value), in_place=in_place) - def set_slice( - self, index: SubscriptTypes, value: Sequence, in_place: bool = False - ) -> "FloatList": + def set_slice(self, index: SubscriptTypes, value: Sequence, in_place: bool = False) -> FloatList: """Calls :py:meth:`~biocutils.NamedList.NamedList.set_slice` after coercing ``value`` to floats.""" return super().set_slice(index, _SubscriptCoercer(value), in_place=in_place) - def safe_insert( - self, index: Union[int, str], value: Any, in_place: bool = False - ) -> "FloatList": + def safe_insert(self, index: Union[int, str], value: Any, in_place: bool = False) -> FloatList: """Calls :py:meth:`~biocutils.NamedList.NamedList.safe_insert` after coercing ``value`` to a float.""" return super().safe_insert(index, _coerce_to_float(value), in_place=in_place) - def safe_append(self, value: Any, in_place: bool = False) -> "FloatList": + def safe_append(self, value: Any, in_place: bool = False) -> FloatList: """Calls :py:meth:`~biocutils.NamedList.NamedList.safe_append` after coercing ``value`` to a float.""" return super().safe_append(_coerce_to_float(value), in_place=in_place) - def safe_extend(self, other: Iterable, in_place: bool = True) -> "FloatList": + def safe_extend(self, other: Iterable, in_place: bool = True) -> FloatList: """Calls :py:meth:`~biocutils.NamedList.NamedList.safe_extend` after coercing elements of ``other`` to floats.""" - return super().safe_extend( - (_coerce_to_float(y) for y in other), in_place=in_place - ) + return super().safe_extend((_coerce_to_float(y) for y in other), in_place=in_place) diff --git a/src/biocutils/integer_list.py b/src/biocutils/integer_list.py index 9ca9fbb..ff41172 100644 --- a/src/biocutils/integer_list.py +++ b/src/biocutils/integer_list.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from typing import Any, Iterable, Optional, Sequence, Union from .named_list import NamedList @@ -21,7 +23,8 @@ def __init__(self, data: Sequence) -> None: """Initialize the coercer. Args: - data: Sequence of values to coerce. + data: + Sequence of values to coerce. """ self._data = data @@ -29,7 +32,8 @@ def __getitem__(self, index: int) -> Optional[int]: """Get an item and coerce it to integer. Args: - index: Index of the item. + index: + Index of the item. Returns: Coerced integer value. @@ -47,7 +51,7 @@ class IntegerList(NamedList): def __init__( self, - data: Optional[Iterable] = None, + data: Optional[Sequence] = None, names: Optional[Names] = None, _validate: bool = True, ): @@ -76,30 +80,22 @@ def __init__( data = list(_coerce_to_int(item) for item in original) super().__init__(data, names, _validate=_validate) - def set_value( - self, index: Union[int, str], value: Any, in_place: bool = False - ) -> "IntegerList": + def set_value(self, index: Union[int, str], value: Any, in_place: bool = False) -> IntegerList: """Calls :py:meth:`~biocutils.NamedList.NamedList.set_value` after coercing ``value`` to a integer.""" return super().set_value(index, _coerce_to_int(value), in_place=in_place) - def set_slice( - self, index: SubscriptTypes, value: Sequence, in_place: bool = False - ) -> "IntegerList": + def set_slice(self, index: SubscriptTypes, value: Sequence, in_place: bool = False) -> IntegerList: """Calls :py:meth:`~biocutils.NamedList.NamedList.set_slice` after coercing ``value`` to integers.""" return super().set_slice(index, _SubscriptCoercer(value), in_place=in_place) - def safe_insert( - self, index: Union[int, str], value: Any, in_place: bool = False - ) -> "IntegerList": + def safe_insert(self, index: Union[int, str], value: Any, in_place: bool = False) -> IntegerList: """Calls :py:meth:`~biocutils.NamedList.NamedList.safe_insert` after coercing ``value`` to a integer.""" return super().safe_insert(index, _coerce_to_int(value), in_place=in_place) - def safe_append(self, value: Any, in_place: bool = False) -> "IntegerList": + def safe_append(self, value: Any, in_place: bool = False) -> IntegerList: """Calls :py:meth:`~biocutils.NamedList.NamedList.safe_append` after coercing ``value`` to a integer.""" return super().safe_append(_coerce_to_int(value), in_place=in_place) - def safe_extend(self, other: Iterable, in_place: bool = True) -> "IntegerList": + def safe_extend(self, other: Iterable, in_place: bool = True) -> IntegerList: """Calls :py:meth:`~biocutils.NamedList.NamedList.safe_extend` after coercing elements of ``other`` to integers.""" - return super().safe_extend( - (_coerce_to_int(y) for y in other), in_place=in_place - ) + return super().safe_extend((_coerce_to_int(y) for y in other), in_place=in_place) diff --git a/src/biocutils/is_high_dimensional.py b/src/biocutils/is_high_dimensional.py index 3bdfc6f..c380c9f 100644 --- a/src/biocutils/is_high_dimensional.py +++ b/src/biocutils/is_high_dimensional.py @@ -10,7 +10,8 @@ def is_high_dimensional(x: Any) -> bool: attribute that is of length greater than 1. Args: - x: Some kind of object. + x: + Some kind of object. Returns: Whether ``x`` is high-dimensional. diff --git a/src/biocutils/is_list_of_type.py b/src/biocutils/is_list_of_type.py index f489b96..5f0ff54 100644 --- a/src/biocutils/is_list_of_type.py +++ b/src/biocutils/is_list_of_type.py @@ -12,11 +12,14 @@ def is_list_of_type(x: Union[list, tuple], target_type: Callable, ignore_none: b """Checks if ``x`` is a list, and whether all elements of the list are of the same type. Args: - x: A list or tuple of values. + x: + A list or tuple of values. - target_type: Type to check for, e.g. ``str``, ``int``. + target_type: + Type to check for, e.g. ``str``, ``int``. - ignore_none: Whether to ignore Nones when comparing to ``target_type``. + ignore_none: + Whether to ignore Nones when comparing to ``target_type``. Returns: True if ``x`` is a list or tuple and all elements are of the target @@ -24,7 +27,7 @@ def is_list_of_type(x: Union[list, tuple], target_type: Callable, ignore_none: b """ if not isinstance(x, (list, tuple, np.ndarray, ma.MaskedArray)): return False - + if isinstance(x, ma.MaskedArray): if not ignore_none: return all(x.mask) and all(isinstance(item, target_type) for item in x.data) diff --git a/src/biocutils/is_missing_scalar.py b/src/biocutils/is_missing_scalar.py index f32dd60..5dd4cd0 100644 --- a/src/biocutils/is_missing_scalar.py +++ b/src/biocutils/is_missing_scalar.py @@ -7,7 +7,8 @@ def is_missing_scalar(x: Any) -> bool: """Check if a scalar value is missing. Args: - x: Any scalar value. + x: + Any scalar value. Returns: Whether ``x`` is None or a NumPy masked constant. diff --git a/src/biocutils/map_to_index.py b/src/biocutils/map_to_index.py index bfb31f1..fc50b6d 100644 --- a/src/biocutils/map_to_index.py +++ b/src/biocutils/map_to_index.py @@ -6,8 +6,7 @@ def map_to_index(x: Sequence, duplicate_method: DUPLICATE_METHOD = "first") -> dict: - """ - Create a dictionary to map values of a sequence to positional indices. + """Create a dictionary to map values of a sequence to positional indices. Args: x: @@ -19,7 +18,7 @@ def map_to_index(x: Sequence, duplicate_method: DUPLICATE_METHOD = "first") -> d value in ``x``. Returns: - dict: Dictionary that maps values of ``x`` to their position inside ``x``. + Dictionary that maps values of ``x`` to their position inside ``x``. """ first_tie = duplicate_method == "first" diff --git a/src/biocutils/match.py b/src/biocutils/match.py index 0e237c5..96e74de 100644 --- a/src/biocutils/match.py +++ b/src/biocutils/match.py @@ -1,4 +1,5 @@ -from typing import Sequence, Union, Optional +from typing import Optional, Sequence, Union + import numpy from .map_to_index import DUPLICATE_METHOD, map_to_index @@ -45,7 +46,7 @@ def match( targets = map_to_index(targets, duplicate_method=duplicate_method) if dtype is None: - dtype = numpy.min_scalar_type(-len(targets)) # get a signed type + dtype = numpy.min_scalar_type(-len(targets)) # get a signed type indices = numpy.zeros(len(x), dtype=dtype) if fail_missing is None: @@ -60,7 +61,7 @@ def match( indices[i] = -1 else: for i, y in enumerate(x): - if not y in targets: + if y not in targets: raise ValueError("cannot find '" + str(y) + "' in 'targets'") indices[i] = targets[y] diff --git a/src/biocutils/named_list.py b/src/biocutils/named_list.py index 94918bd..6f00201 100644 --- a/src/biocutils/named_list.py +++ b/src/biocutils/named_list.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from copy import deepcopy from typing import Any, Dict, Iterable, Optional, Sequence, Union @@ -21,7 +23,7 @@ class NamedList: def __init__( self, - data: Optional[Iterable] = None, + data: Optional[Sequence] = None, names: Optional[Names] = None, _validate: bool = True, ): @@ -49,7 +51,9 @@ def __init__( data = data._data elif not isinstance(data, list): data = list(data) + names = _sanitize_names(names, len(data)) + self._data = data self._names = names @@ -93,7 +97,7 @@ def __str__(self) -> str: else: return repr(self._data) - def __eq__(self, other: "NamedList") -> bool: + def __eq__(self, other: NamedList) -> bool: """ Args: other: Another ``NamedList``. @@ -110,7 +114,7 @@ def __eq__(self, other: "NamedList") -> bool: #####>>>> Get/set names <<<<##### ################################# - def get_names(self) -> Names: + def get_names(self) -> Optional[Names]: """ Returns: Names for the list elements. @@ -121,14 +125,14 @@ def get_names(self) -> Names: return self._names @property - def names(self) -> Names: + def names(self) -> Optional[Names]: """Alias for :py:meth:`~get_names`.""" return self.get_names() def _shallow_copy(self): return type(self)(self._data, self._names, _validate=False) - def set_names(self, names: Optional[Names], in_place: bool = False) -> "NamedList": + def set_names(self, names: Optional[Names], in_place: bool = False) -> NamedList: """ Args: names: @@ -168,7 +172,7 @@ def get_value(self, index: Union[str, int]) -> Any: index = _name_to_position(self._names, index) return self._data[index] - def get_slice(self, index: SubscriptTypes) -> "NamedList": + def get_slice(self, index: SubscriptTypes) -> NamedList: """ Args: index: @@ -188,7 +192,7 @@ def get_slice(self, index: SubscriptTypes) -> "NamedList": outnames = subset_sequence(self._names, index) return type(self)(outdata, outnames, _validate=False) - def __getitem__(self, index: SubscriptTypes) -> Union["NamedList", Any]: + def __getitem__(self, index: SubscriptTypes) -> Union[NamedList, Any]: """ If ``index`` is a scalar, this is an alias for :py:meth:`~get_value`. @@ -200,7 +204,7 @@ def __getitem__(self, index: SubscriptTypes) -> Union["NamedList", Any]: else: return self.get_slice(NormalizedSubscript(index)) - def set_value(self, index: Union[str, int], value: Any, in_place: bool = False) -> "NamedList": + def set_value(self, index: Union[str, int], value: Any, in_place: bool = False) -> NamedList: """ Args: index: @@ -247,7 +251,7 @@ def set_value(self, index: Union[str, int], value: Any, in_place: bool = False) return output - def set_slice(self, index: SubscriptTypes, value: Sequence, in_place: bool = False) -> "NamedList": + def set_slice(self, index: SubscriptTypes, value: Sequence, in_place: bool = False) -> NamedList: """ Args: index: @@ -310,13 +314,13 @@ def __setitem__(self, index: SubscriptTypes, value: Any): #####>>>> List methods <<<<##### ################################ - def _define_output(self, in_place: bool) -> "NamedList": + def _define_output(self, in_place: bool) -> NamedList: if in_place: return self else: return self.copy() - def safe_insert(self, index: Union[int, str], value: Any, in_place: bool = False) -> "NamedList": + def safe_insert(self, index: Union[int, str], value: Any, in_place: bool = False) -> NamedList: """ Args: index: @@ -348,7 +352,7 @@ def insert(self, index: Union[int, str], value: Any): """Alias for :py:meth:`~safe_insert` with ``in_place = True``.""" self.safe_insert(index, value, in_place=True) - def safe_append(self, value: Any, in_place: bool = False) -> "NamedList": + def safe_append(self, value: Any, in_place: bool = False) -> NamedList: """ Args: value: @@ -373,7 +377,7 @@ def append(self, value: Any): """Alias for :py:meth:`~safe_append` with ``in_place = True``.""" self.safe_append(value, in_place=True) - def safe_extend(self, other: Iterable, in_place: bool = False) -> "NamedList": + def safe_extend(self, other: Iterable, in_place: bool = False) -> NamedList: """ Args: other: @@ -406,7 +410,7 @@ def extend(self, other: Iterable): """Alias for :py:meth:`~safe_extend` with ``in_place = True``.""" self.safe_extend(other, in_place=True) - def __add__(self, other: list) -> "NamedList": + def __add__(self, other: list) -> NamedList: """Alias for :py:meth:`~safe_extend`.""" return self.safe_extend(other) @@ -420,7 +424,7 @@ def __iadd__(self, other: list): #####>>>> Copy methods <<<<##### ################################ - def copy(self) -> "NamedList": + def copy(self) -> NamedList: """ Returns: A shallow copy of a ``NamedList`` with the same contents. This @@ -433,11 +437,11 @@ def copy(self) -> "NamedList": newnames = newnames.copy() return type(self)(self._data.copy(), names=newnames, _validate=False) - def __copy__(self) -> "NamedList": + def __copy__(self) -> NamedList: """Alias for :py:meth:`~copy`.""" return self.copy() - def __deepcopy__(self, memo=None, _nil=[]) -> "NamedList": + def __deepcopy__(self, memo=None, _nil=[]) -> NamedList: """ Args: memo: @@ -483,7 +487,7 @@ def as_dict(self) -> Dict[str, Any]: return output @classmethod - def from_list(cls, x: list) -> "NamedList": + def from_list(cls, x: list) -> NamedList: """ Args: x: List of data elements. @@ -494,7 +498,7 @@ def from_list(cls, x: list) -> "NamedList": return cls(x) @classmethod - def from_dict(cls, x: dict) -> "NamedList": + def from_dict(cls, x: dict) -> NamedList: """ Args: x: Dictionary where keys are strings (or can be coerced to them). diff --git a/src/biocutils/names.py b/src/biocutils/names.py index e90f45f..aa232cd 100644 --- a/src/biocutils/names.py +++ b/src/biocutils/names.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from copy import deepcopy from typing import Any, Callable, Iterable, List, Optional, Sequence, Union @@ -82,7 +84,7 @@ def __str__(self) -> str: """ return str(self._names) - def __eq__(self, other: "Names") -> bool: + def __eq__(self, other: Names) -> bool: """ Args: other: Another ``Names`` object. @@ -133,7 +135,7 @@ def get_value(self, index: int) -> str: """ return self._names[index] - def get_slice(self, index: SubscriptTypes) -> "Names": + def get_slice(self, index: SubscriptTypes) -> Names: """ Args: index: @@ -147,7 +149,7 @@ def get_slice(self, index: SubscriptTypes) -> "Names": index, scalar = normalize_subscript(index, len(self), None) return type(self)(subset_sequence(self._names, index), _validate=False) - def __getitem__(self, index: SubscriptTypes) -> Union[str, "Names"]: + def __getitem__(self, index: SubscriptTypes) -> Union[str, Names]: """ If ``index`` is a scalar, this is an alias for :py:attr:`~get_value`. @@ -159,7 +161,7 @@ def __getitem__(self, index: SubscriptTypes) -> Union[str, "Names"]: else: return self.get_slice(NormalizedSubscript(index)) - def set_value(self, index: int, value: str, in_place: bool = False) -> "Names": + def set_value(self, index: int, value: str, in_place: bool = False) -> Names: """ Args: index: Position of interest. @@ -180,9 +182,7 @@ def set_value(self, index: int, value: str, in_place: bool = False) -> "Names": output._names[index] = str(value) return output - def set_slice( - self, index: SubscriptTypes, value: Sequence[str], in_place: bool = False - ) -> "Names": + def set_slice(self, index: SubscriptTypes, value: Sequence[str], in_place: bool = False) -> Names: """ Args: index: Positions of interest. @@ -228,13 +228,13 @@ def __setitem__(self, index: SubscriptTypes, value: Any): #####>>>> List methods <<<<##### ################################ - def _define_output(self, in_place: bool) -> "Names": + def _define_output(self, in_place: bool) -> Names: if in_place: return self else: return self.copy() - def safe_append(self, value: str, in_place: bool = False) -> "Names": + def safe_append(self, value: str, in_place: bool = False) -> Names: """ Args: value: Name to be added. @@ -256,7 +256,7 @@ def append(self, value: str): """Alias for :py:attr:`~safe_append` with ``in_place = True``.""" self.safe_append(value, in_place=True) - def safe_insert(self, index: int, value: str, in_place: bool = False) -> "Names": + def safe_insert(self, index: int, value: str, in_place: bool = False) -> Names: """ Args: index: Position on the object to insert at. @@ -278,7 +278,7 @@ def insert(self, index: int, value: str): """Alias for :py:attr:`~safe_insert` with ``in_place = True``.""" self.safe_insert(index, value, in_place=True) - def safe_extend(self, value: Sequence[str], in_place: bool = False) -> "Names": + def safe_extend(self, value: Sequence[str], in_place: bool = False) -> Names: """ Args: value: Names to be added. @@ -332,7 +332,7 @@ def __iadd__(self, other: list): #####>>>> Copy methods <<<<##### ################################ - def copy(self) -> "Names": + def copy(self) -> Names: """ Returns: A shallow copy of the current object. This will copy the underlying @@ -341,11 +341,11 @@ def copy(self) -> "Names": """ return type(self)(self._names.copy(), _validate=False) - def __copy__(self) -> "Names": + def __copy__(self) -> Names: """Alias for :py:attr:`~copy`.""" return self.copy() - def __deepcopy__(self, memo=None, _nil=[]) -> "Names": + def __deepcopy__(self, memo=None, _nil=[]) -> Names: """ Args: memo: @@ -387,19 +387,24 @@ def _name_to_position(names: Optional[Names], index: str) -> int: return i -def _sanitize_names(names: Optional[Names], length: int) -> Union[None, Names]: +def _validate_names(names: Optional[Names], length: int) -> bool: + if names is not None and len(names) != length: + raise ValueError("length of 'names' must be equal to number of entries (" + str(length) + ")") + + return True + + +def _sanitize_names(names: Optional[Names], length: int) -> Optional[Names]: if names is None: return names if not isinstance(names, Names): names = Names(names) - if len(names) != length: - raise ValueError( - "length of 'names' must be equal to number of entries (" + str(length) + ")" - ) + + _validate_names(names, length=length) return names -def _combine_names(*x: Any, get_names: Callable) -> Union[Names, None]: +def _combine_names(*x: Any, get_names: Callable) -> Optional[Names]: all_names = [] has_names = False for y in x: diff --git a/src/biocutils/normalize_subscript.py b/src/biocutils/normalize_subscript.py index d85c6d0..d79ee6b 100644 --- a/src/biocutils/normalize_subscript.py +++ b/src/biocutils/normalize_subscript.py @@ -4,12 +4,7 @@ def _raise_int(idx: int, length): - raise IndexError( - "subscript (" - + str(idx) - + ") out of range for vector-like object of length " - + str(length) - ) + raise IndexError("subscript (" + str(idx) + ") out of range for vector-like object of length " + str(length)) def _is_scalar_bool(sub): @@ -27,7 +22,8 @@ def __init__(self, subscript: Sequence[int]) -> None: """Initialize a NormalizedSubscript. Args: - subscript: Sequence of integers for a normalized subscript. + subscript: + Sequence of integers for a normalized subscript. """ self._subscript = subscript @@ -132,11 +128,7 @@ def normalize_subscript( if isinstance(sub, str): if names is None: - raise IndexError( - "failed to find subscript '" - + sub - + "' for vector-like object with no names" - ) + raise IndexError("failed to find subscript '" + sub + "' for vector-like object with no names") i = -1 from .names import Names @@ -226,9 +218,7 @@ def normalize_subscript( if len(has_strings): if names is None: - raise IndexError( - "cannot find string subscripts for vector-like object with no names" - ) + raise IndexError("cannot find string subscripts for vector-like object with no names") mapping = {} for i, y in enumerate(names): diff --git a/src/biocutils/package_utils.py b/src/biocutils/package_utils.py index dcf03d1..9822bb6 100644 --- a/src/biocutils/package_utils.py +++ b/src/biocutils/package_utils.py @@ -7,7 +7,8 @@ def is_package_installed(package_name: str) -> bool: """Check if a package is installed. Args: - package_name: Package name. + package_name: + Package name. Returns: True if package is installed, otherwise False. diff --git a/src/biocutils/print_truncated.py b/src/biocutils/print_truncated.py index 52ab420..11b529a 100644 --- a/src/biocutils/print_truncated.py +++ b/src/biocutils/print_truncated.py @@ -20,13 +20,9 @@ def print_truncated(x, truncated_to: int = 3, full_threshold: int = 10) -> str: String containing the pretty-printed contents. """ if isinstance(x, dict): - return print_truncated_dict( - x, truncated_to=truncated_to, full_threshold=full_threshold - ) + return print_truncated_dict(x, truncated_to=truncated_to, full_threshold=full_threshold) elif isinstance(x, list): - return print_truncated_list( - x, truncated_to=truncated_to, full_threshold=full_threshold - ) + return print_truncated_list(x, truncated_to=truncated_to, full_threshold=full_threshold) else: return repr(x) @@ -43,7 +39,8 @@ def print_truncated_list( preview of an object without spewing out all of its contents on the screen. Args: - x: List to be printed. + x: + List to be printed. truncated_to: Number of elements to truncate to, at the start and end of the @@ -71,9 +68,7 @@ def print_truncated_list( if transform is None: def transform(y): - return print_truncated( - y, truncated_to=truncated_to, full_threshold=full_threshold - ) + return print_truncated(y, truncated_to=truncated_to, full_threshold=full_threshold) if len(x) > full_threshold and len(x) > truncated_to * 2: for i in range(truncated_to): @@ -131,9 +126,7 @@ def print_truncated_dict( if transform is None: def transform(y): - return print_truncated( - y, truncated_to=truncated_to, full_threshold=full_threshold - ) + return print_truncated(y, truncated_to=truncated_to, full_threshold=full_threshold) all_keys = x.keys() if len(x) > full_threshold and len(x) > truncated_to * 2: diff --git a/src/biocutils/print_wrapped_table.py b/src/biocutils/print_wrapped_table.py index 583f583..447c8e7 100644 --- a/src/biocutils/print_wrapped_table.py +++ b/src/biocutils/print_wrapped_table.py @@ -102,9 +102,7 @@ def reinitialize(): return output -def create_floating_names( - names: Optional[List[str]], indices: Sequence[int] -) -> List[str]: +def create_floating_names(names: Optional[List[str]], indices: Sequence[int]) -> List[str]: """Create the floating names to use in :py:meth:`~print_wrapped_table`. If no names are present, positional indices are used instead. diff --git a/src/biocutils/relaxed_combine_columns.py b/src/biocutils/relaxed_combine_columns.py index a4e2747..5e45453 100644 --- a/src/biocutils/relaxed_combine_columns.py +++ b/src/biocutils/relaxed_combine_columns.py @@ -20,11 +20,7 @@ def relaxed_combine_columns(*x: Any): Returns: Combined object, typically the same type as the first entry of ``x`` """ - raise NotImplementedError( - "no `combine_columns` method implemented for '" - + type(x[0]).__name__ - + "' objects." - ) + raise NotImplementedError("no `combine_columns` method implemented for '" + type(x[0]).__name__ + "' objects.") if is_package_installed("pandas") is True: diff --git a/src/biocutils/relaxed_combine_rows.py b/src/biocutils/relaxed_combine_rows.py index 2dc634b..6358ded 100644 --- a/src/biocutils/relaxed_combine_rows.py +++ b/src/biocutils/relaxed_combine_rows.py @@ -20,9 +20,7 @@ def relaxed_combine_rows(*x: Any): Returns: Combined object, typically the same type as the first entry of ``x``. """ - raise NotImplementedError( - "no `combine_rows` method implemented for '" + type(x[0]).__name__ + "' objects." - ) + raise NotImplementedError("no `combine_rows` method implemented for '" + type(x[0]).__name__ + "' objects.") if is_package_installed("pandas"): diff --git a/src/biocutils/reverse_index.py b/src/biocutils/reverse_index.py index 528f1ee..4d1f2e0 100644 --- a/src/biocutils/reverse_index.py +++ b/src/biocutils/reverse_index.py @@ -7,7 +7,8 @@ def build_reverse_index(obj: Sequence[str]) -> dict: Only contains the first occurrence of a term. Args: - obj: List of names. + obj: + List of names. Returns: A dictionary mapping names to their index positions. diff --git a/src/biocutils/show_as_cell.py b/src/biocutils/show_as_cell.py index a521554..b54992c 100644 --- a/src/biocutils/show_as_cell.py +++ b/src/biocutils/show_as_cell.py @@ -25,7 +25,7 @@ def show_as_cell(x: Any, indices: Sequence[int]) -> List[str]: try: candidate = str(x[i]) if len(candidate) > 25: - candidate = candidate[:20] + "..." # pick the first two characters, whatever. + candidate = candidate[:20] + "..." # pick the first two characters, whatever. nl = candidate.find("\n") if nl >= 0: candidate = candidate[:nl] + "..." diff --git a/src/biocutils/string_list.py b/src/biocutils/string_list.py index 8fe1d84..2344a83 100644 --- a/src/biocutils/string_list.py +++ b/src/biocutils/string_list.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from typing import Any, Iterable, Optional, Sequence, Union from .named_list import NamedList @@ -42,7 +44,7 @@ class StringList(NamedList): def __init__( self, - data: Optional[Iterable] = None, + data: Optional[Sequence] = None, names: Optional[Names] = None, _validate: bool = True, ): @@ -71,30 +73,22 @@ def __init__( data = list(_coerce_to_str(item) for item in original) super().__init__(data, names, _validate=_validate) - def set_value( - self, index: Union[int, str], value: Any, in_place: bool = False - ) -> "StringList": + def set_value(self, index: Union[int, str], value: Any, in_place: bool = False) -> StringList: """Calls :py:meth:`~biocutils.NamedList.NamedList.set_value` after coercing ``value`` to a string.""" return super().set_value(index, _coerce_to_str(value), in_place=in_place) - def set_slice( - self, index: SubscriptTypes, value: Sequence, in_place: bool = False - ) -> "StringList": + def set_slice(self, index: SubscriptTypes, value: Sequence, in_place: bool = False) -> StringList: """Calls :py:meth:`~biocutils.NamedList.NamedList.set_slice` after coercing ``value`` to strings.""" return super().set_slice(index, _SubscriptCoercer(value), in_place=in_place) - def safe_insert( - self, index: Union[int, str], value: Any, in_place: bool = False - ) -> "StringList": + def safe_insert(self, index: Union[int, str], value: Any, in_place: bool = False) -> StringList: """Calls :py:meth:`~biocutils.NamedList.NamedList.safe_insert` after coercing ``value`` to a string.""" return super().safe_insert(index, _coerce_to_str(value), in_place=in_place) - def safe_append(self, value: Any, in_place: bool = False) -> "StringList": + def safe_append(self, value: Any, in_place: bool = False) -> StringList: """Calls :py:meth:`~biocutils.NamedList.NamedList.safe_append` after coercing ``value`` to a string.""" return super().safe_append(_coerce_to_str(value), in_place=in_place) - def safe_extend(self, other: Iterable, in_place: bool = True) -> "StringList": + def safe_extend(self, other: Iterable, in_place: bool = True) -> StringList: """Calls :py:meth:`~biocutils.NamedList.NamedList.safe_extend` after coercing elements of ``other`` to strings.""" - return super().safe_extend( - (_coerce_to_str(y) for y in other), in_place=in_place - ) + return super().safe_extend((_coerce_to_str(y) for y in other), in_place=in_place) diff --git a/src/biocutils/table.py b/src/biocutils/table.py new file mode 100644 index 0000000..518022a --- /dev/null +++ b/src/biocutils/table.py @@ -0,0 +1,38 @@ +from functools import singledispatch +from typing import Sequence + +from .integer_list import IntegerList + + +@singledispatch +def table(x: Sequence, sort: bool = True) -> IntegerList: + """Create a frequency table of values in a sequence. + + Count the occurrences of each unique value in the input sequence and return + them as an IntegerList with names corresponding to the unique values. + + Args: + x: + A sequence of hashable values. + + sort: + Whether to sort the output by keys (values from x). + + Returns: + An IntegerList where names are the unique values and values are their counts. + """ + output = {} + for v in x: + if v in output: + output[v] += 1 + else: + output[v] = 1 + + if sort: + collected = sorted(output.keys()) + tmp = {} + for y in collected: + tmp[y] = output[y] + output = tmp + + return IntegerList.from_dict(output) diff --git a/src/biocutils/which.py b/src/biocutils/which.py index 87d2fdd..d429149 100644 --- a/src/biocutils/which.py +++ b/src/biocutils/which.py @@ -1,4 +1,5 @@ from typing import Optional, Sequence + import numpy From ece44a08340690739cafbe714a8b40920493ed61 Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Sat, 27 Dec 2025 07:38:08 -0800 Subject: [PATCH 12/21] get name at index --- src/biocutils/named_list.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/biocutils/named_list.py b/src/biocutils/named_list.py index 6f00201..fc6a9ab 100644 --- a/src/biocutils/named_list.py +++ b/src/biocutils/named_list.py @@ -153,12 +153,27 @@ def set_names(self, names: Optional[Names], in_place: bool = False) -> NamedList output._names = _sanitize_names(names, len(self)) return output + def get_name(self, index: int) -> Optional[str]: + """Get name at an index. + + Args: + index: + Integer index of the element. + Returns: + Names for the list elements. + """ + if self._names is None: + return None + + return self._names.get_value(index) + ################################# #####>>>> Get/set items <<<<##### ################################# def get_value(self, index: Union[str, int]) -> Any: - """ + """Get value at an index. + Args: index: Integer index of the element to obtain. Alternatively, a string From c1a5db15a6a3b778bfcb554db7a431ca98498eee Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Sat, 27 Dec 2025 07:39:28 -0800 Subject: [PATCH 13/21] ned --- tests/test_NamedList.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_NamedList.py b/tests/test_NamedList.py index f116ee7..cceede2 100644 --- a/tests/test_NamedList.py +++ b/tests/test_NamedList.py @@ -10,6 +10,7 @@ def test_NamedList_init(): assert x.as_list() == [ 1,2,3,4 ] assert x.get_names().as_list() == ["a", "b", "c", "d"] assert len(x) == 4 + assert x.get_name(0) == "a" y = NamedList(x) assert y.as_list() == [1,2,3,4] @@ -23,6 +24,7 @@ def test_NamedList_init(): x = NamedList([1,2,3,4]) assert x.as_list() == [1,2,3,4] assert x.get_names() is None + assert x.get_name(1) is None def test_Names_iter(): From 0295b0696326429733a92ad9ee628fe88c3c3482 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 27 Dec 2025 15:40:17 +0000 Subject: [PATCH 14/21] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- setup.py | 1 + src/biocutils/__init__.py | 2 +- src/biocutils/which.py | 2 +- tests/test_BooleanList.py | 4 ++-- tests/test_Factor.py | 8 ++++---- tests/test_FloatList.py | 2 +- tests/test_IntegerList.py | 2 +- tests/test_Names.py | 3 +-- tests/test_StringList.py | 4 ++-- tests/test_biocobject.py | 26 +++++++++++++------------- tests/test_combine_sequences.py | 2 +- tests/test_factorize.py | 2 +- 12 files changed, 29 insertions(+), 29 deletions(-) diff --git a/setup.py b/setup.py index 7f9e0df..4fbfd79 100644 --- a/setup.py +++ b/setup.py @@ -4,6 +4,7 @@ PyScaffold helps you to put up the scaffold of your new Python project. Learn more under: https://pyscaffold.org/ """ + from setuptools import setup if __name__ == "__main__": diff --git a/src/biocutils/__init__.py b/src/biocutils/__init__.py index ed683fd..95ed876 100644 --- a/src/biocutils/__init__.py +++ b/src/biocutils/__init__.py @@ -61,4 +61,4 @@ from .get_height import get_height from .is_high_dimensional import is_high_dimensional -from .bioc_object import BiocObject \ No newline at end of file +from .bioc_object import BiocObject diff --git a/src/biocutils/which.py b/src/biocutils/which.py index d429149..ca7a863 100644 --- a/src/biocutils/which.py +++ b/src/biocutils/which.py @@ -23,7 +23,7 @@ def which( """ if isinstance(x, numpy.ndarray): found = numpy.where(x)[0] - if not dtype is None: + if dtype is not None: found = found.astype(dtype=dtype, copy=False, order="A") return found diff --git a/tests/test_BooleanList.py b/tests/test_BooleanList.py index 593effa..a97fd50 100644 --- a/tests/test_BooleanList.py +++ b/tests/test_BooleanList.py @@ -44,7 +44,7 @@ def test_BooleanList_setitem(): x = BooleanList([False, True, True, False]) x[0] = None assert x.as_list() == [None, True, True, False] - x[0] = 12345 + x[0] = 12345 assert x.as_list() == [True, True, True, False] x[1:3] = [False, False] @@ -54,7 +54,7 @@ def test_BooleanList_setitem(): assert x.as_list() == [None, False, None, False] x.set_names(["A", "B", "C", "D"], in_place=True) - x["C"] = True + x["C"] = True assert x.as_list() == [None, False, True, False] x[["A", "B"]] = [False, True] assert x.as_list() == [False, True, True, False] diff --git a/tests/test_Factor.py b/tests/test_Factor.py index fdcb004..9f1ad76 100644 --- a/tests/test_Factor.py +++ b/tests/test_Factor.py @@ -17,13 +17,13 @@ def test_factor_init(): assert len(f) == 6 assert list(f) == ["A", "B", None, "A", None, "E"] assert list(f.get_codes()) == [0, 1, -1, 0, -1, 4] - + f = Factor([None] * 10, levels=["A", "B", "C", "D", "E"]) assert list(f) == [None] * 10 # Works with NumPy inputs. f = Factor(numpy.array([4,3,2,1,0], dtype=numpy.uint8), levels=numpy.array(["A", "B", "C", "D", "E"])) - assert len(f) == 5 + assert len(f) == 5 assert f.get_codes().dtype == numpy.int8 assert isinstance(f.get_levels(), StringList) @@ -98,7 +98,7 @@ def test_Factor_get_value(): def test_Factor_get_slice(): f = Factor([0, 1, 2, -1, 2, 4], levels=["A", "B", "C", "D", "E"]) - sub = f.get_slice([0, 1]) + sub = f.get_slice([0, 1]) assert list(sub) == ["A", "B"] assert sub.get_levels() == f.get_levels() @@ -176,7 +176,7 @@ def test_Factor_setitem(): f[-1] = "D" assert list(f.get_codes()) == [1, 1, 0, 0, 2, 3] - f[2:5] = Factor([4, 3, 1], levels=["A", "B", "C", "D", "E"]) + f[2:5] = Factor([4, 3, 1], levels=["A", "B", "C", "D", "E"]) assert list(f.get_codes()) == [1, 1, 4, 3, 1, 3] assert f.get_levels() == f.get_levels() diff --git a/tests/test_FloatList.py b/tests/test_FloatList.py index 860d170..771f37c 100644 --- a/tests/test_FloatList.py +++ b/tests/test_FloatList.py @@ -44,7 +44,7 @@ def test_FloatList_setitem(): x = FloatList([ 0.5, -2.1, -3.2, -4.5 ]) x[0] = None assert x.as_list() == [None, -2.1, -3.2, -4.5] - x[0] = 12345 + x[0] = 12345 assert x.as_list() == [12345.0, -2.1, -3.2, -4.5] x[1:3] = [10.1, 20.2] diff --git a/tests/test_IntegerList.py b/tests/test_IntegerList.py index d3df82a..61c2970 100644 --- a/tests/test_IntegerList.py +++ b/tests/test_IntegerList.py @@ -44,7 +44,7 @@ def test_IntegerList_setitem(): x = IntegerList([1,2,3,4]) x[0] = None assert x.as_list() == [None, 2, 3, 4] - x[0] = 12345 + x[0] = 12345 assert x.as_list() == [12345, 2, 3, 4] x[1:3] = [10, 20] diff --git a/tests/test_Names.py b/tests/test_Names.py index 10945ae..d9f6526 100644 --- a/tests/test_Names.py +++ b/tests/test_Names.py @@ -185,7 +185,7 @@ def test_Names_generics(): sub = biocutils.subset_sequence(x, [0,3,2,1]) assert isinstance(sub, Names) assert sub.as_list() == ["1", "4", "3", "2"] - + y = ["a", "b", "c", "d"] com = biocutils.combine_sequences(x, y) assert isinstance(com, Names) @@ -195,4 +195,3 @@ def test_Names_generics(): ass = biocutils.assign_sequence(x, range(1, 3), y) assert isinstance(ass, Names) assert ass.as_list() == ["1", "b", "c", "4"] - diff --git a/tests/test_StringList.py b/tests/test_StringList.py index 394ceac..f7aa6f2 100644 --- a/tests/test_StringList.py +++ b/tests/test_StringList.py @@ -44,7 +44,7 @@ def test_StringList_setitem(): x = StringList([1,2,3,4]) x[0] = None assert x.as_list() == [None, "2", "3", "4"] - x[0] = 12345 + x[0] = 12345 assert x.as_list() == ["12345", "2", "3", "4"] x[1:3] = [10, 20] @@ -89,7 +89,7 @@ def test_StringList_generics(): sub = biocutils.subset_sequence(x, [0,3,2,1]) assert isinstance(sub, StringList) assert sub.as_list() == ["1", "4", "3", "2"] - + y = ["a", "b", "c", "d"] com = biocutils.combine_sequences(x, y) assert isinstance(com, StringList) diff --git a/tests/test_biocobject.py b/tests/test_biocobject.py index 3645936..e192eb5 100644 --- a/tests/test_biocobject.py +++ b/tests/test_biocobject.py @@ -3,7 +3,7 @@ from biocutils.bioc_object import BiocObject from biocutils.named_list import NamedList - + def test_init_empty(): """Test initialization with default values.""" obj = BiocObject() @@ -14,7 +14,7 @@ def test_init_with_dict(): """Test initialization with a dictionary.""" meta = {"author": "jkanche", "version": 1} obj = BiocObject(metadata=meta) - + assert isinstance(obj.metadata, NamedList) assert len(obj.metadata) == 2 @@ -28,7 +28,7 @@ def test_metadata_property_setter(): obj = BiocObject() new_meta = {"tag": "experiment_1"} obj.metadata = new_meta - + assert len(obj.metadata) == 1 assert isinstance(obj.metadata, NamedList) @@ -36,21 +36,21 @@ def test_set_metadata_copy(): """Test functional style set_metadata (copy-on-write).""" obj = BiocObject(metadata={"id": 1}) original_id = id(obj) - + new_obj = obj.set_metadata({"id": 2}) - + assert id(new_obj) != original_id assert len(new_obj.metadata) == 1 - - assert len(obj.metadata) == 1 + + assert len(obj.metadata) == 1 def test_set_metadata_inplace(): """Test imperative style set_metadata (in-place).""" obj = BiocObject(metadata={"id": 1}) original_id = id(obj) - + new_obj = obj.set_metadata({"id": 2}, in_place=True) - + assert id(new_obj) == original_id assert new_obj is obj assert len(obj.metadata) == 1 @@ -59,18 +59,18 @@ def test_inheritance(): """Test that subclasses maintain their type when copying.""" class GenomicContainer(BiocObject): pass - + obj = GenomicContainer(metadata={"genome": "hg38"}) new_obj = obj.set_metadata({"genome": "mm10"}) - + assert isinstance(new_obj, GenomicContainer) assert new_obj is not obj def test_shallow_copy_behavior(): heavy_data = ["large", "data"] - + obj = BiocObject() obj._heavy_data = heavy_data new_obj = obj.set_metadata({"new": "meta"}) assert new_obj is not obj - assert new_obj._heavy_data is obj._heavy_data \ No newline at end of file + assert new_obj._heavy_data is obj._heavy_data diff --git a/tests/test_combine_sequences.py b/tests/test_combine_sequences.py index 3eb8f36..4ecdd21 100644 --- a/tests/test_combine_sequences.py +++ b/tests/test_combine_sequences.py @@ -35,7 +35,7 @@ def test_basic_dense_masked(): x = [1, 2, 3] y = [0.1, 0.2] xd = np.array(x) - yd = np.ma.array(y, mask=[True]*2) + yd = np.ma.array(y, mask=[True]*2) zcomb = combine_sequences(xd, yd) z = x + y diff --git a/tests/test_factorize.py b/tests/test_factorize.py index 4deaed9..4c9d25a 100644 --- a/tests/test_factorize.py +++ b/tests/test_factorize.py @@ -52,7 +52,7 @@ def test_factorize_sorted(): def test_factorize_factor(): f = Factor([4, 3, 2, 1, 0], ["A", "B", "C", "D", "E"]) lev, ind = factorize(f) - assert lev == ["E", "D", "C", "B", "A"] + assert lev == ["E", "D", "C", "B", "A"] assert list(ind) == [0, 1, 2, 3, 4] lev, ind = factorize(f, sort_levels=True) From d4b83eb7aeb8add1b3b10501e74f1484627d4c8e Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Sun, 28 Dec 2025 10:43:45 -0800 Subject: [PATCH 15/21] delete method for namedlist/names --- src/biocutils/named_list.py | 41 +++++++++++++++++++++++++++ src/biocutils/names.py | 29 +++++++++++++++++++ tests/test_NamedList.py | 55 +++++++++++++++++++++++++++++++++++++ tests/test_Names.py | 42 ++++++++++++++++++++++++++++ 4 files changed, 167 insertions(+) diff --git a/src/biocutils/named_list.py b/src/biocutils/named_list.py index fc6a9ab..c02b361 100644 --- a/src/biocutils/named_list.py +++ b/src/biocutils/named_list.py @@ -435,6 +435,47 @@ def __iadd__(self, other: list): self.extend(other) return self + def safe_delete(self, index: Union[int, str, slice], in_place: bool = False) -> NamedList: + """ + Args: + index: + An integer index or slice containing position(s) to delete. + Alternatively, the name of the value to delete (the first + occurrence of the name is used). + + in_place: + Whether to modify the current object in place. + + Returns: + A ``NamedList`` where the item at ``index`` is removed. This is a + new object if ``in_place = False``, otherwise it is a reference to + the current object. + """ + if in_place: + output = self + else: + output = self._shallow_copy() + output._data = output._data[:] # Shallow copy of the list + if output._names is not None: + output._names = output._names.copy() + + if isinstance(index, str): + index = _name_to_position(self._names, index) + + del output._data[index] + if output._names is not None: + output._names.delete(index) + + return output + + def delete(self, index: Union[int, str, slice]): + """Alias for :py:meth:`~safe_delete` with ``in_place = True``.""" + self.safe_delete(index, in_place=True) + + def __delitem__(self, index: Union[int, str, slice]): + """Alias for :py:meth:`~delete`.""" + self.delete(index) + ################################ #####>>>> Copy methods <<<<##### ################################ diff --git a/src/biocutils/names.py b/src/biocutils/names.py index aa232cd..011da40 100644 --- a/src/biocutils/names.py +++ b/src/biocutils/names.py @@ -328,6 +328,35 @@ def __iadd__(self, other: list): self.extend(other) return self + def safe_delete(self, index: Union[int, slice], in_place: bool = False) -> Names: + """ + Args: + index: + Position(s) of the name(s) to delete. + + in_place: + Whether to perform this deletion in-place. + + Returns: + A ``Names`` object with the deleted name(s). This is a new object + if ``in_place = False``, otherwise it is a reference to the current + object. + """ + output = self._define_output(in_place) + if in_place: + output._wipe_reverse_index() + + del output._names[index] + return output + + def delete(self, index: Union[int, slice]): + """Alias for :py:attr:`~safe_delete` with ``in_place = True``.""" + self.safe_delete(index, in_place=True) + + def __delitem__(self, index: Union[int, slice]): + """Alias for :py:attr:`~delete`.""" + self.delete(index) + ################################ #####>>>> Copy methods <<<<##### ################################ diff --git a/tests/test_NamedList.py b/tests/test_NamedList.py index cceede2..c0079ad 100644 --- a/tests/test_NamedList.py +++ b/tests/test_NamedList.py @@ -257,3 +257,58 @@ def test_NamedList_generics(): y = biocutils.assign_sequence(x, [1, 3], NamedList([ 20, 40 ], names=["b", "d" ])) assert y.as_list() == [ 1, 20, 3, 40 ] assert y.get_names().as_list() == [ "A", "B", "C", "D" ] # doesn't set the names, as per policy. + +def test_NamedList_safe_delete(): + x = NamedList([1, 2, 3, 4], names=["A", "B", "C", "D"]) + + y = x.safe_delete(1) + assert y.as_list() == [1, 3, 4] + assert y.get_names().as_list() == ["A", "C", "D"] + assert x.as_list() == [1, 2, 3, 4] + + y = x.safe_delete("C") + assert y.as_list() == [1, 2, 4] + assert y.get_names().as_list() == ["A", "B", "D"] + + y = x.safe_delete(slice(1, 3)) + assert y.as_list() == [1, 4] + assert y.get_names().as_list() == ["A", "D"] + + y = x.safe_delete(-1) + assert y.as_list() == [1, 2, 3] + assert y.get_names().as_list() == ["A", "B", "C"] + + +def test_NamedList_delete(): + x = NamedList([1, 2, 3, 4], names=["A", "B", "C", "D"]) + + x.delete(0) + assert x.as_list() == [2, 3, 4] + assert x.get_names().as_list() == ["B", "C", "D"] + + x.delete("D") + assert x.as_list() == [2, 3] + assert x.get_names().as_list() == ["B", "C"] + + +def test_NamedList_delitem(): + x = NamedList([1, 2, 3, 4], names=["A", "B", "C", "D"]) + + del x[1] + assert x.as_list() == [1, 3, 4] + assert x.get_names().as_list() == ["A", "C", "D"] + + del x["A"] + assert x.as_list() == [3, 4] + assert x.get_names().as_list() == ["C", "D"] + + x = NamedList([1, 2, 3, 4], names=["A", "B", "C", "D"]) + del x[0:2] + assert x.as_list() == [3, 4] + assert x.get_names().as_list() == ["C", "D"] + + with pytest.raises(KeyError): + del x["Missing"] + + with pytest.raises(IndexError): + del x[10] \ No newline at end of file diff --git a/tests/test_Names.py b/tests/test_Names.py index d9f6526..410b78a 100644 --- a/tests/test_Names.py +++ b/tests/test_Names.py @@ -195,3 +195,45 @@ def test_Names_generics(): ass = biocutils.assign_sequence(x, range(1, 3), y) assert isinstance(ass, Names) assert ass.as_list() == ["1", "b", "c", "4"] + +def test_Names_safe_delete(): + x = Names(["A", "B", "C", "D"]) + + y = x.safe_delete(1) + assert y.as_list() == ["A", "C", "D"] + assert y.map("B") == -1 + assert y.map("C") == 1 + assert x.as_list() == ["A", "B", "C", "D"] + + y = x.safe_delete(slice(0, 2)) + assert y.as_list() == ["C", "D"] + assert y.map("A") == -1 + assert y.map("C") == 0 + + +def test_Names_delete(): + x = Names(["A", "B", "C", "D"]) + + x.delete(2) + assert x.as_list() == ["A", "B", "D"] + assert x.map("C") == -1 + assert x.map("D") == 2 + + x.delete(0) + assert x.as_list() == ["B", "D"] + assert x.map("A") == -1 + assert x.map("B") == 0 + + +def test_Names_delitem(): + x = Names(["1", "2", "3", "4"]) + + del x[1] + assert x.as_list() == ["1", "3", "4"] + assert x.map("2") == -1 + assert x.map("3") == 1 + + del x[0:2] + assert x.as_list() == ["4"] + assert x.map("1") == -1 + assert x.map("4") == 0 \ No newline at end of file From 4ac96b5c823bda6d13c0a70e0a0542ae70ba9867 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 28 Dec 2025 18:43:56 +0000 Subject: [PATCH 16/21] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/test_NamedList.py | 4 ++-- tests/test_Names.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_NamedList.py b/tests/test_NamedList.py index c0079ad..54dadf7 100644 --- a/tests/test_NamedList.py +++ b/tests/test_NamedList.py @@ -309,6 +309,6 @@ def test_NamedList_delitem(): with pytest.raises(KeyError): del x["Missing"] - + with pytest.raises(IndexError): - del x[10] \ No newline at end of file + del x[10] diff --git a/tests/test_Names.py b/tests/test_Names.py index 410b78a..b1cfc92 100644 --- a/tests/test_Names.py +++ b/tests/test_Names.py @@ -236,4 +236,4 @@ def test_Names_delitem(): del x[0:2] assert x.as_list() == ["4"] assert x.map("1") == -1 - assert x.map("4") == 0 \ No newline at end of file + assert x.map("4") == 0 From 571abc576cc0fad6e2e88eb88d9caf990f57f048 Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Sun, 28 Dec 2025 10:58:06 -0800 Subject: [PATCH 17/21] more pythonish methods --- src/biocutils/factor.py | 54 +++++++++++++++++++++++++++++++++++++ src/biocutils/named_list.py | 50 +++++++++++++++++++++++++++++++++- src/biocutils/names.py | 11 ++++++++ tests/test_Factor.py | 51 +++++++++++++++++++++++++++++++++++ tests/test_NamedList.py | 20 +++++++++++++- tests/test_Names.py | 12 ++++++++- 6 files changed, 195 insertions(+), 3 deletions(-) diff --git a/src/biocutils/factor.py b/src/biocutils/factor.py index 2bdad7e..1c9c496 100644 --- a/src/biocutils/factor.py +++ b/src/biocutils/factor.py @@ -764,6 +764,60 @@ def from_sequence( levels, indices = factorize(x, levels=levels, sort_levels=sort_levels, **kwargs) return Factor(indices, levels=levels, ordered=ordered, names=names) + ################################ + #####>>>> List methods <<<<##### + ################################ + + def as_list(self) -> list: + """ + Returns: + List of strings corresponding to the factor elements. + Missing values are represented as None. + """ + return [self._levels[c] if c >= 0 else None for c in self._codes] + + def safe_delete(self, index: Union[int, str, slice], in_place: bool = False) -> Factor: + """ + Args: + index: + Integer index or slice containing position(s) to delete. + Alternatively, the name of the value to delete (the first + occurrence of the name is used). + + in_place: + Whether to modify the current object in place. + + Returns: + A ``Factor`` where the item at ``index`` is removed. This is a + new object if ``in_place = False``, otherwise it is a reference to + the current object. + """ + if in_place: + output = self + else: + output = copy(self) + output._codes = copy(self._codes) + if output._names is not None: + output._names = output._names.copy() + + if isinstance(index, str): + index = _name_to_position(output._names, index) + + output._codes = numpy.delete(output._codes, index) + + if output._names is not None: + output._names.delete(index) + + return output + + def delete(self, index: Union[int, str, slice]): + """Alias for :py:meth:`~safe_delete` with ``in_place = True``.""" + self.safe_delete(index, in_place=True) + + def __delitem__(self, index: Union[int, str, slice]): + """Alias for :py:meth:`~delete`.""" + self.delete(index) + @subset_sequence.register def _subset_sequence_Factor(x: Factor, indices: Sequence[int]) -> Factor: diff --git a/src/biocutils/named_list.py b/src/biocutils/named_list.py index c02b361..cd406b4 100644 --- a/src/biocutils/named_list.py +++ b/src/biocutils/named_list.py @@ -1,7 +1,7 @@ from __future__ import annotations from copy import deepcopy -from typing import Any, Dict, Iterable, Optional, Sequence, Union +from typing import Any, Dict, Iterable, Optional, Sequence, Tuple, Union from .assign_sequence import assign_sequence from .combine_sequences import combine_sequences @@ -476,6 +476,54 @@ def __delitem__(self, index: Union[int, str, slice]): """Alias for :py:meth:`~delete`.""" self.delete(index) + ##################################### + #####>>>> dict like methods <<<<##### + ##################################### + + def keys(self) -> Iterable[str]: + """ + Returns: + Iterator over the names of the list elements. + """ + if self._names is None: + return iter([]) + return iter(self._names) + + def values(self) -> Iterable[Any]: + """ + Returns: + Iterator over the values of the list elements. + """ + return iter(self._data) + + def items(self) -> Iterable[Tuple[str, Any]]: + """ + Returns: + Iterator over (name, value) pairs. + If names are missing, keys are returned as stringified indices. + """ + if self._names is not None: + return zip(self._names, self._data) + else: + return zip((str(i) for i in range(len(self))), self._data) + + def get(self, key: Union[str, int], default: Any = None) -> Any: + """ + Args: + key: + Name or index of the element. + + default: + Value to return if ``key`` is not found. + + Returns: + Value at ``key`` or ``default``. + """ + try: + return self.get_value(key) + except (KeyError, IndexError): + return default + ################################ #####>>>> Copy methods <<<<##### ################################ diff --git a/src/biocutils/names.py b/src/biocutils/names.py index 011da40..d2c2e25 100644 --- a/src/biocutils/names.py +++ b/src/biocutils/names.py @@ -121,6 +121,17 @@ def map(self, name: str) -> int: else: return -1 + def __contains__(self, name: str) -> bool: + """ + Args: + name: + Name to check. + + Returns: + True if ``name`` exists, otherwise False. + """ + return self.map(name) >= 0 + ################################# #####>>>> Get/set items <<<<##### ################################# diff --git a/tests/test_Factor.py b/tests/test_Factor.py index 9f1ad76..1b03d0f 100644 --- a/tests/test_Factor.py +++ b/tests/test_Factor.py @@ -339,3 +339,54 @@ def test_Factor_init_from_list(): assert isinstance(f1, Factor) assert len(f1) == 5 assert len(f1.get_levels()) == 3 + +def test_Factor_as_list(): + f = Factor([0, 1, -1, 0], levels=["A", "B"]) + assert f.as_list() == ["A", "B", None, "A"] + + empty = Factor([], levels=[]) + assert empty.as_list() == [] + + +def test_Factor_safe_delete(): + f = Factor([0, 1, 2, 0], levels=["A", "B", "C"], names=["x", "y", "z", "w"]) + + y = f.safe_delete(1) + assert y.as_list() == ["A", "C", "A"] + assert y.get_names().as_list() == ["x", "z", "w"] + assert f.as_list() == ["A", "B", "C", "A"] + + y = f.safe_delete("y") + assert y.as_list() == ["A", "C", "A"] + assert y.get_names().as_list() == ["x", "z", "w"] + + y = f.safe_delete(slice(1, 3)) + assert y.as_list() == ["A", "A"] + assert y.get_names().as_list() == ["x", "w"] + + +def test_Factor_delete(): + f = Factor([0, 1, 2], levels=["A", "B", "C"], names=["x", "y", "z"]) + + f.delete(1) + assert f.as_list() == ["A", "C"] + assert f.get_names().as_list() == ["x", "z"] + + f.delete("z") + assert f.as_list() == ["A"] + assert f.get_names().as_list() == ["x"] + + +def test_Factor_delitem(): + f = Factor([0, 1, 2, 0], levels=["A", "B", "C"], names=["x", "y", "z", "w"]) + + del f["y"] + assert f.as_list() == ["A", "C", "A"] + assert f.get_names().as_list() == ["x", "z", "w"] + + del f[0] + assert f.as_list() == ["C", "A"] + assert f.get_names().as_list() == ["z", "w"] + + del f[:] + assert len(f) == 0 \ No newline at end of file diff --git a/tests/test_NamedList.py b/tests/test_NamedList.py index c0079ad..bee98e3 100644 --- a/tests/test_NamedList.py +++ b/tests/test_NamedList.py @@ -311,4 +311,22 @@ def test_NamedList_delitem(): del x["Missing"] with pytest.raises(IndexError): - del x[10] \ No newline at end of file + del x[10] + +def test_NamedList_dict_methods(): + x = NamedList([1, 2, 3], names=["A", "B", "C"]) + + assert list(x.keys()) == ["A", "B", "C"] + assert list(x.values()) == [1, 2, 3] + assert list(x.items()) == [("A", 1), ("B", 2), ("C", 3)] + + assert x.get("A") == 1 + assert x.get("C") == 3 + assert x.get("Missing") is None + assert x.get("Missing", 100) == 100 + assert x.get(1) == 2 # Integer index access via get + + y = NamedList([10, 20]) + assert list(y.keys()) == [] + assert list(y.values()) == [10, 20] + assert list(y.items()) == [("0", 10), ("1", 20)] \ No newline at end of file diff --git a/tests/test_Names.py b/tests/test_Names.py index 410b78a..1a3231f 100644 --- a/tests/test_Names.py +++ b/tests/test_Names.py @@ -236,4 +236,14 @@ def test_Names_delitem(): del x[0:2] assert x.as_list() == ["4"] assert x.map("1") == -1 - assert x.map("4") == 0 \ No newline at end of file + assert x.map("4") == 0 + +def test_Names_contains(): + x = Names(["A", "B", "C"]) + assert "A" in x + assert "B" in x + assert "Z" not in x + + # Works with duplicates + y = Names(["A", "A", "B"]) + assert "A" in y \ No newline at end of file From fa9426175198554ea322f35ec39eb43e459720de Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Sun, 28 Dec 2025 11:01:39 -0800 Subject: [PATCH 18/21] add is_unique --- src/biocutils/names.py | 9 +++++++++ tests/test_Names.py | 12 +++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/src/biocutils/names.py b/src/biocutils/names.py index d2c2e25..128f31e 100644 --- a/src/biocutils/names.py +++ b/src/biocutils/names.py @@ -398,6 +398,15 @@ def __deepcopy__(self, memo=None, _nil=[]) -> Names: A deep copy of this ``Names`` object with the same contents. """ return type(self)(deepcopy(self._names, memo, _nil), _validate=False) + + @property + def is_unique(self) -> bool: + """ + Returns: + True if all names are unique, otherwise False. + """ + self._populate_reverse_index() + return len(self._reverse) == len(self._names) @subset_sequence.register diff --git a/tests/test_Names.py b/tests/test_Names.py index 1a3231f..6d541ac 100644 --- a/tests/test_Names.py +++ b/tests/test_Names.py @@ -246,4 +246,14 @@ def test_Names_contains(): # Works with duplicates y = Names(["A", "A", "B"]) - assert "A" in y \ No newline at end of file + assert "A" in y + +def test_Names_is_unique(): + x = Names(["A", "B", "C"]) + assert x.is_unique + + y = Names(["A", "B", "A"]) + assert not y.is_unique + + empty = Names([]) + assert empty.is_unique \ No newline at end of file From 0d7594d241c97ae9177e0e468732f01b455c7ca2 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 28 Dec 2025 19:02:20 +0000 Subject: [PATCH 19/21] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/biocutils/names.py | 2 +- tests/test_Factor.py | 12 ++++++------ tests/test_NamedList.py | 2 +- tests/test_Names.py | 6 +++--- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/biocutils/names.py b/src/biocutils/names.py index 128f31e..1047f79 100644 --- a/src/biocutils/names.py +++ b/src/biocutils/names.py @@ -398,7 +398,7 @@ def __deepcopy__(self, memo=None, _nil=[]) -> Names: A deep copy of this ``Names`` object with the same contents. """ return type(self)(deepcopy(self._names, memo, _nil), _validate=False) - + @property def is_unique(self) -> bool: """ diff --git a/tests/test_Factor.py b/tests/test_Factor.py index 1b03d0f..c78bb9b 100644 --- a/tests/test_Factor.py +++ b/tests/test_Factor.py @@ -343,7 +343,7 @@ def test_Factor_init_from_list(): def test_Factor_as_list(): f = Factor([0, 1, -1, 0], levels=["A", "B"]) assert f.as_list() == ["A", "B", None, "A"] - + empty = Factor([], levels=[]) assert empty.as_list() == [] @@ -367,7 +367,7 @@ def test_Factor_safe_delete(): def test_Factor_delete(): f = Factor([0, 1, 2], levels=["A", "B", "C"], names=["x", "y", "z"]) - + f.delete(1) assert f.as_list() == ["A", "C"] assert f.get_names().as_list() == ["x", "z"] @@ -379,14 +379,14 @@ def test_Factor_delete(): def test_Factor_delitem(): f = Factor([0, 1, 2, 0], levels=["A", "B", "C"], names=["x", "y", "z", "w"]) - + del f["y"] assert f.as_list() == ["A", "C", "A"] assert f.get_names().as_list() == ["x", "z", "w"] - + del f[0] assert f.as_list() == ["C", "A"] assert f.get_names().as_list() == ["z", "w"] - + del f[:] - assert len(f) == 0 \ No newline at end of file + assert len(f) == 0 diff --git a/tests/test_NamedList.py b/tests/test_NamedList.py index 66253c3..dddc167 100644 --- a/tests/test_NamedList.py +++ b/tests/test_NamedList.py @@ -319,7 +319,7 @@ def test_NamedList_dict_methods(): assert list(x.keys()) == ["A", "B", "C"] assert list(x.values()) == [1, 2, 3] assert list(x.items()) == [("A", 1), ("B", 2), ("C", 3)] - + assert x.get("A") == 1 assert x.get("C") == 3 assert x.get("Missing") is None diff --git a/tests/test_Names.py b/tests/test_Names.py index c9ae475..43ce660 100644 --- a/tests/test_Names.py +++ b/tests/test_Names.py @@ -243,7 +243,7 @@ def test_Names_contains(): assert "A" in x assert "B" in x assert "Z" not in x - + # Works with duplicates y = Names(["A", "A", "B"]) assert "A" in y @@ -251,9 +251,9 @@ def test_Names_contains(): def test_Names_is_unique(): x = Names(["A", "B", "C"]) assert x.is_unique - + y = Names(["A", "B", "A"]) assert not y.is_unique - + empty = Names([]) assert empty.is_unique From 25fb68a0714f10ff177c803c5f8b59061fbf75d3 Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Sun, 28 Dec 2025 11:05:17 -0800 Subject: [PATCH 20/21] add lint errors --- src/biocutils/factor.py | 4 ++-- src/biocutils/float_list.py | 2 +- src/biocutils/integer_list.py | 2 +- src/biocutils/show_as_cell.py | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/biocutils/factor.py b/src/biocutils/factor.py index 1c9c496..5aa5993 100644 --- a/src/biocutils/factor.py +++ b/src/biocutils/factor.py @@ -424,8 +424,8 @@ def set_value(self, index: Union[str, int], value: Union[str, None], in_place: b output._codes[index] = -1 return output - for i, l in enumerate(output._levels): - if l == value: + for i, lev in enumerate(output._levels): + if lev == value: output._codes[index] = i return output diff --git a/src/biocutils/float_list.py b/src/biocutils/float_list.py index 7d3c0e3..7ed5c47 100644 --- a/src/biocutils/float_list.py +++ b/src/biocutils/float_list.py @@ -12,7 +12,7 @@ def _coerce_to_float(x: Any): return None try: return float(x) - except: + except Exception as _: return None diff --git a/src/biocutils/integer_list.py b/src/biocutils/integer_list.py index ff41172..a4825b0 100644 --- a/src/biocutils/integer_list.py +++ b/src/biocutils/integer_list.py @@ -12,7 +12,7 @@ def _coerce_to_int(x: Any): return None try: return int(x) - except: + except Exception as _: return None diff --git a/src/biocutils/show_as_cell.py b/src/biocutils/show_as_cell.py index b54992c..072c17f 100644 --- a/src/biocutils/show_as_cell.py +++ b/src/biocutils/show_as_cell.py @@ -30,6 +30,6 @@ def show_as_cell(x: Any, indices: Sequence[int]) -> List[str]: if nl >= 0: candidate = candidate[:nl] + "..." output.append(candidate) - except: + except Exception as _: output.append("####") return output From 84913ae85fded3acb06c71a6872e6e7024a4514e Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Sun, 28 Dec 2025 11:14:42 -0800 Subject: [PATCH 21/21] Update changelog --- CHANGELOG.md | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a52ad48..52908da 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,16 @@ # Changelog -## Version 0.3.0 +## Version 0.3.0 - 0.3.1 - Provide a base `BiocObject` class similar to the `Annotated` class in Bioconductor. The class provides `metadata` slot, accessors and validation functions. +- Renaming code files to follow pep guidelines +- Update Github actions and workflow to the new biocsetup versions +- Changes to improve `NamedList`, `Names` classes + - get name at index + - delete method for namedlist/names + - add is_unique + - add lint errors +- linting documentation, typehints etc ## Version 0.2.3