diff --git a/.github/workflows/publish-pypi.yml b/.github/workflows/publish-pypi.yml new file mode 100644 index 0000000..405fee0 --- /dev/null +++ b/.github/workflows/publish-pypi.yml @@ -0,0 +1,52 @@ +name: Publish to PyPI + +on: + push: + tags: "*" + +jobs: + build: + runs-on: ubuntu-latest + permissions: + id-token: write + repository-projects: write + contents: write + pages: write + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python 3.12 + uses: actions/setup-python@v5 + with: + python-version: 3.12 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install tox + + - name: Test with tox + run: | + tox + + - name: Build Project and Publish + run: | + python -m tox -e clean,build + + # This uses the trusted publisher workflow so no token is required. + - name: Publish to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + + - name: Build docs + run: | + tox -e docs + + - run: touch ./docs/_build/html/.nojekyll + + - name: GH Pages Deployment + uses: JamesIves/github-pages-deploy-action@v4 + with: + branch: gh-pages # The branch the action should deploy to. + folder: ./docs/_build/html + clean: true # Automatically remove deleted files from the deploy branch diff --git a/.github/workflows/pypi-publish.yml b/.github/workflows/pypi-publish.yml deleted file mode 100644 index 030cd10..0000000 --- a/.github/workflows/pypi-publish.yml +++ /dev/null @@ -1,51 +0,0 @@ -# This workflow will install Python dependencies, run tests and lint with a single version of Python -# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions - -name: Publish to PyPI - -on: - push: - tags: "*" - -jobs: - build: - - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v4 - - name: Set up Python 3.11 - uses: actions/setup-python@v5 - with: - python-version: 3.11 - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install flake8 pytest tox - # - name: Lint with flake8 - # run: | - # # stop the build if there are Python syntax errors or undefined names - # flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics - # # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - # # flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - - name: Test with tox - run: | - tox - - name: Build docs - run: | - tox -e docs - - run: touch ./docs/_build/html/.nojekyll - - name: GH Pages Deployment - uses: JamesIves/github-pages-deploy-action@4.1.3 - with: - branch: gh-pages # The branch the action should deploy to. - folder: ./docs/_build/html - clean: true # Automatically remove deleted files from the deploy branch - - name: Build Project and Publish - run: | - python -m tox -e clean,build - - name: Publish package - uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 - with: - user: __token__ - password: ${{ secrets.PYPI_PASSWORD }} diff --git a/.github/workflows/pypi-test.yml b/.github/workflows/pypi-test.yml deleted file mode 100644 index 22f6c4a..0000000 --- a/.github/workflows/pypi-test.yml +++ /dev/null @@ -1,40 +0,0 @@ -# This workflow will install Python dependencies, run tests and lint with a single version of Python -# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions - -name: Test the library - -on: - push: - branches: [ master ] - pull_request: - branches: [ master ] - -jobs: - build: - - runs-on: ubuntu-latest - strategy: - matrix: - python-version: [ '3.9', '3.10', '3.11', '3.12', '3.13' ] - - name: Python ${{ matrix.python-version }} - steps: - - uses: actions/checkout@v4 - - name: Setup Python - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - cache: 'pip' - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install flake8 pytest tox - # - name: Lint with flake8 - # run: | - # # stop the build if there are Python syntax errors or undefined names - # flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics - # # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - # # flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - - name: Test with tox - run: | - tox diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml new file mode 100644 index 0000000..e0f247d --- /dev/null +++ b/.github/workflows/run-tests.yml @@ -0,0 +1,73 @@ +name: Test the library + +on: + push: + branches: + - master # for legacy repos + - main + pull_request: + branches: + - master # for legacy repos + - main + workflow_dispatch: # Allow manually triggering the workflow + schedule: + # Run roughly every 15 days at 00:00 UTC + # (useful to check if updates on dependencies break the package) + - cron: "0 0 1,16 * *" + +permissions: + contents: read + +concurrency: + group: >- + ${{ github.workflow }}-${{ github.ref_type }}- + ${{ github.event.pull_request.number || github.sha }} + cancel-in-progress: true + +jobs: + test: + strategy: + matrix: + python: ["3.10", "3.11", "3.12", "3.13", "3.14"] + platform: + - ubuntu-latest + - macos-latest + - windows-latest + runs-on: ${{ matrix.platform }} + name: Python ${{ matrix.python }}, ${{ matrix.platform }} + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + id: setup-python + with: + python-version: ${{ matrix.python }} + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install tox coverage + + - name: Run tests + run: >- + pipx run --python '${{ steps.setup-python.outputs.python-path }}' + tox + -- -rFEx --durations 10 --color yes --cov --cov-branch --cov-report=xml # pytest args + + - name: Check for codecov token availability + id: codecov-check + shell: bash + run: | + if [ ${{ secrets.CODECOV_TOKEN }} != '' ]; then + echo "codecov=true" >> $GITHUB_OUTPUT; + else + echo "codecov=false" >> $GITHUB_OUTPUT; + fi + + - name: Upload coverage reports to Codecov with GitHub Action + uses: codecov/codecov-action@v5 + if: ${{ steps.codecov-check.outputs.codecov == 'true' }} + env: + CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} + slug: ${{ github.repository }} + flags: ${{ matrix.platform }} - py${{ matrix.python }} diff --git a/CHANGELOG.md b/CHANGELOG.md index a52ad48..52908da 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,16 @@ # Changelog -## Version 0.3.0 +## Version 0.3.0 - 0.3.1 - Provide a base `BiocObject` class similar to the `Annotated` class in Bioconductor. The class provides `metadata` slot, accessors and validation functions. +- Renaming code files to follow pep guidelines +- Update Github actions and workflow to the new biocsetup versions +- Changes to improve `NamedList`, `Names` classes + - get name at index + - delete method for namedlist/names + - add is_unique + - add lint errors +- linting documentation, typehints etc ## Version 0.2.3 diff --git a/setup.py b/setup.py index 7f9e0df..4fbfd79 100644 --- a/setup.py +++ b/setup.py @@ -4,6 +4,7 @@ PyScaffold helps you to put up the scaffold of your new Python project. Learn more under: https://pyscaffold.org/ """ + from setuptools import setup if __name__ == "__main__": diff --git a/src/biocutils/__init__.py b/src/biocutils/__init__.py index 6f2326f..95ed876 100644 --- a/src/biocutils/__init__.py +++ b/src/biocutils/__init__.py @@ -15,13 +15,13 @@ finally: del version, PackageNotFoundError -from .Factor import Factor -from .StringList import StringList -from .IntegerList import IntegerList -from .FloatList import FloatList -from .BooleanList import BooleanList -from .Names import Names -from .NamedList import NamedList +from .factor import Factor +from .string_list import StringList +from .integer_list import IntegerList +from .float_list import FloatList +from .boolean_list import BooleanList +from .names import Names +from .named_list import NamedList from .factorize import factorize from .intersect import intersect @@ -60,3 +60,5 @@ from .get_height import get_height from .is_high_dimensional import is_high_dimensional + +from .bioc_object import BiocObject diff --git a/src/biocutils/_utils_combine.py b/src/biocutils/_utils_combine.py index 328bf6a..2221840 100644 --- a/src/biocutils/_utils_combine.py +++ b/src/biocutils/_utils_combine.py @@ -26,6 +26,8 @@ def _check_array_dimensions(x, active: int) -> bool: + ")" ) + return True + def _coerce_sparse_matrix(first, combined, module): if isinstance(first, module.csr_matrix): diff --git a/src/biocutils/assign.py b/src/biocutils/assign.py index a590033..875033d 100644 --- a/src/biocutils/assign.py +++ b/src/biocutils/assign.py @@ -14,7 +14,8 @@ def assign(x: Any, indices: Sequence[int], replacement: Any) -> Any: :py:func:`~biocutils.assign_sequence.assign_sequence` instead. Args: - x: Object to be assignted. + x: + Object to be assignted. Returns: The object after assignment, typically the same type as ``x``. diff --git a/src/biocutils/assign_rows.py b/src/biocutils/assign_rows.py index 4ddcf13..ada797b 100644 --- a/src/biocutils/assign_rows.py +++ b/src/biocutils/assign_rows.py @@ -31,15 +31,15 @@ def assign_rows(x: Any, indices: Sequence[int], replacement: Any) -> Any: tmp = [slice(None)] * len(x.shape) tmp[0] = indices output[(*tmp,)] = replacement + return output @assign_rows.register -def _assign_rows_numpy( - x: numpy.ndarray, indices: Sequence[int], replacement: Any -) -> numpy.ndarray: +def _assign_rows_numpy(x: numpy.ndarray, indices: Sequence[int], replacement: Any) -> numpy.ndarray: tmp = [slice(None)] * len(x.shape) tmp[0] = indices output = numpy.copy(x) output[(*tmp,)] = replacement + return output diff --git a/src/biocutils/assign_sequence.py b/src/biocutils/assign_sequence.py index 091dc0c..d6b23d8 100644 --- a/src/biocutils/assign_sequence.py +++ b/src/biocutils/assign_sequence.py @@ -41,18 +41,14 @@ def _assign_sequence_list(x: list, indices: Sequence[int], replacement: Any) -> @assign_sequence.register -def _assign_sequence_numpy( - x: numpy.ndarray, indices: Sequence[int], replacement: Any -) -> numpy.ndarray: +def _assign_sequence_numpy(x: numpy.ndarray, indices: Sequence[int], replacement: Any) -> numpy.ndarray: output = numpy.copy(x) output[indices] = replacement return output @assign_sequence.register -def _assign_sequence_range( - x: range, indices: Sequence[int], replacement: Any -) -> Union[range, list]: +def _assign_sequence_range(x: range, indices: Sequence[int], replacement: Any) -> Union[range, list]: if ( isinstance(replacement, range) and isinstance(indices, range) diff --git a/src/biocutils/bioc_object.py b/src/biocutils/bioc_object.py index 1302663..d77b3c8 100644 --- a/src/biocutils/bioc_object.py +++ b/src/biocutils/bioc_object.py @@ -4,12 +4,7 @@ from typing import Any, Dict, Optional, Union from warnings import warn -try: - from typing import Self -except ImportError: - Self = "BiocObject" - -from .NamedList import NamedList +from .named_list import NamedList __author__ = "Jayaram Kancherla" __copyright__ = "jkanche" @@ -40,22 +35,22 @@ class BiocObject: Provides a standardized `metadata` slot and copy-on-write semantics. """ - def __init__(self, metadata: Optional[Union[Dict[str, Any], NamedList]] = None, validate: bool = True) -> None: + def __init__(self, metadata: Optional[Union[Dict[str, Any], NamedList]] = None, _validate: bool = True) -> None: """Initialize the BiocObject. Args: metadata: Additional metadata. Defaults to an empty NamedList. - validate: + _validate: Whether to validate the input. Defaults to True. """ - if validate and metadata is not None: + if _validate and metadata is not None: _validate_metadata(metadata) self._metadata = sanitize_metadata(metadata) - def _define_output(self, in_place: bool = False) -> Self: + def _define_output(self, in_place: bool = False) -> BiocObject: """Internal utility to handle in-place vs copy-on-modify.""" if in_place: return self @@ -93,7 +88,7 @@ def get_metadata(self) -> NamedList: """Alias for :py:attr:`~metadata` getter.""" return self.metadata - def set_metadata(self, metadata: Optional[Union[Dict[str, Any], NamedList]], in_place: bool = False) -> Self: + def set_metadata(self, metadata: Optional[Union[Dict[str, Any], NamedList]], in_place: bool = False) -> BiocObject: """Set new metadata. Args: diff --git a/src/biocutils/BooleanList.py b/src/biocutils/boolean_list.py similarity index 61% rename from src/biocutils/BooleanList.py rename to src/biocutils/boolean_list.py index 1e46826..447a8f9 100644 --- a/src/biocutils/BooleanList.py +++ b/src/biocutils/boolean_list.py @@ -1,7 +1,9 @@ +from __future__ import annotations + from typing import Any, Iterable, Optional, Sequence, Union -from .NamedList import NamedList -from .Names import Names +from .named_list import NamedList +from .names import Names from .normalize_subscript import SubscriptTypes @@ -10,10 +12,27 @@ def _coerce_to_bool(x: Any): class _SubscriptCoercer: - def __init__(self, data): + """Coercer for subscript operations on BooleanList.""" + + def __init__(self, data: Sequence) -> None: + """Initialize the coercer. + + Args: + data: + Sequence of values to coerce. + """ self._data = data - def __getitem__(self, index): + def __getitem__(self, index: int) -> Optional[bool]: + """Get an item and coerce it to boolean. + + Args: + index: + Index of the item. + + Returns: + Coerced boolean value. + """ return _coerce_to_bool(self._data[index]) @@ -27,7 +46,7 @@ class BooleanList(NamedList): def __init__( self, - data: Optional[Iterable] = None, + data: Optional[Sequence] = None, names: Optional[Names] = None, _validate: bool = True, ): @@ -45,41 +64,34 @@ def __init__( _validate: Internal use only. """ - if _validate: - if data is not None: - if isinstance(data, BooleanList): + if data is not None: + if isinstance(data, BooleanList): + data = data._data + else: + if isinstance(data, NamedList): data = data._data - else: - if isinstance(data, NamedList): - data = data._data - original = data - data = list(_coerce_to_bool(item) for item in original) + + original = data + data = list(_coerce_to_bool(item) for item in original) + super().__init__(data, names, _validate=_validate) - def set_value( - self, index: Union[int, str], value: Any, in_place: bool = False - ) -> "BooleanList": + def set_value(self, index: Union[int, str], value: Any, in_place: bool = False) -> BooleanList: """Calls :py:meth:`~biocutils.NamedList.NamedList.set_value` after coercing ``value`` to a boolean.""" return super().set_value(index, _coerce_to_bool(value), in_place=in_place) - def set_slice( - self, index: SubscriptTypes, value: Sequence, in_place: bool = False - ) -> "BooleanList": + def set_slice(self, index: SubscriptTypes, value: Sequence, in_place: bool = False) -> BooleanList: """Calls :py:meth:`~biocutils.NamedList.NamedList.set_slice` after coercing ``value`` to booleans.""" return super().set_slice(index, _SubscriptCoercer(value), in_place=in_place) - def safe_insert( - self, index: Union[int, str], value: Any, in_place: bool = False - ) -> "BooleanList": + def safe_insert(self, index: Union[int, str], value: Any, in_place: bool = False) -> BooleanList: """Calls :py:meth:`~biocutils.NamedList.NamedList.safe_insert` after coercing ``value`` to a boolean.""" return super().safe_insert(index, _coerce_to_bool(value), in_place=in_place) - def safe_append(self, value: Any, in_place: bool = False) -> "BooleanList": + def safe_append(self, value: Any, in_place: bool = False) -> BooleanList: """Calls :py:meth:`~biocutils.NamedList.NamedList.safe_append` after coercing ``value`` to a boolean.""" return super().safe_append(_coerce_to_bool(value), in_place=in_place) - def safe_extend(self, other: Iterable, in_place: bool = True) -> "BooleanList": + def safe_extend(self, other: Iterable, in_place: bool = False) -> BooleanList: """Calls :py:meth:`~biocutils.NamedList.NamedList.safe_extend` after coercing elements of ``other`` to booleans.""" - return super().safe_extend( - (_coerce_to_bool(y) for y in other), in_place=in_place - ) + return super().safe_extend((_coerce_to_bool(y) for y in other), in_place=in_place) diff --git a/src/biocutils/combine.py b/src/biocutils/combine.py index 5e7a058..84bce6a 100644 --- a/src/biocutils/combine.py +++ b/src/biocutils/combine.py @@ -5,7 +5,7 @@ from .is_high_dimensional import is_high_dimensional -def combine(*x: Any): +def combine(*x: Any) -> Any: """ Generic combine that checks if the objects are n-dimensional for n > 1 (i.e. has a ``shape`` property of length greater than 1); if so, it calls @@ -14,7 +14,8 @@ def combine(*x: Any): :py:func:`~biocutils.combine_sequences.combine_sequences` instead. Args: - x: Objects to combine. + x: + Objects to combine. Returns: A combined object, typically the same type as the first element in ``x``. @@ -28,9 +29,8 @@ def combine(*x: Any): has_1d = True if has_nd and has_1d: - raise ValueError( - "cannot mix 1-dimensional and higher-dimensional objects in `combine`" - ) + raise ValueError("cannot mix 1-dimensional and higher-dimensional objects in `combine`") + if has_nd: return combine_rows(*x) else: diff --git a/src/biocutils/combine_columns.py b/src/biocutils/combine_columns.py index d1ad72c..bd71ecb 100644 --- a/src/biocutils/combine_columns.py +++ b/src/biocutils/combine_columns.py @@ -19,7 +19,7 @@ @singledispatch -def combine_columns(*x: Any): +def combine_columns(*x: Any) -> Any: """Combine n-dimensional objects along the second dimension. If all elements are :py:class:`~numpy.ndarray`, @@ -40,11 +40,7 @@ def combine_columns(*x: Any): Returns: Combined object, typically the same type as the first entry of ``x`` """ - raise NotImplementedError( - "no `combine_columns` method implemented for '" - + type(x[0]).__name__ - + "' objects" - ) + raise NotImplementedError("no `combine_columns` method implemented for '" + type(x[0]).__name__ + "' objects") @combine_columns.register @@ -57,7 +53,7 @@ def _combine_columns_dense_arrays(*x: numpy.ndarray): return numpy.concatenate(x, axis=1) -if is_package_installed("scipy") is True: +if is_package_installed("scipy"): import scipy.sparse as sp def _combine_columns_sparse_matrices(*x): @@ -85,8 +81,13 @@ def _combine_columns_sparse_arrays(*x): x = [convert_to_dense(y) for y in x] return numpy.concatenate(x, axis=1) + try: + combine_columns.register(sp.sparray, _combine_columns_sparse_arrays) + except Exception: + pass + -if is_package_installed("pandas") is True: +if is_package_installed("pandas"): from pandas import DataFrame, concat @combine_columns.register(DataFrame) diff --git a/src/biocutils/combine_rows.py b/src/biocutils/combine_rows.py index 1783dcf..e8e8573 100644 --- a/src/biocutils/combine_rows.py +++ b/src/biocutils/combine_rows.py @@ -19,7 +19,7 @@ @singledispatch -def combine_rows(*x: Any): +def combine_rows(*x: Any) -> Any: """Combine n-dimensional objects along their first dimension. If all elements are :py:class:`~numpy.ndarray`, we combine them using @@ -40,9 +40,7 @@ def combine_rows(*x: Any): Returns: Combined object, typically the same type as the first entry of ``x``. """ - raise NotImplementedError( - "no `combine_rows` method implemented for '" + type(x[0]).__name__ + "' objects" - ) + raise NotImplementedError("no `combine_rows` method implemented for '" + type(x[0]).__name__ + "' objects") @combine_rows.register(numpy.ndarray) @@ -69,7 +67,7 @@ def _combine_rows_sparse_matrices(*x): return numpy.concatenate(x) try: - combine_rows.register(sp.sparray, _combine_rows_sparse_arrays) + combine_rows.register(sp.spmatrix, _combine_rows_sparse_matrices) except Exception: pass @@ -77,14 +75,14 @@ def _combine_rows_sparse_arrays(*x): _check_array_dimensions(x, 0) if is_list_of_type(x, sp.sparray): combined = sp.vstack(x) - return _coerce_sparse_array(first, combined, sp) + return _coerce_sparse_array(x[0], combined, sp) warn("not all elements are SciPy sparse arrays") x = [convert_to_dense(y) for y in x] return numpy.concatenate(x) try: - combine_rows.register(sp.spmatrix, _combine_rows_sparse_matrices) + combine_rows.register(sp.sparray, _combine_rows_sparse_arrays) except Exception: pass diff --git a/src/biocutils/combine_sequences.py b/src/biocutils/combine_sequences.py index 8613385..2b34be3 100644 --- a/src/biocutils/combine_sequences.py +++ b/src/biocutils/combine_sequences.py @@ -13,7 +13,7 @@ @singledispatch -def combine_sequences(*x: Any): +def combine_sequences(*x: Any) -> Any: """Combine vector-like objects (1-dimensional arrays). If all elements are :py:class:`~numpy.ndarray`, @@ -34,11 +34,7 @@ def combine_sequences(*x: Any): Returns: A combined object, ideally of the same type as the first element in ``x``. """ - raise NotImplementedError( - "no `combine_sequences` method implemented for '" - + type(x[0]).__name__ - + "' objects" - ) + raise NotImplementedError("no `combine_sequences` method implemented for '" + type(x[0]).__name__ + "' objects") @combine_sequences.register(list) @@ -51,6 +47,7 @@ def _combine_sequences_dense_arrays(*x: numpy.ndarray): for y in x: if numpy.ma.is_masked(y): return numpy.ma.concatenate(x, axis=None) + return numpy.concatenate(x, axis=None) @@ -85,6 +82,7 @@ def _combine_sequences_ranges(*x: range): if not failed: return range(start, stop, step) + return list(chain(*x)) @@ -101,4 +99,5 @@ def _combine_sequences_pandas_series(*x): else: elems.append(elem) x = elems + return concat(x) diff --git a/src/biocutils/convert_to_dense.py b/src/biocutils/convert_to_dense.py index 1281e4c..12aa11f 100644 --- a/src/biocutils/convert_to_dense.py +++ b/src/biocutils/convert_to_dense.py @@ -15,7 +15,8 @@ def convert_to_dense(x: Any) -> numpy.ndarray: ``numpy.concatenate`` doesn't understand. Args: - x: Some array-like object to be stored as a NumPy array. + x: + Some array-like object to be stored as a NumPy array. Returns: A NumPy array. diff --git a/src/biocutils/extract_column_names.py b/src/biocutils/extract_column_names.py index 5b63683..d3a4e02 100644 --- a/src/biocutils/extract_column_names.py +++ b/src/biocutils/extract_column_names.py @@ -11,11 +11,12 @@ @singledispatch -def extract_column_names(x: Any) -> numpy.ndarray: +def extract_column_names(x: Any) -> Any: """Access column names from 2-dimensional representations. Args: - x: Any object. + x: + Any object with column names. Returns: Array of strings containing column names. @@ -27,5 +28,5 @@ def extract_column_names(x: Any) -> numpy.ndarray: from pandas import DataFrame @extract_column_names.register(DataFrame) - def _colnames_dataframe(x: DataFrame) -> list: + def _colnames_dataframe(x): return numpy.array(x.columns, dtype=str) diff --git a/src/biocutils/extract_row_names.py b/src/biocutils/extract_row_names.py index 81b81fb..8ebacee 100644 --- a/src/biocutils/extract_row_names.py +++ b/src/biocutils/extract_row_names.py @@ -11,11 +11,11 @@ @singledispatch -def extract_row_names(x: Any) -> numpy.ndarray: +def extract_row_names(x: Any) -> Any: """Access row names from 2-dimensional representations. Args: - x: Any object. + x: Any object with row names. Returns: Array of strings containing row names. @@ -27,5 +27,5 @@ def extract_row_names(x: Any) -> numpy.ndarray: from pandas import DataFrame @extract_row_names.register(DataFrame) - def _rownames_dataframe(x: DataFrame) -> list: + def _rownames_dataframe(x): return numpy.array(x.index, dtype=str) diff --git a/src/biocutils/Factor.py b/src/biocutils/factor.py similarity index 85% rename from src/biocutils/Factor.py rename to src/biocutils/factor.py index 374b34e..5aa5993 100644 --- a/src/biocutils/Factor.py +++ b/src/biocutils/factor.py @@ -1,8 +1,10 @@ +from __future__ import annotations + +import warnings from copy import copy, deepcopy from typing import Optional, Sequence, Union import numpy -import warnings from .assign_sequence import assign_sequence from .combine_sequences import combine_sequences @@ -10,22 +12,20 @@ from .is_list_of_type import is_list_of_type from .is_missing_scalar import is_missing_scalar from .match import match -from .Names import Names, _combine_names, _name_to_position, _sanitize_names +from .names import Names, _combine_names, _name_to_position, _sanitize_names from .normalize_subscript import ( NormalizedSubscript, SubscriptTypes, normalize_subscript, ) from .print_truncated import print_truncated_list -from .StringList import StringList +from .string_list import StringList from .subset_sequence import subset_sequence def _sanitize_codes(codes: Sequence[int], num_levels: int) -> numpy.ndarray: if not isinstance(codes, numpy.ndarray): - replacement = numpy.ndarray( - len(codes), dtype=numpy.min_scalar_type(-num_levels) - ) # get a signed type. + replacement = numpy.ndarray(len(codes), dtype=numpy.min_scalar_type(-num_levels)) # get a signed type. for i, x in enumerate(codes): if is_missing_scalar(x) or x < 0: replacement[i] = -1 @@ -35,16 +35,12 @@ def _sanitize_codes(codes: Sequence[int], num_levels: int) -> numpy.ndarray: else: if len(codes.shape) != 1: raise ValueError("'codes' should be a 1-dimensional array") - if not numpy.issubdtype( - codes.dtype, numpy.signedinteger - ): # force it to be signed. + if not numpy.issubdtype(codes.dtype, numpy.signedinteger): # force it to be signed. codes = codes.astype(numpy.min_scalar_type(-num_levels)) for x in codes: if x < -1 or x >= num_levels: - raise ValueError( - "all entries of 'codes' should refer to an entry of 'levels'" - ) + raise ValueError("all entries of 'codes' should refer to an entry of 'levels'") return codes @@ -67,7 +63,7 @@ def _sanitize_levels(levels: Sequence[str], check: bool = True) -> StringList: class FactorIterator: """Iterator for a :py:class:`~Factor` object.""" - def __init__(self, parent: "Factor"): + def __init__(self, parent: Factor): """ Args: parent: The parent :py:class:`~Factor` object. @@ -75,7 +71,7 @@ def __init__(self, parent: "Factor"): self._parent = parent self._position = 0 - def __iter__(self) -> "FactorIterator": + def __iter__(self) -> FactorIterator: """ Returns: The iterator. @@ -106,10 +102,10 @@ class Factor: def __init__( self, - codes: Sequence[int], - levels: Sequence[str], + codes: Union[numpy.ndarray, Sequence[int]], + levels: Union[StringList, Sequence[str]], ordered: bool = False, - names: Optional[Names] = None, + names: Optional[Union[Names, Sequence[str]]] = None, _validate: bool = True, ): """Initialize a Factor object. @@ -149,7 +145,7 @@ def __init__( #####>>>> Simple getters <<<<##### ################################## - def _define_output(self, in_place: bool) -> "Factor": + def _define_output(self, in_place: bool) -> Factor: if in_place: return self else: @@ -171,7 +167,7 @@ def codes(self) -> numpy.ndarray: """Alias for :py:meth:`~get_codes`.""" return self.get_codes() - def set_codes(self, codes: Sequence[int], in_place: bool = False) -> "Factor": + def set_codes(self, codes: Sequence[int], in_place: bool = False) -> Factor: """ Args: codes: @@ -187,9 +183,7 @@ def set_codes(self, codes: Sequence[int], in_place: bool = False) -> "Factor": """ output = self._define_output(in_place) if len(codes) != len(self): - raise ValueError( - "length of 'codes' should be equal to that of the current object" - ) + raise ValueError("length of 'codes' should be equal to that of the current object") output._codes = _sanitize_codes(codes, len(self._levels)) return output @@ -220,7 +214,7 @@ def ordered(self) -> bool: """Alias for :py:meth:`~get_ordered`.""" return self.get_ordered() - def set_ordered(self, ordered: bool, in_place: bool = False) -> "Factor": + def set_ordered(self, ordered: bool, in_place: bool = False) -> Factor: """ Args: ordered: @@ -293,12 +287,7 @@ def __repr__(self) -> str: Returns: A stringified representation of this object. """ - tmp = ( - "Factor(codes=" - + print_truncated_list(self._codes) - + ", levels=" - + print_truncated_list(self._levels) - ) + tmp = "Factor(codes=" + print_truncated_list(self._codes) + ", levels=" + print_truncated_list(self._levels) if self._ordered: tmp += ", ordered=True" if self._names: @@ -311,42 +300,24 @@ def __str__(self) -> str: Returns: A pretty-printed representation of this object. """ - message = ( - "Factor of length " - + str(len(self._codes)) - + " with " - + str(len(self._levels)) - + " level" - ) + message = "Factor of length " + str(len(self._codes)) + " with " + str(len(self._levels)) + " level" if len(self._levels) != 0: message += "s" message += "\n" message += ( "values: " - + print_truncated_list( - self._codes, transform=lambda i: self._levels[i], include_brackets=False - ) + + print_truncated_list(self._codes, transform=lambda i: self._levels[i], include_brackets=False) + "\n" ) if self._names is not None: message += ( - "names: " - + print_truncated_list( - self._names, transform=lambda x: x, include_brackets=False - ) - + "\n" - ) - message += ( - "levels: " - + print_truncated_list( - self._levels, transform=lambda x: x, include_brackets=False + "names: " + print_truncated_list(self._names, transform=lambda x: x, include_brackets=False) + "\n" ) - + "\n" - ) + message += "levels: " + print_truncated_list(self._levels, transform=lambda x: x, include_brackets=False) + "\n" message += "ordered: " + str(self._ordered) return message - def __eq__(self, other: "Factor"): + def __eq__(self, other: Factor): """ Args: other: Another ``Factor``. @@ -357,7 +328,12 @@ def __eq__(self, other: "Factor"): """ if not isinstance(other, Factor): return False - if len(self) != len(other) or self._levels != other._levels or self._names != other._names or self._ordered != other._ordered: + if ( + len(self) != len(other) + or self._levels != other._levels + or self._names != other._names + or self._ordered != other._ordered + ): return False return (self._codes == other._codes).all() @@ -384,7 +360,7 @@ def get_value(self, index: Union[str, int]) -> Union[str, None]: return None return self._levels[i] - def get_slice(self, index: SubscriptTypes) -> "Factor": + def get_slice(self, index: SubscriptTypes) -> Factor: """ Args: index: @@ -404,7 +380,7 @@ def get_slice(self, index: SubscriptTypes) -> "Factor": output._names = subset_sequence(self._names, index) return output - def __getitem__(self, index: SubscriptTypes) -> Union[str, "Factor"]: + def __getitem__(self, index: SubscriptTypes) -> Union[str, Factor]: """ If ``index`` is a scalar, this is an alias for :py:meth:`~get_value`. @@ -416,9 +392,7 @@ def __getitem__(self, index: SubscriptTypes) -> Union[str, "Factor"]: else: return self.get_slice(NormalizedSubscript(index)) - def set_value( - self, index: Union[str, int], value: Union[str, None], in_place: bool = False - ) -> "Factor": + def set_value(self, index: Union[str, int], value: Union[str, None], in_place: bool = False) -> Factor: """ Args: index: @@ -450,14 +424,14 @@ def set_value( output._codes[index] = -1 return output - for i, l in enumerate(output._levels): - if l == value: + for i, lev in enumerate(output._levels): + if lev == value: output._codes[index] = i return output raise IndexError("failed to find level '" + str(value) + "'") - def set_slice(self, index: SubscriptTypes, value: "Factor", in_place: bool = False): + def set_slice(self, index: SubscriptTypes, value: Factor, in_place: bool = False): """ Replace items in the ``Factor`` list. The ``index`` elements in the current object are replaced with the corresponding values in ``value``. @@ -508,7 +482,7 @@ def set_slice(self, index: SubscriptTypes, value: "Factor", in_place: bool = Fal return output - def __setitem__(self, index: SubscriptTypes, value: Union[str, "Factor"]): + def __setitem__(self, index: SubscriptTypes, value: Union[str, Factor]): """ If ``index`` is a scalar, this is an alias for :py:meth:`~set_value`. @@ -524,7 +498,7 @@ def __setitem__(self, index: SubscriptTypes, value: Union[str, "Factor"]): #####>>>> Level setting <<<<##### ################################# - def drop_unused_levels(self, in_place: bool = False) -> "Factor": + def drop_unused_levels(self, in_place: bool = False) -> Factor: """Drop unused levels. Args: @@ -567,7 +541,7 @@ def replace_levels( self, levels: Sequence[str], in_place: bool = False, - ) -> "Factor": + ) -> Factor: """Replace the existing levels with a new list. The codes of the returned ``Factor`` are unchanged by this method and will index into the replacement ``levels``, so each element of the ``Factor`` may refer @@ -614,12 +588,7 @@ def replace_levels( output._levels = new_levels return output - def set_levels( - self, - levels: Union[str, Sequence[str]], - remap: bool = True, - in_place: bool = False - ) -> "Factor": + def set_levels(self, levels: Union[str, Sequence[str]], remap: bool = True, in_place: bool = False) -> Factor: """ Alias for :py:meth:`~remap_levels` if ``remap = True``, otherwise an alias for :py:meth:`~replace_levels`. The first alias is deprecated and @@ -631,9 +600,7 @@ def set_levels( else: return self.replace_levels(levels, in_place=in_place) - def remap_levels( - self, levels: Union[str, Sequence[str]], in_place: bool = False - ) -> "Factor": + def remap_levels(self, levels: Union[str, Sequence[str]], in_place: bool = False) -> Factor: """Remap codes to a replacement list of levels. Each entry of the remapped ``Factor`` will refer to the same string across the old and new levels, provided that string is present in both sets of levels. @@ -679,9 +646,7 @@ def remap_levels( lmapping[x] = len(new_levels) new_levels.append(x) if levels not in lmapping: - raise ValueError( - "string 'levels' should already be present among object levels" - ) + raise ValueError("string 'levels' should already be present among object levels") else: new_levels = levels if not isinstance(new_levels, StringList): @@ -712,7 +677,7 @@ def remap_levels( #####>>>> Copying <<<<##### ########################### - def __copy__(self) -> "Factor": + def __copy__(self) -> Factor: """ Returns: A shallow copy of the ``Factor`` object. @@ -725,7 +690,7 @@ def __copy__(self) -> "Factor": _validate=False, ) - def __deepcopy__(self, memo) -> "Factor": + def __deepcopy__(self, memo) -> Factor: """ Returns: A deep copy of the ``Factor`` object. @@ -762,8 +727,8 @@ def from_sequence( sort_levels: bool = True, ordered: bool = False, names: Optional[Sequence[str]] = None, - **kwargs - ) -> "Factor": + **kwargs, + ) -> Factor: """Convert a sequence of hashable values into a factor. Args: @@ -799,6 +764,60 @@ def from_sequence( levels, indices = factorize(x, levels=levels, sort_levels=sort_levels, **kwargs) return Factor(indices, levels=levels, ordered=ordered, names=names) + ################################ + #####>>>> List methods <<<<##### + ################################ + + def as_list(self) -> list: + """ + Returns: + List of strings corresponding to the factor elements. + Missing values are represented as None. + """ + return [self._levels[c] if c >= 0 else None for c in self._codes] + + def safe_delete(self, index: Union[int, str, slice], in_place: bool = False) -> Factor: + """ + Args: + index: + Integer index or slice containing position(s) to delete. + Alternatively, the name of the value to delete (the first + occurrence of the name is used). + + in_place: + Whether to modify the current object in place. + + Returns: + A ``Factor`` where the item at ``index`` is removed. This is a + new object if ``in_place = False``, otherwise it is a reference to + the current object. + """ + if in_place: + output = self + else: + output = copy(self) + output._codes = copy(self._codes) + if output._names is not None: + output._names = output._names.copy() + + if isinstance(index, str): + index = _name_to_position(output._names, index) + + output._codes = numpy.delete(output._codes, index) + + if output._names is not None: + output._names.delete(index) + + return output + + def delete(self, index: Union[int, str, slice]): + """Alias for :py:meth:`~safe_delete` with ``in_place = True``.""" + self.safe_delete(index, in_place=True) + + def __delitem__(self, index: Union[int, str, slice]): + """Alias for :py:meth:`~delete`.""" + self.delete(index) + @subset_sequence.register def _subset_sequence_Factor(x: Factor, indices: Sequence[int]) -> Factor: @@ -841,9 +860,7 @@ def _combine_factors(*x: Factor): new_levels.append(y) mapping.append(all_levels_map[y]) - curout = numpy.ndarray( - len(f), dtype=numpy.min_scalar_type(-len(new_levels)) - ) + curout = numpy.ndarray(len(f), dtype=numpy.min_scalar_type(-len(new_levels))) for i, j in enumerate(f._codes): if j < 0: curout[i] = j diff --git a/src/biocutils/FloatList.py b/src/biocutils/float_list.py similarity index 72% rename from src/biocutils/FloatList.py rename to src/biocutils/float_list.py index 3b587bd..7ed5c47 100644 --- a/src/biocutils/FloatList.py +++ b/src/biocutils/float_list.py @@ -1,7 +1,9 @@ +from __future__ import annotations + from typing import Any, Iterable, Optional, Sequence, Union -from .NamedList import NamedList -from .Names import Names +from .named_list import NamedList +from .names import Names from .normalize_subscript import SubscriptTypes @@ -10,15 +12,30 @@ def _coerce_to_float(x: Any): return None try: return float(x) - except: + except Exception as _: return None class _SubscriptCoercer: - def __init__(self, data): + """Coercer for subscript operations on FloatList.""" + + def __init__(self, data: Sequence) -> None: + """Initialize the coercer. + + Args: + data: Sequence of values to coerce. + """ self._data = data - def __getitem__(self, index): + def __getitem__(self, index: int) -> Optional[float]: + """Get an item and coerce it to float. + + Args: + index: Index of the item. + + Returns: + Coerced float value. + """ return _coerce_to_float(self._data[index]) @@ -32,7 +49,7 @@ class FloatList(NamedList): def __init__( self, - data: Optional[Iterable] = None, + data: Optional[Sequence] = None, names: Optional[Names] = None, _validate: bool = True, ): @@ -59,32 +76,25 @@ def __init__( data = data._data original = data data = list(_coerce_to_float(item) for item in original) + super().__init__(data, names, _validate=_validate) - def set_value( - self, index: Union[int, str], value: Any, in_place: bool = False - ) -> "FloatList": + def set_value(self, index: Union[int, str], value: Any, in_place: bool = False) -> FloatList: """Calls :py:meth:`~biocutils.NamedList.NamedList.set_value` after coercing ``value`` to a float.""" return super().set_value(index, _coerce_to_float(value), in_place=in_place) - def set_slice( - self, index: SubscriptTypes, value: Sequence, in_place: bool = False - ) -> "FloatList": + def set_slice(self, index: SubscriptTypes, value: Sequence, in_place: bool = False) -> FloatList: """Calls :py:meth:`~biocutils.NamedList.NamedList.set_slice` after coercing ``value`` to floats.""" return super().set_slice(index, _SubscriptCoercer(value), in_place=in_place) - def safe_insert( - self, index: Union[int, str], value: Any, in_place: bool = False - ) -> "FloatList": + def safe_insert(self, index: Union[int, str], value: Any, in_place: bool = False) -> FloatList: """Calls :py:meth:`~biocutils.NamedList.NamedList.safe_insert` after coercing ``value`` to a float.""" return super().safe_insert(index, _coerce_to_float(value), in_place=in_place) - def safe_append(self, value: Any, in_place: bool = False) -> "FloatList": + def safe_append(self, value: Any, in_place: bool = False) -> FloatList: """Calls :py:meth:`~biocutils.NamedList.NamedList.safe_append` after coercing ``value`` to a float.""" return super().safe_append(_coerce_to_float(value), in_place=in_place) - def safe_extend(self, other: Iterable, in_place: bool = True) -> "FloatList": + def safe_extend(self, other: Iterable, in_place: bool = True) -> FloatList: """Calls :py:meth:`~biocutils.NamedList.NamedList.safe_extend` after coercing elements of ``other`` to floats.""" - return super().safe_extend( - (_coerce_to_float(y) for y in other), in_place=in_place - ) + return super().safe_extend((_coerce_to_float(y) for y in other), in_place=in_place) diff --git a/src/biocutils/IntegerList.py b/src/biocutils/integer_list.py similarity index 71% rename from src/biocutils/IntegerList.py rename to src/biocutils/integer_list.py index c06e3e6..a4825b0 100644 --- a/src/biocutils/IntegerList.py +++ b/src/biocutils/integer_list.py @@ -1,7 +1,9 @@ +from __future__ import annotations + from typing import Any, Iterable, Optional, Sequence, Union -from .NamedList import NamedList -from .Names import Names +from .named_list import NamedList +from .names import Names from .normalize_subscript import SubscriptTypes @@ -10,15 +12,32 @@ def _coerce_to_int(x: Any): return None try: return int(x) - except: + except Exception as _: return None class _SubscriptCoercer: - def __init__(self, data): + """Coercer for subscript operations on IntegerList.""" + + def __init__(self, data: Sequence) -> None: + """Initialize the coercer. + + Args: + data: + Sequence of values to coerce. + """ self._data = data - def __getitem__(self, index): + def __getitem__(self, index: int) -> Optional[int]: + """Get an item and coerce it to integer. + + Args: + index: + Index of the item. + + Returns: + Coerced integer value. + """ return _coerce_to_int(self._data[index]) @@ -32,7 +51,7 @@ class IntegerList(NamedList): def __init__( self, - data: Optional[Iterable] = None, + data: Optional[Sequence] = None, names: Optional[Names] = None, _validate: bool = True, ): @@ -61,30 +80,22 @@ def __init__( data = list(_coerce_to_int(item) for item in original) super().__init__(data, names, _validate=_validate) - def set_value( - self, index: Union[int, str], value: Any, in_place: bool = False - ) -> "IntegerList": + def set_value(self, index: Union[int, str], value: Any, in_place: bool = False) -> IntegerList: """Calls :py:meth:`~biocutils.NamedList.NamedList.set_value` after coercing ``value`` to a integer.""" return super().set_value(index, _coerce_to_int(value), in_place=in_place) - def set_slice( - self, index: SubscriptTypes, value: Sequence, in_place: bool = False - ) -> "IntegerList": + def set_slice(self, index: SubscriptTypes, value: Sequence, in_place: bool = False) -> IntegerList: """Calls :py:meth:`~biocutils.NamedList.NamedList.set_slice` after coercing ``value`` to integers.""" return super().set_slice(index, _SubscriptCoercer(value), in_place=in_place) - def safe_insert( - self, index: Union[int, str], value: Any, in_place: bool = False - ) -> "IntegerList": + def safe_insert(self, index: Union[int, str], value: Any, in_place: bool = False) -> IntegerList: """Calls :py:meth:`~biocutils.NamedList.NamedList.safe_insert` after coercing ``value`` to a integer.""" return super().safe_insert(index, _coerce_to_int(value), in_place=in_place) - def safe_append(self, value: Any, in_place: bool = False) -> "IntegerList": + def safe_append(self, value: Any, in_place: bool = False) -> IntegerList: """Calls :py:meth:`~biocutils.NamedList.NamedList.safe_append` after coercing ``value`` to a integer.""" return super().safe_append(_coerce_to_int(value), in_place=in_place) - def safe_extend(self, other: Iterable, in_place: bool = True) -> "IntegerList": + def safe_extend(self, other: Iterable, in_place: bool = True) -> IntegerList: """Calls :py:meth:`~biocutils.NamedList.NamedList.safe_extend` after coercing elements of ``other`` to integers.""" - return super().safe_extend( - (_coerce_to_int(y) for y in other), in_place=in_place - ) + return super().safe_extend((_coerce_to_int(y) for y in other), in_place=in_place) diff --git a/src/biocutils/is_high_dimensional.py b/src/biocutils/is_high_dimensional.py index 9d9d5d2..c380c9f 100644 --- a/src/biocutils/is_high_dimensional.py +++ b/src/biocutils/is_high_dimensional.py @@ -1,9 +1,11 @@ from functools import singledispatch +from typing import Any @singledispatch -def is_high_dimensional(x): - """ +def is_high_dimensional(x: Any) -> bool: + """Check if an object is high-dimensional. + Whether an object is high-dimensional, i.e., has a ``shape`` attribute that is of length greater than 1. diff --git a/src/biocutils/is_list_of_type.py b/src/biocutils/is_list_of_type.py index f489b96..5f0ff54 100644 --- a/src/biocutils/is_list_of_type.py +++ b/src/biocutils/is_list_of_type.py @@ -12,11 +12,14 @@ def is_list_of_type(x: Union[list, tuple], target_type: Callable, ignore_none: b """Checks if ``x`` is a list, and whether all elements of the list are of the same type. Args: - x: A list or tuple of values. + x: + A list or tuple of values. - target_type: Type to check for, e.g. ``str``, ``int``. + target_type: + Type to check for, e.g. ``str``, ``int``. - ignore_none: Whether to ignore Nones when comparing to ``target_type``. + ignore_none: + Whether to ignore Nones when comparing to ``target_type``. Returns: True if ``x`` is a list or tuple and all elements are of the target @@ -24,7 +27,7 @@ def is_list_of_type(x: Union[list, tuple], target_type: Callable, ignore_none: b """ if not isinstance(x, (list, tuple, np.ndarray, ma.MaskedArray)): return False - + if isinstance(x, ma.MaskedArray): if not ignore_none: return all(x.mask) and all(isinstance(item, target_type) for item in x.data) diff --git a/src/biocutils/is_missing_scalar.py b/src/biocutils/is_missing_scalar.py index 8392fdf..5dd4cd0 100644 --- a/src/biocutils/is_missing_scalar.py +++ b/src/biocutils/is_missing_scalar.py @@ -1,8 +1,11 @@ +from typing import Any + import numpy -def is_missing_scalar(x) -> bool: - """ +def is_missing_scalar(x: Any) -> bool: + """Check if a scalar value is missing. + Args: x: Any scalar value. diff --git a/src/biocutils/map_to_index.py b/src/biocutils/map_to_index.py index bfb31f1..fc50b6d 100644 --- a/src/biocutils/map_to_index.py +++ b/src/biocutils/map_to_index.py @@ -6,8 +6,7 @@ def map_to_index(x: Sequence, duplicate_method: DUPLICATE_METHOD = "first") -> dict: - """ - Create a dictionary to map values of a sequence to positional indices. + """Create a dictionary to map values of a sequence to positional indices. Args: x: @@ -19,7 +18,7 @@ def map_to_index(x: Sequence, duplicate_method: DUPLICATE_METHOD = "first") -> d value in ``x``. Returns: - dict: Dictionary that maps values of ``x`` to their position inside ``x``. + Dictionary that maps values of ``x`` to their position inside ``x``. """ first_tie = duplicate_method == "first" diff --git a/src/biocutils/match.py b/src/biocutils/match.py index 2881bec..96e74de 100644 --- a/src/biocutils/match.py +++ b/src/biocutils/match.py @@ -1,4 +1,5 @@ -from typing import Sequence, Union, Optional +from typing import Optional, Sequence, Union + import numpy from .map_to_index import DUPLICATE_METHOD, map_to_index @@ -8,7 +9,7 @@ def match( x: Sequence, targets: Union[dict, Sequence], duplicate_method: DUPLICATE_METHOD = "first", - dtype: Optional[numpy.ndarray] = None, + dtype: Optional[numpy.dtype] = None, fail_missing: Optional[bool] = None, ) -> numpy.ndarray: """Find a matching value of each element of ``x`` in ``target``. @@ -45,7 +46,7 @@ def match( targets = map_to_index(targets, duplicate_method=duplicate_method) if dtype is None: - dtype = numpy.min_scalar_type(-len(targets)) # get a signed type + dtype = numpy.min_scalar_type(-len(targets)) # get a signed type indices = numpy.zeros(len(x), dtype=dtype) if fail_missing is None: @@ -60,7 +61,7 @@ def match( indices[i] = -1 else: for i, y in enumerate(x): - if not y in targets: + if y not in targets: raise ValueError("cannot find '" + str(y) + "' in 'targets'") indices[i] = targets[y] diff --git a/src/biocutils/NamedList.py b/src/biocutils/named_list.py similarity index 76% rename from src/biocutils/NamedList.py rename to src/biocutils/named_list.py index af4670a..cd406b4 100644 --- a/src/biocutils/NamedList.py +++ b/src/biocutils/named_list.py @@ -1,9 +1,11 @@ +from __future__ import annotations + from copy import deepcopy -from typing import Any, Dict, Iterable, Optional, Sequence, Union +from typing import Any, Dict, Iterable, Optional, Sequence, Tuple, Union from .assign_sequence import assign_sequence from .combine_sequences import combine_sequences -from .Names import Names, _name_to_position, _sanitize_names +from .names import Names, _name_to_position, _sanitize_names from .normalize_subscript import ( NormalizedSubscript, SubscriptTypes, @@ -21,7 +23,7 @@ class NamedList: def __init__( self, - data: Optional[Iterable] = None, + data: Optional[Sequence] = None, names: Optional[Names] = None, _validate: bool = True, ): @@ -39,6 +41,9 @@ def __init__( _validate: Internal use only. """ + if isinstance(data, dict): + raise TypeError("'data' is a dictionary, use 'NamedList.from_dict' instead.") + if _validate: if data is None: data = [] @@ -46,7 +51,9 @@ def __init__( data = data._data elif not isinstance(data, list): data = list(data) + names = _sanitize_names(names, len(data)) + self._data = data self._names = names @@ -86,18 +93,11 @@ def __str__(self) -> str: names if any exist. """ if self._names is not None: - return ( - "[" - + ", ".join( - repr(self._names[i]) + "=" + repr(x) - for i, x in enumerate(self._data) - ) - + "]" - ) + return "[" + ", ".join(repr(self._names[i]) + "=" + repr(x) for i, x in enumerate(self._data)) + "]" else: return repr(self._data) - def __eq__(self, other: "NamedList") -> bool: + def __eq__(self, other: NamedList) -> bool: """ Args: other: Another ``NamedList``. @@ -114,7 +114,7 @@ def __eq__(self, other: "NamedList") -> bool: #####>>>> Get/set names <<<<##### ################################# - def get_names(self) -> Names: + def get_names(self) -> Optional[Names]: """ Returns: Names for the list elements. @@ -125,14 +125,14 @@ def get_names(self) -> Names: return self._names @property - def names(self) -> Names: + def names(self) -> Optional[Names]: """Alias for :py:meth:`~get_names`.""" return self.get_names() def _shallow_copy(self): return type(self)(self._data, self._names, _validate=False) - def set_names(self, names: Optional[Names], in_place: bool = False) -> "NamedList": + def set_names(self, names: Optional[Names], in_place: bool = False) -> NamedList: """ Args: names: @@ -153,12 +153,27 @@ def set_names(self, names: Optional[Names], in_place: bool = False) -> "NamedLis output._names = _sanitize_names(names, len(self)) return output + def get_name(self, index: int) -> Optional[str]: + """Get name at an index. + + Args: + index: + Integer index of the element. + Returns: + Names for the list elements. + """ + if self._names is None: + return None + + return self._names.get_value(index) + ################################# #####>>>> Get/set items <<<<##### ################################# def get_value(self, index: Union[str, int]) -> Any: - """ + """Get value at an index. + Args: index: Integer index of the element to obtain. Alternatively, a string @@ -172,7 +187,7 @@ def get_value(self, index: Union[str, int]) -> Any: index = _name_to_position(self._names, index) return self._data[index] - def get_slice(self, index: SubscriptTypes) -> "NamedList": + def get_slice(self, index: SubscriptTypes) -> NamedList: """ Args: index: @@ -192,7 +207,7 @@ def get_slice(self, index: SubscriptTypes) -> "NamedList": outnames = subset_sequence(self._names, index) return type(self)(outdata, outnames, _validate=False) - def __getitem__(self, index: SubscriptTypes) -> Union["NamedList", Any]: + def __getitem__(self, index: SubscriptTypes) -> Union[NamedList, Any]: """ If ``index`` is a scalar, this is an alias for :py:meth:`~get_value`. @@ -204,9 +219,7 @@ def __getitem__(self, index: SubscriptTypes) -> Union["NamedList", Any]: else: return self.get_slice(NormalizedSubscript(index)) - def set_value( - self, index: Union[str, int], value: Any, in_place: bool = False - ) -> "NamedList": + def set_value(self, index: Union[str, int], value: Any, in_place: bool = False) -> NamedList: """ Args: index: @@ -253,9 +266,7 @@ def set_value( return output - def set_slice( - self, index: SubscriptTypes, value: Sequence, in_place: bool = False - ) -> "NamedList": + def set_slice(self, index: SubscriptTypes, value: Sequence, in_place: bool = False) -> NamedList: """ Args: index: @@ -318,15 +329,13 @@ def __setitem__(self, index: SubscriptTypes, value: Any): #####>>>> List methods <<<<##### ################################ - def _define_output(self, in_place: bool) -> "NamedList": + def _define_output(self, in_place: bool) -> NamedList: if in_place: return self else: return self.copy() - def safe_insert( - self, index: Union[int, str], value: Any, in_place: bool = False - ) -> "NamedList": + def safe_insert(self, index: Union[int, str], value: Any, in_place: bool = False) -> NamedList: """ Args: index: @@ -358,7 +367,7 @@ def insert(self, index: Union[int, str], value: Any): """Alias for :py:meth:`~safe_insert` with ``in_place = True``.""" self.safe_insert(index, value, in_place=True) - def safe_append(self, value: Any, in_place: bool = False) -> "NamedList": + def safe_append(self, value: Any, in_place: bool = False) -> NamedList: """ Args: value: @@ -383,7 +392,7 @@ def append(self, value: Any): """Alias for :py:meth:`~safe_append` with ``in_place = True``.""" self.safe_append(value, in_place=True) - def safe_extend(self, other: Iterable, in_place: bool = False) -> "NamedList": + def safe_extend(self, other: Iterable, in_place: bool = False) -> NamedList: """ Args: other: @@ -416,7 +425,7 @@ def extend(self, other: Iterable): """Alias for :py:meth:`~safe_extend` with ``in_place = True``.""" self.safe_extend(other, in_place=True) - def __add__(self, other: list) -> "NamedList": + def __add__(self, other: list) -> NamedList: """Alias for :py:meth:`~safe_extend`.""" return self.safe_extend(other) @@ -426,11 +435,100 @@ def __iadd__(self, other: list): self.extend(other) return self + def safe_delete(self, index: Union[int, str, slice], in_place: bool = False) -> NamedList: + """ + Args: + index: + An integer index or slice containing position(s) to delete. + Alternatively, the name of the value to delete (the first + occurrence of the name is used). + + in_place: + Whether to modify the current object in place. + + Returns: + A ``NamedList`` where the item at ``index`` is removed. This is a + new object if ``in_place = False``, otherwise it is a reference to + the current object. + """ + if in_place: + output = self + else: + output = self._shallow_copy() + output._data = output._data[:] # Shallow copy of the list + if output._names is not None: + output._names = output._names.copy() + + if isinstance(index, str): + index = _name_to_position(self._names, index) + + del output._data[index] + if output._names is not None: + output._names.delete(index) + + return output + + def delete(self, index: Union[int, str, slice]): + """Alias for :py:meth:`~safe_delete` with ``in_place = True``.""" + self.safe_delete(index, in_place=True) + + def __delitem__(self, index: Union[int, str, slice]): + """Alias for :py:meth:`~delete`.""" + self.delete(index) + + ##################################### + #####>>>> dict like methods <<<<##### + ##################################### + + def keys(self) -> Iterable[str]: + """ + Returns: + Iterator over the names of the list elements. + """ + if self._names is None: + return iter([]) + return iter(self._names) + + def values(self) -> Iterable[Any]: + """ + Returns: + Iterator over the values of the list elements. + """ + return iter(self._data) + + def items(self) -> Iterable[Tuple[str, Any]]: + """ + Returns: + Iterator over (name, value) pairs. + If names are missing, keys are returned as stringified indices. + """ + if self._names is not None: + return zip(self._names, self._data) + else: + return zip((str(i) for i in range(len(self))), self._data) + + def get(self, key: Union[str, int], default: Any = None) -> Any: + """ + Args: + key: + Name or index of the element. + + default: + Value to return if ``key`` is not found. + + Returns: + Value at ``key`` or ``default``. + """ + try: + return self.get_value(key) + except (KeyError, IndexError): + return default + ################################ #####>>>> Copy methods <<<<##### ################################ - def copy(self) -> "NamedList": + def copy(self) -> NamedList: """ Returns: A shallow copy of a ``NamedList`` with the same contents. This @@ -443,11 +541,11 @@ def copy(self) -> "NamedList": newnames = newnames.copy() return type(self)(self._data.copy(), names=newnames, _validate=False) - def __copy__(self) -> "NamedList": + def __copy__(self) -> NamedList: """Alias for :py:meth:`~copy`.""" return self.copy() - def __deepcopy__(self, memo=None, _nil=[]) -> "NamedList": + def __deepcopy__(self, memo=None, _nil=[]) -> NamedList: """ Args: memo: @@ -492,28 +590,28 @@ def as_dict(self) -> Dict[str, Any]: output[n] = self[i] return output - @staticmethod - def from_list(x: list) -> "NamedList": + @classmethod + def from_list(cls, x: list) -> NamedList: """ Args: x: List of data elements. Returns: - A ``NamedList`` instance with the contents of ``x`` and no names. + A instance with the contents of ``x`` and no names. """ - return NamedList(x) + return cls(x) - @staticmethod - def from_dict(x: dict) -> "NamedList": + @classmethod + def from_dict(cls, x: dict) -> NamedList: """ Args: x: Dictionary where keys are strings (or can be coerced to them). Returns: - A ``NamedList`` instance where the list elements are the values of + A instance where the list elements are the values of ``x`` and the names are the stringified keys. """ - return NamedList(list(x.values()), names=Names(str(y) for y in x.keys())) + return cls(list(x.values()), names=Names(str(y) for y in x.keys())) @subset_sequence.register @@ -530,9 +628,7 @@ def _combine_sequences_NamedList(*x: NamedList) -> NamedList: @assign_sequence.register -def _assign_sequence_NamedList( - x: NamedList, indices: Sequence[int], other: Sequence -) -> NamedList: +def _assign_sequence_NamedList(x: NamedList, indices: Sequence[int], other: Sequence) -> NamedList: if isinstance(other, NamedList): # Do NOT set the names if 'other' is a NamedList. Names don't change # during assignment/setting operations, as a matter of policy. This is @@ -541,6 +637,4 @@ def _assign_sequence_NamedList( # of names, and it would be weird for the same sequence of names to # suddently become an invalid indexing vector after an assignment. other = other._data - return type(x)( - assign_sequence(x._data, NormalizedSubscript(indices), other), names=x._names - ) + return type(x)(assign_sequence(x._data, NormalizedSubscript(indices), other), names=x._names) diff --git a/src/biocutils/Names.py b/src/biocutils/names.py similarity index 83% rename from src/biocutils/Names.py rename to src/biocutils/names.py index e90f45f..1047f79 100644 --- a/src/biocutils/Names.py +++ b/src/biocutils/names.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from copy import deepcopy from typing import Any, Callable, Iterable, List, Optional, Sequence, Union @@ -82,7 +84,7 @@ def __str__(self) -> str: """ return str(self._names) - def __eq__(self, other: "Names") -> bool: + def __eq__(self, other: Names) -> bool: """ Args: other: Another ``Names`` object. @@ -119,6 +121,17 @@ def map(self, name: str) -> int: else: return -1 + def __contains__(self, name: str) -> bool: + """ + Args: + name: + Name to check. + + Returns: + True if ``name`` exists, otherwise False. + """ + return self.map(name) >= 0 + ################################# #####>>>> Get/set items <<<<##### ################################# @@ -133,7 +146,7 @@ def get_value(self, index: int) -> str: """ return self._names[index] - def get_slice(self, index: SubscriptTypes) -> "Names": + def get_slice(self, index: SubscriptTypes) -> Names: """ Args: index: @@ -147,7 +160,7 @@ def get_slice(self, index: SubscriptTypes) -> "Names": index, scalar = normalize_subscript(index, len(self), None) return type(self)(subset_sequence(self._names, index), _validate=False) - def __getitem__(self, index: SubscriptTypes) -> Union[str, "Names"]: + def __getitem__(self, index: SubscriptTypes) -> Union[str, Names]: """ If ``index`` is a scalar, this is an alias for :py:attr:`~get_value`. @@ -159,7 +172,7 @@ def __getitem__(self, index: SubscriptTypes) -> Union[str, "Names"]: else: return self.get_slice(NormalizedSubscript(index)) - def set_value(self, index: int, value: str, in_place: bool = False) -> "Names": + def set_value(self, index: int, value: str, in_place: bool = False) -> Names: """ Args: index: Position of interest. @@ -180,9 +193,7 @@ def set_value(self, index: int, value: str, in_place: bool = False) -> "Names": output._names[index] = str(value) return output - def set_slice( - self, index: SubscriptTypes, value: Sequence[str], in_place: bool = False - ) -> "Names": + def set_slice(self, index: SubscriptTypes, value: Sequence[str], in_place: bool = False) -> Names: """ Args: index: Positions of interest. @@ -228,13 +239,13 @@ def __setitem__(self, index: SubscriptTypes, value: Any): #####>>>> List methods <<<<##### ################################ - def _define_output(self, in_place: bool) -> "Names": + def _define_output(self, in_place: bool) -> Names: if in_place: return self else: return self.copy() - def safe_append(self, value: str, in_place: bool = False) -> "Names": + def safe_append(self, value: str, in_place: bool = False) -> Names: """ Args: value: Name to be added. @@ -256,7 +267,7 @@ def append(self, value: str): """Alias for :py:attr:`~safe_append` with ``in_place = True``.""" self.safe_append(value, in_place=True) - def safe_insert(self, index: int, value: str, in_place: bool = False) -> "Names": + def safe_insert(self, index: int, value: str, in_place: bool = False) -> Names: """ Args: index: Position on the object to insert at. @@ -278,7 +289,7 @@ def insert(self, index: int, value: str): """Alias for :py:attr:`~safe_insert` with ``in_place = True``.""" self.safe_insert(index, value, in_place=True) - def safe_extend(self, value: Sequence[str], in_place: bool = False) -> "Names": + def safe_extend(self, value: Sequence[str], in_place: bool = False) -> Names: """ Args: value: Names to be added. @@ -328,11 +339,40 @@ def __iadd__(self, other: list): self.extend(other) return self + def safe_delete(self, index: Union[int, slice], in_place: bool = False) -> Names: + """ + Args: + index: + Position(s) of the name(s) to delete. + + in_place: + Whether to perform this deletion in-place. + + Returns: + A ``Names`` object with the deleted name(s). This is a new object + if ``in_place = False``, otherwise it is a reference to the current + object. + """ + output = self._define_output(in_place) + if in_place: + output._wipe_reverse_index() + + del output._names[index] + return output + + def delete(self, index: Union[int, slice]): + """Alias for :py:attr:`~safe_delete` with ``in_place = True``.""" + self.safe_delete(index, in_place=True) + + def __delitem__(self, index: Union[int, slice]): + """Alias for :py:attr:`~delete`.""" + self.delete(index) + ################################ #####>>>> Copy methods <<<<##### ################################ - def copy(self) -> "Names": + def copy(self) -> Names: """ Returns: A shallow copy of the current object. This will copy the underlying @@ -341,11 +381,11 @@ def copy(self) -> "Names": """ return type(self)(self._names.copy(), _validate=False) - def __copy__(self) -> "Names": + def __copy__(self) -> Names: """Alias for :py:attr:`~copy`.""" return self.copy() - def __deepcopy__(self, memo=None, _nil=[]) -> "Names": + def __deepcopy__(self, memo=None, _nil=[]) -> Names: """ Args: memo: @@ -359,6 +399,15 @@ def __deepcopy__(self, memo=None, _nil=[]) -> "Names": """ return type(self)(deepcopy(self._names, memo, _nil), _validate=False) + @property + def is_unique(self) -> bool: + """ + Returns: + True if all names are unique, otherwise False. + """ + self._populate_reverse_index() + return len(self._reverse) == len(self._names) + @subset_sequence.register def _subset_sequence_Names(x: Names, indices: Sequence[int]) -> Names: @@ -387,19 +436,24 @@ def _name_to_position(names: Optional[Names], index: str) -> int: return i -def _sanitize_names(names: Optional[Names], length: int) -> Union[None, Names]: +def _validate_names(names: Optional[Names], length: int) -> bool: + if names is not None and len(names) != length: + raise ValueError("length of 'names' must be equal to number of entries (" + str(length) + ")") + + return True + + +def _sanitize_names(names: Optional[Names], length: int) -> Optional[Names]: if names is None: return names if not isinstance(names, Names): names = Names(names) - if len(names) != length: - raise ValueError( - "length of 'names' must be equal to number of entries (" + str(length) + ")" - ) + + _validate_names(names, length=length) return names -def _combine_names(*x: Any, get_names: Callable) -> Union[Names, None]: +def _combine_names(*x: Any, get_names: Callable) -> Optional[Names]: all_names = [] has_names = False for y in x: diff --git a/src/biocutils/normalize_subscript.py b/src/biocutils/normalize_subscript.py index 890a9b3..d79ee6b 100644 --- a/src/biocutils/normalize_subscript.py +++ b/src/biocutils/normalize_subscript.py @@ -4,12 +4,7 @@ def _raise_int(idx: int, length): - raise IndexError( - "subscript (" - + str(idx) - + ") out of range for vector-like object of length " - + str(length) - ) + raise IndexError("subscript (" + str(idx) + ") out of range for vector-like object of length " + str(length)) def _is_scalar_bool(sub): @@ -23,8 +18,9 @@ class NormalizedSubscript: such that :py:func:`~normalize_subscript` is just a no-op. """ - def __init__(self, subscript: Sequence[int]): - """ + def __init__(self, subscript: Sequence[int]) -> None: + """Initialize a NormalizedSubscript. + Args: subscript: Sequence of integers for a normalized subscript. @@ -40,11 +36,11 @@ def subscript(self) -> Sequence[int]: return self._subscript def __getitem__(self, index: Any) -> Any: - """ + """Get an item from the subscript. + Args: - index: - Any argument accepted by the ``__getitem__`` method of the - :py:attr:`~subscript`. + index: Any argument accepted by the ``__getitem__`` method of the + subscript. Returns: The same return value as the ``__getitem__`` method of the @@ -53,7 +49,8 @@ def __getitem__(self, index: Any) -> Any: return self._subscript[index] def __len__(self) -> int: - """ + """Get the length of the subscript. + Returns: Length of the subscript. """ @@ -68,8 +65,9 @@ def normalize_subscript( length: int, names: Optional[Sequence[str]] = None, non_negative_only: bool = True, -) -> Tuple: - """ +) -> Tuple[Sequence[int], bool]: + """Normalize a subscript into a sequence of integer indices. + Normalize a subscript for ``__getitem__`` or friends into a sequence of integer indices, for consistent downstream use. @@ -101,7 +99,7 @@ def normalize_subscript( names: List of names for each entry in the object. If not None, this should have length equal to ``length``. Some optimizations - are possible if this is a :py:class:`~Names.Names` object. + are possible if this is a :py:class:`~Names.names` object. non_negative_only: Whether negative indices must be converted into non-negative @@ -130,13 +128,9 @@ def normalize_subscript( if isinstance(sub, str): if names is None: - raise IndexError( - "failed to find subscript '" - + sub - + "' for vector-like object with no names" - ) + raise IndexError("failed to find subscript '" + sub + "' for vector-like object with no names") i = -1 - from .Names import Names + from .names import Names if isinstance(names, Names): i = names.map(sub) @@ -195,7 +189,7 @@ def normalize_subscript( output = [] has_strings = set() string_positions = [] - from .Names import Names + from .names import Names are_names_indexed = isinstance(names, Names) @@ -224,9 +218,7 @@ def normalize_subscript( if len(has_strings): if names is None: - raise IndexError( - "cannot find string subscripts for vector-like object with no names" - ) + raise IndexError("cannot find string subscripts for vector-like object with no names") mapping = {} for i, y in enumerate(names): diff --git a/src/biocutils/package_utils.py b/src/biocutils/package_utils.py index e6e4b3a..9822bb6 100644 --- a/src/biocutils/package_utils.py +++ b/src/biocutils/package_utils.py @@ -4,13 +4,14 @@ def is_package_installed(package_name: str) -> bool: - """Check if the package is installed. + """Check if a package is installed. Args: - package_name (str): Package name. + package_name: + Package name. Returns: - bool: True if package is installed, otherwise False. + True if package is installed, otherwise False. """ _installed = False try: diff --git a/src/biocutils/print_truncated.py b/src/biocutils/print_truncated.py index 52ab420..11b529a 100644 --- a/src/biocutils/print_truncated.py +++ b/src/biocutils/print_truncated.py @@ -20,13 +20,9 @@ def print_truncated(x, truncated_to: int = 3, full_threshold: int = 10) -> str: String containing the pretty-printed contents. """ if isinstance(x, dict): - return print_truncated_dict( - x, truncated_to=truncated_to, full_threshold=full_threshold - ) + return print_truncated_dict(x, truncated_to=truncated_to, full_threshold=full_threshold) elif isinstance(x, list): - return print_truncated_list( - x, truncated_to=truncated_to, full_threshold=full_threshold - ) + return print_truncated_list(x, truncated_to=truncated_to, full_threshold=full_threshold) else: return repr(x) @@ -43,7 +39,8 @@ def print_truncated_list( preview of an object without spewing out all of its contents on the screen. Args: - x: List to be printed. + x: + List to be printed. truncated_to: Number of elements to truncate to, at the start and end of the @@ -71,9 +68,7 @@ def print_truncated_list( if transform is None: def transform(y): - return print_truncated( - y, truncated_to=truncated_to, full_threshold=full_threshold - ) + return print_truncated(y, truncated_to=truncated_to, full_threshold=full_threshold) if len(x) > full_threshold and len(x) > truncated_to * 2: for i in range(truncated_to): @@ -131,9 +126,7 @@ def print_truncated_dict( if transform is None: def transform(y): - return print_truncated( - y, truncated_to=truncated_to, full_threshold=full_threshold - ) + return print_truncated(y, truncated_to=truncated_to, full_threshold=full_threshold) all_keys = x.keys() if len(x) > full_threshold and len(x) > truncated_to * 2: diff --git a/src/biocutils/print_wrapped_table.py b/src/biocutils/print_wrapped_table.py index cbfcb36..447c8e7 100644 --- a/src/biocutils/print_wrapped_table.py +++ b/src/biocutils/print_wrapped_table.py @@ -1,4 +1,4 @@ -from typing import List, Optional, Sequence +from typing import Any, List, Optional, Sequence import numpy @@ -102,9 +102,7 @@ def reinitialize(): return output -def create_floating_names( - names: Optional[List[str]], indices: Sequence[int] -) -> List[str]: +def create_floating_names(names: Optional[List[str]], indices: Sequence[int]) -> List[str]: """Create the floating names to use in :py:meth:`~print_wrapped_table`. If no names are present, positional indices are used instead. @@ -144,14 +142,17 @@ def truncate_strings(values: List[str], width: int = 40) -> List[str]: return replacement -def print_type(x) -> str: - """Print the type of an object, with some special behavior for certain classes (e.g., to add the data type of NumPy - arrays). This is intended for display at the top of the columns of :py:meth:`~print_wrapped_table`. +def print_type(x: Any) -> str: + """Print the type of an object. + + Print the type of an object, with some special behavior for certain classes + (e.g., to add the data type of NumPy arrays). This is intended for display + at the top of the columns of :py:meth:`~print_wrapped_table`. Args: x: Some object. - Return: + Returns: String containing the class of the object. """ cls = type(x).__name__ diff --git a/src/biocutils/relaxed_combine_columns.py b/src/biocutils/relaxed_combine_columns.py index a4e2747..5e45453 100644 --- a/src/biocutils/relaxed_combine_columns.py +++ b/src/biocutils/relaxed_combine_columns.py @@ -20,11 +20,7 @@ def relaxed_combine_columns(*x: Any): Returns: Combined object, typically the same type as the first entry of ``x`` """ - raise NotImplementedError( - "no `combine_columns` method implemented for '" - + type(x[0]).__name__ - + "' objects." - ) + raise NotImplementedError("no `combine_columns` method implemented for '" + type(x[0]).__name__ + "' objects.") if is_package_installed("pandas") is True: diff --git a/src/biocutils/relaxed_combine_rows.py b/src/biocutils/relaxed_combine_rows.py index 2dc634b..6358ded 100644 --- a/src/biocutils/relaxed_combine_rows.py +++ b/src/biocutils/relaxed_combine_rows.py @@ -20,9 +20,7 @@ def relaxed_combine_rows(*x: Any): Returns: Combined object, typically the same type as the first entry of ``x``. """ - raise NotImplementedError( - "no `combine_rows` method implemented for '" + type(x[0]).__name__ + "' objects." - ) + raise NotImplementedError("no `combine_rows` method implemented for '" + type(x[0]).__name__ + "' objects.") if is_package_installed("pandas"): diff --git a/src/biocutils/reverse_index.py b/src/biocutils/reverse_index.py index 3b64c9e..4d1f2e0 100644 --- a/src/biocutils/reverse_index.py +++ b/src/biocutils/reverse_index.py @@ -1,16 +1,17 @@ from typing import Sequence -def build_reverse_index(obj: Sequence[str]): +def build_reverse_index(obj: Sequence[str]) -> dict: """Build a reverse index by name, for fast lookup operations. - Only contains the first occurence of a term. + Only contains the first occurrence of a term. Args: - obj: List of names. + obj: + List of names. Returns: - A map of keys and their index positions. + A dictionary mapping names to their index positions. """ revmap = {} for i, n in enumerate(obj): diff --git a/src/biocutils/show_as_cell.py b/src/biocutils/show_as_cell.py index a521554..072c17f 100644 --- a/src/biocutils/show_as_cell.py +++ b/src/biocutils/show_as_cell.py @@ -25,11 +25,11 @@ def show_as_cell(x: Any, indices: Sequence[int]) -> List[str]: try: candidate = str(x[i]) if len(candidate) > 25: - candidate = candidate[:20] + "..." # pick the first two characters, whatever. + candidate = candidate[:20] + "..." # pick the first two characters, whatever. nl = candidate.find("\n") if nl >= 0: candidate = candidate[:nl] + "..." output.append(candidate) - except: + except Exception as _: output.append("####") return output diff --git a/src/biocutils/StringList.py b/src/biocutils/string_list.py similarity index 72% rename from src/biocutils/StringList.py rename to src/biocutils/string_list.py index c16366b..2344a83 100644 --- a/src/biocutils/StringList.py +++ b/src/biocutils/string_list.py @@ -1,7 +1,9 @@ +from __future__ import annotations + from typing import Any, Iterable, Optional, Sequence, Union -from .NamedList import NamedList -from .Names import Names +from .named_list import NamedList +from .names import Names from .normalize_subscript import SubscriptTypes @@ -10,10 +12,25 @@ def _coerce_to_str(x: Any): class _SubscriptCoercer: - def __init__(self, data): + """Coercer for subscript operations on StringList.""" + + def __init__(self, data: Sequence) -> None: + """Initialize the coercer. + + Args: + data: Sequence of values to coerce. + """ self._data = data - def __getitem__(self, index): + def __getitem__(self, index: int) -> Optional[str]: + """Get an item and coerce it to string. + + Args: + index: Index of the item. + + Returns: + Coerced string value. + """ return _coerce_to_str(self._data[index]) @@ -27,7 +44,7 @@ class StringList(NamedList): def __init__( self, - data: Optional[Iterable] = None, + data: Optional[Sequence] = None, names: Optional[Names] = None, _validate: bool = True, ): @@ -56,30 +73,22 @@ def __init__( data = list(_coerce_to_str(item) for item in original) super().__init__(data, names, _validate=_validate) - def set_value( - self, index: Union[int, str], value: Any, in_place: bool = False - ) -> "StringList": + def set_value(self, index: Union[int, str], value: Any, in_place: bool = False) -> StringList: """Calls :py:meth:`~biocutils.NamedList.NamedList.set_value` after coercing ``value`` to a string.""" return super().set_value(index, _coerce_to_str(value), in_place=in_place) - def set_slice( - self, index: SubscriptTypes, value: Sequence, in_place: bool = False - ) -> "StringList": + def set_slice(self, index: SubscriptTypes, value: Sequence, in_place: bool = False) -> StringList: """Calls :py:meth:`~biocutils.NamedList.NamedList.set_slice` after coercing ``value`` to strings.""" return super().set_slice(index, _SubscriptCoercer(value), in_place=in_place) - def safe_insert( - self, index: Union[int, str], value: Any, in_place: bool = False - ) -> "StringList": + def safe_insert(self, index: Union[int, str], value: Any, in_place: bool = False) -> StringList: """Calls :py:meth:`~biocutils.NamedList.NamedList.safe_insert` after coercing ``value`` to a string.""" return super().safe_insert(index, _coerce_to_str(value), in_place=in_place) - def safe_append(self, value: Any, in_place: bool = False) -> "StringList": + def safe_append(self, value: Any, in_place: bool = False) -> StringList: """Calls :py:meth:`~biocutils.NamedList.NamedList.safe_append` after coercing ``value`` to a string.""" return super().safe_append(_coerce_to_str(value), in_place=in_place) - def safe_extend(self, other: Iterable, in_place: bool = True) -> "StringList": + def safe_extend(self, other: Iterable, in_place: bool = True) -> StringList: """Calls :py:meth:`~biocutils.NamedList.NamedList.safe_extend` after coercing elements of ``other`` to strings.""" - return super().safe_extend( - (_coerce_to_str(y) for y in other), in_place=in_place - ) + return super().safe_extend((_coerce_to_str(y) for y in other), in_place=in_place) diff --git a/src/biocutils/subset_rows.py b/src/biocutils/subset_rows.py index a09fadb..e5b31cb 100644 --- a/src/biocutils/subset_rows.py +++ b/src/biocutils/subset_rows.py @@ -1,19 +1,21 @@ from functools import singledispatch from typing import Any, Sequence +import numpy + +from .package_utils import is_package_installed + @singledispatch def subset_rows(x: Any, indices: Sequence[int]) -> Any: - """ + """Subset a high-dimensional object by indices on the first dimension. + Subset ``x`` by ``indices`` on the first dimension. The default - method attempts to use ``x``'s ``__getitem__`` method, + method attempts to use ``x``'s ``__getitem__`` method. Args: - x: - Any high-dimensional object. - - indices: - Sequence of non-negative integers specifying the integers of interest. + x: Any high-dimensional object. + indices: Sequence of non-negative integers specifying the rows of interest. Returns: The result of slicing ``x`` by ``indices``. The exact type @@ -22,3 +24,36 @@ def subset_rows(x: Any, indices: Sequence[int]) -> Any: tmp = [slice(None)] * len(x.shape) tmp[0] = indices return x[(*tmp,)] + + +@subset_rows.register +def _subset_rows_numpy(x: numpy.ndarray, indices: Sequence[int]) -> numpy.ndarray: + """Subset a NumPy array by row indices. + + Args: + x: NumPy array to subset. + indices: Sequence of non-negative integers specifying rows. + + Returns: + Subsetted NumPy array. + """ + tmp = [slice(None)] * len(x.shape) + tmp[0] = indices + return x[(*tmp,)] + + +if is_package_installed("pandas"): + from pandas import DataFrame + + @subset_rows.register(DataFrame) + def _subset_rows_dataframe(x: DataFrame, indices: Sequence[int]) -> DataFrame: + """Subset a pandas DataFrame by row indices. + + Args: + x: DataFrame to subset. + indices: Sequence of non-negative integers specifying rows. + + Returns: + Subsetted DataFrame. + """ + return x.iloc[indices, :] diff --git a/src/biocutils/subset_sequence.py b/src/biocutils/subset_sequence.py index 609027e..1c39517 100644 --- a/src/biocutils/subset_sequence.py +++ b/src/biocutils/subset_sequence.py @@ -4,17 +4,15 @@ @singledispatch def subset_sequence(x: Any, indices: Sequence[int]) -> Any: - """ + """Subset a sequence-like object by indices. + Subset ``x`` by ``indices`` to obtain a new object. The default method attempts to use ``x``'s ``__getitem__`` method. Args: - x: - Any object that supports ``__getitem__`` with an integer sequence. - - indices: - Sequence of non-negative integers specifying the integers of interest. - All indices should be less than ``len(x)``. + x: Any object that supports ``__getitem__`` with an integer sequence. + indices: Sequence of non-negative integers specifying the positions of + interest. All indices should be less than ``len(x)``. Returns: The result of slicing ``x`` by ``indices``. The exact type @@ -24,19 +22,47 @@ def subset_sequence(x: Any, indices: Sequence[int]) -> Any: @subset_sequence.register -def _subset_sequence_list(x: list, indices: Sequence) -> list: +def _subset_sequence_list(x: list, indices: Sequence[int]) -> list: + """Subset a list by indices. + + Args: + x: List to subset. + indices: Sequence of non-negative integers specifying positions. + + Returns: + A new list containing the specified elements. + """ return type(x)(x[i] for i in indices) @subset_sequence.register -def _subset_sequence_range(x: range, indices: Sequence) -> Union[list, range]: +def _subset_sequence_range(x: range, indices: Sequence[int]) -> Union[list, range]: + """Subset a range by indices. + + Args: + x: Range object to subset. + indices: Sequence of non-negative integers or a range object. + + Returns: + A range if indices is a range, otherwise a list. + """ if isinstance(indices, range): # We can just assume that all 'indices' are in [0, len(x)), # so no need to handle out-of-range indices. - return range( - x.start + x.step * indices.start, - x.start + x.step * indices.stop, - x.step * indices.step - ) + return range(x.start + x.step * indices.start, x.start + x.step * indices.stop, x.step * indices.step) else: return [x[i] for i in indices] + + +@subset_sequence.register +def _subset_sequence_tuple(x: tuple, indices: Sequence[int]) -> tuple: + """Subset a tuple by indices. + + Args: + x: Tuple to subset. + indices: Sequence of non-negative integers specifying positions. + + Returns: + A new tuple containing the specified elements. + """ + return tuple(x[i] for i in indices) diff --git a/src/biocutils/table.py b/src/biocutils/table.py new file mode 100644 index 0000000..518022a --- /dev/null +++ b/src/biocutils/table.py @@ -0,0 +1,38 @@ +from functools import singledispatch +from typing import Sequence + +from .integer_list import IntegerList + + +@singledispatch +def table(x: Sequence, sort: bool = True) -> IntegerList: + """Create a frequency table of values in a sequence. + + Count the occurrences of each unique value in the input sequence and return + them as an IntegerList with names corresponding to the unique values. + + Args: + x: + A sequence of hashable values. + + sort: + Whether to sort the output by keys (values from x). + + Returns: + An IntegerList where names are the unique values and values are their counts. + """ + output = {} + for v in x: + if v in output: + output[v] += 1 + else: + output[v] = 1 + + if sort: + collected = sorted(output.keys()) + tmp = {} + for y in collected: + tmp[y] = output[y] + output = tmp + + return IntegerList.from_dict(output) diff --git a/src/biocutils/which.py b/src/biocutils/which.py index 2a0c0d1..ca7a863 100644 --- a/src/biocutils/which.py +++ b/src/biocutils/which.py @@ -1,10 +1,11 @@ from typing import Optional, Sequence + import numpy def which( x: Sequence, - dtype: Optional[numpy.ndarray] = None, + dtype: Optional[numpy.dtype] = None, ) -> numpy.ndarray: """Report the indices of all elements of ``x`` that are truthy. @@ -22,7 +23,7 @@ def which( """ if isinstance(x, numpy.ndarray): found = numpy.where(x)[0] - if not dtype is None: + if dtype is not None: found = found.astype(dtype=dtype, copy=False, order="A") return found diff --git a/tests/test_BooleanList.py b/tests/test_BooleanList.py index 593effa..a97fd50 100644 --- a/tests/test_BooleanList.py +++ b/tests/test_BooleanList.py @@ -44,7 +44,7 @@ def test_BooleanList_setitem(): x = BooleanList([False, True, True, False]) x[0] = None assert x.as_list() == [None, True, True, False] - x[0] = 12345 + x[0] = 12345 assert x.as_list() == [True, True, True, False] x[1:3] = [False, False] @@ -54,7 +54,7 @@ def test_BooleanList_setitem(): assert x.as_list() == [None, False, None, False] x.set_names(["A", "B", "C", "D"], in_place=True) - x["C"] = True + x["C"] = True assert x.as_list() == [None, False, True, False] x[["A", "B"]] = [False, True] assert x.as_list() == [False, True, True, False] diff --git a/tests/test_Factor.py b/tests/test_Factor.py index fdcb004..c78bb9b 100644 --- a/tests/test_Factor.py +++ b/tests/test_Factor.py @@ -17,13 +17,13 @@ def test_factor_init(): assert len(f) == 6 assert list(f) == ["A", "B", None, "A", None, "E"] assert list(f.get_codes()) == [0, 1, -1, 0, -1, 4] - + f = Factor([None] * 10, levels=["A", "B", "C", "D", "E"]) assert list(f) == [None] * 10 # Works with NumPy inputs. f = Factor(numpy.array([4,3,2,1,0], dtype=numpy.uint8), levels=numpy.array(["A", "B", "C", "D", "E"])) - assert len(f) == 5 + assert len(f) == 5 assert f.get_codes().dtype == numpy.int8 assert isinstance(f.get_levels(), StringList) @@ -98,7 +98,7 @@ def test_Factor_get_value(): def test_Factor_get_slice(): f = Factor([0, 1, 2, -1, 2, 4], levels=["A", "B", "C", "D", "E"]) - sub = f.get_slice([0, 1]) + sub = f.get_slice([0, 1]) assert list(sub) == ["A", "B"] assert sub.get_levels() == f.get_levels() @@ -176,7 +176,7 @@ def test_Factor_setitem(): f[-1] = "D" assert list(f.get_codes()) == [1, 1, 0, 0, 2, 3] - f[2:5] = Factor([4, 3, 1], levels=["A", "B", "C", "D", "E"]) + f[2:5] = Factor([4, 3, 1], levels=["A", "B", "C", "D", "E"]) assert list(f.get_codes()) == [1, 1, 4, 3, 1, 3] assert f.get_levels() == f.get_levels() @@ -339,3 +339,54 @@ def test_Factor_init_from_list(): assert isinstance(f1, Factor) assert len(f1) == 5 assert len(f1.get_levels()) == 3 + +def test_Factor_as_list(): + f = Factor([0, 1, -1, 0], levels=["A", "B"]) + assert f.as_list() == ["A", "B", None, "A"] + + empty = Factor([], levels=[]) + assert empty.as_list() == [] + + +def test_Factor_safe_delete(): + f = Factor([0, 1, 2, 0], levels=["A", "B", "C"], names=["x", "y", "z", "w"]) + + y = f.safe_delete(1) + assert y.as_list() == ["A", "C", "A"] + assert y.get_names().as_list() == ["x", "z", "w"] + assert f.as_list() == ["A", "B", "C", "A"] + + y = f.safe_delete("y") + assert y.as_list() == ["A", "C", "A"] + assert y.get_names().as_list() == ["x", "z", "w"] + + y = f.safe_delete(slice(1, 3)) + assert y.as_list() == ["A", "A"] + assert y.get_names().as_list() == ["x", "w"] + + +def test_Factor_delete(): + f = Factor([0, 1, 2], levels=["A", "B", "C"], names=["x", "y", "z"]) + + f.delete(1) + assert f.as_list() == ["A", "C"] + assert f.get_names().as_list() == ["x", "z"] + + f.delete("z") + assert f.as_list() == ["A"] + assert f.get_names().as_list() == ["x"] + + +def test_Factor_delitem(): + f = Factor([0, 1, 2, 0], levels=["A", "B", "C"], names=["x", "y", "z", "w"]) + + del f["y"] + assert f.as_list() == ["A", "C", "A"] + assert f.get_names().as_list() == ["x", "z", "w"] + + del f[0] + assert f.as_list() == ["C", "A"] + assert f.get_names().as_list() == ["z", "w"] + + del f[:] + assert len(f) == 0 diff --git a/tests/test_FloatList.py b/tests/test_FloatList.py index 860d170..771f37c 100644 --- a/tests/test_FloatList.py +++ b/tests/test_FloatList.py @@ -44,7 +44,7 @@ def test_FloatList_setitem(): x = FloatList([ 0.5, -2.1, -3.2, -4.5 ]) x[0] = None assert x.as_list() == [None, -2.1, -3.2, -4.5] - x[0] = 12345 + x[0] = 12345 assert x.as_list() == [12345.0, -2.1, -3.2, -4.5] x[1:3] = [10.1, 20.2] diff --git a/tests/test_IntegerList.py b/tests/test_IntegerList.py index d3df82a..61c2970 100644 --- a/tests/test_IntegerList.py +++ b/tests/test_IntegerList.py @@ -44,7 +44,7 @@ def test_IntegerList_setitem(): x = IntegerList([1,2,3,4]) x[0] = None assert x.as_list() == [None, 2, 3, 4] - x[0] = 12345 + x[0] = 12345 assert x.as_list() == [12345, 2, 3, 4] x[1:3] = [10, 20] diff --git a/tests/test_NamedList.py b/tests/test_NamedList.py index f116ee7..dddc167 100644 --- a/tests/test_NamedList.py +++ b/tests/test_NamedList.py @@ -10,6 +10,7 @@ def test_NamedList_init(): assert x.as_list() == [ 1,2,3,4 ] assert x.get_names().as_list() == ["a", "b", "c", "d"] assert len(x) == 4 + assert x.get_name(0) == "a" y = NamedList(x) assert y.as_list() == [1,2,3,4] @@ -23,6 +24,7 @@ def test_NamedList_init(): x = NamedList([1,2,3,4]) assert x.as_list() == [1,2,3,4] assert x.get_names() is None + assert x.get_name(1) is None def test_Names_iter(): @@ -255,3 +257,76 @@ def test_NamedList_generics(): y = biocutils.assign_sequence(x, [1, 3], NamedList([ 20, 40 ], names=["b", "d" ])) assert y.as_list() == [ 1, 20, 3, 40 ] assert y.get_names().as_list() == [ "A", "B", "C", "D" ] # doesn't set the names, as per policy. + +def test_NamedList_safe_delete(): + x = NamedList([1, 2, 3, 4], names=["A", "B", "C", "D"]) + + y = x.safe_delete(1) + assert y.as_list() == [1, 3, 4] + assert y.get_names().as_list() == ["A", "C", "D"] + assert x.as_list() == [1, 2, 3, 4] + + y = x.safe_delete("C") + assert y.as_list() == [1, 2, 4] + assert y.get_names().as_list() == ["A", "B", "D"] + + y = x.safe_delete(slice(1, 3)) + assert y.as_list() == [1, 4] + assert y.get_names().as_list() == ["A", "D"] + + y = x.safe_delete(-1) + assert y.as_list() == [1, 2, 3] + assert y.get_names().as_list() == ["A", "B", "C"] + + +def test_NamedList_delete(): + x = NamedList([1, 2, 3, 4], names=["A", "B", "C", "D"]) + + x.delete(0) + assert x.as_list() == [2, 3, 4] + assert x.get_names().as_list() == ["B", "C", "D"] + + x.delete("D") + assert x.as_list() == [2, 3] + assert x.get_names().as_list() == ["B", "C"] + + +def test_NamedList_delitem(): + x = NamedList([1, 2, 3, 4], names=["A", "B", "C", "D"]) + + del x[1] + assert x.as_list() == [1, 3, 4] + assert x.get_names().as_list() == ["A", "C", "D"] + + del x["A"] + assert x.as_list() == [3, 4] + assert x.get_names().as_list() == ["C", "D"] + + x = NamedList([1, 2, 3, 4], names=["A", "B", "C", "D"]) + del x[0:2] + assert x.as_list() == [3, 4] + assert x.get_names().as_list() == ["C", "D"] + + with pytest.raises(KeyError): + del x["Missing"] + + with pytest.raises(IndexError): + del x[10] + +def test_NamedList_dict_methods(): + x = NamedList([1, 2, 3], names=["A", "B", "C"]) + + assert list(x.keys()) == ["A", "B", "C"] + assert list(x.values()) == [1, 2, 3] + assert list(x.items()) == [("A", 1), ("B", 2), ("C", 3)] + + assert x.get("A") == 1 + assert x.get("C") == 3 + assert x.get("Missing") is None + assert x.get("Missing", 100) == 100 + assert x.get(1) == 2 # Integer index access via get + + y = NamedList([10, 20]) + assert list(y.keys()) == [] + assert list(y.values()) == [10, 20] + assert list(y.items()) == [("0", 10), ("1", 20)] diff --git a/tests/test_Names.py b/tests/test_Names.py index 10945ae..43ce660 100644 --- a/tests/test_Names.py +++ b/tests/test_Names.py @@ -185,7 +185,7 @@ def test_Names_generics(): sub = biocutils.subset_sequence(x, [0,3,2,1]) assert isinstance(sub, Names) assert sub.as_list() == ["1", "4", "3", "2"] - + y = ["a", "b", "c", "d"] com = biocutils.combine_sequences(x, y) assert isinstance(com, Names) @@ -196,3 +196,64 @@ def test_Names_generics(): assert isinstance(ass, Names) assert ass.as_list() == ["1", "b", "c", "4"] +def test_Names_safe_delete(): + x = Names(["A", "B", "C", "D"]) + + y = x.safe_delete(1) + assert y.as_list() == ["A", "C", "D"] + assert y.map("B") == -1 + assert y.map("C") == 1 + assert x.as_list() == ["A", "B", "C", "D"] + + y = x.safe_delete(slice(0, 2)) + assert y.as_list() == ["C", "D"] + assert y.map("A") == -1 + assert y.map("C") == 0 + + +def test_Names_delete(): + x = Names(["A", "B", "C", "D"]) + + x.delete(2) + assert x.as_list() == ["A", "B", "D"] + assert x.map("C") == -1 + assert x.map("D") == 2 + + x.delete(0) + assert x.as_list() == ["B", "D"] + assert x.map("A") == -1 + assert x.map("B") == 0 + + +def test_Names_delitem(): + x = Names(["1", "2", "3", "4"]) + + del x[1] + assert x.as_list() == ["1", "3", "4"] + assert x.map("2") == -1 + assert x.map("3") == 1 + + del x[0:2] + assert x.as_list() == ["4"] + assert x.map("1") == -1 + assert x.map("4") == 0 + +def test_Names_contains(): + x = Names(["A", "B", "C"]) + assert "A" in x + assert "B" in x + assert "Z" not in x + + # Works with duplicates + y = Names(["A", "A", "B"]) + assert "A" in y + +def test_Names_is_unique(): + x = Names(["A", "B", "C"]) + assert x.is_unique + + y = Names(["A", "B", "A"]) + assert not y.is_unique + + empty = Names([]) + assert empty.is_unique diff --git a/tests/test_StringList.py b/tests/test_StringList.py index 394ceac..f7aa6f2 100644 --- a/tests/test_StringList.py +++ b/tests/test_StringList.py @@ -44,7 +44,7 @@ def test_StringList_setitem(): x = StringList([1,2,3,4]) x[0] = None assert x.as_list() == [None, "2", "3", "4"] - x[0] = 12345 + x[0] = 12345 assert x.as_list() == ["12345", "2", "3", "4"] x[1:3] = [10, 20] @@ -89,7 +89,7 @@ def test_StringList_generics(): sub = biocutils.subset_sequence(x, [0,3,2,1]) assert isinstance(sub, StringList) assert sub.as_list() == ["1", "4", "3", "2"] - + y = ["a", "b", "c", "d"] com = biocutils.combine_sequences(x, y) assert isinstance(com, StringList) diff --git a/tests/test_biocobject.py b/tests/test_biocobject.py index 4315995..e192eb5 100644 --- a/tests/test_biocobject.py +++ b/tests/test_biocobject.py @@ -1,9 +1,9 @@ import pytest from copy import copy from biocutils.bioc_object import BiocObject -from biocutils.NamedList import NamedList +from biocutils.named_list import NamedList + - def test_init_empty(): """Test initialization with default values.""" obj = BiocObject() @@ -14,7 +14,7 @@ def test_init_with_dict(): """Test initialization with a dictionary.""" meta = {"author": "jkanche", "version": 1} obj = BiocObject(metadata=meta) - + assert isinstance(obj.metadata, NamedList) assert len(obj.metadata) == 2 @@ -28,7 +28,7 @@ def test_metadata_property_setter(): obj = BiocObject() new_meta = {"tag": "experiment_1"} obj.metadata = new_meta - + assert len(obj.metadata) == 1 assert isinstance(obj.metadata, NamedList) @@ -36,21 +36,21 @@ def test_set_metadata_copy(): """Test functional style set_metadata (copy-on-write).""" obj = BiocObject(metadata={"id": 1}) original_id = id(obj) - + new_obj = obj.set_metadata({"id": 2}) - + assert id(new_obj) != original_id assert len(new_obj.metadata) == 1 - - assert len(obj.metadata) == 1 + + assert len(obj.metadata) == 1 def test_set_metadata_inplace(): """Test imperative style set_metadata (in-place).""" obj = BiocObject(metadata={"id": 1}) original_id = id(obj) - + new_obj = obj.set_metadata({"id": 2}, in_place=True) - + assert id(new_obj) == original_id assert new_obj is obj assert len(obj.metadata) == 1 @@ -59,18 +59,18 @@ def test_inheritance(): """Test that subclasses maintain their type when copying.""" class GenomicContainer(BiocObject): pass - + obj = GenomicContainer(metadata={"genome": "hg38"}) new_obj = obj.set_metadata({"genome": "mm10"}) - + assert isinstance(new_obj, GenomicContainer) assert new_obj is not obj def test_shallow_copy_behavior(): heavy_data = ["large", "data"] - + obj = BiocObject() obj._heavy_data = heavy_data new_obj = obj.set_metadata({"new": "meta"}) assert new_obj is not obj - assert new_obj._heavy_data is obj._heavy_data \ No newline at end of file + assert new_obj._heavy_data is obj._heavy_data diff --git a/tests/test_combine_sequences.py b/tests/test_combine_sequences.py index 3eb8f36..4ecdd21 100644 --- a/tests/test_combine_sequences.py +++ b/tests/test_combine_sequences.py @@ -35,7 +35,7 @@ def test_basic_dense_masked(): x = [1, 2, 3] y = [0.1, 0.2] xd = np.array(x) - yd = np.ma.array(y, mask=[True]*2) + yd = np.ma.array(y, mask=[True]*2) zcomb = combine_sequences(xd, yd) z = x + y diff --git a/tests/test_factorize.py b/tests/test_factorize.py index 4deaed9..4c9d25a 100644 --- a/tests/test_factorize.py +++ b/tests/test_factorize.py @@ -52,7 +52,7 @@ def test_factorize_sorted(): def test_factorize_factor(): f = Factor([4, 3, 2, 1, 0], ["A", "B", "C", "D", "E"]) lev, ind = factorize(f) - assert lev == ["E", "D", "C", "B", "A"] + assert lev == ["E", "D", "C", "B", "A"] assert list(ind) == [0, 1, 2, 3, 4] lev, ind = factorize(f, sort_levels=True)