Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ python_requires = >=3.8
install_requires =
importlib-metadata; python_version<"3.8"
numpy
biocutils
biocutils>=0.1.8


[options.packages.find]
Expand Down
23 changes: 22 additions & 1 deletion src/delayedarray/Combine.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from typing import Callable, Tuple, Sequence
from typing import Callable, Tuple, Sequence, Any
import numpy
import copy

from .DelayedOp import DelayedOp
from ._mask import _concatenate_unmasked_ndarrays, _concatenate_maybe_masked_ndarrays
Expand Down Expand Up @@ -101,6 +102,26 @@ def along(self) -> int:
return self._along


def _simplify_combine(x: Combine) -> Any:
if len(x.seeds) == 1:
return x.seeds[0]
all_seeds = []
simplified = False
for ss in x.seeds:
if type(ss) is Combine and x.along == ss.along:
# Don't use isinstance, we don't want to collapse for Combine
# subclasses that might be doing god knows what.
all_seeds += ss.seeds
simplified = True
else:
all_seeds.append(ss)
if not simplified:
return x
new_x = copy.copy(x)
new_x._seeds = all_seeds
return new_x


def _extract_subarrays(x: Combine, subset: Tuple[Sequence[int], ...], f: Callable):
# Figuring out which slices belong to who.
chosen = subset[x._along]
Expand Down
23 changes: 15 additions & 8 deletions src/delayedarray/DelayedArray.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@
from .SparseNdarray import SparseNdarray
from .BinaryIsometricOp import BinaryIsometricOp
from .Cast import Cast
from .Combine import Combine
from .Combine import Combine, _simplify_combine
from .Round import Round
from .Subset import Subset
from .Transpose import Transpose
from .Subset import Subset, _simplify_subset
from .Transpose import Transpose, _simplify_transpose
from .UnaryIsometricOpSimple import UnaryIsometricOpSimple
from .UnaryIsometricOpWithArgs import UnaryIsometricOpWithArgs

Expand Down Expand Up @@ -136,7 +136,9 @@ def T(self) -> "DelayedArray":
Returns:
A ``DelayedArray`` containing the delayed transpose.
"""
return DelayedArray(Transpose(self._seed, perm=None))
tout = Transpose(self._seed, perm=None)
tout = _simplify_transpose(tout)
return DelayedArray(tout)

def __repr__(self) -> str:
"""Pretty-print this ``DelayedArray``. This uses
Expand Down Expand Up @@ -253,20 +255,23 @@ def __array_function__(self, func, types, args, kwargs) -> "DelayedArray":
seeds = []
for x in args[0]:
seeds.append(_extract_seed(x))

if "axis" in kwargs:
axis = kwargs["axis"]
else:
axis = 0
return DelayedArray(Combine(seeds, along=axis))
cout = Combine(seeds, along=axis)
cout = _simplify_combine(cout)
return DelayedArray(cout)

if func == numpy.transpose:
seed = _extract_seed(args[0])
if "axes" in kwargs:
axes = kwargs["axes"]
else:
axes = None
return DelayedArray(Transpose(seed, perm=axes))
tout = Transpose(seed, perm=axes)
tout = _simplify_transpose(tout)
return DelayedArray(tout)

if func == numpy.round:
seed = _extract_seed(args[0])
Expand Down Expand Up @@ -808,7 +813,9 @@ def __getitem__(self, subset: Tuple[Union[slice, Sequence], ...]) -> Union["Dela
"""
cleaned = _getitem_subset_preserves_dimensions(self.shape, subset)
if cleaned is not None:
return DelayedArray(Subset(self._seed, cleaned))
sout = Subset(self._seed, cleaned)
sout = _simplify_subset(sout)
return DelayedArray(sout)
return _getitem_subset_discards_dimensions(self._seed, subset, extract_dense_array)


Expand Down
34 changes: 27 additions & 7 deletions src/delayedarray/Subset.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from typing import Callable, Sequence, Tuple
from typing import Callable, Sequence, Tuple, Any
from numpy import dtype, ndarray, ix_
import numpy
import biocutils
import copy

from .DelayedOp import DelayedOp
from .SparseNdarray import SparseNdarray
from ._subset import _sanitize_subset
from ._subset import _sanitize_subset, _is_single_subset_noop
from .extract_dense_array import extract_dense_array
from .extract_sparse_array import extract_sparse_array
from .create_dask_array import create_dask_array
Expand Down Expand Up @@ -87,18 +89,36 @@ def subset(self) -> Tuple[Sequence[int], ...]:
return self._subset


def _simplify_subset(x: Subset) -> Any:
seed = x.seed
if not type(seed) is Subset:
# Don't use isinstance, we don't want to collapse for Subset
# subclasses that might be doing god knows what.
return x
all_subsets = []
noop = True
for i, sub in enumerate(x.subset):
seed_sub = seed.subset[i]
new_sub = biocutils.subset_sequence(seed_sub, sub)
if noop and not _is_single_subset_noop(seed.seed.shape[i], new_sub):
noop = False
all_subsets.append(new_sub)
if noop:
return seed.seed
new_x = copy.copy(x)
new_x._seed = seed.seed
new_x._subset = (*all_subsets,)
return new_x


def _extract_array(x: Subset, subset: Tuple[Sequence[int], ...], f: Callable):
newsub = list(subset)
expanded = []
is_safe = 0

for i, s in enumerate(newsub):
cursub = x._subset[i]
if isinstance(cursub, ndarray):
replacement = cursub[s]
else:
replacement = [cursub[j] for j in s]

replacement = biocutils.subset_sequence(cursub, s)
san_sub, san_remap = _sanitize_subset(replacement)
newsub[i] = san_sub

Expand Down
32 changes: 27 additions & 5 deletions src/delayedarray/Transpose.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from typing import Callable, Optional, Tuple, Sequence
from typing import Callable, Optional, Tuple, Sequence, Any
from numpy import dtype, transpose
import numpy
import copy

from .DelayedOp import DelayedOp
from .SparseNdarray import SparseNdarray
Expand Down Expand Up @@ -40,8 +41,6 @@ def __init__(self, seed, perm: Optional[Tuple[int, ...]]):
dimension ordering is assumed to be reversed.
"""

self._seed = seed

curshape = seed.shape
ndim = len(curshape)
if perm is not None:
Expand All @@ -52,12 +51,12 @@ def __init__(self, seed, perm: Optional[Tuple[int, ...]]):
else:
perm = (*range(ndim - 1, -1, -1),)

self._perm = perm

final_shape = []
for x in perm:
final_shape.append(curshape[x])

self._seed = seed
self._perm = perm
self._shape = (*final_shape,)

@property
Expand Down Expand Up @@ -94,6 +93,29 @@ def perm(self) -> Tuple[int, ...]:
return self._perm


def _simplify_transpose(x: Transpose) -> Any:
seed = x.seed
if not type(seed) is Transpose:
# Don't use isinstance, we don't want to collapse for Transpose
# subclasses that might be doing god knows what.
return x

new_perm = []
noop = True
for i, p in enumerate(x.perm):
new_p = seed.perm[p]
if new_p != i:
noop = False
new_perm.append(new_p)
if noop:
return seed.seed

new_x = copy.copy(x)
new_x._seed = seed.seed
new_x._perm = (*new_perm,)
return new_x


def _extract_array(x: Transpose, subset: Tuple[Sequence[int], ...], f: Callable):
permsub = [None] * len(subset)
for i, j in enumerate(x._perm):
Expand Down
42 changes: 42 additions & 0 deletions tests/test_Combine.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,48 @@ def test_Combine_otherdim(left_mask_rate, right_mask_rate):
assert_identical_ndarrays(delayedarray.to_dense_array(x), safe_concatenate((y1, y2), axis=1))


def test_Combine_simplified():
y1 = simulate_ndarray((30, 23), mask_rate=0)
y2 = simulate_ndarray((50, 23), mask_rate=0)
y3 = simulate_ndarray((30, 41), mask_rate=0)

x1 = delayedarray.DelayedArray(y1)
x2 = delayedarray.DelayedArray(y2)
x3 = delayedarray.DelayedArray(y3)

com = numpy.concatenate((x1, x2))
com2 = numpy.concatenate((com, x2))
assert isinstance(com2, delayedarray.DelayedArray)
assert isinstance(com2.seed, delayedarray.Combine)
assert len(com2.seed.seeds) == 3
assert [isinstance(s, delayedarray.Combine) for s in com2.seed.seeds] == [False] * 3
assert_identical_ndarrays(delayedarray.to_dense_array(com2), safe_concatenate((y1, y2, y2)))

com = numpy.concatenate((x1, x3), axis=1)
com2 = numpy.concatenate((com, x1), axis=1)
assert isinstance(com2, delayedarray.DelayedArray)
assert isinstance(com2.seed, delayedarray.Combine)
assert len(com2.seed.seeds) == 3
assert [isinstance(s, delayedarray.Combine) for s in com2.seed.seeds] == [False] * 3
assert_identical_ndarrays(delayedarray.to_dense_array(com2), safe_concatenate((y1, y3, y1), axis=1))

# No-ops properly.
com = numpy.concatenate((x1,))
assert isinstance(com, delayedarray.DelayedArray)
assert isinstance(com.seed, numpy.ndarray)
assert_identical_ndarrays(delayedarray.to_dense_array(com), y1)

# Doesn't attempt to collapse if the axes are different.
com = numpy.concatenate((x1, x2))
com2 = numpy.concatenate((com, com), axis=1)
assert isinstance(com2, delayedarray.DelayedArray)
assert isinstance(com2.seed, delayedarray.Combine)
assert len(com2.seed.seeds) == 2
assert [isinstance(s, delayedarray.Combine) for s in com2.seed.seeds] == [True] * 2
ref = numpy.concatenate((y1, y2))
assert_identical_ndarrays(delayedarray.to_dense_array(com2), safe_concatenate((ref, ref), axis=1))


@pytest.mark.parametrize("left_mask_rate", [0, 0.2])
@pytest.mark.parametrize("right_mask_rate", [0, 0.2])
def test_Combine_subset(left_mask_rate, right_mask_rate):
Expand Down
31 changes: 31 additions & 0 deletions tests/test_Subset.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import delayedarray
import numpy
import pytest
import biocutils

from utils import simulate_ndarray, assert_identical_ndarrays, simulate_SparseNdarray

Expand All @@ -13,6 +14,9 @@ def test_Subset_ix(mask_rate):

subix = numpy.ix_(range(1, 10), [20, 30, 40], [10, 11, 12, 13])
sub = x[subix]
assert isinstance(sub, delayedarray.DelayedArray)
assert isinstance(sub.seed, delayedarray.Subset)

assert sub.shape == (9, 3, 4)
assert isinstance(sub.seed.seed, numpy.ndarray)
assert len(sub.seed.subset) == 3
Expand Down Expand Up @@ -88,6 +92,33 @@ def test_Subset_unsorted_duplicates(mask_rate):
assert_identical_ndarrays(delayedarray.to_dense_array(sub), y[:, [5, 4, 3, 2, 1, 0], :])


def test_Subset_simplified():
test_shape = (30, 55)
y = simulate_ndarray(test_shape, mask_rate=0)
x = delayedarray.DelayedArray(y)

sub = x[:, list(range(0, 55, 2))]
sub2 = sub[:, list(range(5, 20))]
assert isinstance(sub2, delayedarray.DelayedArray)
assert isinstance(sub2.seed, delayedarray.Subset)
assert isinstance(sub2.seed.seed, numpy.ndarray)
assert_identical_ndarrays(delayedarray.to_dense_array(sub2), y[:, biocutils.subset_sequence(range(0, 55, 2), range(5, 20))])

sub = x[list(range(10, 20)), :]
sub2 = sub[:, list(range(0, 55, 5))]
assert isinstance(sub2, delayedarray.DelayedArray)
assert isinstance(sub2.seed, delayedarray.Subset)
assert isinstance(sub2.seed.seed, numpy.ndarray)
assert_identical_ndarrays(delayedarray.to_dense_array(sub2), y[10:20,0:55:5])

# Identifies no-ops and returns the seed directly.
sub = x[::-1,::-1]
sub2 = sub[::-1,::-1]
assert isinstance(sub2, delayedarray.DelayedArray)
assert isinstance(sub2.seed, numpy.ndarray)
assert_identical_ndarrays(delayedarray.to_dense_array(sub2), y)


@pytest.mark.parametrize("mask_rate", [0, 0.2])
def test_Subset_subset(mask_rate):
y = simulate_ndarray((99, 63), mask_rate=mask_rate)
Expand Down
23 changes: 23 additions & 0 deletions tests/test_Transpose.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,29 @@ def test_Transpose_simple(mask_rate):
assert_identical_ndarrays(delayedarray.to_dense_array(t), numpy.transpose(y))


def test_Transpose_simplified():
y = simulate_ndarray((30, 23, 5), mask_rate=0)
x = delayedarray.DelayedArray(y)

t = x.T
t2 = t.T
assert isinstance(t2, delayedarray.DelayedArray)
assert isinstance(t2.seed, numpy.ndarray)
assert_identical_ndarrays(delayedarray.to_dense_array(t2), y.T.T)

t2 = numpy.transpose(t, axes=(2, 1, 0))
assert isinstance(t2, delayedarray.DelayedArray)
assert isinstance(t2.seed, numpy.ndarray)
assert_identical_ndarrays(delayedarray.to_dense_array(t2), numpy.transpose(y.T, (2, 1, 0)))

t2 = numpy.transpose(t, axes=(1, 2, 0))
assert isinstance(t2, delayedarray.DelayedArray)
assert isinstance(t2.seed, delayedarray.Transpose)
assert t2.seed.perm == (1, 0, 2)
assert isinstance(t2.seed.seed, numpy.ndarray)
assert_identical_ndarrays(delayedarray.to_dense_array(t2), numpy.transpose(y.T, axes=(1, 2, 0)))


@pytest.mark.parametrize("mask_rate", [0, 0.2])
def test_Transpose_more_dimensions(mask_rate):
y = simulate_ndarray((30, 23, 10), mask_rate=mask_rate)
Expand Down
Loading