Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions python/genvarloader/_dataset/_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -734,12 +734,12 @@ def ploidy(self) -> int | None:

@property
def shape(self) -> tuple[int, int]:
"""Return the shape of the dataset. :code:`(n_samples, n_regions)`"""
"""Return the shape of the dataset. :code:`(n_regions, n_samples)`"""
return self.n_regions, self.n_samples

@property
def full_shape(self) -> tuple[int, int]:
"""Return the full shape of the dataset, ignoring any subsetting. :code:`(n_samples, n_regions)`"""
"""Return the full shape of the dataset, ignoring any subsetting. :code:`(n_regions, n_samples)`"""
return self._idxer.full_shape

@property
Expand Down
23 changes: 21 additions & 2 deletions python/genvarloader/_dataset/_rag_variants.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class RaggedVariant(ak.Record):

class RaggedVariants(ak.Array):
"""An awkward record array, typically with shape (batch, ploidy, ~variants).
Guaranteed to at least have the field "alts" and "v_starts" and one of "refs" or "ilens"."""
Guaranteed to at least have the field "alt" and "start" and one of "ref" or "ilen"."""

def __init__(
self,
Expand Down Expand Up @@ -59,6 +59,25 @@ def __init__(

super().__init__(arr)

@classmethod
def from_ak(cls, arr: ak.Array) -> RaggedVariants:
"""Create a RaggedVariants object from an awkward array.

Parameters
----------
arr
The awkward array to create a RaggedVariants object from.
"""
fields = set(arr.fields)

if missing := {"alt", "start"} - fields:
raise ValueError(f"Missing required fields: {missing}")

if {"ref", "ilen"}.isdisjoint(fields):
raise ValueError("Must have one of ref or ilen.")

return ak.with_parameter(arr, "__record__", RaggedVariants.__name__)

@property
def alt(self) -> ak.Array:
"""Alternative alleles."""
Expand All @@ -73,7 +92,7 @@ def start(self) -> Ragged[POS_TYPE]:
def ilen(self) -> Ragged[np.int32]:
"""Indel lengths. Infallible."""
if "ilen" not in self.fields:
ilen = ak.num(self.alt, -1) - ak.num(self.ref, -1)
ilen = ak.str.length(self.alt) - ak.str.length(self.ref) # type: ignore
ilen = Ragged(ilen)
return ilen

Expand Down
2 changes: 1 addition & 1 deletion python/genvarloader/_dataset/_reconstruct.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def from_table(cls, path: str | Path, one_based: bool = True):
info = {
k: variants[k].to_numpy()
for k, v in variants.schema.items()
if v.is_numeric()
if v.is_numeric() and k not in {"POS", "ILEN"}
}

ref = (
Expand Down