Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/release-notes/4039.breaking.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Remove `use_highly_variable` parameter to {func}`scanpy.pp.pca` and {func}`scanpy.experimental.pp.normalize_pearson_residuals_pca` {smaller}`P Angerer`
25 changes: 12 additions & 13 deletions src/scanpy/experimental/pp/_normalization.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@
doc_layer,
doc_pca_chunk,
)
from ...get import _get_obs_rep, _set_obs_rep
from ...preprocessing._docs import doc_mask_var_hvg
from ...preprocessing._pca import _handle_mask_var, pca
from ...get import _check_mask, _get_obs_rep, _set_obs_rep
from ...preprocessing._docs import doc_mask_var
from ...preprocessing._pca import pca

if TYPE_CHECKING:
from collections.abc import Mapping
Expand Down Expand Up @@ -158,7 +158,7 @@ def normalize_pearson_residuals(
adata=doc_adata,
dist_params=doc_dist_params,
pca_chunk=doc_pca_chunk,
mask_var_hvg=doc_mask_var_hvg,
mask_var=doc_mask_var,
check_values=doc_check_values,
inplace=doc_inplace,
)
Expand All @@ -171,8 +171,9 @@ def normalize_pearson_residuals_pca(
n_comps: int | None = 50,
rng: SeedLike | RNGLike | None = None,
kwargs_pca: Mapping[str, Any] = MappingProxyType({}),
mask_var: np.ndarray | str | None | Default = Default("'highly_variable'"),
use_highly_variable: bool | None = None,
mask_var: np.ndarray | str | None | Default = Default(
"adata.var.get('highly_variable')"
),
check_values: bool = True,
inplace: bool = True,
) -> AnnData | None:
Expand All @@ -190,7 +191,7 @@ def normalize_pearson_residuals_pca(
{adata}
{dist_params}
{pca_chunk}
{mask_var_hvg}
{mask_var}
{check_values}
{inplace}

Expand All @@ -211,19 +212,17 @@ def normalize_pearson_residuals_pca(
residual normalization.
`.varm['PCs']`
The principal components containing the loadings. When `inplace=True` and
`use_highly_variable=True`, this will contain empty rows for the genes not
`mask_var is not None`, this will contain empty rows for the genes not
selected.
`.uns['pca']['variance_ratio']`
Ratio of explained variance.
`.uns['pca']['variance']`
Explained variance, equivalent to the eigenvalues of the covariance matrix.

"""
# Unify new mask argument and deprecated use_highly_varible argument
_, mask_var = _handle_mask_var(
adata, mask_var, use_highly_variable=use_highly_variable
)
del use_highly_variable
if isinstance(mask_var, Default):
mask_var = "highly_variable" if "highly_variable" in adata.var else None
mask_var = _check_mask(adata, mask_var, "var")

if mask_var is not None:
adata_sub = adata[:, mask_var].copy()
Expand Down
9 changes: 1 addition & 8 deletions src/scanpy/preprocessing/_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,11 @@
If True, use `adata.raw.X` for expression values instead of `adata.X`.\
"""

doc_mask_var_hvg = """\
doc_mask_var = """\
mask_var
To run only on a certain set of genes given by a boolean array
or a string referring to an array in :attr:`~anndata.AnnData.var`.
By default, uses `.var['highly_variable']` if available, else everything.
use_highly_variable
Whether to use highly variable genes only, stored in
`.var['highly_variable']`.
By default uses them if they have been determined beforehand.

.. deprecated:: 1.10.0
Use `mask_var` instead
"""

doc_obs_qc_args = """\
Expand Down
66 changes: 12 additions & 54 deletions src/scanpy/preprocessing/_pca/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from ..._utils import _doc_params, get_literal_vals, is_backed_type
from ..._utils.random import _accepts_legacy_random_state, _legacy_random_state
from ...get import _check_mask, _get_obs_rep
from .._docs import doc_mask_var_hvg
from .._docs import doc_mask_var
from ._compat import _pca_compat_sparse

if TYPE_CHECKING:
Expand Down Expand Up @@ -51,7 +51,7 @@
type SvdSolver = SvdSolvDaskML | SvdSolvSkearn | SvdSolvPCACustom


@_doc_params(mask_var_hvg=doc_mask_var_hvg, rng=doc_rng)
@_doc_params(mask_var=doc_mask_var, rng=doc_rng)
@_accepts_legacy_random_state(0)
def pca( # noqa: PLR0912, PLR0913, PLR0915
data: AnnData | np.ndarray | CSBase,
Expand All @@ -65,8 +65,9 @@ def pca( # noqa: PLR0912, PLR0913, PLR0915
chunk_size: int | None = None,
rng: SeedLike | RNGLike | None = None,
return_info: bool = False,
mask_var: NDArray[np.bool] | str | None | Default = Default("'highly_variable'"),
use_highly_variable: bool | None = None,
mask_var: NDArray[np.bool] | str | None | Default = Default(
"adata.var.get('highly_variable')"
),
dtype: DTypeLike = "float32",
key_added: str | None | Default = Default(preset=("pca", "key_added")),
copy: bool = False,
Expand Down Expand Up @@ -160,7 +161,7 @@ def pca( # noqa: PLR0912, PLR0913, PLR0915
return_info
Only relevant when not passing an :class:`~anndata.AnnData`:
see “Returns”.
{mask_var_hvg}
{mask_var}
layer
Layer of `adata` to use as expression values.
dtype
Expand Down Expand Up @@ -224,11 +225,12 @@ def pca( # noqa: PLR0912, PLR0913, PLR0915
else:
adata = AnnData(data)

# Unify new mask argument and deprecated use_highly_varible argument
mask_var_param, mask_var = _handle_mask_var(
adata, mask_var, obsm=obsm, use_highly_variable=use_highly_variable
)
del use_highly_variable
if isinstance(mask_var, Default):
mask_var = "highly_variable" if "highly_variable" in adata.var else None
elif mask_var is not None and obsm is not None:
msg = "Argument `mask_var` is incompatible with `obsm`."
raise ValueError(msg)
mask_var_param, mask_var = mask_var, _check_mask(adata, mask_var, "var")
adata_comp = adata[:, mask_var] if mask_var is not None else adata

if n_comps is None:
Expand Down Expand Up @@ -355,7 +357,6 @@ def pca( # noqa: PLR0912, PLR0913, PLR0915
adata.uns[key_uns] = dict(
params=dict(
zero_center=zero_center,
use_highly_variable=mask_var_param == "highly_variable",
mask_var=mask_var_param,
**(dict(layer=layer) if layer is not None else {}),
**(dict(obsm=obsm) if obsm is not None else {}),
Expand Down Expand Up @@ -387,49 +388,6 @@ def pca( # noqa: PLR0912, PLR0913, PLR0915
return x_pca


def _handle_mask_var(
adata: AnnData,
mask_var: NDArray[np.bool] | str | None | Default,
*,
obsm: str | None = None,
use_highly_variable: bool | None,
) -> tuple[np.ndarray | str | None, np.ndarray | None]:
"""Unify new mask argument and deprecated use_highly_varible argument.

Returns both the normalized mask parameter and the validated mask array.
"""
if obsm:
if not isinstance(mask_var, Default) and mask_var is not None:
msg = "Argument `mask_var` is incompatible with `obsm`."
raise ValueError(msg)
return None, None

# First, verify and possibly warn
if use_highly_variable is not None:
hint = (
'Use_highly_variable=True can be called through mask_var="highly_variable". '
"Use_highly_variable=False can be called through mask_var=None"
)
msg = f"Argument `use_highly_variable` is deprecated, consider using the mask argument. {hint}"
warn(msg, FutureWarning)
if not isinstance(mask_var, Default):
msg = f"These arguments are incompatible. {hint}"
raise ValueError(msg)

# Handle default case and explicit use_highly_variable=True
if use_highly_variable or (
use_highly_variable is None
and isinstance(mask_var, Default)
and "highly_variable" in adata.var.columns
):
mask_var = "highly_variable"

# Without highly variable genes, we don’t use a mask by default
if isinstance(mask_var, Default) or mask_var is None:
return None, None
return mask_var, _check_mask(adata, mask_var, "var")


@overload
def _handle_dask_ml_args(
svd_solver: str | None, method: type[dmld.PCA | dmld.IncrementalPCA]
Expand Down
15 changes: 7 additions & 8 deletions src/scanpy/tools/_ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ class Ingest:
_umap: UMAP
# pca
_pca_centered: bool
_pca_use_hvg: bool
_pca_mask: str | None
_pca_basis: np.ndarray
# adata
_adata_ref: AnnData
Expand Down Expand Up @@ -338,15 +338,14 @@ def _init_neighbors(self, adata: AnnData, neighbors_key: str | None) -> None:

def _init_pca(self, adata: AnnData) -> None:
self._pca_centered = adata.uns["pca"]["params"]["zero_center"]
self._pca_use_hvg = adata.uns["pca"]["params"]["use_highly_variable"]
self._pca_mask = adata.uns["pca"]["params"]["mask_var"]

mask = "highly_variable"
if self._pca_use_hvg and mask not in adata.var.columns:
msg = f"Did not find `adata.var[{mask!r}']`."
if self._pca_mask and self._pca_mask not in adata.var.columns:
msg = f"Did not find `adata.var[{self._pca_mask!r}']`."
raise ValueError(msg)

if self._pca_use_hvg:
self._pca_basis = adata.varm["PCs"][adata.var[mask]]
if self._pca_mask:
self._pca_basis = adata.varm["PCs"][adata.var[self._pca_mask]]
else:
self._pca_basis = adata.varm["PCs"]

Expand Down Expand Up @@ -402,7 +401,7 @@ def __init__(
def _pca(self, n_pcs=None):
x = self._adata_new.X
x = x.toarray() if isinstance(x, CSBase) else x.copy()
if self._pca_use_hvg:
if self._pca_mask:
x = x[:, self._adata_ref.var["highly_variable"]]
if self._pca_centered:
x -= x.mean(axis=0)
Expand Down
9 changes: 0 additions & 9 deletions tests/test_deprecations.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,3 @@ def test_deprecate_multicore_tsne() -> None:
pytest.warns(ImportWarning, match=r"MulticoreTSNE"),
):
sc.tl.tsne(pbmc, use_fast_tsne=True)


def test_deprecate_use_highly_variable_genes():
pbmc = pbmc68k_reduced()

with pytest.warns(
FutureWarning, match="Argument `use_highly_variable` is deprecated"
):
sc.pp.pca(pbmc, use_highly_variable=True)
1 change: 1 addition & 0 deletions tests/test_neighbors.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

from pytest_mock import MockerFixture


# the input data
X = [[1, 0], [3, 0], [5, 6], [0, 4]]
n_neighbors = 3 # includes data points themselves
Expand Down
24 changes: 8 additions & 16 deletions tests/test_normalization.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from __future__ import annotations

from contextlib import nullcontext
from functools import partial
from typing import TYPE_CHECKING

Expand Down Expand Up @@ -216,9 +215,7 @@ def _check_pearson_pca_fields(ad, n_cells, n_comps):
[
pytest.param(False, dict(), "n_genes", id="no_hvg"),
pytest.param(True, dict(), "n_hvgs", id="hvg_default"),
pytest.param(
True, dict(use_highly_variable=False), "n_genes", id="hvg_opt_out"
),
pytest.param(True, dict(mask_var=None), "n_genes", id="hvg_opt_out"),
pytest.param(False, dict(mask_var="test_mask"), "n_unmasked", id="mask"),
],
)
Expand All @@ -245,19 +242,14 @@ def test_normalize_pearson_residuals_pca(
adata, flavor="pearson_residuals", n_top_genes=n_hvgs
)

ctx = (
pytest.warns(FutureWarning, match=r"use_highly_variable.*deprecated")
if "use_highly_variable" in params
else nullcontext()
# inplace=False
adata_pca = sc.experimental.pp.normalize_pearson_residuals_pca(
adata.copy(), inplace=False, n_comps=n_comps, **params
)
# inplace=True modifies the input adata object
sc.experimental.pp.normalize_pearson_residuals_pca(
adata, inplace=True, n_comps=n_comps, **params
)
with ctx: # inplace=False
adata_pca = sc.experimental.pp.normalize_pearson_residuals_pca(
adata.copy(), inplace=False, n_comps=n_comps, **params
)
with ctx: # inplace=True modifies the input adata object
sc.experimental.pp.normalize_pearson_residuals_pca(
adata, inplace=True, n_comps=n_comps, **params
)

for ad, n_var_ret in (
(adata_pca, n_var_copy),
Expand Down
18 changes: 6 additions & 12 deletions tests/test_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,20 +403,14 @@ def test_pca_n_pcs():
# We use all possible array types here since this error should be raised before
# PCA can realize that it got a Dask array
@pytest.mark.parametrize("array_type", ARRAY_TYPES_ALL)
def test_mask_highly_var_error(array_type):
"""Check if use_highly_variable=True throws an error if the annotation is missing."""
def test_mask_var_error(array_type):
"""Check if mask_var="..." throws an error if the annotation is missing."""
adata = AnnData(array_type(A_list).astype("float32"))
with (
pytest.warns(
FutureWarning,
match=r"Argument `use_highly_variable` is deprecated, consider using the mask argument\.",
),
pytest.raises(
ValueError,
match=r"Did not find `adata\.var\['highly_variable'\]`\.",
),
with pytest.raises(
ValueError,
match=r"Did not find `adata\.var\['highly_variable'\]`\.",
):
sc.pp.pca(adata, use_highly_variable=True)
sc.pp.pca(adata, mask_var="highly_variable")


def test_mask_length_error():
Expand Down
Loading