Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
f76dc7b
Modularity score functions with comments
amalia-k510 Apr 25, 2025
f092469
typo fix
amalia-k510 Apr 25, 2025
7ffa1ec
Merge branch 'scverse:main' into main
amalia-k510 Apr 25, 2025
c0d0c52
Merge branch 'scverse:main' into main
amalia-k510 May 7, 2025
68652a7
modularity code updated and 6 tests written for modularity
amalia-k510 May 7, 2025
948319a
error fixing from pipelines
amalia-k510 May 7, 2025
6a64330
ruff error fix
amalia-k510 May 7, 2025
793351f
keywords variable fix
amalia-k510 May 7, 2025
92d8e26
neighbors from a precomputed distance matrix, still need to make sure…
amalia-k510 May 7, 2025
198c4fb
revert back
amalia-k510 May 7, 2025
e7fb67a
code only for the prexisting distance matrix
amalia-k510 May 7, 2025
14cb441
initial changes for the neighborhors
amalia-k510 May 8, 2025
0ce8c15
distances name switch and sparse array allowed
amalia-k510 May 12, 2025
914b87d
input fix
amalia-k510 May 12, 2025
d285203
variable input fixes
amalia-k510 May 12, 2025
50705b3
test added
amalia-k510 May 12, 2025
4730667
numpy issue fix for one line
amalia-k510 May 12, 2025
4b9fe3e
avoid densifying sparse matrices
amalia-k510 May 12, 2025
7d754c7
switched to @needs
amalia-k510 May 12, 2025
15320af
switched to @needs
amalia-k510 May 12, 2025
623a86c
variable fix input
amalia-k510 May 12, 2025
e8c9a25
code from separate PR removed
amalia-k510 May 12, 2025
040b8b7
unify metadata assembly
flying-sheep May 16, 2025
d6a9aee
Discard changes to src/scanpy/neighbors/__init__.py
flying-sheep May 16, 2025
c03b863
comments fix and release notes
amalia-k510 May 23, 2025
473a437
comments fix typo
amalia-k510 May 23, 2025
c6e5d1f
Merge branch 'scverse:main' into main
amalia-k510 May 25, 2025
ac0a6b3
before neighbour merge
amalia-k510 May 25, 2025
1c033f0
notes
amalia-k510 May 25, 2025
662534b
Merge branch 'main' of https://github.com/amalia-k510/scanpy into main
amalia-k510 May 25, 2025
32116f0
Merge branch 'matrix_exist' into main
amalia-k510 May 25, 2025
a1b2033
merge error fix
amalia-k510 May 25, 2025
4cdc729
post merge and call form neighbor
amalia-k510 May 25, 2025
cb7aaf6
release notes fix
amalia-k510 May 26, 2025
7e34ce2
Merge branch 'main' into main
flying-sheep May 28, 2025
efc2f89
Merge branch 'main' into pr/amalia-k510/3613
flying-sheep Dec 5, 2025
e0cf8a6
only one function
flying-sheep Dec 5, 2025
d2ac79b
Merge branch 'main' into main
flying-sheep Jan 13, 2026
16e6c7e
fix syntax
flying-sheep Jan 13, 2026
707766d
fix relnote name
flying-sheep Jan 13, 2026
389f076
fix call
flying-sheep Jan 13, 2026
76cc0f9
more test fixes
flying-sheep Jan 13, 2026
6700928
Merge branch 'main' into main
flying-sheep Jan 13, 2026
562cd52
Merge branch 'main' into pr/amalia-k510/3613
flying-sheep Jan 13, 2026
329de39
fix merge
flying-sheep Jan 13, 2026
7e78927
retrieve
flying-sheep Jan 13, 2026
e7dd7de
test and doc
flying-sheep Jan 13, 2026
dc2cd35
simplify
flying-sheep Jan 13, 2026
99a925a
modes
flying-sheep Jan 13, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/release-notes/3616.feat.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add modularity scoring via {func}`scanpy.metrics.modularity` with support for directed/undirected graphs {smaller}`A. Karesh`
30 changes: 16 additions & 14 deletions src/scanpy/_utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -883,20 +883,22 @@ class NeighborsView:
This defines where to look for neighbors dictionary,
connectivities, distances.

neigh = NeighborsView(adata, key)
neigh['distances']
neigh['connectivities']
neigh['params']
'connectivities' in neigh
'params' in neigh

is the same as

adata.obsp[adata.uns[key]['distances_key']]
adata.obsp[adata.uns[key]['connectivities_key']]
adata.uns[key]['params']
adata.uns[key]['connectivities_key'] in adata.obsp
'params' in adata.uns[key]
Examples
--------
>>> neigh = NeighborsView(adata, key)
>>> neigh["distances"]
>>> neigh["connectivities"]
>>> neigh["params"]
>>> "connectivities" in neigh
>>> "params" in neigh

is the same as

>>> adata.obsp[adata.uns[key]["distances_key"]]
>>> adata.obsp[adata.uns[key]["connectivities_key"]]
>>> adata.uns[key]["params"]
>>> adata.uns[key]["connectivities_key"] in adata.obsp
>>> "params" in adata.uns[key]

"""

Expand Down
4 changes: 2 additions & 2 deletions src/scanpy/metrics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from __future__ import annotations

from ._gearys_c import gearys_c
from ._metrics import confusion_matrix
from ._metrics import confusion_matrix, modularity
from ._morans_i import morans_i

__all__ = ["confusion_matrix", "gearys_c", "morans_i"]
__all__ = ["confusion_matrix", "gearys_c", "modularity", "morans_i"]
134 changes: 133 additions & 1 deletion src/scanpy/metrics/_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,23 @@

from __future__ import annotations

from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, overload

import numpy as np
import pandas as pd
from anndata import AnnData
from natsort import natsorted
from pandas.api.types import CategoricalDtype

from .._utils import NeighborsView

if TYPE_CHECKING:
from collections.abc import Sequence
from typing import Literal

from numpy.typing import ArrayLike

from .._compat import SpBase


def confusion_matrix(
Expand Down Expand Up @@ -89,3 +97,127 @@ def confusion_matrix(
df = df.loc[np.array(orig_idx), np.array(new_idx)]

return df


@overload
def modularity(
connectivities: ArrayLike | SpBase,
/,
labels: pd.Series | ArrayLike,
*,
is_directed: bool,
) -> float: ...


@overload
def modularity(
adata: AnnData,
/,
labels: str | pd.Series | ArrayLike = "leiden",
*,
neighbors_key: str | None = None,
is_directed: bool | None = None,
mode: Literal["calculate", "update", "retrieve"] = "calculate",
) -> float: ...


def modularity(
adata_or_connectivities: AnnData | ArrayLike | SpBase,
/,
labels: str | pd.Series | ArrayLike = "leiden",
*,
neighbors_key: str | None = None,
is_directed: bool | None = None,
mode: Literal["calculate", "update", "retrieve"] = "calculate",
) -> float:
"""Compute the modularity of a graph given its connectivities and labels.

Parameters
----------
adata_or_connectivities
The AnnData object containing the data or a weighted adjacency matrix representing the graph.
labels
Cluster labels for each node in the graph.
When `AnnData` is provided, this can be the key in `adata.obs` that contains the clustering labels and defaults to `"leiden"`.
neighbors_key
When `AnnData` is provided, the key in `adata.obsp` that contains the connectivities.
is_directed
Whether the graph is directed or undirected.
Optional when an `AnnData` object has been passed.
mode
When `AnnData` is provided,
this controls if the stored modularity is retrieved,
or if we should calculate it (and optionally update it in `adata.uns[labels]`).

Returns
-------
The modularity of the graph based on the provided clustering.
"""
if isinstance(adata_or_connectivities, AnnData):
return modularity_adata(
adata_or_connectivities,
labels=labels,
neighbors_key=neighbors_key,
is_directed=is_directed,
mode=mode,
)
if isinstance(labels, str):
msg = "`labels` must be provided as array when passing a connectivities array"
raise TypeError(msg)
if is_directed is None:
msg = "`is_directed` must be provided when passing a connectivities array"
raise TypeError(msg)
return modularity_array(
adata_or_connectivities, labels=labels, is_directed=is_directed
)


def modularity_adata(
adata: AnnData,
/,
*,
labels: str | pd.Series | ArrayLike,
neighbors_key: str | None,
is_directed: bool | None,
mode: Literal["calculate", "update", "retrieve"],
) -> float:
if mode in {"retrieve", "update"} and not isinstance(labels, str):
msg = "`labels` must be a string when `mode` is `'retrieve'` or `'update'`"
raise ValueError(msg)
if mode == "retrieve":
return adata.uns[labels]["modularity"]

nv = NeighborsView(adata, neighbors_key)
connectivities = nv["connectivities"]

if is_directed is None and (is_directed := nv["params"].get("is_directed")) is None:
msg = "`adata` has no `'is_directed'` in `adata.uns[neighbors_key]['params']`, need to specify `is_directed`"
raise ValueError(msg)

m = modularity(
connectivities,
adata.obs[labels] if isinstance(labels, str) else labels,
is_directed=is_directed,
)
if mode == "update":
adata.uns[labels]["modularity"] = m
return m


def modularity_array(
connectivities: ArrayLike | SpBase,
/,
*,
labels: pd.Series | ArrayLike,
is_directed: bool,
) -> float:
try:
import igraph as ig
except ImportError as e:
msg = "igraph is require for computing modularity"
raise ImportError(msg) from e
igraph_mode = ig.ADJ_DIRECTED if is_directed else ig.ADJ_UNDIRECTED
graph = ig.Graph.Weighted_Adjacency(connectivities, mode=igraph_mode)
# cluster labels to integer codes required by igraph
labels = pd.Categorical(np.asarray(labels)).codes
return graph.modularity(labels)
5 changes: 5 additions & 0 deletions src/scanpy/neighbors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ class NeighborsParams(TypedDict): # noqa: D101
metric_kwds: NotRequired[Mapping[str, Any]]
use_rep: NotRequired[str]
n_pcs: NotRequired[int]
is_directed: NotRequired[bool]


@_doc_params(n_pcs=doc_n_pcs, use_rep=doc_use_rep)
Expand All @@ -92,6 +93,7 @@ def neighbors( # noqa: PLR0913
metric_kwds: Mapping[str, Any] = MappingProxyType({}),
random_state: _LegacyRandom = 0,
key_added: str | None = None,
is_directed: bool = False,
copy: bool = False,
) -> AnnData | None:
"""Compute the nearest neighbors distance matrix and a neighborhood graph of observations :cite:p:`McInnes2018`.
Expand Down Expand Up @@ -169,6 +171,8 @@ def neighbors( # noqa: PLR0913
If specified, the neighbors data is added to .uns[key_added],
distances are stored in `.obsp[f'{{key_added}}_distances']` and
connectivities in `.obsp[f'{{key_added}}_connectivities']`.
is_directed
If `True`, the connectivity matrix is expected to be a directed graph.
copy
Return a copy instead of writing to adata.

Expand Down Expand Up @@ -264,6 +268,7 @@ def neighbors( # noqa: PLR0913
method=method,
random_state=random_state,
metric=metric,
is_directed=is_directed,
**({} if not metric_kwds else dict(metric_kwds=metric_kwds)),
**({} if use_rep is None else dict(use_rep=use_rep)),
**({} if n_pcs is None else dict(n_pcs=n_pcs)),
Expand Down
16 changes: 13 additions & 3 deletions src/scanpy/tools/_leiden.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import annotations

from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, cast

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -47,7 +47,7 @@ def leiden( # noqa: PLR0912, PLR0913, PLR0915
flavor: Literal["leidenalg", "igraph"] | None = None,
**clustering_args,
) -> AnnData | None:
"""Cluster cells into subgroups :cite:p:`Traag2019`.
r"""Cluster cells into subgroups :cite:p:`Traag2019`.

Cluster cells using the Leiden algorithm :cite:p:`Traag2019`,
an improved version of the Louvain algorithm :cite:p:`Blondel2008`.
Expand Down Expand Up @@ -120,6 +120,12 @@ def leiden( # noqa: PLR0912, PLR0913, PLR0915
A dict with the values for the parameters `resolution`, `random_state`,
and `n_iterations`.

`adata.uns['leiden' | key_added]['modularity']` : :class:`float`
The modularity score of the final clustering,
as calculated by the `flavor`.
Use :func:`scanpy.metrics.modularity`\ `(adata, mode='calculate' | 'update')`
to calculate a score independent of `flavor`.

"""
if flavor is None:
flavor = "leidenalg"
Expand Down Expand Up @@ -178,7 +184,10 @@ def leiden( # noqa: PLR0912, PLR0913, PLR0915
if use_weights:
clustering_args["weights"] = np.array(g.es["weight"]).astype(np.float64)
clustering_args["seed"] = random_state
part = leidenalg.find_partition(g, partition_type, **clustering_args)
part = cast(
"MutableVertexPartition",
leidenalg.find_partition(g, partition_type, **clustering_args),
)
else:
g = _utils.get_igraph_from_adjacency(adjacency, directed=False)
if use_weights:
Expand Down Expand Up @@ -212,6 +221,7 @@ def leiden( # noqa: PLR0912, PLR0913, PLR0915
random_state=random_state,
n_iterations=n_iterations,
)
adata.uns[key_added]["modularity"] = part.modularity
logg.info(
" finished",
time=start,
Expand Down
28 changes: 22 additions & 6 deletions tests/test_clustering.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

from functools import partial
from typing import TYPE_CHECKING

import pandas as pd
import pytest
Expand All @@ -10,21 +11,27 @@
from testing.scanpy._helpers.data import pbmc68k_reduced
from testing.scanpy._pytest.marks import needs

if TYPE_CHECKING:
from typing import Literal


@pytest.fixture
def adata_neighbors():
return pbmc68k_reduced()


FLAVORS = [
pytest.param("igraph", marks=needs.igraph),
pytest.param("leidenalg", marks=needs.leidenalg),
]
@pytest.fixture(
params=[
pytest.param("igraph", marks=needs.igraph),
pytest.param("leidenalg", marks=needs.leidenalg),
]
)
def flavor(request: pytest.FixtureRequest) -> Literal["igraph", "leidenalg"]:
return request.param


@needs.leidenalg
@needs.igraph
@pytest.mark.parametrize("flavor", FLAVORS)
@pytest.mark.parametrize("resolution", [1, 2])
@pytest.mark.parametrize("n_iterations", [-1, 3])
def test_leiden_basic(adata_neighbors, flavor, resolution, n_iterations):
Expand All @@ -44,7 +51,6 @@ def test_leiden_basic(adata_neighbors, flavor, resolution, n_iterations):

@needs.leidenalg
@needs.igraph
@pytest.mark.parametrize("flavor", FLAVORS)
def test_leiden_random_state(adata_neighbors, flavor):
is_leiden_alg = flavor == "leidenalg"
n_iterations = 2 if is_leiden_alg else -1
Expand Down Expand Up @@ -72,8 +78,18 @@ def test_leiden_random_state(adata_neighbors, flavor):
directed=is_leiden_alg,
n_iterations=n_iterations,
)
# reproducible
pd.testing.assert_series_equal(adata_1.obs["leiden"], adata_1_again.obs["leiden"])
assert (
pytest.approx(adata_1.uns["leiden"]["modularity"])
== adata_1_again.uns["leiden"]["modularity"]
)
# different clustering
assert not adata_2.obs["leiden"].equals(adata_1_again.obs["leiden"])
assert (
pytest.approx(adata_2.uns["leiden"]["modularity"])
!= adata_1_again.uns["leiden"]["modularity"]
)


@needs.igraph
Expand Down
Loading
Loading