scverse · amalia-k510 · Apr 25, 2025 · Apr 25, 2025 · Apr 25, 2025 · May 7, 2025
diff --git a/docs/release-notes/3616.feat.md b/docs/release-notes/3616.feat.md
@@ -0,0 +1 @@
+Add modularity scoring via {func}`scanpy.metrics.modularity` with support for directed/undirected graphs {smaller}`A. Karesh`
diff --git a/src/scanpy/_utils/__init__.py b/src/scanpy/_utils/__init__.py
@@ -883,20 +883,22 @@ class NeighborsView:
         This defines where to look for neighbors dictionary,
         connectivities, distances.
 
-        neigh = NeighborsView(adata, key)
-        neigh['distances']
-        neigh['connectivities']
-        neigh['params']
-        'connectivities' in neigh
-        'params' in neigh
-
-        is the same as
-
-        adata.obsp[adata.uns[key]['distances_key']]
-        adata.obsp[adata.uns[key]['connectivities_key']]
-        adata.uns[key]['params']
-        adata.uns[key]['connectivities_key'] in adata.obsp
-        'params' in adata.uns[key]
+    Examples
+    --------
+    >>> neigh = NeighborsView(adata, key)
+    >>> neigh["distances"]
+    >>> neigh["connectivities"]
+    >>> neigh["params"]
+    >>> "connectivities" in neigh
+    >>> "params" in neigh
+
+    is the same as
+
+    >>> adata.obsp[adata.uns[key]["distances_key"]]
+    >>> adata.obsp[adata.uns[key]["connectivities_key"]]
+    >>> adata.uns[key]["params"]
+    >>> adata.uns[key]["connectivities_key"] in adata.obsp
+    >>> "params" in adata.uns[key]
 
     """
 

diff --git a/src/scanpy/metrics/__init__.py b/src/scanpy/metrics/__init__.py
@@ -3,7 +3,7 @@
 from __future__ import annotations
 
 from ._gearys_c import gearys_c
-from ._metrics import confusion_matrix
+from ._metrics import confusion_matrix, modularity
 from ._morans_i import morans_i
 
-__all__ = ["confusion_matrix", "gearys_c", "morans_i"]
+__all__ = ["confusion_matrix", "gearys_c", "modularity", "morans_i"]
diff --git a/src/scanpy/metrics/_metrics.py b/src/scanpy/metrics/_metrics.py
@@ -2,15 +2,23 @@
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, overload
 
 import numpy as np
 import pandas as pd
+from anndata import AnnData
 from natsort import natsorted
 from pandas.api.types import CategoricalDtype
 
+from .._utils import NeighborsView
+
 if TYPE_CHECKING:
     from collections.abc import Sequence
+    from typing import Literal
+
+    from numpy.typing import ArrayLike
+
+    from .._compat import SpBase
 
 
 def confusion_matrix(
@@ -89,3 +97,127 @@ def confusion_matrix(
     df = df.loc[np.array(orig_idx), np.array(new_idx)]
 
     return df
+
+
+@overload
+def modularity(
+    connectivities: ArrayLike | SpBase,
+    /,
+    labels: pd.Series | ArrayLike,
+    *,
+    is_directed: bool,
+) -> float: ...
+
+
+@overload
+def modularity(
+    adata: AnnData,
+    /,
+    labels: str | pd.Series | ArrayLike = "leiden",
+    *,
+    neighbors_key: str | None = None,
+    is_directed: bool | None = None,
+    mode: Literal["calculate", "update", "retrieve"] = "calculate",
+) -> float: ...
+
+
+def modularity(
+    adata_or_connectivities: AnnData | ArrayLike | SpBase,
+    /,
+    labels: str | pd.Series | ArrayLike = "leiden",
+    *,
+    neighbors_key: str | None = None,
+    is_directed: bool | None = None,
+    mode: Literal["calculate", "update", "retrieve"] = "calculate",
+) -> float:
+    """Compute the modularity of a graph given its connectivities and labels.
+
+    Parameters
+    ----------
+    adata_or_connectivities
+        The AnnData object containing the data or a weighted adjacency matrix representing the graph.
+    labels
+        Cluster labels for each node in the graph.
+        When `AnnData` is provided, this can be the key in `adata.obs` that contains the clustering labels and defaults to `"leiden"`.
+    neighbors_key
+        When `AnnData` is provided, the key in `adata.obsp` that contains the connectivities.
+    is_directed
+        Whether the graph is directed or undirected.
+        Optional when an `AnnData` object has been passed.
+    mode
+        When `AnnData` is provided,
+        this controls if the stored modularity is retrieved,
+        or if we should calculate it (and optionally update it in `adata.uns[labels]`).
+
+    Returns
+    -------
+    The modularity of the graph based on the provided clustering.
+    """
+    if isinstance(adata_or_connectivities, AnnData):
+        return modularity_adata(
+            adata_or_connectivities,
+            labels=labels,
+            neighbors_key=neighbors_key,
+            is_directed=is_directed,
+            mode=mode,
+        )
+    if isinstance(labels, str):
+        msg = "`labels` must be provided as array when passing a connectivities array"
+        raise TypeError(msg)
+    if is_directed is None:
+        msg = "`is_directed` must be provided when passing a connectivities array"
+        raise TypeError(msg)
+    return modularity_array(
+        adata_or_connectivities, labels=labels, is_directed=is_directed
+    )
+
+
+def modularity_adata(
+    adata: AnnData,
+    /,
+    *,
+    labels: str | pd.Series | ArrayLike,
+    neighbors_key: str | None,
+    is_directed: bool | None,
+    mode: Literal["calculate", "update", "retrieve"],
+) -> float:
+    if mode in {"retrieve", "update"} and not isinstance(labels, str):
+        msg = "`labels` must be a string when `mode` is `'retrieve'` or `'update'`"
+        raise ValueError(msg)
+    if mode == "retrieve":
+        return adata.uns[labels]["modularity"]
+
+    nv = NeighborsView(adata, neighbors_key)
+    connectivities = nv["connectivities"]
+
+    if is_directed is None and (is_directed := nv["params"].get("is_directed")) is None:
+        msg = "`adata` has no `'is_directed'` in `adata.uns[neighbors_key]['params']`, need to specify `is_directed`"
+        raise ValueError(msg)
+
+    m = modularity(
+        connectivities,
+        adata.obs[labels] if isinstance(labels, str) else labels,
+        is_directed=is_directed,
+    )
+    if mode == "update":
+        adata.uns[labels]["modularity"] = m
+    return m
+
+
+def modularity_array(
+    connectivities: ArrayLike | SpBase,
+    /,
+    *,
+    labels: pd.Series | ArrayLike,
+    is_directed: bool,
+) -> float:
+    try:
+        import igraph as ig
+    except ImportError as e:
+        msg = "igraph is require for computing modularity"
+        raise ImportError(msg) from e
+    igraph_mode = ig.ADJ_DIRECTED if is_directed else ig.ADJ_UNDIRECTED
+    graph = ig.Graph.Weighted_Adjacency(connectivities, mode=igraph_mode)
+    # cluster labels to integer codes required by igraph
+    labels = pd.Categorical(np.asarray(labels)).codes
+    return graph.modularity(labels)
diff --git a/src/scanpy/neighbors/__init__.py b/src/scanpy/neighbors/__init__.py
@@ -75,6 +75,7 @@ class NeighborsParams(TypedDict):  # noqa: D101
     metric_kwds: NotRequired[Mapping[str, Any]]
     use_rep: NotRequired[str]
     n_pcs: NotRequired[int]
+    is_directed: NotRequired[bool]
 
 
 @_doc_params(n_pcs=doc_n_pcs, use_rep=doc_use_rep)
@@ -92,6 +93,7 @@ def neighbors(  # noqa: PLR0913
     metric_kwds: Mapping[str, Any] = MappingProxyType({}),
     random_state: _LegacyRandom = 0,
     key_added: str | None = None,
+    is_directed: bool = False,
     copy: bool = False,
 ) -> AnnData | None:
     """Compute the nearest neighbors distance matrix and a neighborhood graph of observations :cite:p:`McInnes2018`.
@@ -169,6 +171,8 @@ def neighbors(  # noqa: PLR0913
         If specified, the neighbors data is added to .uns[key_added],
         distances are stored in `.obsp[f'{{key_added}}_distances']` and
         connectivities in `.obsp[f'{{key_added}}_connectivities']`.
+    is_directed
+        If `True`, the connectivity matrix is expected to be a directed graph.
     copy
         Return a copy instead of writing to adata.
 
@@ -264,6 +268,7 @@ def neighbors(  # noqa: PLR0913
         method=method,
         random_state=random_state,
         metric=metric,
+        is_directed=is_directed,
         **({} if not metric_kwds else dict(metric_kwds=metric_kwds)),
         **({} if use_rep is None else dict(use_rep=use_rep)),
         **({} if n_pcs is None else dict(n_pcs=n_pcs)),

diff --git a/src/scanpy/tools/_leiden.py b/src/scanpy/tools/_leiden.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, cast
 
 import numpy as np
 import pandas as pd
@@ -47,7 +47,7 @@ def leiden(  # noqa: PLR0912, PLR0913, PLR0915
     flavor: Literal["leidenalg", "igraph"] | None = None,
     **clustering_args,
 ) -> AnnData | None:
-    """Cluster cells into subgroups :cite:p:`Traag2019`.
+    r"""Cluster cells into subgroups :cite:p:`Traag2019`.
 
     Cluster cells using the Leiden algorithm :cite:p:`Traag2019`,
     an improved version of the Louvain algorithm :cite:p:`Blondel2008`.
@@ -120,6 +120,12 @@ def leiden(  # noqa: PLR0912, PLR0913, PLR0915
         A dict with the values for the parameters `resolution`, `random_state`,
         and `n_iterations`.
 
+    `adata.uns['leiden' | key_added]['modularity']` : :class:`float`
+        The modularity score of the final clustering,
+        as calculated by the `flavor`.
+        Use :func:`scanpy.metrics.modularity`\ `(adata, mode='calculate' | 'update')`
+        to calculate a score independent of `flavor`.
+
     """
     if flavor is None:
         flavor = "leidenalg"
@@ -178,7 +184,10 @@ def leiden(  # noqa: PLR0912, PLR0913, PLR0915
         if use_weights:
             clustering_args["weights"] = np.array(g.es["weight"]).astype(np.float64)
         clustering_args["seed"] = random_state
-        part = leidenalg.find_partition(g, partition_type, **clustering_args)
+        part = cast(
+            "MutableVertexPartition",
+            leidenalg.find_partition(g, partition_type, **clustering_args),
+        )
     else:
         g = _utils.get_igraph_from_adjacency(adjacency, directed=False)
         if use_weights:
@@ -212,6 +221,7 @@ def leiden(  # noqa: PLR0912, PLR0913, PLR0915
         random_state=random_state,
         n_iterations=n_iterations,
     )
+    adata.uns[key_added]["modularity"] = part.modularity
     logg.info(
         "    finished",
         time=start,

diff --git a/tests/test_clustering.py b/tests/test_clustering.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 from functools import partial
+from typing import TYPE_CHECKING
 
 import pandas as pd
 import pytest
@@ -10,21 +11,27 @@
 from testing.scanpy._helpers.data import pbmc68k_reduced
 from testing.scanpy._pytest.marks import needs
 
+if TYPE_CHECKING:
+    from typing import Literal
+
 
 @pytest.fixture
 def adata_neighbors():
     return pbmc68k_reduced()
 
 
-FLAVORS = [
-    pytest.param("igraph", marks=needs.igraph),
-    pytest.param("leidenalg", marks=needs.leidenalg),
-]
+@pytest.fixture(
+    params=[
+        pytest.param("igraph", marks=needs.igraph),
+        pytest.param("leidenalg", marks=needs.leidenalg),
+    ]
+)
+def flavor(request: pytest.FixtureRequest) -> Literal["igraph", "leidenalg"]:
+    return request.param
 
 
 @needs.leidenalg
 @needs.igraph
-@pytest.mark.parametrize("flavor", FLAVORS)
 @pytest.mark.parametrize("resolution", [1, 2])
 @pytest.mark.parametrize("n_iterations", [-1, 3])
 def test_leiden_basic(adata_neighbors, flavor, resolution, n_iterations):
@@ -44,7 +51,6 @@ def test_leiden_basic(adata_neighbors, flavor, resolution, n_iterations):
 
 @needs.leidenalg
 @needs.igraph
-@pytest.mark.parametrize("flavor", FLAVORS)
 def test_leiden_random_state(adata_neighbors, flavor):
     is_leiden_alg = flavor == "leidenalg"
     n_iterations = 2 if is_leiden_alg else -1
@@ -72,8 +78,18 @@ def test_leiden_random_state(adata_neighbors, flavor):
         directed=is_leiden_alg,
         n_iterations=n_iterations,
     )
+    # reproducible
     pd.testing.assert_series_equal(adata_1.obs["leiden"], adata_1_again.obs["leiden"])
+    assert (
+        pytest.approx(adata_1.uns["leiden"]["modularity"])
+        == adata_1_again.uns["leiden"]["modularity"]
+    )
+    # different clustering
     assert not adata_2.obs["leiden"].equals(adata_1_again.obs["leiden"])
+    assert (
+        pytest.approx(adata_2.uns["leiden"]["modularity"])
+        != adata_1_again.uns["leiden"]["modularity"]
+    )
 
 
 @needs.igraph
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Add modularity scoring via {func}`scanpy.metrics.modularity` with support for directed/undirected graphs {smaller}`A. Karesh`