docstrings + edge_importance_data meth

kylerohn · kylerohn · commit a159a2e0b296 · 2025-11-30T16:37:16.000-07:00
diff --git a/bioneuralnet/explainability/dpmon_explainer.py b/bioneuralnet/explainability/dpmon_explainer.py
@@ -6,7 +6,7 @@
 from torch import nn
 from torch.types import FileLike
 
-from typing import Literal
+from typing import Literal, Dict, List
 
 from torch_geometric.data import Data
 from torch_geometric.explain import Explainer, ExplainerAlgorithm, ModelConfig
@@ -21,8 +21,6 @@
     setup_device,
 )
 
-from typing import List, Optional
-
 
 class NeuralNetworkWrapper(nn.Module):
     """A wrapper class for formatting DPMON Neural Network IO in a form pytorch_geometric requires"""
@@ -40,9 +38,9 @@ def __init__(self, nn: NeuralNetwork):
 
         self.nn = nn
 
-    def forward(self, x, edge_index, train_features, edge_attr=None):
+    def forward(self, x, edge_index, train_features, **kwargs):
 
-        _omics_network_tg = Data(x=x, edge_index=edge_index, edge_attr=edge_attr)
+        _omics_network_tg = Data(x=x, edge_index=edge_index, **kwargs)
 
         pred, _, _ = self.nn(train_features, _omics_network_tg)
         return pred
@@ -55,18 +53,23 @@ def __init__(
         self,
         f: FileLike,
         dpmon: DPMON,
-        algorithm: ExplainerAlgorithm,
-        mode: Literal["regression", "binary_classification", "multiclass_classification"],
-        explanation_type: Literal["model", "phenomenon"] = "model",
-        node_mask_type: Literal["object", "common_attributes", "attributes"] | None = "attributes",
-        edge_mask_type: Literal["object", "common_attributes", "attributes"] | None = "object",
-        task_level: Literal["edge", "node", "graph"] = "graph",
-        return_type: Literal["raw", "log_probs", "probs"] = "raw",
         weights_only: bool = True,
     ):
         """Initialize DPMON explainer object.
-        This implementation is a first version.
-        There has to be a better way to do this
+        This implementation is a first version. By default, it uses `torch_geometric.explain.GNNExplainer()`
+        to produce feature importance explanations on `clinical` data. The raw node explanations are stored in
+        `self.expl.node_mask`
+
+        It is important to note that these explanations likely do not capture the full picture of the predictions
+        in a multi-omics network, rather provide insight into the clinical (patient) features which account
+        for the explanations.
+
+        This explainer object also produces edge importances at an `object` level. It is currently
+        unknown how useful these explanations are, but are stored in `self.expl.edge_mask`. A method is provided
+        to retrieve the top n important edges for the user to observe if they want.
+
+        Otherwise, all of the default `pytorch_geometric.explain.Explainer` methods are available to use, whether
+        through a provided wrapper or through the `expl` member variable
 
         Args:
             f (FileLike): The file object or path to a saved model trained with DPMON
@@ -90,6 +93,8 @@ def __init__(
             phenotype_col="phenotype",
         )[0]
 
+        self.clinical_data = dpmon.clinical_data
+
         model = NeuralNetwork(
             model_type=dpmon.model,
             gnn_input_dim=self.omics_network_tg.x.shape[1],  # type: ignore
@@ -112,29 +117,103 @@ def __init__(
         ).to(device)
 
         self.model = NeuralNetworkWrapper(model)
+
+    def explain(
+        self,
+        algorithm: ExplainerAlgorithm,
+        mode: Literal[
+            "regression", "binary_classification", "multiclass_classification"
+        ],
+        explanation_type: Literal["model", "phenomenon"] = "model",
+        node_mask_type: (
+            Literal["object", "common_attributes", "attributes"] | None
+        ) = "attributes",
+        edge_mask_type: (
+            Literal["object", "common_attributes", "attributes"] | None
+        ) = "object",
+        task_level: Literal["edge", "node", "graph"] = "graph",
+        return_type: Literal["raw", "log_probs", "probs"] = "raw",
+    ):
+        """Generate explanations for the DPMON instance and the model loaded at the specified path
+
+        Args:
+            algorithm (ExplainerAlgorithm): The `pytorch_geometric.explain` explainer algorithm to use. Currently only tested with `GNNExplainer()`
+            mode (Literal[ &quot;regression&quot;, &quot;binary_classification&quot;, &quot;multiclass_classification&quot; ]): The type of prediction the GNN is making
+            explanation_type (Literal[&quot;model&quot;, &quot;phenomenon&quot;], optional): Whether to generate explanations on the `model` predictions or Explains the `phenomenon` that the model is trying to predict. Defaults to "model".
+            node_mask_type (Literal[&quot;object&quot;, &quot;common_attributes&quot;, &quot;attributes&quot;]  |  None, optional): The node explanation type to generate. Defaults to "attributes".
+            edge_mask_type (Literal[&quot;object&quot;, &quot;common_attributes&quot;, &quot;attributes&quot;]  |  None, optional): The edge explanation type to generate. Defaults to "object".
+            task_level (Literal[&quot;edge&quot;, &quot;node&quot;, &quot;graph&quot;], optional): The prediction scope of the model. Defaults to "graph".
+            return_type (Literal[&quot;raw&quot;, &quot;log_probs&quot;, &quot;probs&quot;], optional): The output of the model. Defaults to "raw".
+        """
         self.explainer = Explainer(
             self.model,
             algorithm,
             explanation_type=explanation_type,
             node_mask_type=node_mask_type,
             edge_mask_type=edge_mask_type,
             model_config=ModelConfig(
-                mode=mode,
-                task_level=task_level,
-                return_type=return_type
-            )
+                mode=mode, task_level=task_level, return_type=return_type
+            ),
         )
 
         if self.omics_network_tg.x != None and self.omics_network_tg.edge_index != None:
-            self.expl = self.explainer(**self.omics_network_tg.to_dict(), train_features=self.train_features)
-            print(self.expl.edge_mask)
-            print(self.expl.node_mask)
+            self.expl = self.explainer(
+                **self.omics_network_tg.to_dict(), train_features=self.train_features
+            )
+
+    def edge_importance_data(self, top_n: int = 5) -> List[Dict]:
+        """Method for providing a summary on object level edge importance
 
+        Args:
+            top_n (int, optional): the number of important edges to retrieve. Defaults to 5.
+        Returns:
+            List[Dict]: The edges with the top n importances and `edge_attr` if it exists
+        """
 
+        edges = []
+
+        if self.expl.edge_mask != None:
+            for idx, importance in enumerate(self.expl.edge_mask):
+                importance = importance.item()
+                new_edge = {"importance": importance, "edge": self.omics_network_tg.edge_index[:, idx]}  # type: ignore
+                if self.omics_network_tg.edge_attr != None:
+                    if self.omics_network_tg.edge_attr.ndim == 1:
+                        new_edge.update(
+                            {"edge_attr": self.omics_network_tg.edge_attr[idx].item()}
+                        )
+                    else:
+                        new_edge.update(
+                            {"edge_attr": self.omics_network_tg.edge_attr[idx, :]}
+                        )
+
+                if len(edges) < 1:
+                    edges.append(new_edge)  # type: ignore
+                    continue
+
+                for i, edge in enumerate(edges):
+                    if edge["importance"] < importance:
+                        edges.insert(i, new_edge)  # type: ignore
+                        break
+                edges = edges[:top_n]
+            return edges[:top_n]
+        else:
+            raise AttributeError(
+                "edge_mask is not defined. Generate explanations on edges first"
+            )
 
+    def visualize_feature_importance(
+        self, path: os.PathLike | None = None, top_k: int | None = None
+    ):
+        """Wrapper of the `pytorch_geometric.explain.Explainer.visualize_feature_importance` method
 
-    def visualize_feature_importance(self, path: os.PathLike):
-        self.expl.visualize_feature_importance(str(path))
+        Args:
+            path (os.PathLike | None, optional): Path to save the feature importance graph. Defaults to None.
+            top_k (int | None, optional): The number of features to include in the graph. Defaults to None.
+        """
+        feat_labels = None
+        if isinstance(self.clinical_data, pd.DataFrame):
+            feat_labels = self.clinical_data.columns.to_list()
 
-    def visualize_graph(self, path: os.PathLike):
-        self.expl.visualize_graph(str(path))
+        self.expl.visualize_feature_importance(
+            str(path) if path != None else path, top_k=top_k, feat_labels=feat_labels  # type: ignore
+        )