Skip to content

Commit 275c104

Browse files
authored
Merge pull request #45 from KrishnaswamyLab/dev
graphtools v1.3.1
2 parents e31a41a + a431bf0 commit 275c104

File tree

6 files changed

+288
-293
lines changed

6 files changed

+288
-293
lines changed

graphtools/api.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
import numpy as np
22
import warnings
3-
import tasklogger
43
from scipy import sparse
54
import pickle
65
import pygsp
6+
import tasklogger
7+
8+
from . import base, graphs
79

8-
from . import base
9-
from . import graphs
10+
_logger = tasklogger.get_tasklogger('graphtools')
1011

1112

1213
def Graph(data,
@@ -173,7 +174,7 @@ def Graph(data,
173174
“Numerical Recipes (3rd edition)”,
174175
Cambridge University Press, 2007, page 795.
175176
"""
176-
tasklogger.set_level(verbose)
177+
_logger.set_level(verbose)
177178
if sample_idx is not None and len(np.unique(sample_idx)) == 1:
178179
warnings.warn("Only one unique sample. "
179180
"Not using MNNGraph")
@@ -239,7 +240,7 @@ def Graph(data,
239240
else:
240241
msg = msg + " and PyGSP inheritance"
241242

242-
tasklogger.log_debug(msg)
243+
_logger.debug(msg)
243244

244245
class_names = [p.__name__.replace("Graph", "") for p in parent_classes]
245246
try:
@@ -257,7 +258,7 @@ def Graph(data,
257258
pass
258259

259260
# build graph and return
260-
tasklogger.log_debug("Initializing {} with arguments {}".format(
261+
_logger.debug("Initializing {} with arguments {}".format(
261262
parent_classes,
262263
", ".join(["{}='{}'".format(key, value)
263264
for key, value in params.items()

graphtools/base.py

Lines changed: 61 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,9 @@
1111
from scipy import sparse
1212
import warnings
1313
import numbers
14-
import tasklogger
1514
import pickle
1615
import sys
16+
import tasklogger
1717

1818
try:
1919
import pandas as pd
@@ -29,6 +29,8 @@
2929

3030
from . import utils
3131

32+
_logger = tasklogger.get_tasklogger('graphtools')
33+
3234

3335
class Base(object):
3436
"""Class that deals with key-word arguments but is otherwise
@@ -179,7 +181,7 @@ def _parse_n_pca_threshold(self, data, n_pca, rank_threshold):
179181
n_pca = None
180182
elif n_pca is True: # notify that we're going to estimate rank.
181183
n_pca = 'auto'
182-
tasklogger.log_info("Estimating n_pca from matrix rank. "
184+
_logger.info("Estimating n_pca from matrix rank. "
183185
"Supply an integer n_pca "
184186
"for fixed amount.")
185187
if not any([isinstance(n_pca, numbers.Number),
@@ -233,45 +235,44 @@ def _reduce_data(self):
233235
Reduced data matrix
234236
"""
235237
if self.n_pca is not None and (self.n_pca == 'auto' or self.n_pca < self.data.shape[1]):
236-
tasklogger.log_start("PCA")
237-
n_pca = self.data.shape[1] - 1 if self.n_pca == 'auto' else self.n_pca
238-
if sparse.issparse(self.data):
239-
if isinstance(self.data, sparse.coo_matrix) or \
240-
isinstance(self.data, sparse.lil_matrix) or \
241-
isinstance(self.data, sparse.dok_matrix):
242-
self.data = self.data.tocsr()
243-
self.data_pca = TruncatedSVD(n_pca, random_state=self.random_state)
244-
else:
245-
self.data_pca = PCA(n_pca,
246-
svd_solver='randomized',
247-
random_state=self.random_state)
248-
self.data_pca.fit(self.data)
249-
if self.n_pca == 'auto':
250-
s = self.data_pca.singular_values_
251-
smax = s.max()
252-
if self.rank_threshold == 'auto':
253-
threshold = smax * \
254-
np.finfo(self.data.dtype).eps * max(self.data.shape)
255-
self.rank_threshold = threshold
256-
threshold = self.rank_threshold
257-
gate = np.where(s >= threshold)[0]
258-
self.n_pca = gate.shape[0]
259-
if self.n_pca == 0:
260-
raise ValueError("Supplied threshold {} was greater than "
261-
"maximum singular value {} "
262-
"for the data matrix".format(threshold, smax))
263-
tasklogger.log_info(
264-
"Using rank estimate of {} as n_pca".format(self.n_pca))
265-
# reset the sklearn operator
266-
op = self.data_pca # for line-width brevity..
267-
op.components_ = op.components_[gate, :]
268-
op.explained_variance_ = op.explained_variance_[gate]
269-
op.explained_variance_ratio_ = op.explained_variance_ratio_[
270-
gate]
271-
op.singular_values_ = op.singular_values_[gate]
272-
self.data_pca = op # im not clear if this is needed due to assignment rules
273-
data_nu = self.data_pca.transform(self.data)
274-
tasklogger.log_complete("PCA")
238+
with _logger.task("PCA"):
239+
n_pca = self.data.shape[1] - 1 if self.n_pca == 'auto' else self.n_pca
240+
if sparse.issparse(self.data):
241+
if isinstance(self.data, sparse.coo_matrix) or \
242+
isinstance(self.data, sparse.lil_matrix) or \
243+
isinstance(self.data, sparse.dok_matrix):
244+
self.data = self.data.tocsr()
245+
self.data_pca = TruncatedSVD(n_pca, random_state=self.random_state)
246+
else:
247+
self.data_pca = PCA(n_pca,
248+
svd_solver='randomized',
249+
random_state=self.random_state)
250+
self.data_pca.fit(self.data)
251+
if self.n_pca == 'auto':
252+
s = self.data_pca.singular_values_
253+
smax = s.max()
254+
if self.rank_threshold == 'auto':
255+
threshold = smax * \
256+
np.finfo(self.data.dtype).eps * max(self.data.shape)
257+
self.rank_threshold = threshold
258+
threshold = self.rank_threshold
259+
gate = np.where(s >= threshold)[0]
260+
self.n_pca = gate.shape[0]
261+
if self.n_pca == 0:
262+
raise ValueError("Supplied threshold {} was greater than "
263+
"maximum singular value {} "
264+
"for the data matrix".format(threshold, smax))
265+
_logger.info(
266+
"Using rank estimate of {} as n_pca".format(self.n_pca))
267+
# reset the sklearn operator
268+
op = self.data_pca # for line-width brevity..
269+
op.components_ = op.components_[gate, :]
270+
op.explained_variance_ = op.explained_variance_[gate]
271+
op.explained_variance_ratio_ = op.explained_variance_ratio_[
272+
gate]
273+
op.singular_values_ = op.singular_values_[gate]
274+
self.data_pca = op # im not clear if this is needed due to assignment rules
275+
data_nu = self.data_pca.transform(self.data)
275276
return data_nu
276277
else:
277278
data_nu = self.data
@@ -472,10 +473,10 @@ def __init__(self,
472473
self.anisotropy = anisotropy
473474

474475
if initialize:
475-
tasklogger.log_debug("Initializing kernel...")
476+
_logger.debug("Initializing kernel...")
476477
self.K
477478
else:
478-
tasklogger.log_debug("Not initializing kernel.")
479+
_logger.debug("Not initializing kernel.")
479480
super().__init__(**kwargs)
480481

481482
def _check_symmetrization(self, kernel_symm, theta):
@@ -524,18 +525,18 @@ def _build_kernel(self):
524525
def symmetrize_kernel(self, K):
525526
# symmetrize
526527
if self.kernel_symm == "+":
527-
tasklogger.log_debug("Using addition symmetrization.")
528+
_logger.debug("Using addition symmetrization.")
528529
K = (K + K.T) / 2
529530
elif self.kernel_symm == "*":
530-
tasklogger.log_debug("Using multiplication symmetrization.")
531+
_logger.debug("Using multiplication symmetrization.")
531532
K = K.multiply(K.T)
532533
elif self.kernel_symm == 'mnn':
533-
tasklogger.log_debug(
534+
_logger.debug(
534535
"Using mnn symmetrization (theta = {}).".format(self.theta))
535536
K = self.theta * utils.elementwise_minimum(K, K.T) + \
536537
(1 - self.theta) * utils.elementwise_maximum(K, K.T)
537538
elif self.kernel_symm is None:
538-
tasklogger.log_debug("Using no symmetrization.")
539+
_logger.debug("Using no symmetrization.")
539540
pass
540541
else:
541542
# this should never happen
@@ -729,12 +730,12 @@ def to_pygsp(self, **kwargs):
729730
def to_igraph(self, attribute="weight", **kwargs):
730731
"""Convert to an igraph Graph
731732
732-
Uses the igraph.Graph.Weighted_Adjacency constructor
733+
Uses the igraph.Graph constructor
733734
734735
Parameters
735736
----------
736737
attribute : str, optional (default: "weight")
737-
kwargs : additional arguments for igraph.Graph.Weighted_Adjacency
738+
kwargs : additional arguments for igraph.Graph
738739
"""
739740
try:
740741
import igraph as ig
@@ -747,8 +748,13 @@ def to_igraph(self, attribute="weight", **kwargs):
747748
# not a pygsp graph
748749
W = self.K.copy()
749750
W = utils.set_diagonal(W, 0)
750-
return ig.Graph.Weighted_Adjacency(utils.to_array(W).tolist(),
751-
attr=attribute, **kwargs)
751+
sources, targets = W.nonzero()
752+
edgelist = list(zip(sources, targets))
753+
g = ig.Graph(W.shape[0], edgelist, **kwargs)
754+
weights = W[W.nonzero()]
755+
weights = utils.to_array(weights)
756+
g.es[attribute] = weights.flatten().tolist()
757+
return g
752758

753759
def to_pickle(self, path):
754760
"""Save the current Graph to a pickle.
@@ -787,10 +793,10 @@ def _check_shortest_path_distance(self, distance):
787793
def _default_shortest_path_distance(self):
788794
if not self.weighted:
789795
distance = 'data'
790-
tasklogger.log_info("Using ambient data distances.")
796+
_logger.info("Using ambient data distances.")
791797
else:
792798
distance = 'affinity'
793-
tasklogger.log_info("Using negative log affinity distances.")
799+
_logger.info("Using negative log affinity distances.")
794800
return distance
795801

796802
def shortest_path(self, method='auto', distance=None):
@@ -954,7 +960,7 @@ def __init__(self, data,
954960
# kwargs are ignored
955961
self.n_jobs = n_jobs
956962
self.verbose = verbose
957-
tasklogger.set_level(verbose)
963+
_logger.set_level(verbose)
958964
super().__init__(data, **kwargs)
959965

960966
def get_params(self):
@@ -1117,6 +1123,6 @@ def set_params(self, **params):
11171123
self.n_jobs = params['n_jobs']
11181124
if 'verbose' in params:
11191125
self.verbose = params['verbose']
1120-
tasklogger.set_level(self.verbose)
1126+
_logger.set_level(self.verbose)
11211127
super().set_params(**params)
11221128
return self

0 commit comments

Comments
 (0)