From 350ffa0875f6544696a59f38d5d690053b9d1672 Mon Sep 17 00:00:00 2001 From: Moritz Kern <92092328+Moritz-Alexander-Kern@users.noreply.github.com> Date: Fri, 10 Feb 2023 14:10:02 +0100 Subject: [PATCH 01/10] Enh/mk allow direct seqs to gpfa (#97) * Allow seqs to be passed directly to gpfa fit * Small fix to error checking * Typo * another typo * fixed formatting for pep8 * fixed docstring of transform after introducing error while resolving merge conflicts * added two unit-test * add more unit-tests * remove blank line --------- Co-authored-by: Jonah Pearl --- elephant/gpfa/gpfa.py | 99 +++++++++++++++++++++++++++++--------- elephant/test/test_gpfa.py | 36 ++++++++++++++ 2 files changed, 111 insertions(+), 24 deletions(-) diff --git a/elephant/gpfa/gpfa.py b/elephant/gpfa/gpfa.py index 94d3fb70d..e261054fd 100644 --- a/elephant/gpfa/gpfa.py +++ b/elephant/gpfa/gpfa.py @@ -2,7 +2,7 @@ Gaussian-process factor analysis (GPFA) is a dimensionality reduction method :cite:`gpfa-Yu2008_1881` for neural trajectory visualization of parallel spike trains. GPFA applies factor analysis (FA) to time-binned spike count data to -reduce the dimensionality and at the same time smoothes the resulting +reduce the dimensionality and at the same time smooth the resulting low-dimensional trajectories by fitting a Gaussian process (GP) model to them. The input consists of a set of trials (Y), each containing a list of spike @@ -20,18 +20,18 @@ Internally, the analysis consists of the following steps: -0) bin the spike train data to get a sequence of N dimensional vectors of spike -counts in respective time bins, and choose the reduced dimensionality x_dim +#. bin the spike train data to get a sequence of N dimensional vectors of spike + counts in respective time bins, and choose the reduced dimensionality x_dim. -1) expectation-maximization for fitting of the parameters C, d, R and the -time-scales and variances of the Gaussian process, using all the trials -provided as input (c.f., `gpfa_core.em()`) +#. expectation-maximization for fitting of the parameters C, d, R and the + time-scales and variances of the Gaussian process, using all the trials + provided as input (c.f., `gpfa_core.em()`). -2) projection of single trials in the low dimensional space (c.f., -`gpfa_core.exact_inference_with_ll()`) +#. projection of single trials in the low dimensional space (c.f., + `gpfa_core.exact_inference_with_ll()`). -3) orthonormalization of the matrix C and the corresponding subspace, for -visualization purposes: (c.f., `gpfa_core.orthonormalize()`) +#. orthonormalization of the matrix C and the corresponding subspace, for + visualization purposes: (c.f., `gpfa_core.orthonormalize()`). .. autosummary:: @@ -54,7 +54,7 @@ Run tutorial interactively: .. image:: https://mybinder.org/badge.svg - :target: https://mybinder.org/v2/gh/NeuralEnsemble/elephant/master + :target: https://mybinder.org/v2/gh/NeuralEnsemble/elephant/master ?filepath=doc/tutorials/gpfa.ipynb @@ -273,7 +273,7 @@ def binsize(self): warnings.warn("'binsize' is deprecated; use 'bin_size'") return self.bin_size - def fit(self, spiketrains): + def fit(self, spiketrains, seqs_train=None): """ Fit the model with the given training data. @@ -289,6 +289,16 @@ def fit(self, spiketrains): `spiketrains[k][n]` refer to spike trains of the same neuron for any choices of `l`, `k`, and `n`. + seqs_train: np.recarray + Alternatively, pass a pre-processed seqs_train array. + This is a training data structure, whose n-th element + (corresponding to the n-th experimental trial) has fields + + T : int + number of bins + y : (#units, T) np.ndarray + neural data + Returns ------- self : object @@ -303,8 +313,17 @@ def fit(self, spiketrains): If covariance matrix of input spike data is rank deficient. """ - self._check_training_data(spiketrains) - seqs_train = self._format_training_data(spiketrains) + + if seqs_train is not None and spiketrains is not None: + raise ValueError('Cannot provide both spiketrains and seqs_train!') + elif spiketrains is not None: + self._check_training_data(spiketrains) + seqs_train = self._format_training_data(spiketrains) + elif seqs_train is not None: + seqs_train = self._format_training_data_seqs(seqs_train) + else: + raise ValueError('Must supply either spiketrains or seqs_train!') + # Check if training data covariance is full rank y_all = np.hstack(seqs_train['y']) y_dim = y_all.shape[0] @@ -353,7 +372,15 @@ def _format_training_data(self, spiketrains): seq['y'] = seq['y'][self.has_spikes_bool, :] return seqs - def transform(self, spiketrains, returned_data=['latent_variable_orth']): + def _format_training_data_seqs(self, seqs): + # Remove inactive units based on training set + self.has_spikes_bool = np.hstack(seqs['y']).any(axis=1) + for seq in seqs: + seq['y'] = seq['y'][self.has_spikes_bool, :] + return seqs + + def transform(self, spiketrains, seqs=None, + returned_data=['latent_variable_orth']): """ Obtain trajectories of neural activity in a low-dimensional latent variable space by inferring the posterior mean of the obtained GPFA @@ -426,14 +453,25 @@ def transform(self, spiketrains, returned_data=['latent_variable_orth']): If `returned_data` contains keys different from the ones in `self.valid_data_names`. """ - if len(spiketrains[0]) != len(self.has_spikes_bool): - raise ValueError("'spiketrains' must contain the same number of " - "neurons as the training spiketrain data") + invalid_keys = set(returned_data).difference(self.valid_data_names) if len(invalid_keys) > 0: raise ValueError("'returned_data' can only have the following " "entries: {}".format(self.valid_data_names)) - seqs = gpfa_util.get_seqs(spiketrains, self.bin_size) + + if spiketrains is not None: + if len(spiketrains[0]) != len(self.has_spikes_bool): + raise ValueError("'spiketrains' must contain the same number " + "of neurons as the training spiketrain data") + + seqs = gpfa_util.get_seqs(spiketrains, self.bin_size) + elif seqs is not None: + # check some stuff + if len(seqs['y'][0]) != len(self.has_spikes_bool): + raise ValueError( + "'seq_trains' must contain the same number of neurons as " + "the training spiketrain data") + for seq in seqs: seq['y'] = seq['y'][self.has_spikes_bool, :] seqs, ll = gpfa_core.exact_inference_with_ll(seqs, @@ -447,8 +485,8 @@ def transform(self, spiketrains, returned_data=['latent_variable_orth']): return seqs[returned_data[0]] return {x: seqs[x] for x in returned_data} - def fit_transform(self, spiketrains, returned_data=[ - 'latent_variable_orth']): + def fit_transform(self, spiketrains, seqs_train=None, + returned_data=['latent_variable_orth']): """ Fit the model with `spiketrains` data and apply the dimensionality reduction on `spiketrains`. @@ -458,6 +496,10 @@ def fit_transform(self, spiketrains, returned_data=[ spiketrains : list of list of neo.SpikeTrain Refer to the :func:`GPFA.fit` docstring. + seqs_train : np.recarray + Refer to the :func:`GPFA.fit` docstring. + Default: `None` + returned_data : list of str Refer to the :func:`GPFA.transform` docstring. @@ -469,7 +511,7 @@ def fit_transform(self, spiketrains, returned_data=[ Raises ------ ValueError - Refer to :func:`GPFA.fit` and :func:`GPFA.transform`. + Refer to :func:`GPFA.fit` and :func:`GPFA.transform`. See Also -------- @@ -477,8 +519,17 @@ def fit_transform(self, spiketrains, returned_data=[ GPFA.transform : transform `spiketrains` into trajectories """ - self.fit(spiketrains) - return self.transform(spiketrains, returned_data=returned_data) + if seqs_train is not None and spiketrains is not None: + raise ValueError('Cannot provide both spiketrains and seqs_train!') + elif spiketrains is not None: + self.fit(spiketrains) + return self.transform(spiketrains, returned_data=returned_data) + elif seqs_train is not None: + self.fit(None, seqs_train=seqs_train) + return self.transform(None, seqs=seqs_train, + returned_data=returned_data) + else: + raise ValueError('Must supply either spiketrains or seqs_train!') def score(self, spiketrains): """ diff --git a/elephant/test/test_gpfa.py b/elephant/test/test_gpfa.py index 19d35ae31..1206394c2 100644 --- a/elephant/test/test_gpfa.py +++ b/elephant/test/test_gpfa.py @@ -86,6 +86,10 @@ def gen_test_data(rates, durs, shapes=(1, 1, 1, 1)): for rate in rates] self.data2.append(spike_times) + # generate seqs_train data + self.seqs_train = gpfa_util.get_seqs(self.data1, + bin_size=self.bin_size) + def test_data1(self): gpfa = GPFA(x_dim=self.x_dim, em_max_iters=self.n_iters) gpfa.fit(self.data1) @@ -216,6 +220,38 @@ def test_logdet(self): logdet_ground_truth = np.log(np.linalg.det(matrix)) assert_array_almost_equal(logdet_fast, logdet_ground_truth) + def test_equality_spiketrains_seqs_train(self): + gpfa_seqs = GPFA(x_dim=self.x_dim, em_max_iters=self.n_iters) + gpfa_seqs.fit_transform(None, seqs_train=self.seqs_train) + + gpfa_spiketrains = GPFA(x_dim=self.x_dim, em_max_iters=self.n_iters) + gpfa_spiketrains.fit_transform(self.data1) + + self.assertAlmostEqual( + gpfa_seqs.transform_info['log_likelihood'], + gpfa_spiketrains.transform_info['log_likelihood'], places=5) + + def test_fit_seqs_train(self): + gpfa = GPFA(x_dim=self.x_dim, em_max_iters=self.n_iters) + gpfa.fit(None, seqs_train=self.seqs_train) + target = [0.07919926, 0.07546947, 0.07397758, 0.0780272, 0.07773862, + 0.07695879, 0.07469927, 0.07867749] + res = gpfa.params_estimated['d'] + assert_array_almost_equal(res, target, decimal=5) + + def test_transform_seqs_train(self): + gpfa = GPFA(x_dim=self.x_dim, em_max_iters=self.n_iters) + gpfa.fit(None, seqs_train=self.seqs_train) + gpfa.transform(None, seqs=self.seqs_train) + self.assertAlmostEqual(gpfa.transform_info['log_likelihood'], + -8172.004695554373, places=5) + + def test_fit_transform_seqs_train(self): + gpfa = GPFA(x_dim=self.x_dim, em_max_iters=self.n_iters) + gpfa.fit_transform(None, seqs_train=self.seqs_train) + self.assertAlmostEqual(gpfa.transform_info['log_likelihood'], + -8172.004695554373, places=5) + if __name__ == "__main__": unittest.main(verbosity=2) From c223f707f247f14a344fb29a3cab6125b215a10e Mon Sep 17 00:00:00 2001 From: Moritz-Alexander-Kern Date: Fri, 10 Feb 2023 16:16:01 +0100 Subject: [PATCH 02/10] refactoring: rename spiketrains to trials --- elephant/gpfa/gpfa.py | 52 +++++++++++++++++++------------------- elephant/gpfa/gpfa_util.py | 24 ++++++++---------- elephant/test/test_gpfa.py | 5 ++-- 3 files changed, 39 insertions(+), 42 deletions(-) diff --git a/elephant/gpfa/gpfa.py b/elephant/gpfa/gpfa.py index e261054fd..4553264ad 100644 --- a/elephant/gpfa/gpfa.py +++ b/elephant/gpfa/gpfa.py @@ -273,13 +273,13 @@ def binsize(self): warnings.warn("'binsize' is deprecated; use 'bin_size'") return self.bin_size - def fit(self, spiketrains, seqs_train=None): + def fit(self, trials, seqs_train=None): """ Fit the model with the given training data. Parameters ---------- - spiketrains : list of list of neo.SpikeTrain + trials : list of list of neo.SpikeTrain Spike train data to be fit to latent variables. The outer list corresponds to trials and the inner list corresponds to the neurons recorded in that trial, such that @@ -314,15 +314,15 @@ def fit(self, spiketrains, seqs_train=None): If covariance matrix of input spike data is rank deficient. """ - if seqs_train is not None and spiketrains is not None: - raise ValueError('Cannot provide both spiketrains and seqs_train!') - elif spiketrains is not None: - self._check_training_data(spiketrains) - seqs_train = self._format_training_data(spiketrains) + if seqs_train is not None and trials is not None: + raise ValueError('Cannot provide both trials and seqs_train!') + elif trials is not None: + self._check_training_data(trials) + seqs_train = self._format_training_data(trials) elif seqs_train is not None: seqs_train = self._format_training_data_seqs(seqs_train) else: - raise ValueError('Must supply either spiketrains or seqs_train!') + raise ValueError('Must supply either trials or seqs_train!') # Check if training data covariance is full rank y_all = np.hstack(seqs_train['y']) @@ -358,9 +358,9 @@ def fit(self, spiketrains, seqs_train=None): @staticmethod def _check_training_data(spiketrains): if len(spiketrains) == 0: - raise ValueError("Input spiketrains cannot be empty") + raise ValueError("Input trials cannot be empty") if not isinstance(spiketrains[0][0], neo.SpikeTrain): - raise ValueError("structure of the spiketrains is not correct: " + raise ValueError("structure of the trials is not correct: " "0-axis should be trials, 1-axis neo.SpikeTrain" "and 2-axis spike times") @@ -392,10 +392,10 @@ def transform(self, spiketrains, seqs=None, Spike train data to be transformed to latent variables. The outer list corresponds to trials and the inner list corresponds to the neurons recorded in that trial, such that - `spiketrains[l][n]` is the spike train of neuron `n` in trial `l`. + `trials[l][n]` is the spike train of neuron `n` in trial `l`. Note that the number and order of `neo.SpikeTrain` objects per - trial must be fixed such that `spiketrains[l][n]` and - `spiketrains[k][n]` refer to spike trains of the same neuron + trial must be fixed such that `trials[l][n]` and + `trials[k][n]` refer to spike trains of the same neuron for any choices of `l`, `k`, and `n`. returned_data : list of str The dimensionality reduction transform generates the following @@ -442,12 +442,12 @@ def transform(self, spiketrains, seqs=None, `VsmGP`: (#bins, #bins, #latent_vars) np.ndarray Note that the num. of bins (#bins) can vary across trials, - reflecting the trial durations in the given `spiketrains` data. + reflecting the trial durations in the given `trials` data. Raises ------ ValueError - If the number of neurons in `spiketrains` is different from that + If the number of neurons in `trials` is different from that in the training spiketrain data. If `returned_data` contains keys different from the ones in @@ -461,7 +461,7 @@ def transform(self, spiketrains, seqs=None, if spiketrains is not None: if len(spiketrains[0]) != len(self.has_spikes_bool): - raise ValueError("'spiketrains' must contain the same number " + raise ValueError("'trials' must contain the same number " "of neurons as the training spiketrain data") seqs = gpfa_util.get_seqs(spiketrains, self.bin_size) @@ -488,8 +488,8 @@ def transform(self, spiketrains, seqs=None, def fit_transform(self, spiketrains, seqs_train=None, returned_data=['latent_variable_orth']): """ - Fit the model with `spiketrains` data and apply the dimensionality - reduction on `spiketrains`. + Fit the model with `trials` data and apply the dimensionality + reduction on `trials`. Parameters ---------- @@ -515,12 +515,12 @@ def fit_transform(self, spiketrains, seqs_train=None, See Also -------- - GPFA.fit : fit the model with `spiketrains` - GPFA.transform : transform `spiketrains` into trajectories + GPFA.fit : fit the model with `trials` + GPFA.transform : transform `trials` into trajectories """ if seqs_train is not None and spiketrains is not None: - raise ValueError('Cannot provide both spiketrains and seqs_train!') + raise ValueError('Cannot provide both trials and seqs_train!') elif spiketrains is not None: self.fit(spiketrains) return self.transform(spiketrains, returned_data=returned_data) @@ -529,7 +529,7 @@ def fit_transform(self, spiketrains, seqs_train=None, return self.transform(None, seqs=seqs_train, returned_data=returned_data) else: - raise ValueError('Must supply either spiketrains or seqs_train!') + raise ValueError('Must supply either trials or seqs_train!') def score(self, spiketrains): """ @@ -541,16 +541,16 @@ def score(self, spiketrains): Spike train data to be scored. The outer list corresponds to trials and the inner list corresponds to the neurons recorded in that trial, such that - `spiketrains[l][n]` is the spike train of neuron `n` in trial `l`. + `trials[l][n]` is the spike train of neuron `n` in trial `l`. Note that the number and order of `neo.SpikeTrain` objects per - trial must be fixed such that `spiketrains[l][n]` and - `spiketrains[k][n]` refer to spike trains of the same neuron + trial must be fixed such that `trials[l][n]` and + `trials[k][n]` refer to spike trains of the same neuron for any choice of `l`, `k`, and `n`. Returns ------- log_likelihood : float - Log-likelihood of the given spiketrains under the fitted model. + Log-likelihood of the given trials under the fitted model. """ self.transform(spiketrains) return self.transform_info['log_likelihood'] diff --git a/elephant/gpfa/gpfa_util.py b/elephant/gpfa/gpfa_util.py index 540c13f37..968a06aeb 100644 --- a/elephant/gpfa/gpfa_util.py +++ b/elephant/gpfa/gpfa_util.py @@ -19,13 +19,13 @@ @deprecated_alias(binsize='bin_size') -def get_seqs(data, bin_size, use_sqrt=True): +def get_seqs(trials, bin_size, use_sqrt=True): """ Converts the data into a rec array using internally BinnedSpikeTrain. Parameters ---------- - data : list of list of neo.SpikeTrain + trials : list of list of neo.SpikeTrain The outer list corresponds to trials and the inner list corresponds to the neurons recorded in that trial, such that data[l][n] is the spike train of neuron n in trial l. Note that the number and order of @@ -36,7 +36,7 @@ def get_seqs(data, bin_size, use_sqrt=True): Spike bin width use_sqrt: bool - Boolean specifying whether or not to use square-root transform on + Boolean specifying whether to use square-root transform on spike counts (see original paper for motivation). Default: True @@ -58,17 +58,13 @@ def get_seqs(data, bin_size, use_sqrt=True): if not isinstance(bin_size, pq.Quantity): raise ValueError("'bin_size' must be of type pq.Quantity") - seqs = [] - for dat in data: - sts = dat - binned_spiketrain = BinnedSpikeTrain(sts, bin_size=bin_size) - if use_sqrt: - binned = np.sqrt(binned_spiketrain.to_array()) - else: - binned = binned_spiketrain.to_array() - seqs.append( - (binned_spiketrain.n_bins, binned)) - seqs = np.array(seqs, dtype=[('T', int), ('y', 'O')]) + seqs = ((BinnedSpikeTrain(trial, bin_size=bin_size).n_bins, + np.sqrt(BinnedSpikeTrain(trial, bin_size=bin_size).to_array()) + if use_sqrt + else BinnedSpikeTrain(trial, bin_size=bin_size).to_array()) + for trial in trials) + + seqs = np.array(list(seqs), dtype=[('T', int), ('y', 'O')]) # Remove trials that are shorter than one bin width if len(seqs) > 0: diff --git a/elephant/test/test_gpfa.py b/elephant/test/test_gpfa.py index 1206394c2..fee76fb0b 100644 --- a/elephant/test/test_gpfa.py +++ b/elephant/test/test_gpfa.py @@ -71,6 +71,7 @@ def gen_test_data(rates, durs, shapes=(1, 1, 1, 1)): n8 = neo.SpikeTrain(gen_test_data(rates_b, durs), units=1 * pq.s, t_start=0 * pq.s, t_stop=10 * pq.s) self.data0.append([n1, n2, n3, n4, n5, n6, n7, n8]) + self.x_dim = 4 self.data1 = self.data0[:20] @@ -135,9 +136,9 @@ def test_invalid_input_data(self): _ = GPFA(tau_init=invalid_tau_init) gpfa = GPFA() with self.assertRaises(ValueError): - gpfa.fit(spiketrains=invalid_data) + gpfa.fit(trials=invalid_data) with self.assertRaises(ValueError): - gpfa.fit(spiketrains=[]) + gpfa.fit(trials=[]) def test_data2(self): gpfa = GPFA(bin_size=self.bin_size, x_dim=8, em_max_iters=self.n_iters) From f7cff8095652841ec3bdc266b98fd96379fe8d15 Mon Sep 17 00:00:00 2001 From: Moritz-Alexander-Kern Date: Fri, 10 Feb 2023 16:18:44 +0100 Subject: [PATCH 03/10] removed unused variable from tests --- elephant/test/test_gpfa.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/elephant/test/test_gpfa.py b/elephant/test/test_gpfa.py index fee76fb0b..fbb910705 100644 --- a/elephant/test/test_gpfa.py +++ b/elephant/test/test_gpfa.py @@ -166,7 +166,7 @@ def test_returned_data(self): self.assertTrue(len(returned_data) == len(seqs)) self.assertTrue(isinstance(seqs, dict)) with self.assertRaises(ValueError): - seqs = gpfa.transform(self.data2, returned_data=['invalid_name']) + gpfa.transform(self.data2, returned_data=['invalid_name']) def test_fit_transform(self): gpfa1 = GPFA(bin_size=self.bin_size, x_dim=self.x_dim, From d108c75bfd60908efbf25cde8289ccc7182076c1 Mon Sep 17 00:00:00 2001 From: Moritz-Alexander-Kern Date: Fri, 10 Feb 2023 16:39:04 +0100 Subject: [PATCH 04/10] replace for-loop with list comprehension --- elephant/gpfa/gpfa_util.py | 4 +++- elephant/test/test_gpfa.py | 46 +++++++++++++++++++------------------- 2 files changed, 26 insertions(+), 24 deletions(-) diff --git a/elephant/gpfa/gpfa_util.py b/elephant/gpfa/gpfa_util.py index 968a06aeb..60babb822 100644 --- a/elephant/gpfa/gpfa_util.py +++ b/elephant/gpfa/gpfa_util.py @@ -58,12 +58,14 @@ def get_seqs(trials, bin_size, use_sqrt=True): if not isinstance(bin_size, pq.Quantity): raise ValueError("'bin_size' must be of type pq.Quantity") + # Create a np.recarray + # a generator that generates a tuple (n_bins, neural data) seqs = ((BinnedSpikeTrain(trial, bin_size=bin_size).n_bins, np.sqrt(BinnedSpikeTrain(trial, bin_size=bin_size).to_array()) if use_sqrt else BinnedSpikeTrain(trial, bin_size=bin_size).to_array()) for trial in trials) - + # create np.recarray by specifying dtype with np.array() seqs = np.array(list(seqs), dtype=[('T', int), ('y', 'O')]) # Remove trials that are shorter than one bin width diff --git a/elephant/test/test_gpfa.py b/elephant/test/test_gpfa.py index fbb910705..dcf503c72 100644 --- a/elephant/test/test_gpfa.py +++ b/elephant/test/test_gpfa.py @@ -30,10 +30,9 @@ class GPFATestCase(unittest.TestCase): def setUp(self): def gen_gamma_spike_train(k, theta, t_max): - x = [] - for i in range(int(3 * t_max / (k * theta))): - x.append(np.random.gamma(k, theta)) - s = np.cumsum(x) + x = (np.random.gamma(k, theta) for _ in + range(int(3 * t_max / (k * theta)))) + s = np.cumsum(list(x)) return s[s < t_max] def gen_test_data(rates, durs, shapes=(1, 1, 1, 1)): @@ -52,25 +51,26 @@ def gen_test_data(rates, durs, shapes=(1, 1, 1, 1)): durs = (2.5, 2.5, 2.5, 2.5) np.random.seed(0) n_trials = 100 - self.data0 = [] - for trial in range(n_trials): - n1 = neo.SpikeTrain(gen_test_data(rates_a, durs), units=1 * pq.s, - t_start=0 * pq.s, t_stop=10 * pq.s) - n2 = neo.SpikeTrain(gen_test_data(rates_a, durs), units=1 * pq.s, - t_start=0 * pq.s, t_stop=10 * pq.s) - n3 = neo.SpikeTrain(gen_test_data(rates_b, durs), units=1 * pq.s, - t_start=0 * pq.s, t_stop=10 * pq.s) - n4 = neo.SpikeTrain(gen_test_data(rates_b, durs), units=1 * pq.s, - t_start=0 * pq.s, t_stop=10 * pq.s) - n5 = neo.SpikeTrain(gen_test_data(rates_a, durs), units=1 * pq.s, - t_start=0 * pq.s, t_stop=10 * pq.s) - n6 = neo.SpikeTrain(gen_test_data(rates_a, durs), units=1 * pq.s, - t_start=0 * pq.s, t_stop=10 * pq.s) - n7 = neo.SpikeTrain(gen_test_data(rates_b, durs), units=1 * pq.s, - t_start=0 * pq.s, t_stop=10 * pq.s) - n8 = neo.SpikeTrain(gen_test_data(rates_b, durs), units=1 * pq.s, - t_start=0 * pq.s, t_stop=10 * pq.s) - self.data0.append([n1, n2, n3, n4, n5, n6, n7, n8]) + + self.data0 = [[ + neo.SpikeTrain(gen_test_data(rates_a, durs), units=1 * pq.s, + t_start=0 * pq.s, t_stop=10 * pq.s), + neo.SpikeTrain(gen_test_data(rates_a, durs), units=1 * pq.s, + t_start=0 * pq.s, t_stop=10 * pq.s), + neo.SpikeTrain(gen_test_data(rates_b, durs), units=1 * pq.s, + t_start=0 * pq.s, t_stop=10 * pq.s), + neo.SpikeTrain(gen_test_data(rates_b, durs), units=1 * pq.s, + t_start=0 * pq.s, t_stop=10 * pq.s), + neo.SpikeTrain(gen_test_data(rates_a, durs), units=1 * pq.s, + t_start=0 * pq.s, t_stop=10 * pq.s), + neo.SpikeTrain(gen_test_data(rates_a, durs), units=1 * pq.s, + t_start=0 * pq.s, t_stop=10 * pq.s), + neo.SpikeTrain(gen_test_data(rates_b, durs), units=1 * pq.s, + t_start=0 * pq.s, t_stop=10 * pq.s), + neo.SpikeTrain(gen_test_data(rates_b, durs), units=1 * pq.s, + t_start=0 * pq.s, t_stop=10 * pq.s)] + for _ in range(n_trials)] + self.x_dim = 4 From 52f98adad70f1b4542d8ba45dd76e59b0dbe3c15 Mon Sep 17 00:00:00 2001 From: Moritz-Alexander-Kern Date: Fri, 10 Feb 2023 18:24:04 +0100 Subject: [PATCH 05/10] remove deprecated homogeneous_poisson_process --- elephant/test/test_gpfa.py | 43 +++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/elephant/test/test_gpfa.py b/elephant/test/test_gpfa.py index dcf503c72..737e24ec5 100644 --- a/elephant/test/test_gpfa.py +++ b/elephant/test/test_gpfa.py @@ -13,7 +13,9 @@ import quantities as pq from numpy.testing import assert_array_equal, assert_array_almost_equal -from elephant.spike_train_generation import homogeneous_poisson_process +from elephant.spike_train_generation import StationaryPoissonProcess + + try: import sklearn @@ -52,25 +54,23 @@ def gen_test_data(rates, durs, shapes=(1, 1, 1, 1)): np.random.seed(0) n_trials = 100 - self.data0 = [[ - neo.SpikeTrain(gen_test_data(rates_a, durs), units=1 * pq.s, - t_start=0 * pq.s, t_stop=10 * pq.s), - neo.SpikeTrain(gen_test_data(rates_a, durs), units=1 * pq.s, - t_start=0 * pq.s, t_stop=10 * pq.s), - neo.SpikeTrain(gen_test_data(rates_b, durs), units=1 * pq.s, - t_start=0 * pq.s, t_stop=10 * pq.s), - neo.SpikeTrain(gen_test_data(rates_b, durs), units=1 * pq.s, - t_start=0 * pq.s, t_stop=10 * pq.s), - neo.SpikeTrain(gen_test_data(rates_a, durs), units=1 * pq.s, - t_start=0 * pq.s, t_stop=10 * pq.s), - neo.SpikeTrain(gen_test_data(rates_a, durs), units=1 * pq.s, - t_start=0 * pq.s, t_stop=10 * pq.s), - neo.SpikeTrain(gen_test_data(rates_b, durs), units=1 * pq.s, - t_start=0 * pq.s, t_stop=10 * pq.s), - neo.SpikeTrain(gen_test_data(rates_b, durs), units=1 * pq.s, - t_start=0 * pq.s, t_stop=10 * pq.s)] - for _ in range(n_trials)] - + # create 100 trials with each trial containing 8 spiketrains each + # 10 seconds long with rates a or b + spiketrain_10_sec = lambda rate_a_or_b: \ + neo.SpikeTrain(gen_test_data(rate_a_or_b, durs), units=1 * pq.s, + t_start=0 * pq.s, t_stop=10 * pq.s) + neurons = lambda : [ + spiketrain_10_sec(rates_a), # n1 + spiketrain_10_sec(rates_a), # n2 + spiketrain_10_sec(rates_b), # n3 + spiketrain_10_sec(rates_b), # n4 + spiketrain_10_sec(rates_a), # n5 + spiketrain_10_sec(rates_a), # n6 + spiketrain_10_sec(rates_b), # n7 + spiketrain_10_sec(rates_b), # n8 + ] + + self.data0 = [neurons() for _ in range(n_trials)] self.x_dim = 4 @@ -83,7 +83,8 @@ def gen_test_data(rates, durs, shapes=(1, 1, 1, 1)): n_channels = 20 for trial in range(n_trials): rates = np.random.randint(low=1, high=100, size=n_channels) - spike_times = [homogeneous_poisson_process(rate=rate * pq.Hz) + spike_times = [StationaryPoissonProcess(rate=rate * pq.Hz, + t_stop=1000*pq.ms).generate_spiketrain() for rate in rates] self.data2.append(spike_times) From b9a9f2dae805fc845e262ef8f47d9ab4cd793f1b Mon Sep 17 00:00:00 2001 From: Moritz-Alexander-Kern Date: Fri, 10 Feb 2023 18:37:59 +0100 Subject: [PATCH 06/10] refactor tests --- elephant/test/test_gpfa.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/elephant/test/test_gpfa.py b/elephant/test/test_gpfa.py index 737e24ec5..e5a5d8c24 100644 --- a/elephant/test/test_gpfa.py +++ b/elephant/test/test_gpfa.py @@ -78,15 +78,13 @@ def gen_test_data(rates, durs, shapes=(1, 1, 1, 1)): # generate data2 np.random.seed(27) - self.data2 = [] n_trials = 10 n_channels = 20 - for trial in range(n_trials): - rates = np.random.randint(low=1, high=100, size=n_channels) - spike_times = [StationaryPoissonProcess(rate=rate * pq.Hz, - t_stop=1000*pq.ms).generate_spiketrain() - for rate in rates] - self.data2.append(spike_times) + rates = lambda : np.random.randint(low=1, high=100, size=n_channels) + + self.data2 =[[StationaryPoissonProcess(rate=rate * pq.Hz, + t_stop=1000*pq.ms).generate_spiketrain() + for rate in rates()]for trial in range(n_trials)] # generate seqs_train data self.seqs_train = gpfa_util.get_seqs(self.data1, From 41d0dc57c4b3ae4b5b968b06f07dc7924e6ac426 Mon Sep 17 00:00:00 2001 From: Moritz-Alexander-Kern Date: Fri, 10 Feb 2023 18:38:40 +0100 Subject: [PATCH 07/10] removed unused variable --- elephant/test/test_gpfa.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/elephant/test/test_gpfa.py b/elephant/test/test_gpfa.py index e5a5d8c24..08bda95e9 100644 --- a/elephant/test/test_gpfa.py +++ b/elephant/test/test_gpfa.py @@ -84,7 +84,7 @@ def gen_test_data(rates, durs, shapes=(1, 1, 1, 1)): self.data2 =[[StationaryPoissonProcess(rate=rate * pq.Hz, t_stop=1000*pq.ms).generate_spiketrain() - for rate in rates()]for trial in range(n_trials)] + for rate in rates()] for _ in range(n_trials)] # generate seqs_train data self.seqs_train = gpfa_util.get_seqs(self.data1, From 700c961351c1e64a75198597bdf293ec45d29c73 Mon Sep 17 00:00:00 2001 From: Moritz-Alexander-Kern Date: Fri, 10 Feb 2023 19:21:26 +0100 Subject: [PATCH 08/10] add continuous input to fi, transform and fit_transform --- elephant/gpfa/gpfa.py | 79 +++++++++++++++++++++----------------- elephant/gpfa/gpfa_util.py | 12 +++--- elephant/test/test_gpfa.py | 12 +++--- 3 files changed, 56 insertions(+), 47 deletions(-) diff --git a/elephant/gpfa/gpfa.py b/elephant/gpfa/gpfa.py index 4553264ad..3a37da100 100644 --- a/elephant/gpfa/gpfa.py +++ b/elephant/gpfa/gpfa.py @@ -273,24 +273,24 @@ def binsize(self): warnings.warn("'binsize' is deprecated; use 'bin_size'") return self.bin_size - def fit(self, trials, seqs_train=None): + def fit(self, trials): """ Fit the model with the given training data. Parameters ---------- - trials : list of list of neo.SpikeTrain + trials : list of list of neo.SpikeTrain or np.recarray Spike train data to be fit to latent variables. The outer list corresponds to trials and the inner list corresponds to the neurons recorded in that trial, such that - `spiketrains[l][n]` is the spike train of neuron `n` in trial `l`. + `trials[l][n]` is the spike train of neuron `n` in trial `l`. Note that the number and order of `neo.SpikeTrain` objects per - trial must be fixed such that `spiketrains[l][n]` and - `spiketrains[k][n]` refer to spike trains of the same neuron + trial must be fixed such that `trials[l][n]` and + `trials[k][n]` refer to spike trains of the same neuron for any choices of `l`, `k`, and `n`. - seqs_train: np.recarray - Alternatively, pass a pre-processed seqs_train array. + Continuous data + Alternatively, pass a pre-processed np.recarray. This is a training data structure, whose n-th element (corresponding to the n-th experimental trial) has fields @@ -307,22 +307,21 @@ def fit(self, trials, seqs_train=None): Raises ------ ValueError - If `spiketrains` is an empty list. + If `trials` is an empty list. - If `spiketrains[0][0]` is not a `neo.SpikeTrain`. + If `trials[0][0]` is not a `neo.SpikeTrain`. If covariance matrix of input spike data is rank deficient. """ - if seqs_train is not None and trials is not None: - raise ValueError('Cannot provide both trials and seqs_train!') - elif trials is not None: + if isinstance(trials, list): self._check_training_data(trials) seqs_train = self._format_training_data(trials) - elif seqs_train is not None: - seqs_train = self._format_training_data_seqs(seqs_train) + elif isinstance(trials, np.ndarray): + seqs_train = self._format_training_data_seqs(trials) else: - raise ValueError('Must supply either trials or seqs_train!') + raise ValueError('Must supply either trials as ' + 'np.recarray or List of Lists!') # Check if training data covariance is full rank y_all = np.hstack(seqs_train['y']) @@ -379,8 +378,7 @@ def _format_training_data_seqs(self, seqs): seq['y'] = seq['y'][self.has_spikes_bool, :] return seqs - def transform(self, spiketrains, seqs=None, - returned_data=['latent_variable_orth']): + def transform(self, trials, returned_data=['latent_variable_orth']): """ Obtain trajectories of neural activity in a low-dimensional latent variable space by inferring the posterior mean of the obtained GPFA @@ -388,7 +386,7 @@ def transform(self, spiketrains, seqs=None, Parameters ---------- - spiketrains : list of list of neo.SpikeTrain + trials : list of list of neo.SpikeTrain Spike train data to be transformed to latent variables. The outer list corresponds to trials and the inner list corresponds to the neurons recorded in that trial, such that @@ -397,6 +395,17 @@ def transform(self, spiketrains, seqs=None, trial must be fixed such that `trials[l][n]` and `trials[k][n]` refer to spike trains of the same neuron for any choices of `l`, `k`, and `n`. + + Continuous data + Alternatively, pass a pre-processed np.recarray. + This is a training data structure, whose n-th element + (corresponding to the n-th experimental trial) has fields + + T : int + number of bins + y : (#units, T) np.ndarray + neural data + returned_data : list of str The dimensionality reduction transform generates the following resultant data: @@ -459,18 +468,22 @@ def transform(self, spiketrains, seqs=None, raise ValueError("'returned_data' can only have the following " "entries: {}".format(self.valid_data_names)) - if spiketrains is not None: - if len(spiketrains[0]) != len(self.has_spikes_bool): + if isinstance(trials,list): + if len(trials[0]) != len(self.has_spikes_bool): raise ValueError("'trials' must contain the same number " "of neurons as the training spiketrain data") + seqs = gpfa_util.get_seqs(trials, self.bin_size) - seqs = gpfa_util.get_seqs(spiketrains, self.bin_size) - elif seqs is not None: + elif isinstance(trials,np.ndarray): # check some stuff - if len(seqs['y'][0]) != len(self.has_spikes_bool): + if len(trials['y'][0]) != len(self.has_spikes_bool): raise ValueError( "'seq_trains' must contain the same number of neurons as " "the training spiketrain data") + seqs=trials + else: + raise ValueError('Must supply either trials as ' + 'np.recarray or List of Lists!') for seq in seqs: seq['y'] = seq['y'][self.has_spikes_bool, :] @@ -485,7 +498,7 @@ def transform(self, spiketrains, seqs=None, return seqs[returned_data[0]] return {x: seqs[x] for x in returned_data} - def fit_transform(self, spiketrains, seqs_train=None, + def fit_transform(self, trials, seqs_train=None, returned_data=['latent_variable_orth']): """ Fit the model with `trials` data and apply the dimensionality @@ -493,7 +506,7 @@ def fit_transform(self, spiketrains, seqs_train=None, Parameters ---------- - spiketrains : list of list of neo.SpikeTrain + trials : list of list of neo.SpikeTrain Refer to the :func:`GPFA.fit` docstring. seqs_train : np.recarray @@ -519,17 +532,13 @@ def fit_transform(self, spiketrains, seqs_train=None, GPFA.transform : transform `trials` into trajectories """ - if seqs_train is not None and spiketrains is not None: - raise ValueError('Cannot provide both trials and seqs_train!') - elif spiketrains is not None: - self.fit(spiketrains) - return self.transform(spiketrains, returned_data=returned_data) - elif seqs_train is not None: - self.fit(None, seqs_train=seqs_train) - return self.transform(None, seqs=seqs_train, - returned_data=returned_data) + + if isinstance(trials,(list,np.ndarray)): + self.fit(trials) + return self.transform(trials, returned_data=returned_data) else: - raise ValueError('Must supply either trials or seqs_train!') + raise ValueError('Must supply either trials as ' + 'np.recarray or List of Lists!') def score(self, spiketrains): """ diff --git a/elephant/gpfa/gpfa_util.py b/elephant/gpfa/gpfa_util.py index 60babb822..e91b32204 100644 --- a/elephant/gpfa/gpfa_util.py +++ b/elephant/gpfa/gpfa_util.py @@ -86,12 +86,12 @@ def cut_trials(seq_in, seg_length=20): Parameters ---------- seq_in : np.recarray - data structure, whose nth entry (corresponding to the nth experimental + trials structure, whose nth entry (corresponding to the nth experimental trial) has fields T : int number of timesteps in trial y : (yDim, T) np.ndarray - neural data + neural trials seg_length : int length of segments to extract, in number of timesteps. If infinite, @@ -101,12 +101,12 @@ def cut_trials(seq_in, seg_length=20): Returns ------- seqOut : np.recarray - data structure, whose nth entry (corresponding to the nth experimental + trials structure, whose nth entry (corresponding to the nth experimental trial) has fields T : int number of timesteps in segment y : (yDim, T) np.ndarray - neural data + neural trials Raises ------ @@ -498,7 +498,7 @@ def orthonormalize(x, l): """ Orthonormalize the columns of the loading matrix and apply the corresponding linear transform to the latent variables. - In the following description, yDim and xDim refer to data dimensionality + In the following description, yDim and xDim refer to trials dimensionality and latent dimensionality, respectively. Parameters @@ -534,7 +534,7 @@ def orthonormalize(x, l): def segment_by_trial(seqs, x, fn): """ - Segment and store data by trial. + Segment and store trials by trial. Parameters ---------- diff --git a/elephant/test/test_gpfa.py b/elephant/test/test_gpfa.py index 08bda95e9..2d9c8e8af 100644 --- a/elephant/test/test_gpfa.py +++ b/elephant/test/test_gpfa.py @@ -222,7 +222,7 @@ def test_logdet(self): def test_equality_spiketrains_seqs_train(self): gpfa_seqs = GPFA(x_dim=self.x_dim, em_max_iters=self.n_iters) - gpfa_seqs.fit_transform(None, seqs_train=self.seqs_train) + gpfa_seqs.fit_transform(self.seqs_train) gpfa_spiketrains = GPFA(x_dim=self.x_dim, em_max_iters=self.n_iters) gpfa_spiketrains.fit_transform(self.data1) @@ -233,22 +233,22 @@ def test_equality_spiketrains_seqs_train(self): def test_fit_seqs_train(self): gpfa = GPFA(x_dim=self.x_dim, em_max_iters=self.n_iters) - gpfa.fit(None, seqs_train=self.seqs_train) - target = [0.07919926, 0.07546947, 0.07397758, 0.0780272, 0.07773862, + gpfa.fit(self.seqs_train) + target = [0.07919926, 0.07546947, 0.07397758, 0.0780272, 0.07773862, 0.07695879, 0.07469927, 0.07867749] res = gpfa.params_estimated['d'] assert_array_almost_equal(res, target, decimal=5) def test_transform_seqs_train(self): gpfa = GPFA(x_dim=self.x_dim, em_max_iters=self.n_iters) - gpfa.fit(None, seqs_train=self.seqs_train) - gpfa.transform(None, seqs=self.seqs_train) + gpfa.fit(self.seqs_train) + gpfa.transform(self.seqs_train) self.assertAlmostEqual(gpfa.transform_info['log_likelihood'], -8172.004695554373, places=5) def test_fit_transform_seqs_train(self): gpfa = GPFA(x_dim=self.x_dim, em_max_iters=self.n_iters) - gpfa.fit_transform(None, seqs_train=self.seqs_train) + gpfa.fit_transform(self.seqs_train) self.assertAlmostEqual(gpfa.transform_info['log_likelihood'], -8172.004695554373, places=5) From 30d64e68cbd2430b838f5a74655a8cb548c7188c Mon Sep 17 00:00:00 2001 From: Moritz-Alexander-Kern Date: Fri, 10 Feb 2023 19:30:07 +0100 Subject: [PATCH 09/10] changed docstrings --- elephant/gpfa/gpfa.py | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/elephant/gpfa/gpfa.py b/elephant/gpfa/gpfa.py index 3a37da100..4a1d0f866 100644 --- a/elephant/gpfa/gpfa.py +++ b/elephant/gpfa/gpfa.py @@ -378,7 +378,7 @@ def _format_training_data_seqs(self, seqs): seq['y'] = seq['y'][self.has_spikes_bool, :] return seqs - def transform(self, trials, returned_data=['latent_variable_orth']): + def transform(self, trials, returned_data=('latent_variable_orth',)): """ Obtain trajectories of neural activity in a low-dimensional latent variable space by inferring the posterior mean of the obtained GPFA @@ -386,7 +386,7 @@ def transform(self, trials, returned_data=['latent_variable_orth']): Parameters ---------- - trials : list of list of neo.SpikeTrain + trials : list of list of neo.SpikeTrain or np.recarray Spike train data to be transformed to latent variables. The outer list corresponds to trials and the inner list corresponds to the neurons recorded in that trial, such that @@ -498,20 +498,15 @@ def transform(self, trials, returned_data=['latent_variable_orth']): return seqs[returned_data[0]] return {x: seqs[x] for x in returned_data} - def fit_transform(self, trials, seqs_train=None, - returned_data=['latent_variable_orth']): + def fit_transform(self, trials, returned_data=('latent_variable_orth',)): """ Fit the model with `trials` data and apply the dimensionality reduction on `trials`. Parameters ---------- - trials : list of list of neo.SpikeTrain - Refer to the :func:`GPFA.fit` docstring. - - seqs_train : np.recarray + trials : list of list of neo.SpikeTrain or np.recarray Refer to the :func:`GPFA.fit` docstring. - Default: `None` returned_data : list of str Refer to the :func:`GPFA.transform` docstring. @@ -540,13 +535,13 @@ def fit_transform(self, trials, seqs_train=None, raise ValueError('Must supply either trials as ' 'np.recarray or List of Lists!') - def score(self, spiketrains): + def score(self, trials): """ Returns the log-likelihood of the given data under the fitted model Parameters ---------- - spiketrains : list of list of neo.SpikeTrain + trials : list of list of neo.SpikeTrain Spike train data to be scored. The outer list corresponds to trials and the inner list corresponds to the neurons recorded in that trial, such that @@ -561,5 +556,5 @@ def score(self, spiketrains): log_likelihood : float Log-likelihood of the given trials under the fitted model. """ - self.transform(spiketrains) + self.transform(trials) return self.transform_info['log_likelihood'] From 5b966ab3c2c94fae80f2b01cb3ef9580768c03c1 Mon Sep 17 00:00:00 2001 From: Moritz-Alexander-Kern Date: Fri, 10 Feb 2023 19:36:28 +0100 Subject: [PATCH 10/10] changed docstrings --- elephant/gpfa/gpfa.py | 1 - 1 file changed, 1 deletion(-) diff --git a/elephant/gpfa/gpfa.py b/elephant/gpfa/gpfa.py index 4a1d0f866..de983cbe0 100644 --- a/elephant/gpfa/gpfa.py +++ b/elephant/gpfa/gpfa.py @@ -473,7 +473,6 @@ def transform(self, trials, returned_data=('latent_variable_orth',)): raise ValueError("'trials' must contain the same number " "of neurons as the training spiketrain data") seqs = gpfa_util.get_seqs(trials, self.bin_size) - elif isinstance(trials,np.ndarray): # check some stuff if len(trials['y'][0]) != len(self.has_spikes_bool):