Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CompStats/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
__version__ = '0.1.6'
__version__ = '0.1.7'
from CompStats.bootstrap import StatisticSamples
from CompStats.measurements import CI, SE, difference_p_value
from CompStats.performance import performance, difference, all_differences, plot_performance, plot_difference
Expand Down
4 changes: 4 additions & 0 deletions CompStats/bootstrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,10 @@ def inner(N):
return inner(N)
except AttributeError:
return inner(N)

def keys(self):
"""calls keys"""
return self.calls.keys()

def __getitem__(self, key):
return self.calls[key]
Expand Down
91 changes: 84 additions & 7 deletions CompStats/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@
import numpy as np
from CompStats.bootstrap import StatisticSamples
from CompStats.utils import progress_bar
from CompStats import measurements
from CompStats.measurements import SE
from CompStats.performance import plot_performance, plot_difference
from CompStats.utils import dataframe


class Perf(object):
Expand Down Expand Up @@ -239,6 +241,13 @@ def best(self):
"""System with best performance"""
if hasattr(self, '_best') and self._best is not None:
return self._best
if not isinstance(self.statistic, dict):
key, value = list(self.statistic_samples.calls.items())[0]
if value.ndim == 1:
self._best = key
else:
self._best = np.array([key] * value.shape[1])
return self._best
BiB = True if self.statistic_samples.BiB else False
keys = np.array(list(self.statistic.keys()))
data = np.asanyarray([self.statistic[k]
Expand Down Expand Up @@ -322,7 +331,14 @@ def se(self):
return list(output.values())[0]
return output

def plot(self, **kwargs):
def plot(self, value_name:str=None,
var_name:str='Performance',
alg_legend:str='Algorithm',
perf_names:list=None,
CI:float=0.05,
kind:str='point', linestyle:str='none',
col_wrap:int=3, capsize:float=0.2,
**kwargs):
"""plot with seaborn

>>> from sklearn.svm import LinearSVC
Expand All @@ -341,13 +357,37 @@ def plot(self, **kwargs):
forest=ens.predict(X_val))
>>> perf.plot()
"""
import seaborn as sns
if self.score_func is not None:
value_name = 'Score'
else:
value_name = 'Error'
_ = dict(value_name=value_name)
_.update(kwargs)
return plot_performance(self.statistic_samples, **_)
df = self.dataframe(value_name=value_name, var_name=var_name,
alg_legend=alg_legend, perf_names=perf_names)
if var_name not in df.columns:
var_name = None
col_wrap = None
ci = lambda x: measurements.CI(x, alpha=CI)
f_grid = sns.catplot(df, x=value_name, errorbar=ci,
y=alg_legend, col=var_name,
kind=kind, linestyle=linestyle,
col_wrap=col_wrap, capsize=capsize, **kwargs)
return f_grid


def dataframe(self, value_name:str='Score',
var_name:str='Performance',
alg_legend:str='Algorithm',
perf_names:str=None):
"""Dataframe"""
if perf_names is None and isinstance(self.best, np.ndarray):
func_name = self.statistic_func.__name__
perf_names = [f'{func_name}({i})'
for i, k in enumerate(self.best)]
return dataframe(self, value_name=value_name,
var_name=var_name,
alg_legend=alg_legend,
perf_names=perf_names)

@property
def n_jobs(self):
Expand Down Expand Up @@ -561,7 +601,28 @@ def p_value(self, right:bool=True):
values.sort(key=lambda x: self.sorting_func(x[1]))
return dict(values)

def plot(self, **kwargs):
def dataframe(self, value_name:str='Score',
var_name:str='Best',
alg_legend:str='Algorithm',
perf_names:str=None):
"""Dataframe"""
if perf_names is None and isinstance(self.best, np.ndarray):
perf_names = [f'{alg}({k})'
for k, alg in enumerate(self.best)]
return dataframe(self, value_name=value_name,
var_name=var_name,
alg_legend=alg_legend,
perf_names=perf_names)

def plot(self, value_name:str='Difference',
var_name:str='Best',
alg_legend:str='Algorithm',
perf_names:list=None,
CI:float=0.05,
kind:str='point', linestyle:str='none',
col_wrap:int=3, capsize:float=0.2,
set_refline:bool=True,
**kwargs):
"""Plot

>>> from sklearn.svm import LinearSVC
Expand All @@ -580,5 +641,21 @@ def plot(self, **kwargs):
>>> diff = perf.difference()
>>> diff.plot()
"""

return plot_difference(self.statistic_samples, **kwargs)
import seaborn as sns
df = self.dataframe(value_name=value_name,
var_name=var_name,
alg_legend=alg_legend, perf_names=perf_names)
title = var_name
if var_name not in df.columns:
var_name = None
col_wrap = None
ci = lambda x: measurements.CI(x, alpha=CI)
f_grid = sns.catplot(df, x=value_name, errorbar=ci,
y=alg_legend, col=var_name,
kind=kind, linestyle=linestyle,
col_wrap=col_wrap, capsize=capsize, **kwargs)
if set_refline:
f_grid.refline(x=0)
if isinstance(self.best, str):
f_grid.facet_axis(0, 0).set_title(f'{title} = {self.best}')
return f_grid
75 changes: 74 additions & 1 deletion CompStats/tests/test_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,79 @@
from CompStats.tests.test_performance import DATA


def test_Difference_dataframe():
"""Test Difference dataframe"""
from CompStats.metrics import f1_score

X, y = load_digits(return_X_y=True)
_ = train_test_split(X, y, test_size=0.3)
X_train, X_val, y_train, y_val = _
ens = RandomForestClassifier().fit(X_train, y_train)
nb = GaussianNB().fit(X_train, y_train)
svm = LinearSVC().fit(X_train, y_train)
score = f1_score(y_val, ens.predict(X_val),
average=None,
num_samples=50)
score(nb.predict(X_val))
score(svm.predict(X_val))
diff = score.difference()
df = diff.dataframe()
assert 'Best' in df.columns
score = f1_score(y_val, ens.predict(X_val),
average='macro',
num_samples=50)
score(nb.predict(X_val))
score(svm.predict(X_val))
diff = score.difference()
df = diff.dataframe()
assert 'Best' not in df.columns


def test_Perf_dataframe():
"""Test Perf dataframe"""
from CompStats.metrics import f1_score

X, y = load_digits(return_X_y=True)
_ = train_test_split(X, y, test_size=0.3)
X_train, X_val, y_train, y_val = _
ens = RandomForestClassifier().fit(X_train, y_train)
nb = GaussianNB().fit(X_train, y_train)
svm = LinearSVC().fit(X_train, y_train)
score = f1_score(y_val, ens.predict(X_val),
average=None,
num_samples=50)
df = score.dataframe()
score(nb.predict(X_val))
score(svm.predict(X_val))
df = score.dataframe()
assert 'Performance' in df.columns
score = f1_score(y_val, ens.predict(X_val),
average='macro',
num_samples=50)
score(nb.predict(X_val))
score(svm.predict(X_val))
df = score.dataframe()
assert 'Performance' not in df.columns


def test_Perf_plot_multi():
"""Test Perf plot multiple"""
from CompStats.metrics import f1_score

X, y = load_digits(return_X_y=True)
_ = train_test_split(X, y, test_size=0.3)
X_train, X_val, y_train, y_val = _
ens = RandomForestClassifier().fit(X_train, y_train)
nb = GaussianNB().fit(X_train, y_train)
svm = LinearSVC().fit(X_train, y_train)
score = f1_score(y_val, ens.predict(X_val),
average=None,
num_samples=50)
score(nb.predict(X_val))
score(svm.predict(X_val))
f_grid = score.plot()
assert f_grid is not None

def test_Perf_statistic_one():
"""Test Perf statistic one alg"""
from CompStats.metrics import f1_score
Expand Down Expand Up @@ -208,7 +281,7 @@ def test_Difference_plot():
diff.plot()


def test_Perf_dataframe():
def test_Perf_input_dataframe():
"""Test Perf with dataframe"""
from CompStats.interface import Perf

Expand Down
27 changes: 26 additions & 1 deletion CompStats/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,4 +54,29 @@ def inner(*args, **kwargs):
return func(*args, **kwargs)

return inner
return perf_docs
return perf_docs


def dataframe(instance, value_name:str='Score',
var_name:str='Performance',
alg_legend:str='Algorithm',
perf_names:list=None):
"""Dataframe"""
import pandas as pd
if isinstance(instance.best, str):
df = pd.DataFrame(dict(instance.statistic_samples.calls.items()))
return df.melt(var_name=alg_legend,
value_name=value_name)
df = pd.DataFrame()
if not isinstance(instance.statistic, dict):
iter = instance.statistic_samples.keys()
else:
iter = instance.statistic
for key in iter:
data = instance.statistic_samples[key]
_df = pd.DataFrame(data,
columns=perf_names).melt(value_name=value_name,
var_name=var_name)
_df[alg_legend] = key
df = pd.concat((df, _df))
return df
21 changes: 9 additions & 12 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -49,15 +49,12 @@ Once the predictions are available, it is time to measure the algorithm's perfor

>>> score = f1_score(y_val, hy, average='macro')
>>> score
<Perf(score_func=f1_score)>
Statistic with its standard error (se)
statistic (se)
0.9332 (0.0113) <= alg-1
<Perf(score_func=f1_score, statistic=0.9435, se=0.0099)>

The previous code shows the macro-f1 score and, in parenthesis, its standard error. The actual performance value is stored in the `statistic` function.

>>> score.statistic
{'alg-1': 0.9332035615949114}
0.9434834454375508

Continuing with the example, let us assume that one wants to test another classifier on the same problem, in this case, a random forest, as can be seen in the following two lines. The second line predicts the validation set and sets it to the analysis.

Expand All @@ -66,8 +63,8 @@ Continuing with the example, let us assume that one wants to test another classi
<Perf(score_func=f1_score)>
Statistic with its standard error (se)
statistic (se)
0.9756 (0.0061) <= Random Forest
0.9332 (0.0113) <= alg-1
0.9655 (0.0077) <= Random Forest
0.9435 (0.0099) <= alg-1

Let us incorporate another prediction, now with the Naive Bayes classifier, as seen below.

Expand All @@ -76,18 +73,18 @@ Let us incorporate another prediction, now with the Naive Bayes classifier, as s
<Perf(score_func=f1_score)>
Statistic with its standard error (se)
statistic (se)
0.9756 (0.0061) <= Random Forest
0.9332 (0.0113) <= alg-1
0.8198 (0.0144) <= Naive Bayes
0.9655 (0.0077) <= Random Forest
0.9435 (0.0099) <= alg-1
0.8549 (0.0153) <= Naive Bayes

The final step is to compare the performance of the three classifiers, which can be done with the `difference` method, as seen next.

>>> diff = score.difference()
>>> diff
<Difference>
difference p-values w.r.t Random Forest
0.0000 <= alg-1
difference p-values w.r.t Random Forest
0.0000 <= Naive Bayes
0.0120 <= alg-1

The class `Difference` has the `plot` method that can be used to depict the difference with respect to the best.

Expand Down
Loading
Loading