Skip to content

Commit e2139a0

Browse files
authored
Merge pull request #28 from INGEOTEC/develop
Version - 0.1.14
2 parents 8bd7cec + 8db30c7 commit e2139a0

File tree

9 files changed

+101
-30
lines changed

9 files changed

+101
-30
lines changed

.devcontainer/devcontainer.json

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,7 @@
1-
{"image": "mcr.microsoft.com/devcontainers/base:ubuntu",
1+
{"image": "mcr.microsoft.com/devcontainers/python:3.13",
22
"features": {
33
"ghcr.io/rocker-org/devcontainer-features/quarto-cli":
4-
{"installChromium": true, "installTinyTex": true},
5-
"ghcr.io/rocker-org/devcontainer-features/apt-packages:1":
6-
{"packages": "ca-certificates,fonts-liberation,libasound2,libatk-bridge2.0-0,libatk1.0-0,libc6,libcairo2,libcups2,libdbus-1-3,libexpat1,libfontconfig1,libgbm1,libgcc1,libglib2.0-0,libgtk-3-0,libnspr4,libnss3,libpango-1.0-0,libpangocairo-1.0-0,libstdc++6,libx11-6,libx11-xcb1,libxcb1,libxcomposite1,libxcursor1,libxdamage1,libxext6,libxfixes3,libxi6,libxrandr2,libxrender1,libxss1,libxtst6,lsb-release,wget,xdg-utils"},
7-
"ghcr.io/rocker-org/devcontainer-features/miniforge:2": {}
4+
{"installChromium": true, "installTinyTex": true}
85
},
9-
"postCreateCommand": "conda env create --file environment.yml"
6+
"postCreateCommand": "python -m pip install -r requirements.txt"
107
}

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,4 +160,5 @@ cython_debug/
160160
#.idea/
161161

162162
.vscode/settings.json
163-
quarto/CompStats_files/
163+
quarto/CompStats_files/
164+
quarto/

CompStats/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14-
__version__ = '0.1.13'
14+
__version__ = '0.1.14'
1515
from CompStats.bootstrap import StatisticSamples
1616
from CompStats.measurements import CI, SE, difference_p_value
1717
from CompStats.performance import performance, difference, all_differences, plot_performance, plot_difference

CompStats/interface.py

Lines changed: 54 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
from CompStats.bootstrap import StatisticSamples
2020
from CompStats.utils import progress_bar
2121
from CompStats import measurements
22-
from CompStats.measurements import SE
22+
from CompStats.measurements import SE, CI
2323
from CompStats.utils import dataframe
2424

2525

@@ -270,7 +270,7 @@ def best(self):
270270
best = data.argmin()
271271
self._best = keys[best]
272272
return self._best
273-
273+
274274
@best.setter
275275
def best(self, value):
276276
self._best = value
@@ -279,7 +279,7 @@ def best(self, value):
279279
def sorting_func(self):
280280
"""Rank systems when multiple performances are used"""
281281
return self._sorting_func
282-
282+
283283
@sorting_func.setter
284284
def sorting_func(self, value):
285285
self._sorting_func = value
@@ -315,7 +315,7 @@ def statistic(self):
315315
else:
316316
self._statistic = dict(data)
317317
return self._statistic
318-
318+
319319
@statistic.setter
320320
def statistic(self, value):
321321
"""statistic setter"""
@@ -346,6 +346,30 @@ def se(self):
346346
return list(output.values())[0]
347347
return output
348348

349+
@property
350+
def ci(self):
351+
"""Confidence interval
352+
353+
>>> from sklearn.svm import LinearSVC
354+
>>> from sklearn.datasets import load_iris
355+
>>> from sklearn.model_selection import train_test_split
356+
>>> from CompStats.interface import Perf
357+
>>> X, y = load_iris(return_X_y=True)
358+
>>> _ = train_test_split(X, y, test_size=0.3)
359+
>>> X_train, X_val, y_train, y_val = _
360+
>>> m = LinearSVC().fit(X_train, y_train)
361+
>>> hy = m.predict(X_val)
362+
>>> ens = RandomForestClassifier().fit(X_train, y_train)
363+
>>> perf = Perf(y_val, hy, name='LinearSVC')
364+
>>> perf.ci
365+
(np.float64(0.9333333333333332), np.float64(1.0))
366+
"""
367+
368+
output = CI(self.statistic_samples)
369+
if len(output) == 1:
370+
return list(output.values())[0]
371+
return output
372+
349373
def plot(self, value_name:str=None,
350374
var_name:str='Performance',
351375
alg_legend:str='Algorithm',
@@ -359,6 +383,7 @@ def plot(self, value_name:str=None,
359383
winner_legend:str='Best',
360384
tie_legend:str='Equivalent',
361385
loser_legend:str='Different',
386+
palette:object=None,
362387
**kwargs):
363388
"""plot with seaborn
364389
@@ -403,10 +428,17 @@ def plot(self, value_name:str=None,
403428
ci = lambda x: measurements.CI(x, alpha=CI)
404429
if comparison:
405430
kwargs.update(dict(hue=comp_legend))
431+
if palette is None:
432+
pal = sns.color_palette("Paired")
433+
palette = {winner_legend:pal[1],
434+
tie_legend:pal[3],
435+
loser_legend: pal[5]}
406436
f_grid = sns.catplot(df, x=value_name, errorbar=ci,
407437
y=alg_legend, col=var_name,
408438
kind=kind, linestyle=linestyle,
409-
col_wrap=col_wrap, capsize=capsize, **kwargs)
439+
col_wrap=col_wrap, capsize=capsize,
440+
palette=palette,
441+
**kwargs)
410442
return f_grid
411443

412444
def dataframe(self, comparison:bool=False,
@@ -420,7 +452,22 @@ def dataframe(self, comparison:bool=False,
420452
tie_legend:str='Equivalent',
421453
loser_legend:str='Different',
422454
perf_names:str=None):
423-
"""Dataframe"""
455+
"""Dataframe
456+
457+
>>> from sklearn.svm import LinearSVC
458+
>>> from sklearn.ensemble import RandomForestClassifier
459+
>>> from sklearn.datasets import load_iris
460+
>>> from sklearn.model_selection import train_test_split
461+
>>> from CompStats.interface import Perf
462+
>>> X, y = load_iris(return_X_y=True)
463+
>>> _ = train_test_split(X, y, test_size=0.3)
464+
>>> X_train, X_val, y_train, y_val = _
465+
>>> m = LinearSVC().fit(X_train, y_train)
466+
>>> hy = m.predict(X_val)
467+
>>> ens = RandomForestClassifier().fit(X_train, y_train)
468+
>>> perf = Perf(y_val, hy, forest=ens.predict(X_val))
469+
>>> df = perf.dataframe()
470+
"""
424471
if perf_names is None and isinstance(self.best, np.ndarray):
425472
func_name = self.statistic_func.__name__
426473
perf_names = [f'{func_name}({i})'
@@ -624,7 +671,7 @@ def _delta_best(self):
624671
return self.statistic[self.best]
625672
keys = np.unique(self.best)
626673
statistic = np.array([self.statistic[k]
627-
for k in keys])
674+
for k in keys])
628675
m = {v: k for k, v in enumerate(keys)}
629676
best = np.array([m[x] for x in self.best])
630677
return statistic[best, np.arange(best.shape[0])]

CompStats/metrics.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -307,7 +307,7 @@ def macro_f1(y_true, *y_pred, labels=None,
307307
sample_weight=None, zero_division='warn',
308308
num_samples: int=500, n_jobs: int=-1, use_tqdm=True,
309309
**kwargs):
310-
""":py:class:`~CompStats.interface.Perf` with :py:func:`~sklearn.metrics.f1_score` (as :py:attr:`score_func`) with the parameteres needed to compute the macro score. The parameters not described can be found in :py:func:`~sklearn.metrics.macro_f1`
310+
""":py:class:`~CompStats.interface.Perf` with :py:func:`~sklearn.metrics.f1_score` (as :py:attr:`score_func`) with the parameteres needed to compute the macro score. The parameters not described can be found in :py:func:`~sklearn.metrics.f1_score`
311311
312312
:param y_true: True measurement or could be a pandas.DataFrame where column label 'y' corresponds to the true measurement.
313313
:type y_true: numpy.ndarray or pandas.DataFrame
@@ -332,7 +332,7 @@ def macro_recall(y_true, *y_pred, labels=None,
332332
sample_weight=None, zero_division='warn',
333333
num_samples: int=500, n_jobs: int=-1, use_tqdm=True,
334334
**kwargs):
335-
""":py:class:`~CompStats.interface.Perf` with :py:func:`~sklearn.metrics.recall_score` (as :py:attr:`score_func`) with the parameteres needed to compute the macro score. The parameters not described can be found in :py:func:`~sklearn.metrics.macro_recall`
335+
""":py:class:`~CompStats.interface.Perf` with :py:func:`~sklearn.metrics.recall_score` (as :py:attr:`score_func`) with the parameteres needed to compute the macro score. The parameters not described can be found in :py:func:`~sklearn.metrics.recall_score`
336336
337337
:param y_true: True measurement or could be a pandas.DataFrame where column label 'y' corresponds to the true measurement.
338338
:type y_true: numpy.ndarray or pandas.DataFrame
@@ -357,7 +357,7 @@ def macro_precision(y_true, *y_pred, labels=None,
357357
sample_weight=None, zero_division='warn',
358358
num_samples: int=500, n_jobs: int=-1, use_tqdm=True,
359359
**kwargs):
360-
""":py:class:`~CompStats.interface.Perf` with :py:func:`~sklearn.metrics.precision_score` (as :py:attr:`score_func`) with the parameteres needed to compute the macro score. The parameters not described can be found in :py:func:`~sklearn.metrics.macro_precision`
360+
""":py:class:`~CompStats.interface.Perf` with :py:func:`~sklearn.metrics.precision_score` (as :py:attr:`score_func`) with the parameteres needed to compute the macro score. The parameters not described can be found in :py:func:`~sklearn.metrics.precision_score`
361361
362362
:param y_true: True measurement or could be a pandas.DataFrame where column label 'y' corresponds to the true measurement.
363363
:type y_true: numpy.ndarray or pandas.DataFrame

CompStats/tests/test_interface.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ def test_Perf_dataframe():
8181
from CompStats.metrics import f1_score
8282

8383
X, y = load_digits(return_X_y=True)
84-
_ = train_test_split(X, y, test_size=0.3)
84+
_ = train_test_split(X, y, test_size=0.3, random_state=0)
8585
X_train, X_val, y_train, y_val = _
8686
ens = RandomForestClassifier().fit(X_train, y_train)
8787
nb = GaussianNB().fit(X_train, y_train)
@@ -121,6 +121,7 @@ def test_Perf_plot_multi():
121121
f_grid = score.plot()
122122
assert f_grid is not None
123123

124+
124125
def test_Perf_statistic_one():
125126
"""Test Perf statistic one alg"""
126127
from CompStats.metrics import f1_score
@@ -142,6 +143,9 @@ def test_Perf_statistic_one():
142143
assert isinstance(score.statistic, float)
143144
assert isinstance(str(score), str)
144145
assert isinstance(score.se, float)
146+
assert isinstance(score.ci, tuple)
147+
assert len(score.ci) == 2
148+
145149

146150
def test_Perf_best():
147151
"""Test Perf best"""
@@ -191,7 +195,7 @@ def test_difference_best():
191195
score(svm.predict(X_val), name='svm')
192196
diff = score.difference()
193197
assert isinstance(diff.best, str)
194-
198+
195199

196200
def test_difference_str__():
197201
"""Test f1_score"""

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
name = 'CompStats'
33
description = 'CompStats implements an evaluation methodology for statistically analyzing competition results and competition'
44
readme = "README.rst"
5+
license = "Apache-2.0"
56
dependencies = [
67
'numpy',
78
'scikit-learn>=1.3.0',
@@ -17,7 +18,6 @@ classifiers = [
1718
"Intended Audience :: Developers",
1819
"Intended Audience :: Information Technology",
1920
"Intended Audience :: Science/Research",
20-
"License :: OSI Approved :: MIT License",
2121
"Operating System :: OS Independent",
2222
"Programming Language :: Python",
2323
"Topic :: Scientific/Engineering :: Artificial Intelligence",

quarto/CompStats.qmd

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ execute:
1818
Collaborative competitions have gained popularity in the scientific and technological fields. These competitions involve defining tasks, selecting evaluation scores, and devising result verification methods. In the standard scenario, participants receive a training set and are expected to provide a solution for a held-out dataset kept by organizers. An essential challenge for organizers arises when comparing algorithms' performance, assessing multiple participants, and ranking them. Statistical tools are often used for this purpose; however, traditional statistical methods often fail to capture decisive differences between systems' performance. CompStats implements an evaluation methodology for statistically analyzing competition results and competition. CompStats offers several advantages, including off-the-shell comparisons with correction mechanisms and the inclusion of confidence intervals.
1919
:::
2020

21-
::: {.card title='Installing using conda'}
21+
::: {.card title='Installing using conda' .flow}
2222

2323
`CompStats` can be install using the conda package manager with the following instruction.
2424

@@ -27,7 +27,7 @@ conda install --channel conda-forge CompStats
2727
```
2828
:::
2929

30-
::: {.card title='Installing using pip'}
30+
::: {.card title='Installing using pip' .flow}
3131
A more general approach to installing `CompStats` is through the use of the command pip, as illustrated in the following instruction.
3232

3333
```{sh}
@@ -41,8 +41,12 @@ pip install CompStats
4141

4242
To illustrate the use of `CompStats`, the following snippets show an example. The instructions load the necessary libraries, including the one to obtain the problem (e.g., digits), four different classifiers, and the last line is the score used to measure the performance and compare the algorithm.
4343

44+
Belowe the imports, it is found the code to load the digits problem and split the dataset into training and validation sets.
45+
46+
::: {.card title="Dataset and libraries" .flow}
4447
```{python}
4548
#| echo: true
49+
#| code-fold: true
4650
4751
from sklearn.svm import LinearSVC
4852
from sklearn.naive_bayes import GaussianNB
@@ -52,42 +56,51 @@ from sklearn.datasets import load_digits
5256
from sklearn.model_selection import train_test_split
5357
from sklearn.base import clone
5458
from CompStats.metrics import f1_score
59+
60+
X, y = load_digits(return_X_y=True)
61+
_ = train_test_split(X, y, test_size=0.3)
62+
X_train, X_val, y_train, y_val = _
5563
```
64+
:::
5665

57-
The first step is to load the digits problem and split the dataset into training and validation sets. The second step is to estimate the parameters of a linear Support Vector Machine and predict the validation set's classes. The predictions are stored in the variable `hy`.
66+
The first line estimates the parameters of a linear Support Vector Machine and predict the validation set's classes. The predictions are stored in the variable `hy`.
5867

68+
::: {.card title="Linear SVM" .flow}
5969
```{python}
6070
#| echo: true
6171
62-
X, y = load_digits(return_X_y=True)
63-
_ = train_test_split(X, y, test_size=0.3)
64-
X_train, X_val, y_train, y_val = _
6572
m = LinearSVC().fit(X_train, y_train)
6673
hy = m.predict(X_val)
6774
```
75+
:::
6876

6977
Once the predictions are available, it is time to measure the algorithm's performance, as seen in the following code. It is essential to note that the API used in `sklearn.metrics` is followed; the difference is that the function returns an instance with different methods that can be used to estimate different performance statistics and compare algorithms.
7078

71-
## Column
72-
79+
::: {.card title="Score" .flow}
7380
```{python}
7481
#| echo: true
7582
7683
score = f1_score(y_val, hy, average='macro')
7784
score
7885
```
86+
:::
87+
88+
## Column
7989

8090
Continuing with the example, let us assume that one wants to test another classifier on the same problem, in this case, a random forest, as can be seen in the following two lines. The second line predicts the validation set and sets it to the analysis.
8191

92+
::: {.card title="Random Forest" .flow}
8293
```{python}
8394
#| echo: true
8495
8596
ens = RandomForestClassifier().fit(X_train, y_train)
8697
score(ens.predict(X_val), name='Random Forest')
8798
```
99+
:::
88100

89101
Let us incorporate another predictions, now with Naive Bayes classifier, and Histogram Gradient Boosting as seen below.
90102

103+
::: {.card title="Rest of the classifiers" .flow}
91104
```{python}
92105
#| echo: true
93106
@@ -96,4 +109,5 @@ _ = score(nb.predict(X_val), name='Naive Bayes')
96109
hist = HistGradientBoostingClassifier().fit(X_train, y_train)
97110
_ = score(hist.predict(X_val), name='Hist. Grad. Boost. Tree')
98111
score.plot()
99-
```
112+
```
113+
:::

requirements.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
numpy
2+
scikit-learn
3+
seaborn
4+
jupyter
5+
pyyaml
6+
sphinx
7+
pytest
8+
statsmodels

0 commit comments

Comments
 (0)