Skip to content

Commit 3b756df

Browse files
ENH: Add apply_scaling function to select scaler by name
1 parent f787529 commit 3b756df

File tree

2 files changed

+74
-2
lines changed

2 files changed

+74
-2
lines changed
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
"""Preprocessing module."""
22

3-
from .scalers import minmax_scale, standardize
3+
from .scalers import apply_scaling, minmax_scale, standardize
44

5-
__all__ = ["minmax_scale", "standardize"]
5+
__all__ = ["apply_scaling", "minmax_scale", "standardize"]

orca_python/preprocessing/scalers.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,78 @@ def _validate_and_align(X_train, X_test):
3737
return X_train, X_test
3838

3939

40+
def apply_scaling(X_train, X_test=None, method=None, return_transformer=False):
41+
"""Apply normalization or standardization to the input data.
42+
43+
The preprocessing is fit on the training data and then applied to both
44+
training and test data (if provided).
45+
46+
Parameters
47+
----------
48+
X_train : array-like of shape (n_samples, n_features)
49+
Feature matrix used specifically for model training.
50+
51+
X_test : array-like of shape (m_samples, n_features), optional
52+
Feature matrix used for model evaluation and prediction.
53+
54+
method : {"norm", "std", None}, optional
55+
- "norm": Min-Max scaling to [0, 1]
56+
- "std" : Standardization (mean=0, std=1)
57+
- None : No preprocessing
58+
59+
return_transformer : bool, default=False
60+
If True, also return the fitted scaling object.
61+
62+
Returns
63+
-------
64+
(X_train_scaled, X_test_scaled) or (X_train_scaled, X_test_scaled, scaler)
65+
Scaled arrays; X_test_scaled is None if X_test is None.
66+
If return_transformer=True and method=None, scaler is None.
67+
68+
Raises
69+
------
70+
ValueError
71+
If an unknown scaling method is specified.
72+
73+
Examples
74+
--------
75+
>>> import numpy as np
76+
>>> from preprocessing.scalers import apply_scaling
77+
>>> X_train = np.array([[1.0, 2.0], [2.0, 4.0], [3.0, 6.0]])
78+
>>> X_test = np.array([[2.5, 5.0]])
79+
>>> X_train_scaled, X_test_scaled = apply_scaling(X_train, X_test, method="norm")
80+
>>> X_train_scaled
81+
array([[0. , 0. ],
82+
[0.5, 0.5],
83+
[1. , 1. ]])
84+
>>> X_test_scaled
85+
array([[0.75, 0.75]])
86+
>>> X_train_scaled, X_test_scaled = apply_scaling(X_train, X_test, method="std")
87+
>>> X_train_scaled.round(3)
88+
array([[-1.225, -1.225],
89+
[ 0. , 0. ],
90+
[ 1.225, 1.225]])
91+
>>> X_test_scaled.round(3)
92+
array([[0.612, 0.612]])
93+
94+
"""
95+
if method is None:
96+
return (X_train, X_test, None) if return_transformer else (X_train, X_test)
97+
98+
if not isinstance(method, str):
99+
raise ValueError("Scaling method must be a string or None.")
100+
101+
key = method.lower()
102+
if key == "norm":
103+
return minmax_scale(X_train, X_test, return_transformer)
104+
elif key == "std":
105+
return standardize(X_train, X_test, return_transformer)
106+
else:
107+
raise ValueError(
108+
f"Unknown scaling method '{method}'. Valid options: 'norm', 'std', None."
109+
)
110+
111+
40112
def minmax_scale(X_train, X_test=None, return_transformer=False):
41113
"""Scale features to a fixed range between 0 and 1.
42114

0 commit comments

Comments
 (0)