Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 20 additions & 10 deletions src/modelskill/comparison/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,18 +22,28 @@ def _add_spatial_grid_to_df(
bins_y = bins
else:
# bins from binsize
x_ptp = np.ptp(df.x.values) # type: ignore
y_ptp = np.ptp(df.y.values) # type: ignore
nx = int(np.ceil(x_ptp / binsize))
ny = int(np.ceil(y_ptp / binsize))
# Ensure bins cover the full data range from min to max
x_min, x_max = df.x.min(), df.x.max()
y_min, y_max = df.y.min(), df.y.max()

# Align bin edges to multiples of binsize relative to rounded mean
# This maintains consistency while ensuring full data coverage
x_mean = np.round(df.x.mean())
y_mean = np.round(df.y.mean())
bins_x = np.arange(
x_mean - nx / 2 * binsize, x_mean + (nx / 2 + 1) * binsize, binsize
)
bins_y = np.arange(
y_mean - ny / 2 * binsize, y_mean + (ny / 2 + 1) * binsize, binsize
)

# Calculate starting edge: largest multiple of binsize (relative to mean)
# that is less than or equal to the minimum data value
x_start = x_mean + np.floor((x_min - x_mean) / binsize) * binsize
y_start = y_mean + np.floor((y_min - y_mean) / binsize) * binsize

# Calculate ending edge: smallest multiple of binsize (relative to mean)
# that is greater than or equal to the maximum data value
x_end = x_mean + np.ceil((x_max - x_mean) / binsize) * binsize
y_end = y_mean + np.ceil((y_max - y_mean) / binsize) * binsize

# Create bin edges from start to end with specified binsize
bins_x = np.arange(x_start, x_end + binsize / 2, binsize)
bins_y = np.arange(y_start, y_end + binsize / 2, binsize)
# cut and get bin centre
df["xBin"] = pd.cut(df.x, bins=bins_x)
df["xBin"] = df["xBin"].apply(lambda x: x.mid)
Expand Down
8 changes: 7 additions & 1 deletion tests/test_trackcompare.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ def test_gridded_skill_bins(comparer):

# binsize (overwrites bins)
ds = comparer.gridded_skill(metrics=["bias"], binsize=2.5, bins=100)
assert len(ds.x) == 4
assert len(ds.x) == 5 # One more bin needed to cover full data range
assert len(ds.y) == 3
assert ds.x[0] == -0.75

Expand All @@ -299,6 +299,12 @@ def test_gridded_skill_misc(comparer):
assert df.loc[df.n < 20, ["bias", "rmse"]].isna().all().all()


def test_gridded_skill_binsize_no_data_loss(comparer):
"""Test that all data points are included when using binsize parameter."""
gs = comparer.gridded_skill(metrics=["bias"], binsize=2.5)
assert int(gs.n.data.sum().values) == comparer.n_points


def test_hist(comparer):
cmp = comparer

Expand Down