Skip to content

Commit 8acd36e

Browse files
committed
Use nextafter to fuzz around floating point break points
1 parent 9933ef8 commit 8acd36e

File tree

3 files changed

+38
-47
lines changed

3 files changed

+38
-47
lines changed

plotnine/_utils/__init__.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1207,3 +1207,24 @@ def has_alpha_channel(c: str | tuple) -> bool:
12071207
return c.startswith("#") and len(c) == 9
12081208
else:
12091209
return color_utils.is_color_tuple(c) and len(c) == 4
1210+
1211+
1212+
def nextafter_range(rng: tuple[float, float]) -> tuple[float, float]:
1213+
"""
1214+
Expand floating-point range by a step to adjacent representable numbers
1215+
1216+
Parameters
1217+
----------
1218+
rng :
1219+
A tuple (min, max) representing the range to expand.
1220+
1221+
Returns
1222+
-------
1223+
:
1224+
A new tuple (lower, upper) where,
1225+
- lower is moved 1 float toward -∞
1226+
- upper is moved 1 float toward +∞
1227+
"""
1228+
from math import inf, nextafter
1229+
1230+
return (nextafter(rng[0], -inf), nextafter(rng[1], inf))

plotnine/stats/binning.py

Lines changed: 13 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -82,13 +82,12 @@ def breaks_from_binwidth(
8282
if center is not None:
8383
boundary = center - boundary
8484

85-
epsilon = np.finfo(float).eps
8685
shift = np.floor((x_range[0] - boundary) / binwidth)
8786
origin = boundary + shift * binwidth
88-
# The (1-epsilon) factor prevents numerical roundoff in the
87+
# The nextafter reduction prevents numerical roundoff in the
8988
# binwidth from creating an extra break beyond the one that
9089
# includes x_range[1].
91-
max_x = x_range[1] + binwidth * (1 - epsilon)
90+
max_x = np.nextafter(x_range[1] + binwidth, -np.inf)
9291
breaks = np.arange(origin, max_x, binwidth)
9392
return breaks
9493

@@ -303,48 +302,21 @@ def fuzzybreaks(
303302

304303
# To minimise precision errors, we do not pass the boundary and
305304
# binwidth into np.arange as params. The resulting breaks
306-
# can then be adjusted with finer(epsilon based rather than
307-
# some arbitrary small number) precision.
305+
# can then be adjusted to the next floating point number.
308306
breaks = np.arange(boundary, srange[1] + binwidth, binwidth)
309307
return _adjust_breaks(breaks, right)
310308

311309

312310
def _adjust_breaks(breaks: FloatArray, right: bool) -> FloatArray:
313-
epsilon = np.finfo(float).eps
314-
plus = 1 + epsilon
315-
minus = 1 - epsilon
316-
317-
sign = np.sign(breaks)
318-
pos_idx = np.where(sign == 1)[0]
319-
neg_idx = np.where(sign == -1)[0]
320-
zero_idx = np.where(sign == 0)[0]
321-
322-
fuzzy = breaks.copy()
323-
if right:
324-
# [_](_](_](_]
325-
lbreak = breaks[0]
326-
fuzzy[pos_idx] *= plus
327-
fuzzy[neg_idx] *= minus
328-
fuzzy[zero_idx] = epsilon
329-
# Left closing break
330-
if lbreak == 0:
331-
fuzzy[0] = -epsilon
332-
elif lbreak < 0:
333-
fuzzy[0] = lbreak * plus
334-
else:
335-
fuzzy[0] = lbreak * minus
336-
else:
337-
# [_)[_)[_)[_]
338-
rbreak = breaks[-1]
339-
fuzzy[pos_idx] *= minus
340-
fuzzy[neg_idx] *= plus
341-
fuzzy[zero_idx] = -epsilon
342-
# Right closing break
343-
if rbreak == 0:
344-
fuzzy[-1] = epsilon
345-
elif rbreak > 0:
346-
fuzzy[-1] = rbreak * plus
347-
else:
348-
fuzzy[-1] = rbreak * minus
311+
"""
312+
Adjust breaks to include/exclude every right break
349313
314+
If right=True, the breaks create intervals closed on right
315+
i.e. [_] (_] (_] (_]
316+
If right=False, the breaks create intervals closed on the left
317+
i.e. [_) [_) [_) [_]
318+
"""
319+
limit, idx = (np.inf, 0) if right else (-np.inf, -1)
320+
fuzzy = np.nextafter(breaks, limit)
321+
fuzzy[idx] = np.nextafter(breaks[idx], -limit)
350322
return fuzzy

plotnine/stats/stat_sina.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import numpy as np
44
import pandas as pd
55

6-
from .._utils import array_kind, jitter, resolution
6+
from .._utils import array_kind, jitter, nextafter_range, resolution
77
from ..doctools import document
88
from ..exceptions import PlotnineError
99
from ..mapping.aes import has_groups
@@ -222,13 +222,11 @@ def compute_group(self, data, scales):
222222
data["density"] = densf(y)
223223
data["scaled"] = data["density"] / dens["density"].max()
224224
else:
225-
y_dim = scales.y.dimension()
226-
fuzz = 1e-8
227-
y_dim_fuzzed = (y_dim[0] - fuzz, y_dim[1] + fuzz)
225+
expanded_y_range = nextafter_range(scales.y.dimension())
228226
if binwidth is not None:
229-
bins = breaks_from_binwidth(y_dim_fuzzed, binwidth)
227+
bins = breaks_from_binwidth(expanded_y_range, binwidth)
230228
else:
231-
bins = breaks_from_bins(y_dim_fuzzed, self.params["bins"])
229+
bins = breaks_from_bins(expanded_y_range, self.params["bins"])
232230

233231
# bin based estimation
234232
bin_index = pd.cut(y, bins, include_lowest=True, labels=False) # pyright: ignore[reportCallIssue,reportArgumentType]

0 commit comments

Comments
 (0)