Skip to content

Commit de63af6

Browse files
committed
Allow initial sizing of BinaryFuseBuilder
Add `MakeBinaryFuseBuilder` which pre-initializes a binary fuse builder to a certain initial size, guaranteeing no allocations up to that size. This avoids reallocations as the buffers grow. We also improve the `reuseBuffer` method to use an `append` pattern (relying on the internal formulas for slice growth) and isolate the unsafe hack to the single place where it is needed now (fingerprints slice).
1 parent 73d34b8 commit de63af6

File tree

3 files changed

+104
-45
lines changed

3 files changed

+104
-45
lines changed

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -86,10 +86,10 @@ When building many filters, memory can be reused (reducing allocation and GC
8686
overhead) with a `BinaryFuseBuilder`:
8787
```Go
8888
var builder xorfilter.BinaryFuseBuilder
89+
builder = xorfilter.MakeBinaryFuseBuilder[uint16](initialSize) // Optional
8990
for {
90-
filter8, _ := BuildBinaryFuse[uint8](&builder, keys)
91-
filter16, _ := BuildBinaryFuse[uint16](&builder, keys)
92-
...
91+
filter16, _ := BuildBinaryFuse[uint16](&builder, keys)
92+
...
9393
}
9494
```
9595

binaryfusefilter.go

Lines changed: 59 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -38,14 +38,44 @@ func NewBinaryFuse[T Unsigned](keys []uint64) (*BinaryFuse[T], error) {
3838

3939
// BinaryFuseBuilder can be used to reuse memory allocations across multiple
4040
// BinaryFuse builds.
41+
//
42+
// An empty BinaryFuseBuilder can be used, and its internal memory will grow as
43+
// needed over time. MakeBinaryFuseBuilder can also be used to pre-initialize
44+
// for a certain size.
4145
type BinaryFuseBuilder struct {
42-
alone reusableBuffer
43-
t2hash reusableBuffer
44-
reverseOrder reusableBuffer
45-
t2count reusableBuffer
46-
reverseH reusableBuffer
47-
startPos reusableBuffer
48-
fingerprints reusableBuffer
46+
alone []uint32
47+
t2hash []uint64
48+
reverseOrder []uint64
49+
t2count []uint8
50+
reverseH []uint8
51+
startPos []uint32
52+
fingerprints []uint32
53+
}
54+
55+
// MakeBinaryFuseBuilder creates a BinaryFuseBuilder with enough preallocated
56+
// memory to allow building of binary fuse filters with fingerprint type T
57+
// without allocations.
58+
//
59+
// Note that the builder can be used with a smaller fingerprint type without
60+
// reallocations. If it is used with a larger fingerprint type, there will be
61+
// one reallocation for the fingerprints slice.
62+
func MakeBinaryFuseBuilder[T Unsigned](initialSize int) BinaryFuseBuilder {
63+
var b BinaryFuseBuilder
64+
var filter BinaryFuse[T]
65+
size := uint32(initialSize)
66+
filter.initializeParameters(&b, size)
67+
capacity := uint32(len(filter.Fingerprints))
68+
reuseBuffer(&b.alone, capacity)
69+
reuseBuffer(&b.t2count, capacity)
70+
reuseBuffer(&b.reverseH, size)
71+
72+
reuseBuffer(&b.t2hash, capacity)
73+
reuseBuffer(&b.reverseOrder, size+1)
74+
// The startPos array needs to be large enough for smaller sizes which use a
75+
// smaller segment length. Also, we dynamically try a smaller segment length
76+
// in some cases.
77+
reuseBuffer(&b.startPos, 2<<bits.Len32(filter.SegmentCount+1))
78+
return b
4979
}
5080

5181
// BuildBinaryFuse creates a binary fuse filter with provided keys, reusing
@@ -71,15 +101,15 @@ func buildBinaryFuse[T Unsigned](b *BinaryFuseBuilder, keys []uint64) (_ BinaryF
71101
filter.Seed = splitmix64(&rngcounter)
72102
capacity := uint32(len(filter.Fingerprints))
73103

74-
alone := reuseBuffer[uint32](&b.alone, int(capacity))
104+
alone := reuseBuffer(&b.alone, capacity)
75105
// the lowest 2 bits are the h index (0, 1, or 2)
76106
// so we only have 6 bits for counting;
77107
// but that's sufficient
78-
t2count := reuseBuffer[uint8](&b.t2count, int(capacity))
79-
reverseH := reuseBuffer[uint8](&b.reverseH, int(size))
108+
t2count := reuseBuffer(&b.t2count, capacity)
109+
reverseH := reuseBuffer(&b.reverseH, size)
80110

81-
t2hash := reuseBuffer[uint64](&b.t2hash, int(capacity))
82-
reverseOrder := reuseBuffer[uint64](&b.reverseOrder, int(size+1))
111+
t2hash := reuseBuffer(&b.t2hash, capacity)
112+
reverseOrder := reuseBuffer(&b.reverseOrder, size+1)
83113
reverseOrder[size] = 1
84114

85115
// the array h0, h1, h2, h0, h1, h2
@@ -118,10 +148,10 @@ func buildBinaryFuse[T Unsigned](b *BinaryFuseBuilder, keys []uint64) (_ BinaryF
118148
for (1 << blockBits) < filter.SegmentCount {
119149
blockBits += 1
120150
}
121-
startPos := reuseBuffer[uint](&b.startPos, 1<<blockBits)
151+
startPos := reuseBuffer(&b.startPos, 1<<blockBits)
122152
for i := range startPos {
123153
// important: we do not want i * size to overflow!!!
124-
startPos[i] = uint((uint64(i) * uint64(size)) >> blockBits)
154+
startPos[i] = uint32((uint64(i) * uint64(size)) >> blockBits)
125155
}
126156
for _, key := range keys {
127157
hash := mixsplit(key, filter.Seed)
@@ -293,7 +323,14 @@ func (filter *BinaryFuse[T]) initializeParameters(b *BinaryFuseBuilder, size uin
293323
}
294324
filter.SegmentCount = totalSegmentCount - (arity - 1)
295325
filter.SegmentCountLength = filter.SegmentCount * filter.SegmentLength
296-
filter.Fingerprints = reuseBuffer[T](&b.fingerprints, int(totalSegmentCount*filter.SegmentLength))
326+
327+
// Allocate fingerprints slice.
328+
numFingerprints := totalSegmentCount * filter.SegmentLength
329+
// Our backing buffer is a []uint32. Figure out how many uint32s we need
330+
// to back a []T of the requested size.
331+
bufSize := (numFingerprints*uint32(unsafe.Sizeof(T(0))) + 3) / 4
332+
buf := reuseBuffer(&b.fingerprints, bufSize)
333+
filter.Fingerprints = unsafe.Slice((*T)(unsafe.Pointer(unsafe.SliceData(buf))), numFingerprints)
297334
}
298335

299336
func (filter *BinaryFuse[T]) mod3(x uint8) uint8 {
@@ -348,29 +385,11 @@ func calculateSizeFactor(arity uint32, size uint32) float64 {
348385
}
349386
}
350387

351-
// reusableBuffer allows reuse of a backing buffer to avoid allocations for
352-
// slices of integers.
353-
type reusableBuffer struct {
354-
buf []uint64
355-
}
356-
357-
type integer interface {
358-
~int | ~int8 | ~int16 | ~int32 | ~int64 | ~uint | ~uint8 | ~uint16 | ~uint32 | ~uint64
359-
}
360-
361-
// reuseBuffer returns an empty slice of the given size, reusing the last buffer
362-
// if possible.
363-
func reuseBuffer[T integer](b *reusableBuffer, size int) []T {
364-
const sizeOfUint64 = 8
365-
// Our backing buffer is a []uint64. Figure out how many uint64s we need
366-
// to back a []T of the requested size.
367-
bufSize := int((uintptr(size)*unsafe.Sizeof(T(0)) + sizeOfUint64 - 1) / sizeOfUint64)
368-
if cap(b.buf) >= bufSize {
369-
clear(b.buf[:bufSize])
370-
} else {
371-
// We need to allocate a new buffer. Increase by at least 25% to amortize
372-
// allocations; this is what append() does for large enough slices.
373-
b.buf = make([]uint64, max(bufSize, cap(b.buf)+cap(b.buf)/4))
374-
}
375-
return unsafe.Slice((*T)(unsafe.Pointer(unsafe.SliceData(b.buf))), size)
388+
// reuseBuffer returns a zeroed slice of the given size, reusing the previous
389+
// one if possible.
390+
func reuseBuffer[T uint8 | uint32 | uint64](buf *[]T, size uint32) []T {
391+
// The compiler recognizes this pattern and doesn't allocate a temporary
392+
// slice. This pattern is used in slices.Grow().
393+
*buf = append((*buf)[:0], make([]T, size)...)
394+
return *buf
376395
}

binaryfusefilter_test.go

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -350,10 +350,22 @@ func TestBinaryFuseN_Issue35(t *testing.T) {
350350
}
351351
}
352352

353+
// TestBinaryFuseBuilder verifies that repeated builds with the same builder
354+
// create the exact same filter as using NewBinaryFuse.
353355
func TestBinaryFuseBuilder(t *testing.T) {
354-
// Verify that repeated builds with the same builder create the exact same
355-
// filter as using NewBinaryFuse.
356356
var bld BinaryFuseBuilder
357+
// Test with and without pre-allocation.
358+
if rand.IntN(2) == 0 {
359+
maxSize := 1 + rand.IntN(1<<rand.IntN(20))
360+
switch rand.IntN(3) {
361+
case 0:
362+
bld = MakeBinaryFuseBuilder[uint8](maxSize)
363+
case 1:
364+
bld = MakeBinaryFuseBuilder[uint16](maxSize)
365+
case 2:
366+
bld = MakeBinaryFuseBuilder[uint32](maxSize)
367+
}
368+
}
357369
for i := 0; i < 100; i++ {
358370
n := 1 + rand.IntN(1<<rand.IntN(20))
359371
keys := make([]uint64, n)
@@ -381,6 +393,34 @@ func crossCheckFuseBuilder[T Unsigned](t *testing.T, bld *BinaryFuseBuilder, key
381393
require.Equal(t, *expected, filter)
382394
}
383395

396+
// TestMakeBinaryFuseBuilder verifies that using MakeBinaryFuseBuilder prevents
397+
// all allocations.
398+
func TestMakeBinaryFuseBuilder(t *testing.T) {
399+
maxSize := 1000 + rand.IntN(100_000)
400+
keys := make([]uint64, maxSize)
401+
for j := range keys {
402+
keys[j] = rand.Uint64()
403+
}
404+
bld := MakeBinaryFuseBuilder[uint16](maxSize)
405+
numAllocs := testing.AllocsPerRun(100, func() {
406+
for range 10 {
407+
var size int
408+
if rand.IntN(100) == 1 {
409+
size = maxSize
410+
} else {
411+
size = 1 + rand.IntN(maxSize)
412+
}
413+
if rand.IntN(2) == 0 {
414+
// Smaller fingerprints can reuse the same preallocated space.
415+
_, _ = BuildBinaryFuse[uint8](&bld, keys[:size])
416+
} else {
417+
_, _ = BuildBinaryFuse[uint16](&bld, keys[:size])
418+
}
419+
}
420+
})
421+
require.Zero(t, numAllocs)
422+
}
423+
384424
// segmentLengthSizes contains represents the range of sizes [startSize, endSize] that
385425
// all get the same segmentLength.
386426
type segmentLengthSizes struct {

0 commit comments

Comments
 (0)