@@ -38,14 +38,44 @@ func NewBinaryFuse[T Unsigned](keys []uint64) (*BinaryFuse[T], error) {
3838
3939// BinaryFuseBuilder can be used to reuse memory allocations across multiple
4040// BinaryFuse builds.
41+ //
42+ // An empty BinaryFuseBuilder can be used, and its internal memory will grow as
43+ // needed over time. MakeBinaryFuseBuilder can also be used to pre-initialize
44+ // for a certain size.
4145type BinaryFuseBuilder struct {
42- alone reusableBuffer
43- t2hash reusableBuffer
44- reverseOrder reusableBuffer
45- t2count reusableBuffer
46- reverseH reusableBuffer
47- startPos reusableBuffer
48- fingerprints reusableBuffer
46+ alone []uint32
47+ t2hash []uint64
48+ reverseOrder []uint64
49+ t2count []uint8
50+ reverseH []uint8
51+ startPos []uint32
52+ fingerprints []uint32
53+ }
54+
55+ // MakeBinaryFuseBuilder creates a BinaryFuseBuilder with enough preallocated
56+ // memory to allow building of binary fuse filters with fingerprint type T
57+ // without allocations.
58+ //
59+ // Note that the builder can be used with a smaller fingerprint type without
60+ // reallocations. If it is used with a larger fingerprint type, there will be
61+ // one reallocation for the fingerprints slice.
62+ func MakeBinaryFuseBuilder [T Unsigned ](initialSize int ) BinaryFuseBuilder {
63+ var b BinaryFuseBuilder
64+ var filter BinaryFuse [T ]
65+ size := uint32 (initialSize )
66+ filter .initializeParameters (& b , size )
67+ capacity := uint32 (len (filter .Fingerprints ))
68+ reuseBuffer (& b .alone , capacity )
69+ reuseBuffer (& b .t2count , capacity )
70+ reuseBuffer (& b .reverseH , size )
71+
72+ reuseBuffer (& b .t2hash , capacity )
73+ reuseBuffer (& b .reverseOrder , size + 1 )
74+ // The startPos array needs to be large enough for smaller sizes which use a
75+ // smaller segment length. Also, we dynamically try a smaller segment length
76+ // in some cases.
77+ reuseBuffer (& b .startPos , 2 << bits .Len32 (filter .SegmentCount + 1 ))
78+ return b
4979}
5080
5181// BuildBinaryFuse creates a binary fuse filter with provided keys, reusing
@@ -71,15 +101,15 @@ func buildBinaryFuse[T Unsigned](b *BinaryFuseBuilder, keys []uint64) (_ BinaryF
71101 filter .Seed = splitmix64 (& rngcounter )
72102 capacity := uint32 (len (filter .Fingerprints ))
73103
74- alone := reuseBuffer [ uint32 ] (& b .alone , int ( capacity ) )
104+ alone := reuseBuffer (& b .alone , capacity )
75105 // the lowest 2 bits are the h index (0, 1, or 2)
76106 // so we only have 6 bits for counting;
77107 // but that's sufficient
78- t2count := reuseBuffer [ uint8 ] (& b .t2count , int ( capacity ) )
79- reverseH := reuseBuffer [ uint8 ] (& b .reverseH , int ( size ) )
108+ t2count := reuseBuffer (& b .t2count , capacity )
109+ reverseH := reuseBuffer (& b .reverseH , size )
80110
81- t2hash := reuseBuffer [ uint64 ] (& b .t2hash , int ( capacity ) )
82- reverseOrder := reuseBuffer [ uint64 ] (& b .reverseOrder , int ( size + 1 ) )
111+ t2hash := reuseBuffer (& b .t2hash , capacity )
112+ reverseOrder := reuseBuffer (& b .reverseOrder , size + 1 )
83113 reverseOrder [size ] = 1
84114
85115 // the array h0, h1, h2, h0, h1, h2
@@ -118,10 +148,10 @@ func buildBinaryFuse[T Unsigned](b *BinaryFuseBuilder, keys []uint64) (_ BinaryF
118148 for (1 << blockBits ) < filter .SegmentCount {
119149 blockBits += 1
120150 }
121- startPos := reuseBuffer [ uint ] (& b .startPos , 1 << blockBits )
151+ startPos := reuseBuffer (& b .startPos , 1 << blockBits )
122152 for i := range startPos {
123153 // important: we do not want i * size to overflow!!!
124- startPos [i ] = uint ((uint64 (i ) * uint64 (size )) >> blockBits )
154+ startPos [i ] = uint32 ((uint64 (i ) * uint64 (size )) >> blockBits )
125155 }
126156 for _ , key := range keys {
127157 hash := mixsplit (key , filter .Seed )
@@ -293,7 +323,14 @@ func (filter *BinaryFuse[T]) initializeParameters(b *BinaryFuseBuilder, size uin
293323 }
294324 filter .SegmentCount = totalSegmentCount - (arity - 1 )
295325 filter .SegmentCountLength = filter .SegmentCount * filter .SegmentLength
296- filter .Fingerprints = reuseBuffer [T ](& b .fingerprints , int (totalSegmentCount * filter .SegmentLength ))
326+
327+ // Allocate fingerprints slice.
328+ numFingerprints := totalSegmentCount * filter .SegmentLength
329+ // Our backing buffer is a []uint32. Figure out how many uint32s we need
330+ // to back a []T of the requested size.
331+ bufSize := (numFingerprints * uint32 (unsafe .Sizeof (T (0 ))) + 3 ) / 4
332+ buf := reuseBuffer (& b .fingerprints , bufSize )
333+ filter .Fingerprints = unsafe .Slice ((* T )(unsafe .Pointer (unsafe .SliceData (buf ))), numFingerprints )
297334}
298335
299336func (filter * BinaryFuse [T ]) mod3 (x uint8 ) uint8 {
@@ -348,29 +385,11 @@ func calculateSizeFactor(arity uint32, size uint32) float64 {
348385 }
349386}
350387
351- // reusableBuffer allows reuse of a backing buffer to avoid allocations for
352- // slices of integers.
353- type reusableBuffer struct {
354- buf []uint64
355- }
356-
357- type integer interface {
358- ~ int | ~ int8 | ~ int16 | ~ int32 | ~ int64 | ~ uint | ~ uint8 | ~ uint16 | ~ uint32 | ~ uint64
359- }
360-
361- // reuseBuffer returns an empty slice of the given size, reusing the last buffer
362- // if possible.
363- func reuseBuffer [T integer ](b * reusableBuffer , size int ) []T {
364- const sizeOfUint64 = 8
365- // Our backing buffer is a []uint64. Figure out how many uint64s we need
366- // to back a []T of the requested size.
367- bufSize := int ((uintptr (size )* unsafe .Sizeof (T (0 )) + sizeOfUint64 - 1 ) / sizeOfUint64 )
368- if cap (b .buf ) >= bufSize {
369- clear (b .buf [:bufSize ])
370- } else {
371- // We need to allocate a new buffer. Increase by at least 25% to amortize
372- // allocations; this is what append() does for large enough slices.
373- b .buf = make ([]uint64 , max (bufSize , cap (b .buf )+ cap (b .buf )/ 4 ))
374- }
375- return unsafe .Slice ((* T )(unsafe .Pointer (unsafe .SliceData (b .buf ))), size )
388+ // reuseBuffer returns a zeroed slice of the given size, reusing the previous
389+ // one if possible.
390+ func reuseBuffer [T uint8 | uint32 | uint64 ](buf * []T , size uint32 ) []T {
391+ // The compiler recognizes this pattern and doesn't allocate a temporary
392+ // slice. This pattern is used in slices.Grow().
393+ * buf = append ((* buf )[:0 ], make ([]T , size )... )
394+ return * buf
376395}
0 commit comments