infino 0.1.0

A fast retrieval engine that stores data on object storage and runs SQL, full-text search, and vector search over it from a single system — search-on-Parquet.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The Infino Authors

//! FTS term-presence bloom filter.
//!
//! One bloom per (superfile, FTS column). Built once at commit time
//! by feeding the superfile's FST term iterator through a
//! [`BloomBuilder`]; queried at skip-prune time via
//! [`Bloom::contains`] to decide whether a superfile could contain
//! at least one of a query's terms. Returns `false` definitively
//! (skip the superfile) or `true` with the bloom's false-positive
//! rate (scan the superfile).
//!
//! # Algorithm: block bloom + XXH3-64 + portable SIMD bit-test
//!
//! - **Block bloom** (Putze et al., 2007): the bit array is split
//!   into fixed-size *blocks* (one cache line, 64 B = 512 bits).
//!   Each key is mapped to one block; both `insert` and `contains`
//!   touch only that block. Cache-line-bounded probes avoid the
//!   scattered random reads of a non-block bloom.
//! - **XXH3-64** as the hash backbone: ~3× faster than SipHash-1-3
//!   in the early-exit miss path that dominates skip-prune
//!   workloads. We don't need HashDoS resistance — terms come
//!   from a closed corpus, not adversarial input.
//! - **Portable SIMD via `wide::u64x4`**: `contains` builds a
//!   K-bit test mask once, then checks `(block & mask) == mask`
//!   in two SIMD AND-NOT-OR-reduce operations covering all 8
//!   block words. On x86_64 with AVX2 this lowers to two 256-bit
//!   `vpand` / `vptest` instructions; on aarch64 NEON it lowers
//!   to four 128-bit `bic`/`orr` instructions; elsewhere it
//!   falls back to scalar.
//! - **Kirsch-Mitzenmacher splitting with golden-ratio
//!   avalanche**: one 64-bit hash is split into a block index
//!   (high 32 bits, masked to `n_blocks`) and K position seeds
//!   derived from a multiplicative remix
//!   (`h * 0x9E37_79B9_7F4A_7C15`). The avalanche decorrelates
//!   the position seeds from the block index, which is what
//!   keeps the in-block bit pattern from clustering.
//!
//! # Sizing (default)
//!
//! - 64 KiB total → 1024 blocks of 64 B each (`n_blocks` is always
//!   a power of two so the modulo is a bit-AND).
//! - K = 4 hash functions per block.
//! - At ~100 K distinct terms (a typical Zipfian 1 M-doc superfile),
//!   that's ~100 items / block, ~7% FPR — meaningful skip without
//!   blowing manifest RAM.
//!
//! # Layout
//!
//! Bytes are stored as `Arc<[u64]>` of length `n_blocks * 8`
//! (each block is 8 × `u64` = 64 B). Storing as `u64` rather than
//! `u8` lets the inner loop test bits with one shift-and-mask per
//! probe, no byte-aligned addressing math. The Arc wrapper is so a
//! `FtsSummaryAgg` clone (and through it, a manifest clone) doesn't
//! duplicate the bloom payload.

use std::sync::Arc;

use wide::u64x4;
use xxhash_rust::xxh3::xxh3_64;

/// Block size in bytes — one cache line on x86_64 / aarch64.
pub const BLOCK_BYTES: usize = 64;
/// Block size in bits.
pub const BLOCK_BITS: usize = BLOCK_BYTES * 8; // 512
/// `u64` words per block.
const BLOCK_WORDS: usize = BLOCK_BYTES / 8; // 8
/// Hash functions per block (Kirsch-Mitzenmacher derived from one
/// 64-bit XXH3 result).
pub const K: usize = 4;

/// Default bloom size: 64 KiB / 1024 blocks. Sized so a typical
/// Zipfian 1 M-doc superfile with ~100 K distinct terms hits ~7%
/// FPR.
pub const DEFAULT_N_BLOCKS: usize = 1024;
/// Default bloom byte size (64 KiB).
pub const DEFAULT_BLOOM_BYTES: usize = DEFAULT_N_BLOCKS * BLOCK_BYTES;

/// 64-bit golden-ratio constant (⌊2^64 / φ⌋, odd). Used as a
/// multiplicative-avalanche mixer to decorrelate the in-block bit
/// positions from the block index, both derived from the same hash.
const GOLDEN_RATIO_U64: u64 = 0x9E37_79B9_7F4A_7C15;

/// Term-presence bloom filter for a single FTS column in a single
/// superfile.
///
/// Cheap to clone (`Arc::clone` on the underlying word buffer); a
/// `FtsSummaryAgg` clone shares this Arc with all manifest copies in
/// the supertable's snapshot history.
#[derive(Clone, Debug)]
pub struct Bloom {
    words: Arc<[u64]>,
    n_blocks_mask: u32,
}

impl Bloom {
    /// Reconstruct from a previously-emitted byte buffer. The
    /// buffer length must be exactly `n_blocks * BLOCK_BYTES` for
    /// some power-of-two `n_blocks`; passing anything else returns
    /// `None`.
    ///
    /// Useful for serialization round-trips when the manifest is
    /// loaded from a persistent backend.
    pub fn from_bytes(bytes: &[u8]) -> Option<Self> {
        if !bytes.len().is_multiple_of(BLOCK_BYTES) {
            return None;
        }
        let n_blocks = bytes.len() / BLOCK_BYTES;
        if n_blocks == 0 || !n_blocks.is_power_of_two() {
            return None;
        }
        let mut words = vec![0u64; n_blocks * BLOCK_WORDS];
        for (i, w) in words.iter_mut().enumerate() {
            let off = i * 8;
            let mut buf = [0u8; 8];
            buf.copy_from_slice(&bytes[off..off + 8]);
            *w = u64::from_le_bytes(buf);
        }
        Some(Self {
            words: words.into(),
            n_blocks_mask: (n_blocks - 1) as u32,
        })
    }

    /// Serialize the bloom's bytes (little-endian `u64` words). The
    /// inverse of [`Bloom::from_bytes`].
    pub fn to_bytes(&self) -> Vec<u8> {
        let mut out = Vec::with_capacity(self.words.len() * 8);
        for w in self.words.iter() {
            out.extend_from_slice(&w.to_le_bytes());
        }
        out
    }

    /// Test whether `key` may have been inserted. `false` is
    /// definitive (the superfile doesn't contain `key`); `true` may
    /// be a false positive at the bloom's configured FPR.
    #[inline]
    pub fn contains(&self, key: &[u8]) -> bool {
        let h = xxh3_64(key);
        let (block_idx, mask) = block_and_mask(h, self.n_blocks_mask);
        let block_offset = block_idx * BLOCK_WORDS;
        let block: &[u64; BLOCK_WORDS] = (&self.words[block_offset..block_offset + BLOCK_WORDS])
            .try_into()
            .expect("BLOCK_WORDS-sized slice");
        contains_block(block, &mask)
    }

    /// Number of blocks. Always a power of two.
    pub fn n_blocks(&self) -> usize {
        (self.n_blocks_mask as usize) + 1
    }

    /// Total byte length.
    pub fn len(&self) -> usize {
        self.n_blocks() * BLOCK_BYTES
    }

    /// Whether the underlying word array is empty.
    pub fn is_empty(&self) -> bool {
        self.words.is_empty()
    }
}

/// Builder for a [`Bloom`]. Constructed with [`BloomBuilder::new`]
/// (default size) or [`BloomBuilder::with_n_blocks`] (custom);
/// fed with [`BloomBuilder::insert`]; finalized with
/// [`BloomBuilder::finish`].
///
/// One builder per (superfile, FTS column) at commit time.
pub struct BloomBuilder {
    words: Vec<u64>,
    n_blocks_mask: u32,
}

impl BloomBuilder {
    /// Builder sized at the default 64 KiB / 1024 blocks.
    pub fn new() -> Self {
        Self::with_n_blocks(DEFAULT_N_BLOCKS)
    }

    /// Builder with a caller-specified block count. Must be a
    /// power of two and ≥ 1; panics in debug otherwise (caller bug).
    pub fn with_n_blocks(n_blocks: usize) -> Self {
        debug_assert!(
            n_blocks.is_power_of_two() && n_blocks >= 1,
            "n_blocks must be a power of two ≥ 1; got {n_blocks}",
        );
        Self {
            words: vec![0u64; n_blocks * BLOCK_WORDS],
            n_blocks_mask: (n_blocks - 1) as u32,
        }
    }

    /// Insert `key` into the bloom. Idempotent (re-inserting the
    /// same key sets the same bits).
    pub fn insert(&mut self, key: &[u8]) {
        let h = xxh3_64(key);
        let (block_idx, mask) = block_and_mask(h, self.n_blocks_mask);
        let block_offset = block_idx * BLOCK_WORDS;
        // OR each mask word into the corresponding block word.
        // Insert is uncommon enough (per-superfile-build, not per-
        // query) that we don't bother SIMDing this path.
        let block = &mut self.words[block_offset..block_offset + BLOCK_WORDS];
        for (b, m) in block.iter_mut().zip(mask.iter()) {
            *b |= *m;
        }
    }

    /// Finalize into a shareable [`Bloom`].
    pub fn finish(self) -> Bloom {
        Bloom {
            words: self.words.into(),
            n_blocks_mask: self.n_blocks_mask,
        }
    }

    /// Number of blocks. Always a power of two.
    pub fn n_blocks(&self) -> usize {
        (self.n_blocks_mask as usize) + 1
    }
}

impl Default for BloomBuilder {
    fn default() -> Self {
        Self::new()
    }
}

/// Block-level "all K positions present" check. `true` iff every bit
/// set in `mask` is also set in `block` (equivalently, `mask & !block == 0`
/// over the 512-bit lane).
///
/// Single-tier `wide::u64x4` AND-NOT-OR-reduce kernel. Lowers to two
/// 256-bit `vpandn` / `vpor` pairs on AVX2 x86_64 (the
/// `target-cpu=x86-64-v3` baseline), four 128-bit NEON ops on aarch64,
/// scalar elsewhere.
///
/// We deliberately do *not* dispatch to an AVX-512 kernel here even
/// on hosts that support it. The block is exactly 64 B = one ZMM
/// register, so AVX-512 only saves a single 256-bit iteration; at the
/// ~1 ns per-call regime this kernel runs in, that saving is
/// dominated by the per-instruction frequency-licensing cost on
/// Sapphire Rapids / Ice Lake (a 5% regression vs `wide` on the
/// `avx512_microbench_contains_block` benchmark before it was
/// removed). The AVX-512 win factor scales with bytes-per-call; this
/// kernel doesn't have enough of them to overcome the license cost
/// in isolation.
#[inline]
fn contains_block(block: &[u64; BLOCK_WORDS], mask: &[u64; BLOCK_WORDS]) -> bool {
    let block_lo = u64x4::new([block[0], block[1], block[2], block[3]]);
    let block_hi = u64x4::new([block[4], block[5], block[6], block[7]]);
    let mask_lo = u64x4::new([mask[0], mask[1], mask[2], mask[3]]);
    let mask_hi = u64x4::new([mask[4], mask[5], mask[6], mask[7]]);
    let r_lo = !block_lo & mask_lo;
    let r_hi = !block_hi & mask_hi;
    let combined = r_lo | r_hi;
    let parts = combined.to_array();
    (parts[0] | parts[1] | parts[2] | parts[3]) == 0
}

/// Derive (block_index, K-bit mask spread across BLOCK_WORDS u64s)
/// from a 64-bit hash.
///
/// Splitting strategy (avoids the K-position arithmetic-progression
/// correlation that's the standard failure mode of naive
/// Kirsch-Mitzenmacher):
///
/// - **`h1`** (the XXH3-64 result): high 32 bits drive the block
///   index. Low 32 bits aren't used here — we want the block
///   index decorrelated from the position seeds, since the K
///   positions form an arithmetic progression in their seeds and
///   any overlap between the index bits and the seed bits funnels
///   into clustered position patterns within a block.
/// - **`h2`** (golden-ratio-multiplicative remix of h1): provides
///   the position seeds. The multiplication
///   `h1 * 0x9E37_79B9_7F4A_7C15` is an avalanche function —
///   every output bit depends on every input bit — so h2's bits
///   are effectively independent of the high 32 bits of h1 that
///   drove block selection. h2's low 32 bits give h_low; h2's
///   high 32 bits give h_high; positions = h_low + i·h_high mod
///   BLOCK_BITS for i in 0..K.
///
/// Returns `(block_index, mask)` where `mask` has exactly K bits
/// set across its 8 u64s, matching the K positions.
#[inline]
fn block_and_mask(h: u64, n_blocks_mask: u32) -> (usize, [u64; BLOCK_WORDS]) {
    let block_idx = ((h >> 32) as u32 & n_blocks_mask) as usize;
    // Golden-ratio multiplicative avalanche to decorrelate
    // position seeds from the block index above.
    let h2 = h.wrapping_mul(GOLDEN_RATIO_U64);
    let h_low = h2 as u32;
    let h_high = (h2 >> 32) as u32;
    let block_bits_mask = (BLOCK_BITS as u32) - 1; // 511

    let mut mask = [0u64; BLOCK_WORDS];
    // Unrolled K=4 to give the compiler the cleanest trace into
    // a SIMD-friendly mask construction.
    let p0 = (h_low & block_bits_mask) as usize;
    let p1 = (h_low.wrapping_add(h_high) & block_bits_mask) as usize;
    let p2 = (h_low.wrapping_add(h_high.wrapping_mul(2)) & block_bits_mask) as usize;
    let p3 = (h_low.wrapping_add(h_high.wrapping_mul(3)) & block_bits_mask) as usize;
    mask[p0 / 64] |= 1u64 << (p0 % 64);
    mask[p1 / 64] |= 1u64 << (p1 % 64);
    mask[p2 / 64] |= 1u64 << (p2 % 64);
    mask[p3 / 64] |= 1u64 << (p3 % 64);

    (block_idx, mask)
}

#[cfg(test)]
mod tests {
    use std::{collections::HashMap, str::from_utf8};

    use super::*;

    /// Build a bloom from a small, deterministic set of keys.
    fn build_with(keys: &[&[u8]]) -> Bloom {
        let mut b = BloomBuilder::new();
        for k in keys {
            b.insert(k);
        }
        b.finish()
    }

    // ---- presence / absence correctness -----------------------------

    #[test]
    fn empty_bloom_contains_nothing() {
        let b = BloomBuilder::new().finish();
        assert!(!b.contains(b"alpha"));
        assert!(!b.contains(b"beta"));
        assert!(!b.contains(b""));
    }

    #[test]
    fn inserted_keys_are_definitely_present() {
        let keys: &[&[u8]] = &[
            b"alpha", b"beta", b"gamma", b"delta", b"epsilon", b"zeta", b"eta", b"theta",
        ];
        let b = build_with(keys);
        for k in keys {
            assert!(
                b.contains(k),
                "inserted key {:?} must be reported present",
                from_utf8(k).unwrap_or("<non-utf8>"),
            );
        }
    }

    #[test]
    fn insert_is_idempotent() {
        let mut b = BloomBuilder::new();
        b.insert(b"hello");
        let bytes_a = b.words.clone();
        b.insert(b"hello");
        let bytes_b = b.words.clone();
        assert_eq!(bytes_a, bytes_b);
    }

    // ---- false-positive rate ---------------------------------------

    /// At ~100K inserted keys / 64 KiB bloom, FPR should be ~7%.
    /// Pin a generous upper bound; a measured 0% or 50% would
    /// indicate a real bug.
    #[test]
    fn fpr_is_within_target_band_at_100k_keys() {
        let mut b = BloomBuilder::new();
        let n_inserted = 100_000usize;
        for i in 0..n_inserted {
            b.insert(format!("term{i}").as_bytes());
        }
        let bloom = b.finish();

        // Probe 100K keys NOT in the inserted set.
        let mut false_positives = 0usize;
        let n_probes = 100_000usize;
        for i in 0..n_probes {
            let probe = format!("absent_term_{i}");
            if bloom.contains(probe.as_bytes()) {
                false_positives += 1;
            }
        }
        let fpr = false_positives as f64 / n_probes as f64;
        // Theoretical: ~7%. Allow [3%, 12%] band.
        assert!(
            (0.03..=0.12).contains(&fpr),
            "FPR {} outside expected band [0.03, 0.12]",
            fpr,
        );
    }

    /// Inserted keys must still all return true at the same scale.
    /// Combined with the FPR test, this verifies "no false
    /// negatives" (a load-bearing bloom invariant).
    #[test]
    fn no_false_negatives_at_100k_keys() {
        let mut b = BloomBuilder::new();
        let n = 100_000usize;
        for i in 0..n {
            b.insert(format!("term{i}").as_bytes());
        }
        let bloom = b.finish();
        for i in 0..n {
            let key = format!("term{i}");
            assert!(
                bloom.contains(key.as_bytes()),
                "false negative on inserted key {key}",
            );
        }
    }

    // ---- sizing -----------------------------------------------------

    #[test]
    fn default_size_is_64kib_1024_blocks() {
        let b = BloomBuilder::new().finish();
        assert_eq!(b.len(), DEFAULT_BLOOM_BYTES);
        assert_eq!(b.n_blocks(), DEFAULT_N_BLOCKS);
    }

    #[test]
    fn custom_size_round_trips_through_builder() {
        for n_blocks in [1usize, 2, 4, 8, 256, 4096] {
            let b = BloomBuilder::with_n_blocks(n_blocks).finish();
            assert_eq!(b.n_blocks(), n_blocks);
            assert_eq!(b.len(), n_blocks * BLOCK_BYTES);
        }
    }

    #[test]
    #[cfg(debug_assertions)]
    #[should_panic(expected = "n_blocks must be a power of two")]
    fn non_power_of_two_n_blocks_panics_in_debug() {
        let _ = BloomBuilder::with_n_blocks(3);
    }

    // ---- byte serialization ----------------------------------------

    #[test]
    fn to_bytes_from_bytes_round_trip_preserves_membership() {
        let keys: &[&[u8]] = &[b"alpha", b"beta", b"gamma", b"delta", b"epsilon"];
        let b1 = build_with(keys);
        let bytes = b1.to_bytes();

        let b2 = Bloom::from_bytes(&bytes).expect("valid bytes");
        assert_eq!(b2.n_blocks(), b1.n_blocks());

        for k in keys {
            assert!(b2.contains(k));
        }
        // Definite-absent keys (chosen to give scatter); a few might
        // collide as false positives, but not all of them.
        let probes: &[&[u8]] = &[b"never1", b"never2", b"never3", b"never4"];
        let n_collisions = probes.iter().filter(|p| b2.contains(p)).count();
        assert!(
            n_collisions < probes.len(),
            "all probes false-positive — bloom appears saturated",
        );
    }

    #[test]
    fn from_bytes_rejects_misaligned_buffer() {
        // Length not a multiple of BLOCK_BYTES.
        assert!(Bloom::from_bytes(&[0u8; BLOCK_BYTES + 1]).is_none());
        // Length is a multiple but block count isn't a power of two.
        assert!(Bloom::from_bytes(&[0u8; BLOCK_BYTES * 3]).is_none());
        // Empty.
        assert!(Bloom::from_bytes(&[]).is_none());
    }

    // ---- hash distribution -----------------------------------------

    /// Block index distribution should be reasonably uniform
    /// across the block array. With 100K random keys and 1024
    /// blocks the χ² test would be ~1023 d.f.; we just check
    /// that the max-block-load isn't pathologically skewed.
    #[test]
    fn block_load_is_roughly_uniform() {
        let n_blocks = DEFAULT_N_BLOCKS;
        let mut load = vec![0usize; n_blocks];
        for i in 0..100_000usize {
            let h = xxh3_64(format!("term{i}").as_bytes());
            let (block_idx, _) = block_and_mask(h, (n_blocks - 1) as u32);
            load[block_idx] += 1;
        }
        let max = *load.iter().max().expect("non-empty");
        let mean = load.iter().sum::<usize>() as f64 / n_blocks as f64;
        // 100K / 1024 ≈ 97.7 mean. Max should be within ~3× of
        // mean for a healthy hash. (A bad hash that funneled
        // everything into a few blocks would have max in the
        // thousands.)
        assert!(
            (max as f64) < 3.0 * mean,
            "max-block-load {max} more than 3× mean {mean}",
        );
    }

    /// Two different keys should very rarely produce the same
    /// (block_idx, mask). 1000 random keys; collisions should
    /// be a tiny fraction.
    #[test]
    fn different_keys_rarely_collide_on_same_signature() {
        let mut sigs: HashMap<(usize, [u64; BLOCK_WORDS]), Vec<String>> = HashMap::new();
        for i in 0..1000usize {
            let key = format!("a-{i}");
            let h = xxh3_64(key.as_bytes());
            let sig = block_and_mask(h, (DEFAULT_N_BLOCKS - 1) as u32);
            sigs.entry(sig).or_default().push(key);
        }
        let collisions = sigs.values().filter(|v| v.len() > 1).count();
        // For 1000 keys against 1024 blocks × C(512, 4) ≈ 2.8e9
        // signature space, the birthday-bound expected collision
        // count is essentially zero. Allow up to a few for safety.
        assert!(
            collisions < 5,
            "{collisions} signature collisions in 1000 keys"
        );
    }

    // ---- Arc sharing -----------------------------------------------

    #[test]
    fn clone_shares_words_via_arc() {
        let b1 = build_with(&[b"alpha", b"beta"]);
        let b2 = b1.clone();
        assert_eq!(b1.words.as_ptr(), b2.words.as_ptr());
    }

    #[test]
    fn debug_format_doesnt_explode() {
        let b = build_with(&[b"x"]);
        let s = format!("{:?}", b);
        assert!(s.contains("Bloom"));
    }

    /// Builder geometry (`n_blocks` on the *builder*, plus `Default`)
    /// and the finished bloom's `len` / `is_empty` agree across the
    /// build → finish boundary.
    #[test]
    fn builder_and_bloom_geometry_is_consistent() {
        const CUSTOM_BLOCKS: usize = 4;
        let mut b = BloomBuilder::with_n_blocks(CUSTOM_BLOCKS);
        assert_eq!(
            b.n_blocks(),
            CUSTOM_BLOCKS,
            "builder reports its block count"
        );
        b.insert(b"alpha");
        let bloom = b.finish();
        assert_eq!(bloom.n_blocks(), CUSTOM_BLOCKS);
        assert_eq!(bloom.len(), CUSTOM_BLOCKS * BLOCK_BYTES);
        assert!(!bloom.is_empty(), "a sized bloom's word array is non-empty");

        // `Default` builds the standard geometry, same as `new()`.
        assert_eq!(
            BloomBuilder::default().n_blocks(),
            BloomBuilder::new().n_blocks()
        );
    }
}