Skip to main content

aleph_filter/
aleph_filter.rs

1//!
2//! The Aleph Filter is a quotient filter variant that supports:
3//! - O(1) expected insertion and lookup
4//! - Tunable false positive rate (FPR)
5//! - Automatic expansion with fingerprint bit sacrifice
6//! - Deletion support (via tombstones for void entries)
7
8use std::collections::HashMap;
9use crate::hash::{hash_key, split_hash, fingerprint_bits_for_fpr};
10use crate::metadata::SlotMetadata;
11use crate::slot::Slot;
12
13/// The main Aleph Filter data structure.
14/// 
15/// Stores fingerprints in a compact quotient-filter format with dynamic expansion.
16/// Each element occupies one logical "slot" with data stored in `slots` and metadata in `metadata`.
17#[derive(Clone)]
18pub struct AlephFilter {
19    /// Fingerprint + metadata storage. Length = num_slots + num_extension_slots.
20    /// Stores packed fingerprints for compact memory usage.
21    slots: Vec<Slot>,
22    
23    /// Per-slot metadata flags (occupied, continuation, shifted).
24    /// Tracks structure of runs and clusters.
25    metadata: Vec<SlotMetadata>,
26
27    /// Number of logical slots (always power of 2).
28    /// The "quotient" component of the quotient filter.
29    num_slots: usize,
30    
31    /// Extra overflow slots at the end (linear, no wraparound).
32    /// Prevents constant expansion and supports insertion collisions.
33    num_extension_slots: usize,
34
35    /// log2(num_slots) - used to extract quotient from hash.
36    quotient_bits: u32,
37    
38    /// Fingerprint bits at creation time.
39    /// Decreases by 1 with each `expand()` to make space for quotient expansion.
40    base_fp_bits: u32,
41
42    /// Number of times the filter has expanded.
43    /// Each expansion doubles slots and sacrifices 1 fingerprint bit per entry.
44    num_expansions: usize,
45    
46    /// Current number of distinct items inserted.
47    num_items: usize,
48
49    /// Reserved for future use (e.g., storing full hashes for deletions).
50    mother_hashes: HashMap<usize, u64>,
51
52    /// Load factor threshold. When `num_items / num_slots >= max_load_factor`, expand.
53    max_load_factor: f64,
54}
55
56// PUBLIC API
57
58impl AlephFilter {
59    /// Creates a new Aleph Filter sized for `expected_items` with target `fpr`.
60    /// 
61    /// Automatically computes internal parameters (slots, fingerprint width) to meet the FPR.
62    /// 
63    /// # Arguments
64    /// 
65    /// * `expected_items` - Expected number of items to insert (must be > 0)
66    /// * `fpr` - Target false positive rate, e.g., 0.01 for 1% (must be in (0, 1))
67    /// 
68    /// # Returns
69    /// 
70    /// A new empty AlephFilter instance
71    /// 
72    /// # Panics
73    /// 
74    /// If `expected_items <= 0` or `fpr` not in (0, 1).
75    pub fn new(expected_items: usize, fpr: f64) -> Self {
76        assert!(expected_items > 0);
77        assert!(fpr > 0.0 && fpr < 1.0);
78        let raw = (expected_items as f64 / 0.8).ceil() as usize;
79        let num_slots = raw.next_power_of_two().max(8);
80        let q = num_slots.trailing_zeros();
81        let fp = fingerprint_bits_for_fpr(fpr);
82        let ext = (q as usize) * 2;  // Match Java: power_of_two_size * 2
83        return Self {
84            slots: vec![Slot::empty(); num_slots + ext],
85            metadata: vec![SlotMetadata::new(); num_slots + ext],
86            num_slots, num_extension_slots: ext,
87            quotient_bits: q, base_fp_bits: fp,
88            num_expansions: 0, num_items: 0,
89            mother_hashes: HashMap::new(),
90            max_load_factor: 0.8,  // Match Java: fullness_threshold = 0.8
91        };
92    }
93
94    /// Creates a new Aleph Filter with manual parameters.
95    /// 
96    /// Use this constructor when you already know the desired slot count and fingerprint width.
97    /// 
98    /// # Arguments
99    /// 
100    /// * `num_slots` - Desired number of logical slots (rounded up to next power of 2)
101    /// * `remainder_bits` - Fingerprint width in bits
102    /// 
103    /// # Returns
104    /// 
105    /// A new empty AlephFilter instance
106    pub fn with_params(num_slots: usize, remainder_bits: u32) -> Self {
107        let num_slots = num_slots.next_power_of_two().max(8);
108        let q = num_slots.trailing_zeros();
109        let ext = (q as usize) * 2;  // Match Java: power_of_two_size * 2
110        return Self {
111            slots: vec![Slot::empty(); num_slots + ext],
112            metadata: vec![SlotMetadata::new(); num_slots + ext],
113            num_slots, num_extension_slots: ext,
114            quotient_bits: q, base_fp_bits: remainder_bits,
115            num_expansions: 0, num_items: 0,
116            mother_hashes: HashMap::new(),
117            max_load_factor: 0.8,  // Match Java: fullness_threshold = 0.8
118        };
119    }
120
121    /// Inserts a key into the filter.
122    /// 
123    /// May trigger an automatic expansion if load factor exceeds threshold.
124    /// Supports duplicate insertions (see contains for lookup semantics).
125    /// 
126    /// # Arguments
127    /// 
128    /// * `key` - Byte slice to insert
129    /// 
130    /// # Complexity
131    /// 
132    /// O(1) amortized expected time
133    pub fn insert(&mut self, key: &[u8]) {
134        if self.load_factor() >= self.max_load_factor {
135            self.expand();
136        }
137        let hash = hash_key(key);
138        let r = self.fp_bits();
139        let (slot_idx, fp) = split_hash(hash, self.quotient_bits, r);
140        let canonical = slot_idx as usize;
141        let slot = if r == 0 {
142            Slot::void_marker() // No fingerprint bits left -> void
143        } else {
144            Slot::new(fp, r as u8)
145        };
146        self.qf_insert_slot(canonical, slot);
147        self.num_items += 1;
148        return;
149    }
150
151    /// Checks if a key might be in the filter.
152    /// 
153    /// Returns `true` if the key is definitely in the filter.
154    /// May return `true` for keys not inserted (false positive at rate `fpr`).
155    /// Never returns `true` for keys definitely not inserted (no false negatives).
156    /// 
157    /// # Arguments
158    /// 
159    /// * `key` - Byte slice to query
160    /// 
161    /// # Returns
162    /// 
163    /// `true` if key is in filter (or hash collision), `false` if definitely not in filter
164    /// 
165    /// # Complexity
166    /// 
167    /// O(1) expected time
168    pub fn contains(&self, key: &[u8]) -> bool {
169        let hash = hash_key(key);
170        let r = self.fp_bits();
171        let (slot_idx, fp) = split_hash(hash, self.quotient_bits, r);
172        let canonical = slot_idx as usize;
173        return self.qf_search(fp, canonical);
174    }
175
176    /// Removes a key from the filter, if present.
177    /// 
178    /// Deleting void entries (exhausted fingerprints) marks them with a tombstone.
179    /// Deleting regular entries compacts the run by shifting elements left.
180    /// 
181    /// # Arguments
182    /// 
183    /// * `key` - Byte slice to delete
184    /// 
185    /// # Returns
186    /// 
187    /// `true` if a matching entry was found and deleted, `false` if not found
188    /// 
189    /// # Complexity
190    /// 
191    /// O(1) expected time
192    pub fn delete(&mut self, key: &[u8]) -> bool {
193        let hash = hash_key(key);
194        let r = self.fp_bits();
195        let (slot_idx, fp) = split_hash(hash, self.quotient_bits, r);
196        let canonical = slot_idx as usize;
197        if self.qf_delete(fp, canonical) {
198            self.num_items = self.num_items.saturating_sub(1);
199            return true;
200        } else {
201            return false;
202        }
203    }
204
205    /// Returns the number of items inserted, accounting for duplicates.
206    /// 
207    /// # Returns
208    /// 
209    /// Count of insertion operations
210    #[inline] pub fn len(&self) -> usize { return self.num_items; }
211    
212    /// Returns `true` if the filter contains no items.
213    /// 
214    /// # Returns
215    /// 
216    /// `true` if empty, `false` otherwise
217    #[inline] pub fn is_empty(&self) -> bool { return self.num_items == 0; }
218    
219    /// Returns the current load factor (items / slots).
220    /// 
221    /// # Returns
222    /// 
223    /// Ratio in [0, 1+) (can exceed 1 due to extension slots)
224    #[inline] pub fn load_factor(&self) -> f64 { return self.num_items as f64 / self.num_slots as f64; }
225    
226    /// Returns the current logical slot count.
227    /// 
228    /// # Returns
229    /// 
230    /// Number of quotient-addressable slots
231    #[inline] pub fn capacity(&self) -> usize { return self.num_slots; }
232    
233    /// Returns the number of times the filter has expanded.
234    /// 
235    /// Each expansion doubles slots and sacrifices 1 fingerprint bit per entry.
236    /// 
237    /// # Returns
238    /// 
239    /// Expansion count
240    #[inline] pub fn num_expansions(&self) -> usize { return self.num_expansions; }
241    
242    /// Returns the current number of quotient bits (log2 of logical slots).
243    /// 
244    /// # Returns
245    /// 
246    /// Quotient bits
247    #[inline] pub fn quotient_bits(&self) -> u32 { return self.quotient_bits; }
248}
249
250// INTERNAL HELPERS
251
252impl AlephFilter {
253    /// Returns total addressable slots (logical + extension).
254    /// 
255    /// Used internally to guard against out-of-bounds access.
256    /// 
257    /// # Returns
258    /// 
259    /// Total array size
260    #[inline]
261    fn total_slots(&self) -> usize { return self.num_slots + self.num_extension_slots; }
262
263    /// Returns the current fingerprint width in bits.
264    /// 
265    /// Decreases by 1 with each expansion to make room for additional quotient bits.
266    /// Uses saturating subtraction to prevent underflow.
267    /// 
268    /// # Returns
269    /// 
270    /// Fingerprint bits available (>= 0)
271    #[inline]
272    pub fn fp_bits(&self) -> u32 { return self.base_fp_bits.saturating_sub(self.num_expansions as u32); }
273
274    /// Checks if a slot is completely empty (no data and no metadata flags set).
275    /// 
276    /// A slot is empty only when all three metadata flags (occupied, continuation, shifted)
277    /// are off AND the slot data is zero.
278    /// 
279    /// # Arguments
280    /// 
281    /// * `i` - Slot index
282    /// 
283    /// # Returns
284    /// 
285    /// `true` if slot is fully empty
286    #[inline]
287    fn is_slot_empty(&self, i: usize) -> bool {
288        return !self.metadata[i].is_occupied()
289            && !self.metadata[i].is_continuation()
290            && !self.metadata[i].is_shifted();
291    }
292
293    /// Atomically swaps a slot, returning the old value.
294    /// 
295    /// Used by insertion logic to push elements right.
296    /// 
297    /// # Arguments
298    /// 
299    /// * `i` - Slot index
300    /// * `new_slot` - Value to place in slot
301    /// 
302    /// # Returns
303    /// 
304    /// Previous value at slot `i`
305    fn swap_slot(&mut self, i: usize, new_slot: Slot) -> Slot {
306        let old = self.slots[i];
307        self.slots[i] = new_slot;
308        return old;
309    }
310}
311
312// QUOTIENT FILTER CORE 
313
314impl AlephFilter {
315    /// Finds the start position of the run for a given canonical slot.
316    /// 
317    /// The algorithm:
318    /// 1. Walk backward from canonical, counting occupied slots while shifted flag is set.
319    /// 2. Walk forward from initial position, counting run starts (non-continuation slots).
320    /// 3. Return position of the Nth run start where N = count from phase 1.
321    /// 
322    /// This determines which run a canonical slot belongs to, even if shifted.
323    /// 
324    /// # Arguments
325    /// 
326    /// * `canonical` - Quotient (canonical slot index)
327    /// 
328    /// # Returns
329    /// 
330    /// Index of the run's first slot
331    /// 
332    fn find_run_start(&self, canonical: usize) -> usize {
333        let mut pos = canonical;
334        let mut skip: i64 = 1;
335
336        // Phase 1: walk backward counting occupied slots
337        while self.metadata[pos].is_shifted() {
338            if self.metadata[pos].is_occupied() {
339                skip += 1;
340            }
341            if pos == 0 { break; }
342            pos -= 1;
343        }
344
345        // Phase 2: walk forward counting run starts (non-continuation)
346        loop {
347            if !self.metadata[pos].is_continuation() {
348                skip -= 1;
349                if skip == 0 {
350                    return pos;
351                }
352            }
353            pos += 1;
354            if pos >= self.total_slots() { return canonical; }
355        }
356    }
357
358    /// Finds a suitable starting position for a new run.
359    /// 
360    /// Starting from a run start, skips past the run and any continuation slots.
361    /// Returns the first empty or non-continuation position where a new run can begin.
362    /// 
363    /// # Arguments
364    /// 
365    /// * `index` - Starting position (typically a run start)
366    /// 
367    /// # Returns
368    /// 
369    /// Position suitablefor inserting a new run
370    /// 
371    fn find_new_run_location(&self, index: usize) -> usize {
372        let mut pos = index;
373        if pos < self.total_slots() && !self.is_slot_empty(pos) {
374            pos += 1;
375        }
376        while pos < self.total_slots() && self.metadata[pos].is_continuation() {
377            pos += 1;
378        }
379        return pos;
380    }
381
382
383    /// Inserts a slot at its canonical position, dispatching to appropriate insertion logic.
384    /// 
385    /// If no run exists at canonical, creates a new run.
386    /// If a run exists, inserts into the existing run.
387    /// 
388    /// # Arguments
389    /// 
390    /// * `canonical` - Quotient (canonical slot index)
391    /// * `slot` - Slot data to insert (fingerprint + length)
392    /// 
393    /// # Returns
394    /// 
395    /// `true` on success, `false` if array is full
396    fn qf_insert_slot(&mut self, canonical: usize, slot: Slot) -> bool {
397        let does_run_exist = self.metadata[canonical].is_occupied();
398        if !does_run_exist {
399            return self.insert_new_run(canonical, slot);
400        } else {
401            let run_start = self.find_run_start(canonical);
402            return self.insert_into_run(slot, run_start);
403        }
404    }
405
406    /// Creates a new run for a canonical slot and inserts the slot.
407    /// 
408    /// Sets the occupied flag, shifts metadata, and performs a push-right cascade
409    /// if the insertion position is already occupied.
410    /// 
411    /// # Arguments
412    /// 
413    /// * `canonical` - Quotient (canonical slot index)
414    /// * `slot` - Slot data to insert
415    /// 
416    /// # Returns
417    /// 
418    /// `true` on success, `false` if array is full
419    /// 
420    fn insert_new_run(&mut self, canonical: usize, slot: Slot) -> bool {
421        let run_start = self.find_run_start(canonical);
422        let insert_pos = self.find_new_run_location(run_start);
423
424        let slot_empty = self.is_slot_empty(insert_pos);
425
426        self.metadata[canonical].set_occupied(true);
427        if insert_pos != canonical {
428            self.metadata[insert_pos].set_shifted(true);
429        }
430        self.metadata[insert_pos].set_continuation(false);
431
432        if slot_empty {
433            self.slots[insert_pos] = slot;
434            return true;
435        }
436
437        // Push everything right via swap chain
438        let mut current_slot = slot;
439        let mut pos = insert_pos;
440        let mut temp_cont = false;
441        loop {
442            if pos >= self.total_slots() { return false; }
443            let was_empty = self.is_slot_empty(pos);
444            current_slot = self.swap_slot(pos, current_slot);
445
446            if pos > insert_pos {
447                self.metadata[pos].set_shifted(true);
448            }
449            if pos > insert_pos {
450                let cur_cont = self.metadata[pos].is_continuation();
451                self.metadata[pos].set_continuation(temp_cont);
452                temp_cont = cur_cont;
453            }
454            pos += 1;
455            if was_empty { break; }
456        }
457        return true;
458    }
459
460    /// Inserts a slot into an existing run, preserving run structure.
461    /// 
462    /// Scans forward through the run, marking boundaries and performing push-right
463    /// when necessary. Maintains continuation flags to mark continuation slots.
464    /// 
465    /// # Arguments
466    /// 
467    /// * `slot` - Slot data to insert
468    /// * `run_start` - Index of run's first slot
469    /// 
470    /// # Returns
471    /// 
472    /// `true` on success, `false` if array is full
473    /// 
474    fn insert_into_run(&mut self, slot: Slot, run_start: usize) -> bool {
475        let mut current_slot = slot;
476        let mut pos = run_start;
477        let mut finished_first_run = false;
478        let mut temp_cont = false;
479
480        loop {
481            if pos >= self.total_slots() { return false; }
482            let was_empty = self.is_slot_empty(pos);
483
484            if pos > run_start {
485                self.metadata[pos].set_shifted(true);
486            }
487
488            if pos > run_start && !finished_first_run && !self.metadata[pos].is_continuation() {
489                finished_first_run = true;
490                self.metadata[pos].set_continuation(true);
491                current_slot = self.swap_slot(pos, current_slot);
492            } else if finished_first_run {
493                let cur_cont = self.metadata[pos].is_continuation();
494                self.metadata[pos].set_continuation(temp_cont);
495                temp_cont = cur_cont;
496                current_slot = self.swap_slot(pos, current_slot);
497            }
498
499            pos += 1;
500            if was_empty { break; }
501        }
502        return true;
503    }
504
505    /// Searches for a fingerprint in the run for a canonical slot.
506    /// 
507    /// Returns `false` early if the canonical slot is not occupied.
508    /// Otherwise, locates the run and scans it for a match.
509    /// 
510    /// # Arguments
511    /// 
512    /// * `fp` - Fingerprint to search for
513    /// * `canonical` - Quotient (canonical slot index)
514    /// 
515    /// # Returns
516    /// 
517    /// `true` if fingerprint is found in the run, `false` otherwise
518    /// 
519    fn qf_search(&self, fp: u64, canonical: usize) -> bool {
520        if !self.metadata[canonical].is_occupied() {
521            return false;
522        }
523        let run_start = self.find_run_start(canonical);
524        return self.find_in_run(run_start, fp);
525    }
526
527    /// Scans a run linearly for a matching fingerprint.
528    /// 
529    /// Iterates from start, checking each slot against the target fingerprint.
530    /// Skips tombstones and stops at the end of the run (when continuation flag is off).
531    /// 
532    /// # Arguments
533    /// 
534    /// * `start` - Starting index of run
535    /// * `fp` - Fingerprint to match
536    /// 
537    /// # Returns
538    /// 
539    /// `true` if a matching fingerprint is found, `false` otherwise
540    /// 
541    fn find_in_run(&self, start: usize, fp: u64) -> bool {
542        let mut pos = start;
543        let r = self.fp_bits();
544        loop {
545            if self.slots[pos].is_tombstone() {
546                // skip tombstones
547            } else if self.slots[pos].matches(fp, r as u8) {
548                return true;
549            }
550            pos += 1;
551            if pos >= self.total_slots() || !self.metadata[pos].is_continuation() {
552                break;
553            }
554        }
555        false
556    }
557
558    /// Finds the start of the cluster containing the given index.
559    ///
560    /// Walks backward until finding a slot that is not shifted.
561    /// Matches Java's `find_cluster_start`.
562    fn find_cluster_start(&self, index: usize) -> usize {
563        let mut pos = index;
564        while pos > 0 && self.metadata[pos].is_shifted() {
565            pos -= 1;
566        }
567        return pos;
568    }
569
570    /// Finds the end of the run starting at `index`.
571    ///
572    /// Walks forward while continuation flag is set.
573    /// Matches Java's `find_run_end`.
574    fn find_run_end(&self, index: usize) -> usize {
575        let mut pos = index;
576        while pos + 1 < self.total_slots() && self.metadata[pos + 1].is_continuation() {
577            pos += 1;
578        }
579        return pos;
580    }
581
582    /// Deletes a fingerprint from the run for a canonical slot.
583    /// 
584    /// Uses the Java paper's full cluster-aware deletion:
585    /// 1. Find the matching fingerprint in the run
586    /// 2. Shift entries within the run backward to fill the gap
587    /// 3. For each subsequent shifted run in the cluster, shift it back by one slot
588    /// 4. Clear metadata at the vacated position
589    /// 5. Clear occupied flag if the run is now empty
590    ///
591    /// This is more correct than run-local deletion because it properly
592    /// compacts the entire cluster, maintaining metadata consistency.
593    fn qf_delete(&mut self, fp: u64, canonical: usize) -> bool {
594        if canonical >= self.num_slots {
595            return false;
596        }
597        if !self.metadata[canonical].is_occupied() {
598            return false;
599        }
600        let run_start = self.find_run_start(canonical);
601        let r = self.fp_bits();
602
603        // Find the matching fingerprint (search for last match, like Java's decide_which_fingerprint_to_delete)
604        let mut pos = run_start;
605        let mut found: Option<usize> = None;
606        loop {
607            if !self.slots[pos].is_tombstone() && self.slots[pos].matches(fp, r as u8) {
608                found = Some(pos);
609            }
610            pos += 1;
611            if pos >= self.total_slots() || !self.metadata[pos].is_continuation() {
612                break;
613            }
614        }
615
616        let del_pos = match found {
617            Some(p) => p,
618            None => return false,
619        };
620
621        // Void entries get tombstoned (matching Java's behavior for void deletes)
622        if self.slots[del_pos].is_void() {
623            self.slots[del_pos] = Slot::tombstone();
624            return true;
625        }
626
627        let mut run_end = self.find_run_end(del_pos);
628
629        // Check if this run has only one entry (will need to clear occupied flag)
630        let turn_off_occupied = run_start == run_end;
631
632        // Shift entries within this run backward to fill the gap
633        for i in del_pos..run_end {
634            self.slots[i] = self.slots[i + 1];
635        }
636
637        // Count continuation and non-occupied flags from cluster start to run_end
638        // This tells us how far entries are shifted from their canonical positions
639        let cluster_start = self.find_cluster_start(canonical);
640        let mut num_shifted_count: i64 = 0;
641        let mut num_non_occupied: i64 = 0;
642        for i in cluster_start..=run_end {
643            if self.metadata[i].is_continuation() {
644                num_shifted_count += 1;
645            }
646            if !self.metadata[i].is_occupied() {
647                num_non_occupied += 1;
648            }
649        }
650
651        // Clear the vacated slot at run_end
652        self.slots[run_end] = Slot::empty();
653        self.metadata[run_end].set_shifted(false);
654        self.metadata[run_end].set_continuation(false);
655
656        // Now shift all subsequent runs in the cluster backward by one slot
657        loop {
658            // Check if there's a next run that needs shifting
659            if run_end + 1 >= self.total_slots()
660                || self.is_slot_empty(run_end + 1)
661                || !self.metadata[run_end + 1].is_shifted()
662            {
663                if turn_off_occupied {
664                    self.metadata[canonical].set_occupied(false);
665                }
666                return true;
667            }
668
669            // Find the next run
670            let next_run_start = run_end + 1;
671            run_end = self.find_run_end(next_run_start);
672
673            // Check if the slot before this run is now back at its canonical position
674            if self.metadata[next_run_start - 1].is_occupied()
675                && num_shifted_count - num_non_occupied == 1
676            {
677                self.metadata[next_run_start - 1].set_shifted(false);
678            } else {
679                self.metadata[next_run_start - 1].set_shifted(true);
680            }
681
682            // Shift each entry in this run back by one
683            for i in next_run_start..=run_end {
684                self.slots[i - 1] = self.slots[i];
685                if self.metadata[i].is_continuation() {
686                    self.metadata[i - 1].set_continuation(true);
687                }
688                if !self.metadata[i].is_occupied() {
689                    num_non_occupied += 1;
690                }
691            }
692            num_shifted_count += (run_end - next_run_start) as i64;
693
694            // Clear the vacated slot
695            self.slots[run_end] = Slot::empty();
696            self.metadata[run_end].set_shifted(false);
697            self.metadata[run_end].set_continuation(false);
698        }
699    }
700}
701
702// EXPANSION 
703impl AlephFilter {
704    /// Doubles the filter size and re-hashes all entries.
705    /// 
706    /// During expansion:
707    /// 1. Collects all entries using `iterate_entries()`
708    /// 2. Doubles logical slots and increments quotient bits
709    /// 3. Sacrifices 1 fingerprint bit per entry (`fp_bits` decreases)
710    /// 4. Re-inserts entries using pivot bit to determine new canonical slot
711    /// 5. Consolidates void markers to prevent exponential void growth
712    /// 
713    /// This supports indefinite insertion up to memory limits.
714    /// 
715    /// # Complexity
716    /// 
717    /// O(n) where n is current item count
718    /// 
719    fn expand(&mut self) {
720        // Collect all (canonical, fp, fp_len, is_void) using Iterator
721        let entries = self.iterate_entries();
722        let old_q_bits = self.quotient_bits;
723
724        // Double the filter
725        self.num_slots *= 2;
726        self.quotient_bits += 1;
727        self.num_expansions += 1;
728        self.num_extension_slots += 2;  // Match Java: num_extension_slots += 2
729        self.slots = vec![Slot::empty(); self.total_slots()];
730        self.metadata = vec![SlotMetadata::new(); self.total_slots()];
731        self.num_items = 0;
732        self.mother_hashes.clear();
733
734        // Reinsert each entry
735
736        // Track slots that already have a void to prevent exponential void growth
737        let mut void_slots: std::collections::HashSet<usize> = std::collections::HashSet::new();
738
739        for (bucket, fingerprint, fp_len, is_void, _old_pos) in entries {
740            if is_void {
741                // Void: insert to both possible new canonical slots (deduped)
742                let s0 = bucket;
743                let s1 = bucket | (1 << old_q_bits);
744                if s0 < self.num_slots && void_slots.insert(s0) {
745                    self.qf_insert_slot(s0, Slot::void_marker());
746                    self.num_items += 1;
747                }
748                if s1 < self.num_slots && void_slots.insert(s1) {
749                    self.qf_insert_slot(s1, Slot::void_marker());
750                    self.num_items += 1;
751                }
752            } else if fp_len > 0 {
753                // Steal lowest fp bit and use it to extend the slot address
754                let pivot = fingerprint & 1;
755                let new_bucket = bucket | ((pivot as usize) << old_q_bits);
756                let new_fp = fingerprint >> 1;
757                let new_len = fp_len - 1;
758
759                if new_bucket < self.num_slots {
760                    if new_len == 0 {
761                        self.qf_insert_slot(new_bucket, Slot::void_marker());
762                    } else {
763                        self.qf_insert_slot(new_bucket, Slot::new(new_fp, new_len));
764                    }
765                    self.num_items += 1;
766                }
767            }
768        }
769        return;
770    }
771
772    /// Extracts all entries as tuples for expansion.
773    /// 
774    /// Returns a list of `(canonical, fingerprint, fingerprint_len, is_void, position)` tuples.
775    /// Uses the iterator algorithm to track canonical slots while scanning the slot array.
776    /// 
777    /// The canonical slot is computed by tracking:
778    /// - Occupied non-shifted slots (cluster starts)
779    /// - Occupied shifted slots and continuations within clusters
780    /// - Queue of pending runs
781    /// 
782    /// # Returns
783    /// 
784    /// Vec of (canonical_slot, fp_bits, fp_length, is_void, storage_position)
785    /// 
786    fn iterate_entries(&self) -> Vec<(usize, u64, u8, bool, usize)> {
787        let mut result = Vec::new();
788        let mut queue: Vec<usize> = Vec::new();
789        let mut current_bucket: usize = 0;
790
791        let mut index: usize = 0;
792        while index < self.total_slots() {
793            // Skip empty slots
794            if self.is_slot_empty(index) && self.slots[index].is_empty() {
795                index += 1;
796                continue;
797            }
798
799            let occ = self.metadata[index].is_occupied();
800            let cont = self.metadata[index].is_continuation();
801            let shift = self.metadata[index].is_shifted();
802
803            if occ && !cont && !shift {
804                queue.clear();
805                queue.push(index);
806                current_bucket = index;
807            } else if occ && cont && shift {
808                queue.push(index);
809            } else if !occ && !cont && shift {
810                if !queue.is_empty() { queue.remove(0); }
811                current_bucket = queue.first().copied().unwrap_or(index);
812            } else if !occ && cont && shift {
813                // continuation — bucket unchanged
814            } else if occ && !cont && shift {
815                queue.push(index);
816                if !queue.is_empty() { queue.remove(0); }
817                current_bucket = queue.first().copied().unwrap_or(index);
818            }
819
820            // Record non-empty data slots
821            if !self.slots[index].is_empty() && !self.slots[index].is_tombstone() {
822                result.push((
823                    current_bucket,
824                    self.slots[index].fingerprint(),
825                    self.slots[index].length(),
826                    self.slots[index].is_void(),
827                    index,
828                ));
829            }
830
831            index += 1;
832        }
833        return result;
834    }
835}
836
837// DEBUG
838
839impl std::fmt::Debug for AlephFilter {
840    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
841        f.debug_struct("AlephFilter")
842            .field("num_items", &self.num_items)
843            .field("num_slots", &self.num_slots)
844            .field("num_expansions", &self.num_expansions)
845            .field("load_factor", &self.load_factor())
846            .finish()
847    }
848}
849