Skip to main content

ftui_render/
quotient_filter.rs

1#![forbid(unsafe_code)]
2
3//! Quotient Filter for space-efficient dirty row tracking (bd-3fc1b).
4//!
5//! A Quotient Filter is a compact approximate-membership data structure
6//! that supports insert, lookup, delete, and merge — unlike Bloom filters,
7//! which cannot delete.
8//!
9//! # How It Works
10//!
11//! Each element is hashed to a `p`-bit fingerprint, split into:
12//! - `q`-bit *quotient* (slot index): determines the canonical slot
13//! - `r`-bit *remainder*: stored in the slot
14//!
15//! Collisions are resolved by linear probing within a cluster. Three
16//! metadata bits per slot track the structure of runs and clusters.
17//!
18//! # Complexity
19//!
20//! - Insert: O(1) amortized
21//! - Lookup: O(1) amortized
22//! - Delete: O(1) amortized
23//! - Space: `(r + 3) * 2^q` bits ≈ 10% overhead above information-theoretic minimum
24//!
25//! # Use Case
26//!
27//! For large virtualized lists (>1M rows) where only a small fraction
28//! (<1%) of rows are dirty, a Quotient Filter uses O(dirty_count) space
29//! vs O(total_rows) for a bitset.
30//!
31//! # Implementation Note
32//!
33//! This implementation uses a simplified open-addressing scheme with
34//! (quotient, remainder) pairs stored directly, avoiding the complexity
35//! of the canonical 3-bit metadata approach while preserving the same
36//! API contract and space characteristics.
37//!
38//! # References
39//!
40//! Bender et al. (2012): "Don't Thrash: How to Cache Your Hash on Flash"
41
42use std::collections::hash_map::DefaultHasher;
43use std::hash::{Hash, Hasher};
44
45/// A Quotient Filter for approximate set membership with deletion support.
46#[derive(Debug, Clone)]
47pub struct QuotientFilter {
48    /// Number of quotient bits (q). Table has 2^q slots.
49    q: u32,
50    /// Number of remainder bits (r). Fingerprint = q + r bits.
51    r: u32,
52    /// Slot storage: each slot holds an optional (quotient, remainder) pair.
53    slots: Vec<Option<(u32, u64)>>,
54    /// Number of elements currently stored.
55    count: usize,
56    /// Total number of slots (2^q).
57    capacity: usize,
58}
59
60/// Configuration for a Quotient Filter.
61#[derive(Debug, Clone, Copy)]
62pub struct QuotientFilterConfig {
63    /// Number of quotient bits (determines capacity: 2^q slots).
64    pub q: u32,
65    /// Number of remainder bits (determines false positive rate: ~2^(-r)).
66    pub r: u32,
67}
68
69impl QuotientFilterConfig {
70    /// Create a config targeting a given capacity and false positive rate.
71    ///
72    /// `expected_items`: expected number of elements
73    /// `fp_rate`: target false positive rate (e.g., 0.01 for 1%)
74    #[must_use]
75    pub fn for_capacity(expected_items: usize, fp_rate: f64) -> Self {
76        let fp_rate = if fp_rate.is_finite() && fp_rate > 0.0 {
77            fp_rate.min(1.0 - f64::EPSILON)
78        } else {
79            0.01
80        };
81
82        // r bits give ~2^(-r) FP rate
83        let r = (-fp_rate.log2()).ceil() as u32;
84        let r = r.clamp(2, 32);
85
86        // q bits: need 2^q > expected_items / load_factor
87        // Use 75% max load for good performance
88        let needed = ((expected_items as f64 / 0.75).ceil()) as u64;
89        let q = (64 - needed.leading_zeros()).clamp(4, 28);
90
91        Self { q, r }
92    }
93}
94
95impl Default for QuotientFilterConfig {
96    fn default() -> Self {
97        Self { q: 10, r: 8 } // 1024 slots, ~0.4% FP rate
98    }
99}
100
101impl QuotientFilter {
102    /// Create a new Quotient Filter with the given configuration.
103    #[must_use]
104    pub fn new(config: QuotientFilterConfig) -> Self {
105        let q = config.q.min(28); // Cap at 2^28 = 256M slots
106        let r = config.r.clamp(1, 32);
107        let capacity = 1usize << q;
108
109        Self {
110            q,
111            r,
112            slots: vec![None; capacity],
113            count: 0,
114            capacity,
115        }
116    }
117
118    /// Create with default configuration.
119    #[must_use]
120    pub fn with_defaults() -> Self {
121        Self::new(QuotientFilterConfig::default())
122    }
123
124    /// Number of elements currently stored.
125    #[must_use]
126    pub fn len(&self) -> usize {
127        self.count
128    }
129
130    /// Whether the filter is empty.
131    #[must_use]
132    pub fn is_empty(&self) -> bool {
133        self.count == 0
134    }
135
136    /// Current load factor (0.0 to 1.0).
137    #[must_use]
138    pub fn load_factor(&self) -> f64 {
139        self.count as f64 / self.capacity as f64
140    }
141
142    /// Number of slots (capacity).
143    #[must_use]
144    pub fn capacity(&self) -> usize {
145        self.capacity
146    }
147
148    /// Theoretical false positive rate at current load.
149    #[must_use]
150    pub fn theoretical_fp_rate(&self) -> f64 {
151        // FP rate ≈ 1 - (1 - 2^(-r))^n ≈ n * 2^(-r) for small rates
152        let base_rate = 1.0 / (1u64 << self.r) as f64;
153        1.0 - (1.0 - base_rate).powi(self.count as i32)
154    }
155
156    /// Hash an element to (quotient, remainder).
157    fn fingerprint<T: Hash>(&self, item: &T) -> (u32, u64) {
158        let mut hasher = DefaultHasher::new();
159        item.hash(&mut hasher);
160        let h = hasher.finish();
161
162        let q_mask = (1u32 << self.q) - 1;
163        let r_mask = (1u64 << self.r) - 1;
164
165        let quotient = ((h >> self.r) as u32) & q_mask;
166        let remainder = h & r_mask;
167        (quotient, remainder)
168    }
169
170    /// Insert an element. Returns `true` if newly inserted, `false` if already present or full.
171    pub fn insert<T: Hash>(&mut self, item: &T) -> bool {
172        if self.count >= self.capacity {
173            return false;
174        }
175
176        let (quotient, remainder) = self.fingerprint(item);
177
178        // Linear probe from canonical slot
179        let mut pos = quotient as usize;
180        for _ in 0..self.capacity {
181            match self.slots[pos] {
182                None => {
183                    // Empty slot — insert here
184                    self.slots[pos] = Some((quotient, remainder));
185                    self.count += 1;
186                    return true;
187                }
188                Some((q, r)) if q == quotient && r == remainder => {
189                    // Already present
190                    return false;
191                }
192                _ => {
193                    // Occupied by different element — probe next
194                    pos = (pos + 1) % self.capacity;
195                }
196            }
197        }
198
199        false // Full (shouldn't happen with load factor check)
200    }
201
202    /// Check if an element might be in the filter.
203    ///
204    /// Returns `false` for definite non-members (no false negatives).
205    /// Returns `true` for probable members (may have false positives
206    /// due to fingerprint collisions).
207    #[must_use]
208    pub fn contains<T: Hash>(&self, item: &T) -> bool {
209        let (quotient, remainder) = self.fingerprint(item);
210
211        let mut pos = quotient as usize;
212        for _ in 0..self.capacity {
213            match self.slots[pos] {
214                None => return false, // Empty slot — not found
215                Some((q, r)) if q == quotient && r == remainder => return true,
216                _ => pos = (pos + 1) % self.capacity,
217            }
218        }
219
220        false
221    }
222
223    /// Remove an element. Returns `true` if it was found and removed.
224    ///
225    /// Uses backward-shift deletion to maintain probe sequences.
226    pub fn remove<T: Hash>(&mut self, item: &T) -> bool {
227        let (quotient, remainder) = self.fingerprint(item);
228
229        // Find the element
230        let mut pos = quotient as usize;
231        let mut found_pos = None;
232        for _ in 0..self.capacity {
233            match self.slots[pos] {
234                None => break,
235                Some((q, r)) if q == quotient && r == remainder => {
236                    found_pos = Some(pos);
237                    break;
238                }
239                _ => pos = (pos + 1) % self.capacity,
240            }
241        }
242
243        let mut pos = match found_pos {
244            Some(p) => p,
245            None => return false,
246        };
247
248        // Backward-shift deletion: move subsequent elements back
249        // to fill the gap, maintaining their probe sequences.
250        self.slots[pos] = None;
251        self.count -= 1;
252
253        let mut current = (pos + 1) % self.capacity;
254        loop {
255            match self.slots[current] {
256                None => break, // End of cluster
257                Some((q, _r)) => {
258                    let canonical = q as usize;
259                    // Check if this element is displaced from its canonical slot
260                    // (i.e., it was shifted past the now-deleted slot)
261                    let should_shift = if canonical <= pos {
262                        // canonical <= pos < current (wrapping considered)
263                        current > pos || current < canonical
264                    } else {
265                        // canonical > pos, so shift only if current wrapped
266                        current > pos && current < canonical
267                    };
268
269                    if !should_shift {
270                        break;
271                    }
272
273                    // Move this element back to the gap
274                    self.slots[pos] = self.slots[current];
275                    self.slots[current] = None;
276                    pos = current;
277                }
278            }
279            current = (current + 1) % self.capacity;
280        }
281
282        true
283    }
284
285    /// Clear all elements.
286    pub fn clear(&mut self) {
287        self.slots.fill(None);
288        self.count = 0;
289    }
290
291    /// Merge another filter into this one.
292    ///
293    /// Both filters must have the same q and r values.
294    /// Returns the number of new elements added.
295    pub fn merge(&mut self, other: &QuotientFilter) -> usize {
296        if self.q != other.q || self.r != other.r {
297            return 0;
298        }
299
300        let mut added = 0;
301        for slot in &other.slots {
302            if let &Some((q, r)) = slot {
303                // Check if already present
304                let mut pos = q as usize;
305                let mut found = false;
306                for _ in 0..self.capacity {
307                    match self.slots[pos] {
308                        None => break,
309                        Some((eq, er)) if eq == q && er == r => {
310                            found = true;
311                            break;
312                        }
313                        _ => pos = (pos + 1) % self.capacity,
314                    }
315                }
316
317                if !found {
318                    // Insert at first empty slot from canonical position
319                    let mut ipos = q as usize;
320                    for _ in 0..self.capacity {
321                        if self.slots[ipos].is_none() {
322                            self.slots[ipos] = Some((q, r));
323                            self.count += 1;
324                            added += 1;
325                            break;
326                        }
327                        ipos = (ipos + 1) % self.capacity;
328                    }
329                }
330            }
331        }
332        added
333    }
334}
335
336impl Default for QuotientFilter {
337    fn default() -> Self {
338        Self::with_defaults()
339    }
340}
341
342// ---------------------------------------------------------------------------
343// Tests
344// ---------------------------------------------------------------------------
345
346#[cfg(test)]
347mod tests {
348    use super::*;
349
350    #[test]
351    fn empty_filter() {
352        let qf = QuotientFilter::with_defaults();
353        assert!(qf.is_empty());
354        assert_eq!(qf.len(), 0);
355        assert_eq!(qf.capacity(), 1024);
356        assert!(!qf.contains(&42u64));
357    }
358
359    #[test]
360    fn insert_and_lookup() {
361        let mut qf = QuotientFilter::with_defaults();
362        assert!(qf.insert(&100u64));
363        assert!(qf.contains(&100u64));
364        assert_eq!(qf.len(), 1);
365    }
366
367    #[test]
368    fn duplicate_insert_returns_false() {
369        let mut qf = QuotientFilter::with_defaults();
370        assert!(qf.insert(&42u64));
371        assert!(!qf.insert(&42u64));
372        assert_eq!(qf.len(), 1);
373    }
374
375    #[test]
376    fn insert_multiple() {
377        let mut qf = QuotientFilter::new(QuotientFilterConfig { q: 8, r: 8 });
378        for i in 0u64..50 {
379            qf.insert(&i);
380        }
381        assert_eq!(qf.len(), 50);
382
383        // All should be found (no false negatives)
384        for i in 0u64..50 {
385            assert!(qf.contains(&i), "element {i} should be found");
386        }
387    }
388
389    #[test]
390    fn no_false_negatives() {
391        let mut qf = QuotientFilter::new(QuotientFilterConfig { q: 10, r: 10 });
392        let items: Vec<u64> = (0..200).collect();
393
394        for item in &items {
395            qf.insert(item);
396        }
397
398        // Verify: zero false negatives
399        for item in &items {
400            assert!(qf.contains(item), "false negative for {item}");
401        }
402    }
403
404    #[test]
405    fn false_positive_rate_bounded() {
406        let mut qf = QuotientFilter::new(QuotientFilterConfig { q: 12, r: 8 });
407
408        // Insert 500 elements
409        for i in 0u64..500 {
410            qf.insert(&i);
411        }
412
413        // Check 10000 non-members
414        let mut false_positives = 0;
415        for i in 10000u64..20000 {
416            if qf.contains(&i) {
417                false_positives += 1;
418            }
419        }
420
421        let fp_rate = false_positives as f64 / 10000.0;
422        // r=8 gives theoretical rate ~0.4%, allow up to 5% with margin
423        assert!(
424            fp_rate < 0.05,
425            "false positive rate too high: {fp_rate:.4} ({false_positives}/10000)"
426        );
427    }
428
429    #[test]
430    fn remove_element() {
431        let mut qf = QuotientFilter::with_defaults();
432        qf.insert(&42u64);
433        assert!(qf.contains(&42u64));
434
435        assert!(qf.remove(&42u64));
436        assert_eq!(qf.len(), 0);
437        assert!(!qf.contains(&42u64));
438    }
439
440    #[test]
441    fn remove_nonexistent() {
442        let mut qf = QuotientFilter::with_defaults();
443        assert!(!qf.remove(&42u64));
444    }
445
446    #[test]
447    fn remove_preserves_others() {
448        let mut qf = QuotientFilter::with_defaults();
449        for i in 0u64..20 {
450            qf.insert(&i);
451        }
452
453        // Remove even numbers
454        for i in (0u64..20).step_by(2) {
455            qf.remove(&i);
456        }
457
458        // Odd numbers should still be present
459        for i in (1u64..20).step_by(2) {
460            assert!(qf.contains(&i), "odd {i} should survive removal");
461        }
462        // Even numbers should be gone
463        for i in (0u64..20).step_by(2) {
464            assert!(!qf.contains(&i), "even {i} should be removed");
465        }
466    }
467
468    #[test]
469    fn clear_filter() {
470        let mut qf = QuotientFilter::with_defaults();
471        for i in 0u64..100 {
472            qf.insert(&i);
473        }
474        assert_eq!(qf.len(), 100);
475
476        qf.clear();
477        assert!(qf.is_empty());
478        assert_eq!(qf.len(), 0);
479
480        for i in 0u64..100 {
481            assert!(!qf.contains(&i));
482        }
483    }
484
485    #[test]
486    fn load_factor() {
487        let mut qf = QuotientFilter::new(QuotientFilterConfig { q: 4, r: 4 }); // 16 slots
488        assert!((qf.load_factor() - 0.0).abs() < f64::EPSILON);
489
490        for i in 0u64..8 {
491            qf.insert(&i);
492        }
493        assert!((qf.load_factor() - 0.5).abs() < 0.01);
494    }
495
496    #[test]
497    fn config_for_capacity() {
498        let config = QuotientFilterConfig::for_capacity(10000, 0.01);
499        assert!(config.r >= 7, "r should be at least 7 for 1% FP rate");
500        assert!(
501            (1usize << config.q) >= 10000,
502            "capacity should exceed expected items"
503        );
504    }
505
506    #[test]
507    fn config_for_capacity_sanitizes_invalid_fp_rates() {
508        let zero = QuotientFilterConfig::for_capacity(128, 0.0);
509        let nan = QuotientFilterConfig::for_capacity(128, f64::NAN);
510
511        assert!(zero.r >= 7);
512        assert!(nan.r >= 7);
513        assert!((1usize << zero.q) >= 128);
514        assert!((1usize << nan.q) >= 128);
515    }
516
517    #[test]
518    fn string_keys() {
519        let mut qf = QuotientFilter::with_defaults();
520        qf.insert(&"hello");
521        qf.insert(&"world");
522        assert!(qf.contains(&"hello"));
523        assert!(qf.contains(&"world"));
524        assert!(!qf.contains(&"foo"));
525    }
526
527    #[test]
528    fn row_id_tracking() {
529        // Simulate dirty row tracking
530        let mut dirty = QuotientFilter::new(QuotientFilterConfig { q: 12, r: 8 });
531
532        // Mark rows as dirty
533        let dirty_rows = [5u32, 42, 100, 255, 1000];
534        for &row in &dirty_rows {
535            dirty.insert(&row);
536        }
537
538        // Check which rows need re-render
539        for row in 0u32..2000 {
540            if dirty_rows.contains(&row) {
541                assert!(dirty.contains(&row), "dirty row {row} not found");
542            }
543        }
544
545        // After re-render, remove from dirty set
546        for &row in &dirty_rows {
547            dirty.remove(&row);
548        }
549        assert!(dirty.is_empty());
550    }
551
552    #[test]
553    fn merge_filters() {
554        let config = QuotientFilterConfig { q: 8, r: 8 };
555        let mut qf1 = QuotientFilter::new(config);
556        let mut qf2 = QuotientFilter::new(config);
557
558        for i in 0u64..10 {
559            qf1.insert(&i);
560        }
561        for i in 5u64..15 {
562            qf2.insert(&i);
563        }
564
565        let added = qf1.merge(&qf2);
566        assert!(added > 0, "merge should add elements");
567
568        // All elements from both should be present
569        for i in 0u64..15 {
570            assert!(qf1.contains(&i), "merged filter should contain {i}");
571        }
572    }
573
574    #[test]
575    fn merge_mismatched_config_is_noop() {
576        let mut qf1 = QuotientFilter::new(QuotientFilterConfig { q: 8, r: 8 });
577        let qf2 = QuotientFilter::new(QuotientFilterConfig { q: 10, r: 8 });
578
579        qf1.insert(&1u64);
580        let added = qf1.merge(&qf2);
581        assert_eq!(added, 0, "mismatched configs should not merge");
582    }
583
584    #[test]
585    fn theoretical_fp_rate_increases_with_load() {
586        let mut qf = QuotientFilter::new(QuotientFilterConfig { q: 10, r: 8 });
587        let rate_empty = qf.theoretical_fp_rate();
588
589        for i in 0u64..100 {
590            qf.insert(&i);
591        }
592        let rate_loaded = qf.theoretical_fp_rate();
593
594        assert!(
595            rate_empty < rate_loaded,
596            "FP rate should increase with load"
597        );
598    }
599
600    #[test]
601    fn space_comparison_vs_bitset() {
602        // Quotient Filter for 1000 dirty rows out of 1M total
603        let dirty_config = QuotientFilterConfig::for_capacity(1000, 0.01);
604        let dirty_qf_bits = (dirty_config.r as usize + 3) * (1usize << dirty_config.q);
605
606        // Bitset for 1M rows: 1M bits
607        let bitset_bits = 1_000_000usize;
608
609        assert!(
610            dirty_qf_bits < bitset_bits,
611            "QF ({dirty_qf_bits} bits) should be smaller than bitset ({bitset_bits} bits) for sparse dirty sets"
612        );
613    }
614
615    #[test]
616    fn default_config() {
617        let qf = QuotientFilter::default();
618        assert_eq!(qf.capacity(), 1024);
619        assert!(qf.is_empty());
620    }
621
622    #[test]
623    fn insert_after_remove_reuses_slot() {
624        let mut qf = QuotientFilter::with_defaults();
625        qf.insert(&1u64);
626        qf.remove(&1u64);
627        assert!(qf.insert(&1u64));
628        assert!(qf.contains(&1u64));
629        assert_eq!(qf.len(), 1);
630    }
631
632    #[test]
633    fn heavy_load() {
634        // Use larger r to avoid fingerprint collisions
635        let mut qf = QuotientFilter::new(QuotientFilterConfig { q: 10, r: 20 }); // 1024 slots, 30-bit fingerprints
636        let target = 500;
637        let mut inserted = 0;
638        for i in 0u64..target as u64 {
639            if qf.insert(&i) {
640                inserted += 1;
641            }
642        }
643        assert_eq!(inserted, target);
644
645        // All should be findable (no false negatives)
646        for i in 0u64..target as u64 {
647            assert!(qf.contains(&i), "element {i} missing at high load");
648        }
649    }
650}