1#![forbid(unsafe_code)]
2
3use std::collections::hash_map::DefaultHasher;
43use std::hash::{Hash, Hasher};
44
45#[derive(Debug, Clone)]
47pub struct QuotientFilter {
48 q: u32,
50 r: u32,
52 slots: Vec<Option<(u32, u64)>>,
54 count: usize,
56 capacity: usize,
58}
59
60#[derive(Debug, Clone, Copy)]
62pub struct QuotientFilterConfig {
63 pub q: u32,
65 pub r: u32,
67}
68
69impl QuotientFilterConfig {
70 #[must_use]
75 pub fn for_capacity(expected_items: usize, fp_rate: f64) -> Self {
76 let fp_rate = if fp_rate.is_finite() && fp_rate > 0.0 {
77 fp_rate.min(1.0 - f64::EPSILON)
78 } else {
79 0.01
80 };
81
82 let r = (-fp_rate.log2()).ceil() as u32;
84 let r = r.clamp(2, 32);
85
86 let needed = ((expected_items as f64 / 0.75).ceil()) as u64;
89 let q = (64 - needed.leading_zeros()).clamp(4, 28);
90
91 Self { q, r }
92 }
93}
94
95impl Default for QuotientFilterConfig {
96 fn default() -> Self {
97 Self { q: 10, r: 8 } }
99}
100
101impl QuotientFilter {
102 #[must_use]
104 pub fn new(config: QuotientFilterConfig) -> Self {
105 let q = config.q.min(28); let r = config.r.clamp(1, 32);
107 let capacity = 1usize << q;
108
109 Self {
110 q,
111 r,
112 slots: vec![None; capacity],
113 count: 0,
114 capacity,
115 }
116 }
117
118 #[must_use]
120 pub fn with_defaults() -> Self {
121 Self::new(QuotientFilterConfig::default())
122 }
123
124 #[must_use]
126 pub fn len(&self) -> usize {
127 self.count
128 }
129
130 #[must_use]
132 pub fn is_empty(&self) -> bool {
133 self.count == 0
134 }
135
136 #[must_use]
138 pub fn load_factor(&self) -> f64 {
139 self.count as f64 / self.capacity as f64
140 }
141
142 #[must_use]
144 pub fn capacity(&self) -> usize {
145 self.capacity
146 }
147
148 #[must_use]
150 pub fn theoretical_fp_rate(&self) -> f64 {
151 let base_rate = 1.0 / (1u64 << self.r) as f64;
153 1.0 - (1.0 - base_rate).powi(self.count as i32)
154 }
155
156 fn fingerprint<T: Hash>(&self, item: &T) -> (u32, u64) {
158 let mut hasher = DefaultHasher::new();
159 item.hash(&mut hasher);
160 let h = hasher.finish();
161
162 let q_mask = (1u32 << self.q) - 1;
163 let r_mask = (1u64 << self.r) - 1;
164
165 let quotient = ((h >> self.r) as u32) & q_mask;
166 let remainder = h & r_mask;
167 (quotient, remainder)
168 }
169
170 pub fn insert<T: Hash>(&mut self, item: &T) -> bool {
172 if self.count >= self.capacity {
173 return false;
174 }
175
176 let (quotient, remainder) = self.fingerprint(item);
177
178 let mut pos = quotient as usize;
180 for _ in 0..self.capacity {
181 match self.slots[pos] {
182 None => {
183 self.slots[pos] = Some((quotient, remainder));
185 self.count += 1;
186 return true;
187 }
188 Some((q, r)) if q == quotient && r == remainder => {
189 return false;
191 }
192 _ => {
193 pos = (pos + 1) % self.capacity;
195 }
196 }
197 }
198
199 false }
201
202 #[must_use]
208 pub fn contains<T: Hash>(&self, item: &T) -> bool {
209 let (quotient, remainder) = self.fingerprint(item);
210
211 let mut pos = quotient as usize;
212 for _ in 0..self.capacity {
213 match self.slots[pos] {
214 None => return false, Some((q, r)) if q == quotient && r == remainder => return true,
216 _ => pos = (pos + 1) % self.capacity,
217 }
218 }
219
220 false
221 }
222
223 pub fn remove<T: Hash>(&mut self, item: &T) -> bool {
227 let (quotient, remainder) = self.fingerprint(item);
228
229 let mut pos = quotient as usize;
231 let mut found_pos = None;
232 for _ in 0..self.capacity {
233 match self.slots[pos] {
234 None => break,
235 Some((q, r)) if q == quotient && r == remainder => {
236 found_pos = Some(pos);
237 break;
238 }
239 _ => pos = (pos + 1) % self.capacity,
240 }
241 }
242
243 let mut pos = match found_pos {
244 Some(p) => p,
245 None => return false,
246 };
247
248 self.slots[pos] = None;
251 self.count -= 1;
252
253 let mut current = (pos + 1) % self.capacity;
254 loop {
255 match self.slots[current] {
256 None => break, Some((q, _r)) => {
258 let canonical = q as usize;
259 let should_shift = if canonical <= pos {
262 current > pos || current < canonical
264 } else {
265 current > pos && current < canonical
267 };
268
269 if !should_shift {
270 break;
271 }
272
273 self.slots[pos] = self.slots[current];
275 self.slots[current] = None;
276 pos = current;
277 }
278 }
279 current = (current + 1) % self.capacity;
280 }
281
282 true
283 }
284
285 pub fn clear(&mut self) {
287 self.slots.fill(None);
288 self.count = 0;
289 }
290
291 pub fn merge(&mut self, other: &QuotientFilter) -> usize {
296 if self.q != other.q || self.r != other.r {
297 return 0;
298 }
299
300 let mut added = 0;
301 for slot in &other.slots {
302 if let &Some((q, r)) = slot {
303 let mut pos = q as usize;
305 let mut found = false;
306 for _ in 0..self.capacity {
307 match self.slots[pos] {
308 None => break,
309 Some((eq, er)) if eq == q && er == r => {
310 found = true;
311 break;
312 }
313 _ => pos = (pos + 1) % self.capacity,
314 }
315 }
316
317 if !found {
318 let mut ipos = q as usize;
320 for _ in 0..self.capacity {
321 if self.slots[ipos].is_none() {
322 self.slots[ipos] = Some((q, r));
323 self.count += 1;
324 added += 1;
325 break;
326 }
327 ipos = (ipos + 1) % self.capacity;
328 }
329 }
330 }
331 }
332 added
333 }
334}
335
336impl Default for QuotientFilter {
337 fn default() -> Self {
338 Self::with_defaults()
339 }
340}
341
342#[cfg(test)]
347mod tests {
348 use super::*;
349
350 #[test]
351 fn empty_filter() {
352 let qf = QuotientFilter::with_defaults();
353 assert!(qf.is_empty());
354 assert_eq!(qf.len(), 0);
355 assert_eq!(qf.capacity(), 1024);
356 assert!(!qf.contains(&42u64));
357 }
358
359 #[test]
360 fn insert_and_lookup() {
361 let mut qf = QuotientFilter::with_defaults();
362 assert!(qf.insert(&100u64));
363 assert!(qf.contains(&100u64));
364 assert_eq!(qf.len(), 1);
365 }
366
367 #[test]
368 fn duplicate_insert_returns_false() {
369 let mut qf = QuotientFilter::with_defaults();
370 assert!(qf.insert(&42u64));
371 assert!(!qf.insert(&42u64));
372 assert_eq!(qf.len(), 1);
373 }
374
375 #[test]
376 fn insert_multiple() {
377 let mut qf = QuotientFilter::new(QuotientFilterConfig { q: 8, r: 8 });
378 for i in 0u64..50 {
379 qf.insert(&i);
380 }
381 assert_eq!(qf.len(), 50);
382
383 for i in 0u64..50 {
385 assert!(qf.contains(&i), "element {i} should be found");
386 }
387 }
388
389 #[test]
390 fn no_false_negatives() {
391 let mut qf = QuotientFilter::new(QuotientFilterConfig { q: 10, r: 10 });
392 let items: Vec<u64> = (0..200).collect();
393
394 for item in &items {
395 qf.insert(item);
396 }
397
398 for item in &items {
400 assert!(qf.contains(item), "false negative for {item}");
401 }
402 }
403
404 #[test]
405 fn false_positive_rate_bounded() {
406 let mut qf = QuotientFilter::new(QuotientFilterConfig { q: 12, r: 8 });
407
408 for i in 0u64..500 {
410 qf.insert(&i);
411 }
412
413 let mut false_positives = 0;
415 for i in 10000u64..20000 {
416 if qf.contains(&i) {
417 false_positives += 1;
418 }
419 }
420
421 let fp_rate = false_positives as f64 / 10000.0;
422 assert!(
424 fp_rate < 0.05,
425 "false positive rate too high: {fp_rate:.4} ({false_positives}/10000)"
426 );
427 }
428
429 #[test]
430 fn remove_element() {
431 let mut qf = QuotientFilter::with_defaults();
432 qf.insert(&42u64);
433 assert!(qf.contains(&42u64));
434
435 assert!(qf.remove(&42u64));
436 assert_eq!(qf.len(), 0);
437 assert!(!qf.contains(&42u64));
438 }
439
440 #[test]
441 fn remove_nonexistent() {
442 let mut qf = QuotientFilter::with_defaults();
443 assert!(!qf.remove(&42u64));
444 }
445
446 #[test]
447 fn remove_preserves_others() {
448 let mut qf = QuotientFilter::with_defaults();
449 for i in 0u64..20 {
450 qf.insert(&i);
451 }
452
453 for i in (0u64..20).step_by(2) {
455 qf.remove(&i);
456 }
457
458 for i in (1u64..20).step_by(2) {
460 assert!(qf.contains(&i), "odd {i} should survive removal");
461 }
462 for i in (0u64..20).step_by(2) {
464 assert!(!qf.contains(&i), "even {i} should be removed");
465 }
466 }
467
468 #[test]
469 fn clear_filter() {
470 let mut qf = QuotientFilter::with_defaults();
471 for i in 0u64..100 {
472 qf.insert(&i);
473 }
474 assert_eq!(qf.len(), 100);
475
476 qf.clear();
477 assert!(qf.is_empty());
478 assert_eq!(qf.len(), 0);
479
480 for i in 0u64..100 {
481 assert!(!qf.contains(&i));
482 }
483 }
484
485 #[test]
486 fn load_factor() {
487 let mut qf = QuotientFilter::new(QuotientFilterConfig { q: 4, r: 4 }); assert!((qf.load_factor() - 0.0).abs() < f64::EPSILON);
489
490 for i in 0u64..8 {
491 qf.insert(&i);
492 }
493 assert!((qf.load_factor() - 0.5).abs() < 0.01);
494 }
495
496 #[test]
497 fn config_for_capacity() {
498 let config = QuotientFilterConfig::for_capacity(10000, 0.01);
499 assert!(config.r >= 7, "r should be at least 7 for 1% FP rate");
500 assert!(
501 (1usize << config.q) >= 10000,
502 "capacity should exceed expected items"
503 );
504 }
505
506 #[test]
507 fn config_for_capacity_sanitizes_invalid_fp_rates() {
508 let zero = QuotientFilterConfig::for_capacity(128, 0.0);
509 let nan = QuotientFilterConfig::for_capacity(128, f64::NAN);
510
511 assert!(zero.r >= 7);
512 assert!(nan.r >= 7);
513 assert!((1usize << zero.q) >= 128);
514 assert!((1usize << nan.q) >= 128);
515 }
516
517 #[test]
518 fn string_keys() {
519 let mut qf = QuotientFilter::with_defaults();
520 qf.insert(&"hello");
521 qf.insert(&"world");
522 assert!(qf.contains(&"hello"));
523 assert!(qf.contains(&"world"));
524 assert!(!qf.contains(&"foo"));
525 }
526
527 #[test]
528 fn row_id_tracking() {
529 let mut dirty = QuotientFilter::new(QuotientFilterConfig { q: 12, r: 8 });
531
532 let dirty_rows = [5u32, 42, 100, 255, 1000];
534 for &row in &dirty_rows {
535 dirty.insert(&row);
536 }
537
538 for row in 0u32..2000 {
540 if dirty_rows.contains(&row) {
541 assert!(dirty.contains(&row), "dirty row {row} not found");
542 }
543 }
544
545 for &row in &dirty_rows {
547 dirty.remove(&row);
548 }
549 assert!(dirty.is_empty());
550 }
551
552 #[test]
553 fn merge_filters() {
554 let config = QuotientFilterConfig { q: 8, r: 8 };
555 let mut qf1 = QuotientFilter::new(config);
556 let mut qf2 = QuotientFilter::new(config);
557
558 for i in 0u64..10 {
559 qf1.insert(&i);
560 }
561 for i in 5u64..15 {
562 qf2.insert(&i);
563 }
564
565 let added = qf1.merge(&qf2);
566 assert!(added > 0, "merge should add elements");
567
568 for i in 0u64..15 {
570 assert!(qf1.contains(&i), "merged filter should contain {i}");
571 }
572 }
573
574 #[test]
575 fn merge_mismatched_config_is_noop() {
576 let mut qf1 = QuotientFilter::new(QuotientFilterConfig { q: 8, r: 8 });
577 let qf2 = QuotientFilter::new(QuotientFilterConfig { q: 10, r: 8 });
578
579 qf1.insert(&1u64);
580 let added = qf1.merge(&qf2);
581 assert_eq!(added, 0, "mismatched configs should not merge");
582 }
583
584 #[test]
585 fn theoretical_fp_rate_increases_with_load() {
586 let mut qf = QuotientFilter::new(QuotientFilterConfig { q: 10, r: 8 });
587 let rate_empty = qf.theoretical_fp_rate();
588
589 for i in 0u64..100 {
590 qf.insert(&i);
591 }
592 let rate_loaded = qf.theoretical_fp_rate();
593
594 assert!(
595 rate_empty < rate_loaded,
596 "FP rate should increase with load"
597 );
598 }
599
600 #[test]
601 fn space_comparison_vs_bitset() {
602 let dirty_config = QuotientFilterConfig::for_capacity(1000, 0.01);
604 let dirty_qf_bits = (dirty_config.r as usize + 3) * (1usize << dirty_config.q);
605
606 let bitset_bits = 1_000_000usize;
608
609 assert!(
610 dirty_qf_bits < bitset_bits,
611 "QF ({dirty_qf_bits} bits) should be smaller than bitset ({bitset_bits} bits) for sparse dirty sets"
612 );
613 }
614
615 #[test]
616 fn default_config() {
617 let qf = QuotientFilter::default();
618 assert_eq!(qf.capacity(), 1024);
619 assert!(qf.is_empty());
620 }
621
622 #[test]
623 fn insert_after_remove_reuses_slot() {
624 let mut qf = QuotientFilter::with_defaults();
625 qf.insert(&1u64);
626 qf.remove(&1u64);
627 assert!(qf.insert(&1u64));
628 assert!(qf.contains(&1u64));
629 assert_eq!(qf.len(), 1);
630 }
631
632 #[test]
633 fn heavy_load() {
634 let mut qf = QuotientFilter::new(QuotientFilterConfig { q: 10, r: 20 }); let target = 500;
637 let mut inserted = 0;
638 for i in 0u64..target as u64 {
639 if qf.insert(&i) {
640 inserted += 1;
641 }
642 }
643 assert_eq!(inserted, target);
644
645 for i in 0u64..target as u64 {
647 assert!(qf.contains(&i), "element {i} missing at high load");
648 }
649 }
650}