radicle_node/service/
filter.rs1#![allow(clippy::identity_op)]
2use std::ops::{Deref, DerefMut};
3
4pub use bloomy::BloomFilter;
5
6use crate::identity::RepoId;
7
8pub const FILTER_SIZE_L: usize = 16 * 1024;
11pub const FILTER_SIZE_M: usize = 4 * 1024;
14pub const FILTER_SIZE_S: usize = 1 * 1024;
17
18pub const FILTER_SIZES: [usize; 3] = [FILTER_SIZE_S, FILTER_SIZE_M, FILTER_SIZE_L];
20
21pub const FILTER_FP_RATE: f64 = 0.01;
23pub const FILTER_HASHES: usize = 7;
25
26#[derive(Clone, PartialEq, Eq, Debug)]
31pub struct Filter(BloomFilter<RepoId>);
32
33impl Default for Filter {
34 fn default() -> Self {
35 Self(BloomFilter::from(vec![0xff; FILTER_SIZE_S]))
36 }
37}
38
39impl Filter {
40 pub fn new(ids: impl IntoIterator<Item = RepoId>) -> Self {
44 let iterator = ids.into_iter();
45 let (min, _) = iterator.size_hint();
46 let size = bloomy::bloom::optimal_bits(min, FILTER_FP_RATE) / 8;
47 let size = if size > FILTER_SIZE_M {
48 FILTER_SIZE_L
49 } else if size > FILTER_SIZE_S {
50 FILTER_SIZE_M
51 } else {
52 FILTER_SIZE_S
53 };
54 let mut bloom = BloomFilter::with_size(size);
55
56 for id in iterator {
57 bloom.insert(&id);
58 }
59 Self(bloom)
60 }
61
62 pub fn empty() -> Self {
64 Self(BloomFilter::from(vec![0x0; FILTER_SIZE_S]))
65 }
66
67 pub fn size(&self) -> usize {
69 self.0.bits() / 8
70 }
71}
72
73impl Deref for Filter {
74 type Target = BloomFilter<RepoId>;
75
76 fn deref(&self) -> &Self::Target {
77 &self.0
78 }
79}
80
81impl DerefMut for Filter {
82 fn deref_mut(&mut self) -> &mut Self::Target {
83 &mut self.0
84 }
85}
86
87impl From<BloomFilter<RepoId>> for Filter {
88 fn from(bloom: BloomFilter<RepoId>) -> Self {
89 Self(bloom)
90 }
91}
92
93#[cfg(test)]
94mod test {
95 use super::*;
96 use crate::test::arbitrary;
97
98 #[test]
99 fn test_parameters() {
100 assert_eq!(bloomy::bloom::optimal_bits(10_000, 0.01) / 8, 11_981);
102 assert_eq!(bloomy::bloom::optimal_bits(1_000, 0.01) / 8, 1198);
104 assert_eq!(bloomy::bloom::optimal_bits(100, 0.01) / 8, 119);
106
107 assert_eq!(
109 bloomy::bloom::optimal_capacity(FILTER_SIZE_L * 8, FILTER_FP_RATE),
110 13_675
111 );
112 assert_eq!(
114 bloomy::bloom::optimal_capacity(FILTER_SIZE_M * 8, FILTER_FP_RATE),
115 3419
116 );
117 assert_eq!(
119 bloomy::bloom::optimal_capacity(FILTER_SIZE_S * 8, FILTER_FP_RATE),
120 855
121 );
122
123 assert_eq!(
124 bloomy::bloom::optimal_hashes(FILTER_SIZE_L * 8, 13_675),
125 FILTER_HASHES
126 );
127 assert_eq!(
128 bloomy::bloom::optimal_hashes(FILTER_SIZE_M * 8, 3419),
129 FILTER_HASHES
130 );
131 assert_eq!(
132 bloomy::bloom::optimal_hashes(FILTER_SIZE_S * 8, 855),
133 FILTER_HASHES
134 );
135 }
136
137 #[test]
138 fn test_sizes() {
139 let ids = arbitrary::vec::<RepoId>(3420);
140 let f = Filter::new(ids.iter().cloned().take(10));
141 assert_eq!(f.size(), FILTER_SIZE_S);
142
143 let f = Filter::new(ids.iter().cloned().take(1000));
144 assert_eq!(f.size(), FILTER_SIZE_M);
145
146 let f = Filter::new(ids.iter().cloned());
147 assert_eq!(f.size(), FILTER_SIZE_L);
148
149 let hs = arbitrary::set::<RepoId>(42..=42);
151 assert_eq!(hs.iter().size_hint(), (42, Some(42)));
152 }
153}