radicle_protocol/service/
filter.rs

1#![allow(clippy::identity_op)]
2use std::ops::{Deref, DerefMut};
3
4pub use bloomy::BloomFilter;
5
6use radicle::identity::RepoId;
7
8/// Size in bytes of *large* bloom filter.
9/// It can store about 13'675 items with a false positive rate of 1%.
10pub const FILTER_SIZE_L: usize = 16 * 1024;
11/// Size in bytes of *medium* bloom filter.
12/// It can store about 3'419 items with a false positive rate of 1%.
13pub const FILTER_SIZE_M: usize = 4 * 1024;
14/// Size in bytes of *small* bloom filter.
15/// It can store about 855 items with a false positive rate of 1%.
16pub const FILTER_SIZE_S: usize = 1 * 1024;
17
18/// Valid filter sizes.
19pub const FILTER_SIZES: [usize; 3] = [FILTER_SIZE_S, FILTER_SIZE_M, FILTER_SIZE_L];
20
21/// Target false positive rate of filter.
22pub const FILTER_FP_RATE: f64 = 0.01;
23/// Number of hashes used for bloom filter.
24pub const FILTER_HASHES: usize = 7;
25
26/// Inventory filter used for subscriptions and inventory comparison.
27///
28/// The [`Default`] instance has all bits set to `1`, ie. it will match
29/// everything.
30#[derive(Clone, PartialEq, Eq, Debug)]
31pub struct Filter(BloomFilter<RepoId>);
32
33impl Default for Filter {
34    fn default() -> Self {
35        Self(BloomFilter::from(vec![0xff; FILTER_SIZE_S]))
36    }
37}
38
39impl Filter {
40    /// Create a new filter with the given items.
41    ///
42    /// Uses the iterator's size hint to determine the size of the filter.
43    pub fn new(ids: impl IntoIterator<Item = RepoId>) -> Self {
44        let iterator = ids.into_iter();
45        let (min, _) = iterator.size_hint();
46        let size = bloomy::bloom::optimal_bits(min, FILTER_FP_RATE) / 8;
47        let size = if size > FILTER_SIZE_M {
48            FILTER_SIZE_L
49        } else if size > FILTER_SIZE_S {
50            FILTER_SIZE_M
51        } else {
52            FILTER_SIZE_S
53        };
54        let mut bloom = BloomFilter::with_size(size);
55
56        for id in iterator {
57            bloom.insert(&id);
58        }
59        Self(bloom)
60    }
61
62    pub fn allowed_by(
63        policies: impl Iterator<
64            Item = Result<radicle::node::policy::SeedPolicy, radicle::node::policy::store::Error>,
65        >,
66    ) -> Self {
67        let mut ids = Vec::new();
68
69        for seed in policies {
70            let seed = match seed {
71                Ok(seed) => seed,
72                Err(err) => {
73                    log::error!(target: "protocol::filter", "Failed to read seed policy: {err}");
74                    continue;
75                }
76            };
77
78            if seed.policy.is_allow() {
79                ids.push(seed.rid);
80            }
81        }
82
83        Self::new(ids)
84    }
85
86    /// Empty filter with nothing set.
87    pub fn empty() -> Self {
88        Self(BloomFilter::from(vec![0x0; FILTER_SIZE_S]))
89    }
90
91    /// Size in bytes.
92    pub fn size(&self) -> usize {
93        self.0.bits() / 8
94    }
95}
96
97impl Deref for Filter {
98    type Target = BloomFilter<RepoId>;
99
100    fn deref(&self) -> &Self::Target {
101        &self.0
102    }
103}
104
105impl DerefMut for Filter {
106    fn deref_mut(&mut self) -> &mut Self::Target {
107        &mut self.0
108    }
109}
110
111impl From<BloomFilter<RepoId>> for Filter {
112    fn from(bloom: BloomFilter<RepoId>) -> Self {
113        Self(bloom)
114    }
115}
116
117#[allow(clippy::unwrap_used)]
118#[cfg(any(test, feature = "test"))]
119impl qcheck::Arbitrary for Filter {
120    fn arbitrary(g: &mut qcheck::Gen) -> Self {
121        let size = *g
122            .choose(&[FILTER_SIZE_S, FILTER_SIZE_M, FILTER_SIZE_L])
123            .unwrap();
124        let mut bytes = vec![0; size];
125        for _ in 0..64 {
126            let index = usize::arbitrary(g) % bytes.len();
127            bytes[index] = u8::arbitrary(g);
128        }
129        Self::from(BloomFilter::from(bytes))
130    }
131}
132
133#[cfg(test)]
134mod test {
135    use super::*;
136    use radicle::test::arbitrary;
137
138    #[test]
139    fn test_parameters() {
140        // To store 10'000 items with a false positive rate of 1%, we need about 12KB.
141        assert_eq!(bloomy::bloom::optimal_bits(10_000, 0.01) / 8, 11_981);
142        // To store 1'000 items with a false positive rate of 1%, we need about 1KB.
143        assert_eq!(bloomy::bloom::optimal_bits(1_000, 0.01) / 8, 1198);
144        // To store 100 items with a false positive rate of 1%, we need about 120B.
145        assert_eq!(bloomy::bloom::optimal_bits(100, 0.01) / 8, 119);
146
147        // With 16KB, we can store 13'675 items with a false positive rate of 1%.
148        assert_eq!(
149            bloomy::bloom::optimal_capacity(FILTER_SIZE_L * 8, FILTER_FP_RATE),
150            13_675
151        );
152        // With 4KB, we can store 3'419 items with a false positive rate of 1%.
153        assert_eq!(
154            bloomy::bloom::optimal_capacity(FILTER_SIZE_M * 8, FILTER_FP_RATE),
155            3419
156        );
157        // With 1KB, we can store 855 items with a false positive rate of 1%.
158        assert_eq!(
159            bloomy::bloom::optimal_capacity(FILTER_SIZE_S * 8, FILTER_FP_RATE),
160            855
161        );
162
163        assert_eq!(
164            bloomy::bloom::optimal_hashes(FILTER_SIZE_L * 8, 13_675),
165            FILTER_HASHES
166        );
167        assert_eq!(
168            bloomy::bloom::optimal_hashes(FILTER_SIZE_M * 8, 3419),
169            FILTER_HASHES
170        );
171        assert_eq!(
172            bloomy::bloom::optimal_hashes(FILTER_SIZE_S * 8, 855),
173            FILTER_HASHES
174        );
175    }
176
177    #[test]
178    fn test_sizes() {
179        let ids = arbitrary::vec::<RepoId>(3420);
180        let f = Filter::new(ids.iter().cloned().take(10));
181        assert_eq!(f.size(), FILTER_SIZE_S);
182
183        let f = Filter::new(ids.iter().cloned().take(1000));
184        assert_eq!(f.size(), FILTER_SIZE_M);
185
186        let f = Filter::new(ids.iter().cloned());
187        assert_eq!(f.size(), FILTER_SIZE_L);
188
189        // Just checking that iterators over hash sets give correct size hints.
190        let hs = arbitrary::set::<RepoId>(42..=42);
191        assert_eq!(hs.iter().size_hint(), (42, Some(42)));
192    }
193}