Skip to main content

radicle_protocol/service/
filter.rs

1#![allow(clippy::identity_op)]
2use std::ops::{Deref, DerefMut};
3
4pub use bloomy::BloomFilter;
5
6use radicle::identity::RepoId;
7
8/// Size in bytes of *large* bloom filter.
9/// It can store about 13'675 items with a false positive rate of 1%.
10pub const FILTER_SIZE_L: usize = 16 * 1024;
11/// Size in bytes of *medium* bloom filter.
12/// It can store about 3'419 items with a false positive rate of 1%.
13pub const FILTER_SIZE_M: usize = 4 * 1024;
14/// Size in bytes of *small* bloom filter.
15/// It can store about 855 items with a false positive rate of 1%.
16pub const FILTER_SIZE_S: usize = 1 * 1024;
17
18/// Valid filter sizes.
19pub const FILTER_SIZES: [usize; 3] = [FILTER_SIZE_S, FILTER_SIZE_M, FILTER_SIZE_L];
20
21/// Target false positive rate of filter.
22pub const FILTER_FP_RATE: f64 = 0.01;
23/// Number of hashes used for bloom filter.
24pub const FILTER_HASHES: usize = 7;
25
26/// Inventory filter used for subscriptions and inventory comparison.
27///
28/// The [`Default`] instance has all bits set to `1`, ie. it will match
29/// everything.
30#[derive(Clone, PartialEq, Eq, Debug)]
31pub struct Filter(BloomFilter<RepoId>);
32
33impl Default for Filter {
34    fn default() -> Self {
35        Self(BloomFilter::from(vec![0xff; FILTER_SIZE_S]))
36    }
37}
38
39impl Filter {
40    /// Create a new filter with the given items.
41    ///
42    /// Uses the iterator's size hint to determine the size of the filter.
43    pub fn new(ids: impl IntoIterator<Item = RepoId>) -> Self {
44        let iterator = ids.into_iter();
45        let (min, _) = iterator.size_hint();
46        let size = bloomy::bloom::optimal_bits(min, FILTER_FP_RATE) / 8;
47        let size = if size > FILTER_SIZE_M {
48            FILTER_SIZE_L
49        } else if size > FILTER_SIZE_S {
50            FILTER_SIZE_M
51        } else {
52            FILTER_SIZE_S
53        };
54        let mut bloom = BloomFilter::with_size(size);
55
56        for id in iterator {
57            bloom.insert(&id);
58        }
59        Self(bloom)
60    }
61
62    pub fn allowed_by(
63        policies: impl Iterator<
64            Item = Result<radicle::node::policy::SeedPolicy, radicle::node::policy::store::Error>,
65        >,
66    ) -> Self {
67        let mut ids = Vec::new();
68
69        for seed in policies {
70            let seed = match seed {
71                Ok(seed) => seed,
72                Err(err) => {
73                    log::debug!(target: "protocol::filter", "Failed to read seed policy: {err}");
74                    continue;
75                }
76            };
77
78            if seed.policy.is_allow() {
79                ids.push(seed.rid);
80            }
81        }
82
83        Self::new(ids)
84    }
85
86    /// Empty filter with nothing set.
87    pub fn empty() -> Self {
88        Self(BloomFilter::from(vec![0x0; FILTER_SIZE_S]))
89    }
90
91    /// Size in bytes.
92    pub fn size(&self) -> usize {
93        self.0.bits() / 8
94    }
95}
96
97impl Deref for Filter {
98    type Target = BloomFilter<RepoId>;
99
100    fn deref(&self) -> &Self::Target {
101        &self.0
102    }
103}
104
105impl DerefMut for Filter {
106    fn deref_mut(&mut self) -> &mut Self::Target {
107        &mut self.0
108    }
109}
110
111impl From<BloomFilter<RepoId>> for Filter {
112    fn from(bloom: BloomFilter<RepoId>) -> Self {
113        Self(bloom)
114    }
115}
116
117#[allow(clippy::unwrap_used)]
118#[cfg(any(test, feature = "test"))]
119impl qcheck::Arbitrary for Filter {
120    fn arbitrary(g: &mut qcheck::Gen) -> Self {
121        let size = *g
122            .choose(&[FILTER_SIZE_S, FILTER_SIZE_M, FILTER_SIZE_L])
123            .unwrap();
124        let mut bytes = vec![0; size];
125        for _ in 0..64 {
126            let index = usize::arbitrary(g) % bytes.len();
127            bytes[index] = u8::arbitrary(g);
128        }
129        Self::from(BloomFilter::from(bytes))
130    }
131}
132
133#[allow(clippy::unwrap_used)]
134#[cfg(test)]
135mod test {
136    use super::*;
137    use radicle::test::arbitrary;
138
139    #[test]
140    fn test_parameters() {
141        // To store 10'000 items with a false positive rate of 1%, we need about 12KB.
142        assert_eq!(bloomy::bloom::optimal_bits(10_000, 0.01) / 8, 11_981);
143        // To store 1'000 items with a false positive rate of 1%, we need about 1KB.
144        assert_eq!(bloomy::bloom::optimal_bits(1_000, 0.01) / 8, 1198);
145        // To store 100 items with a false positive rate of 1%, we need about 120B.
146        assert_eq!(bloomy::bloom::optimal_bits(100, 0.01) / 8, 119);
147
148        // With 16KB, we can store 13'675 items with a false positive rate of 1%.
149        assert_eq!(
150            bloomy::bloom::optimal_capacity(FILTER_SIZE_L * 8, FILTER_FP_RATE),
151            13_675
152        );
153        // With 4KB, we can store 3'419 items with a false positive rate of 1%.
154        assert_eq!(
155            bloomy::bloom::optimal_capacity(FILTER_SIZE_M * 8, FILTER_FP_RATE),
156            3419
157        );
158        // With 1KB, we can store 855 items with a false positive rate of 1%.
159        assert_eq!(
160            bloomy::bloom::optimal_capacity(FILTER_SIZE_S * 8, FILTER_FP_RATE),
161            855
162        );
163
164        assert_eq!(
165            bloomy::bloom::optimal_hashes(FILTER_SIZE_L * 8, 13_675),
166            FILTER_HASHES
167        );
168        assert_eq!(
169            bloomy::bloom::optimal_hashes(FILTER_SIZE_M * 8, 3419),
170            FILTER_HASHES
171        );
172        assert_eq!(
173            bloomy::bloom::optimal_hashes(FILTER_SIZE_S * 8, 855),
174            FILTER_HASHES
175        );
176    }
177
178    #[test]
179    fn test_sizes() {
180        let ids = arbitrary::vec::<RepoId>(3420);
181        let f = Filter::new(ids.iter().cloned().take(10));
182        assert_eq!(f.size(), FILTER_SIZE_S);
183
184        let f = Filter::new(ids.iter().cloned().take(1000));
185        assert_eq!(f.size(), FILTER_SIZE_M);
186
187        let f = Filter::new(ids.iter().cloned());
188        assert_eq!(f.size(), FILTER_SIZE_L);
189
190        // Just checking that iterators over hash sets give correct size hints.
191        let hs = arbitrary::set::<RepoId>(42..=42);
192        assert_eq!(hs.iter().size_hint(), (42, Some(42)));
193    }
194
195    /// Checks that a particular filter extracted from a live deployment of
196    /// `radicle-node` at `release/1.5.0`, which is known to contain
197    /// "heartwood", actually also evaluates to contain "heartwood".
198    ///
199    /// This is to catch regressions in the [`std::hash::Hash`] implementation
200    /// [`RepoId`] and other breaking changes to [`Filter`].
201    #[test]
202    fn compatible() {
203        let filter = {
204            let mut filter = [0u8; FILTER_SIZE_S];
205
206            #[rustfmt::skip]
207            const COMPRESSED_FIXTURE: [(usize, u8); 100] = [
208                (0x002, 0xa8), (0x010, 0x08), (0x016, 0x40), (0x01b, 0x20), (0x04d, 0x04),
209                (0x050, 0x04), (0x05a, 0x02), (0x05e, 0x80), (0x06d, 0x40), (0x075, 0x08),
210                (0x082, 0x80), (0x084, 0x80), (0x089, 0x01), (0x08b, 0x08), (0x099, 0x04),
211                (0x0a1, 0x40), (0x0a7, 0x40), (0x0be, 0x40), (0x0d3, 0x01), (0x0e2, 0x01),
212                (0x0ee, 0x08), (0x0f2, 0x04), (0x109, 0x08), (0x119, 0x10), (0x15b, 0x40),
213                (0x160, 0x44), (0x163, 0x01), (0x168, 0x08), (0x16b, 0x01), (0x16d, 0x04),
214                (0x176, 0x80), (0x17e, 0x40), (0x189, 0x20), (0x18f, 0x04), (0x19f, 0x20),
215                (0x1b2, 0x08), (0x1b5, 0x04), (0x1b8, 0x20), (0x1ed, 0x10), (0x1f1, 0x40),
216                (0x1f3, 0x04), (0x1fa, 0x40), (0x20b, 0x08), (0x20e, 0x04), (0x218, 0x01),
217                (0x231, 0x02), (0x23d, 0x80), (0x248, 0x10), (0x24e, 0x04), (0x250, 0x01),
218                (0x251, 0x01), (0x255, 0x04), (0x25a, 0x10), (0x265, 0x20), (0x27c, 0x01),
219                (0x284, 0x04), (0x285, 0x20), (0x28d, 0x81), (0x29f, 0x01), (0x2a6, 0x10),
220                (0x2ac, 0x40), (0x2ad, 0x10), (0x2b4, 0x04), (0x2b8, 0x02), (0x2cb, 0x01),
221                (0x2d1, 0x80), (0x2d4, 0x01), (0x2d7, 0x40), (0x2ed, 0x80), (0x2f7, 0x01),
222                (0x302, 0x80), (0x303, 0x40), (0x307, 0x40), (0x309, 0x04), (0x318, 0x04),
223                (0x31e, 0x10), (0x335, 0x01), (0x336, 0x40), (0x338, 0x40), (0x351, 0x80),
224                (0x353, 0x10), (0x359, 0x0c), (0x360, 0x40), (0x367, 0x01), (0x36b, 0x08),
225                (0x36c, 0x40), (0x37b, 0x10), (0x37d, 0x40), (0x399, 0x02), (0x39f, 0x02),
226                (0x3a6, 0x02), (0x3a9, 0x04), (0x3ab, 0x01), (0x3cb, 0x04), (0x3e2, 0x01),
227                (0x3e5, 0x10), (0x3ea, 0x40), (0x3ed, 0x40), (0x3f2, 0x02), (0x3f5, 0x80),
228            ];
229
230            for (i, v) in COMPRESSED_FIXTURE.into_iter() {
231                filter[i] = v;
232            }
233
234            Filter(BloomFilter::from(filter.to_vec()))
235        };
236
237        assert!(filter.contains(&"rad:z3gqcJUoA1n9HaHKufZs5FCSGazv5".parse().unwrap()),);
238    }
239}