Skip to main content

radicle_protocol/service/
filter.rs

1use std::ops::{Deref, DerefMut};
2
3pub use bloomy::BloomFilter;
4
5use radicle::identity::RepoId;
6
7/// Size in bytes of *large* bloom filter.
8/// It can store about 13'675 items with a false positive rate of 1%.
9pub const FILTER_SIZE_L: usize = 1024 * 16;
10/// Size in bytes of *medium* bloom filter.
11/// It can store about 3'419 items with a false positive rate of 1%.
12pub const FILTER_SIZE_M: usize = 1024 * 4;
13/// Size in bytes of *small* bloom filter.
14/// It can store about 855 items with a false positive rate of 1%.
15pub const FILTER_SIZE_S: usize = 1024;
16
17/// Valid filter sizes.
18pub const FILTER_SIZES: [usize; 3] = [FILTER_SIZE_S, FILTER_SIZE_M, FILTER_SIZE_L];
19
20/// Target false positive rate of filter.
21pub const FILTER_FP_RATE: f64 = 0.01;
22/// Number of hashes used for bloom filter.
23pub const FILTER_HASHES: usize = 7;
24
25/// Inventory filter used for subscriptions and inventory comparison.
26///
27/// The [`Default`] instance has all bits set to `1`, ie. it will match
28/// everything.
29#[derive(Clone, PartialEq, Eq, Debug)]
30pub struct Filter(BloomFilter<RepoId>);
31
32impl Default for Filter {
33    fn default() -> Self {
34        Self(BloomFilter::from(vec![0xff; FILTER_SIZE_S]))
35    }
36}
37
38impl Filter {
39    /// Create a new filter with the given items.
40    ///
41    /// Uses the iterator's size hint to determine the size of the filter.
42    pub fn new(ids: impl IntoIterator<Item = RepoId>) -> Self {
43        let iterator = ids.into_iter();
44        let (min, _) = iterator.size_hint();
45        let size = bloomy::bloom::optimal_bits(min, FILTER_FP_RATE) / 8;
46        let size = if size > FILTER_SIZE_M {
47            FILTER_SIZE_L
48        } else if size > FILTER_SIZE_S {
49            FILTER_SIZE_M
50        } else {
51            FILTER_SIZE_S
52        };
53        let mut bloom = BloomFilter::with_size(size);
54
55        for id in iterator {
56            bloom.insert(&id);
57        }
58        Self(bloom)
59    }
60
61    pub fn allowed_by(
62        policies: impl Iterator<
63            Item = Result<radicle::node::policy::SeedPolicy, radicle::node::policy::store::Error>,
64        >,
65    ) -> Self {
66        let mut ids = Vec::new();
67
68        for seed in policies {
69            let seed = match seed {
70                Ok(seed) => seed,
71                Err(err) => {
72                    log::debug!(target: "protocol::filter", "Failed to read seed policy: {err}");
73                    continue;
74                }
75            };
76
77            if seed.policy.is_allow() {
78                ids.push(seed.rid);
79            }
80        }
81
82        Self::new(ids)
83    }
84
85    /// Empty filter with nothing set.
86    pub fn empty() -> Self {
87        Self(BloomFilter::from(vec![0x0; FILTER_SIZE_S]))
88    }
89
90    /// Size in bytes.
91    pub fn size(&self) -> usize {
92        self.0.bits() / 8
93    }
94}
95
96impl Deref for Filter {
97    type Target = BloomFilter<RepoId>;
98
99    fn deref(&self) -> &Self::Target {
100        &self.0
101    }
102}
103
104impl DerefMut for Filter {
105    fn deref_mut(&mut self) -> &mut Self::Target {
106        &mut self.0
107    }
108}
109
110impl From<BloomFilter<RepoId>> for Filter {
111    fn from(bloom: BloomFilter<RepoId>) -> Self {
112        Self(bloom)
113    }
114}
115
116#[allow(clippy::unwrap_used)]
117#[cfg(any(test, feature = "test"))]
118impl qcheck::Arbitrary for Filter {
119    fn arbitrary(g: &mut qcheck::Gen) -> Self {
120        let size = *g
121            .choose(&[FILTER_SIZE_S, FILTER_SIZE_M, FILTER_SIZE_L])
122            .unwrap();
123        let mut bytes = vec![0; size];
124        for _ in 0..64 {
125            let index = usize::arbitrary(g) % bytes.len();
126            bytes[index] = u8::arbitrary(g);
127        }
128        Self::from(BloomFilter::from(bytes))
129    }
130}
131
132#[allow(clippy::unwrap_used)]
133#[cfg(test)]
134mod test {
135    use super::*;
136    use radicle::test::arbitrary;
137
138    #[test]
139    fn test_parameters() {
140        // To store 10'000 items with a false positive rate of 1%, we need about 12KB.
141        assert_eq!(bloomy::bloom::optimal_bits(10_000, 0.01) / 8, 11_981);
142        // To store 1'000 items with a false positive rate of 1%, we need about 1KB.
143        assert_eq!(bloomy::bloom::optimal_bits(1_000, 0.01) / 8, 1198);
144        // To store 100 items with a false positive rate of 1%, we need about 120B.
145        assert_eq!(bloomy::bloom::optimal_bits(100, 0.01) / 8, 119);
146
147        // With 16KB, we can store 13'675 items with a false positive rate of 1%.
148        assert_eq!(
149            bloomy::bloom::optimal_capacity(FILTER_SIZE_L * 8, FILTER_FP_RATE),
150            13_675
151        );
152        // With 4KB, we can store 3'419 items with a false positive rate of 1%.
153        assert_eq!(
154            bloomy::bloom::optimal_capacity(FILTER_SIZE_M * 8, FILTER_FP_RATE),
155            3419
156        );
157        // With 1KB, we can store 855 items with a false positive rate of 1%.
158        assert_eq!(
159            bloomy::bloom::optimal_capacity(FILTER_SIZE_S * 8, FILTER_FP_RATE),
160            855
161        );
162
163        assert_eq!(
164            bloomy::bloom::optimal_hashes(FILTER_SIZE_L * 8, 13_675),
165            FILTER_HASHES
166        );
167        assert_eq!(
168            bloomy::bloom::optimal_hashes(FILTER_SIZE_M * 8, 3419),
169            FILTER_HASHES
170        );
171        assert_eq!(
172            bloomy::bloom::optimal_hashes(FILTER_SIZE_S * 8, 855),
173            FILTER_HASHES
174        );
175    }
176
177    #[test]
178    fn test_sizes() {
179        let ids = arbitrary::vec::<RepoId>(3420);
180        let f = Filter::new(ids.iter().cloned().take(10));
181        assert_eq!(f.size(), FILTER_SIZE_S);
182
183        let f = Filter::new(ids.iter().cloned().take(1000));
184        assert_eq!(f.size(), FILTER_SIZE_M);
185
186        let f = Filter::new(ids.iter().cloned());
187        assert_eq!(f.size(), FILTER_SIZE_L);
188
189        // Just checking that iterators over hash sets give correct size hints.
190        let hs = arbitrary::set::<RepoId>(42..=42);
191        assert_eq!(hs.iter().size_hint(), (42, Some(42)));
192    }
193
194    /// Checks that a particular filter extracted from a live deployment of
195    /// `radicle-node` at `release/1.5.0`, which is known to contain
196    /// "heartwood", actually also evaluates to contain "heartwood".
197    ///
198    /// This is to catch regressions in the [`std::hash::Hash`] implementation
199    /// [`RepoId`] and other breaking changes to [`Filter`].
200    #[test]
201    fn compatible() {
202        let filter = {
203            let mut filter = [0u8; FILTER_SIZE_S];
204
205            #[rustfmt::skip]
206            const COMPRESSED_FIXTURE: [(usize, u8); 100] = [
207                (0x002, 0xa8), (0x010, 0x08), (0x016, 0x40), (0x01b, 0x20), (0x04d, 0x04),
208                (0x050, 0x04), (0x05a, 0x02), (0x05e, 0x80), (0x06d, 0x40), (0x075, 0x08),
209                (0x082, 0x80), (0x084, 0x80), (0x089, 0x01), (0x08b, 0x08), (0x099, 0x04),
210                (0x0a1, 0x40), (0x0a7, 0x40), (0x0be, 0x40), (0x0d3, 0x01), (0x0e2, 0x01),
211                (0x0ee, 0x08), (0x0f2, 0x04), (0x109, 0x08), (0x119, 0x10), (0x15b, 0x40),
212                (0x160, 0x44), (0x163, 0x01), (0x168, 0x08), (0x16b, 0x01), (0x16d, 0x04),
213                (0x176, 0x80), (0x17e, 0x40), (0x189, 0x20), (0x18f, 0x04), (0x19f, 0x20),
214                (0x1b2, 0x08), (0x1b5, 0x04), (0x1b8, 0x20), (0x1ed, 0x10), (0x1f1, 0x40),
215                (0x1f3, 0x04), (0x1fa, 0x40), (0x20b, 0x08), (0x20e, 0x04), (0x218, 0x01),
216                (0x231, 0x02), (0x23d, 0x80), (0x248, 0x10), (0x24e, 0x04), (0x250, 0x01),
217                (0x251, 0x01), (0x255, 0x04), (0x25a, 0x10), (0x265, 0x20), (0x27c, 0x01),
218                (0x284, 0x04), (0x285, 0x20), (0x28d, 0x81), (0x29f, 0x01), (0x2a6, 0x10),
219                (0x2ac, 0x40), (0x2ad, 0x10), (0x2b4, 0x04), (0x2b8, 0x02), (0x2cb, 0x01),
220                (0x2d1, 0x80), (0x2d4, 0x01), (0x2d7, 0x40), (0x2ed, 0x80), (0x2f7, 0x01),
221                (0x302, 0x80), (0x303, 0x40), (0x307, 0x40), (0x309, 0x04), (0x318, 0x04),
222                (0x31e, 0x10), (0x335, 0x01), (0x336, 0x40), (0x338, 0x40), (0x351, 0x80),
223                (0x353, 0x10), (0x359, 0x0c), (0x360, 0x40), (0x367, 0x01), (0x36b, 0x08),
224                (0x36c, 0x40), (0x37b, 0x10), (0x37d, 0x40), (0x399, 0x02), (0x39f, 0x02),
225                (0x3a6, 0x02), (0x3a9, 0x04), (0x3ab, 0x01), (0x3cb, 0x04), (0x3e2, 0x01),
226                (0x3e5, 0x10), (0x3ea, 0x40), (0x3ed, 0x40), (0x3f2, 0x02), (0x3f5, 0x80),
227            ];
228
229            for (i, v) in COMPRESSED_FIXTURE.into_iter() {
230                filter[i] = v;
231            }
232
233            Filter(BloomFilter::from(filter.to_vec()))
234        };
235
236        assert!(filter.contains(&"rad:z3gqcJUoA1n9HaHKufZs5FCSGazv5".parse().unwrap()),);
237    }
238}