radicle_node/service/
filter.rs

1#![allow(clippy::identity_op)]
2use std::ops::{Deref, DerefMut};
3
4pub use bloomy::BloomFilter;
5
6use crate::identity::RepoId;
7
8/// Size in bytes of *large* bloom filter.
9/// It can store about 13'675 items with a false positive rate of 1%.
10pub const FILTER_SIZE_L: usize = 16 * 1024;
11/// Size in bytes of *medium* bloom filter.
12/// It can store about 3'419 items with a false positive rate of 1%.
13pub const FILTER_SIZE_M: usize = 4 * 1024;
14/// Size in bytes of *small* bloom filter.
15/// It can store about 855 items with a false positive rate of 1%.
16pub const FILTER_SIZE_S: usize = 1 * 1024;
17
18/// Valid filter sizes.
19pub const FILTER_SIZES: [usize; 3] = [FILTER_SIZE_S, FILTER_SIZE_M, FILTER_SIZE_L];
20
21/// Target false positive rate of filter.
22pub const FILTER_FP_RATE: f64 = 0.01;
23/// Number of hashes used for bloom filter.
24pub const FILTER_HASHES: usize = 7;
25
26/// Inventory filter used for subscriptions and inventory comparison.
27///
28/// The [`Default`] instance has all bits set to `1`, ie. it will match
29/// everything.
30#[derive(Clone, PartialEq, Eq, Debug)]
31pub struct Filter(BloomFilter<RepoId>);
32
33impl Default for Filter {
34    fn default() -> Self {
35        Self(BloomFilter::from(vec![0xff; FILTER_SIZE_S]))
36    }
37}
38
39impl Filter {
40    /// Create a new filter with the given items.
41    ///
42    /// Uses the iterator's size hint to determine the size of the filter.
43    pub fn new(ids: impl IntoIterator<Item = RepoId>) -> Self {
44        let iterator = ids.into_iter();
45        let (min, _) = iterator.size_hint();
46        let size = bloomy::bloom::optimal_bits(min, FILTER_FP_RATE) / 8;
47        let size = if size > FILTER_SIZE_M {
48            FILTER_SIZE_L
49        } else if size > FILTER_SIZE_S {
50            FILTER_SIZE_M
51        } else {
52            FILTER_SIZE_S
53        };
54        let mut bloom = BloomFilter::with_size(size);
55
56        for id in iterator {
57            bloom.insert(&id);
58        }
59        Self(bloom)
60    }
61
62    /// Empty filter with nothing set.
63    pub fn empty() -> Self {
64        Self(BloomFilter::from(vec![0x0; FILTER_SIZE_S]))
65    }
66
67    /// Size in bytes.
68    pub fn size(&self) -> usize {
69        self.0.bits() / 8
70    }
71}
72
73impl Deref for Filter {
74    type Target = BloomFilter<RepoId>;
75
76    fn deref(&self) -> &Self::Target {
77        &self.0
78    }
79}
80
81impl DerefMut for Filter {
82    fn deref_mut(&mut self) -> &mut Self::Target {
83        &mut self.0
84    }
85}
86
87impl From<BloomFilter<RepoId>> for Filter {
88    fn from(bloom: BloomFilter<RepoId>) -> Self {
89        Self(bloom)
90    }
91}
92
93#[cfg(test)]
94mod test {
95    use super::*;
96    use crate::test::arbitrary;
97
98    #[test]
99    fn test_parameters() {
100        // To store 10'000 items with a false positive rate of 1%, we need about 12KB.
101        assert_eq!(bloomy::bloom::optimal_bits(10_000, 0.01) / 8, 11_981);
102        // To store 1'000 items with a false positive rate of 1%, we need about 1KB.
103        assert_eq!(bloomy::bloom::optimal_bits(1_000, 0.01) / 8, 1198);
104        // To store 100 items with a false positive rate of 1%, we need about 120B.
105        assert_eq!(bloomy::bloom::optimal_bits(100, 0.01) / 8, 119);
106
107        // With 16KB, we can store 13'675 items with a false positive rate of 1%.
108        assert_eq!(
109            bloomy::bloom::optimal_capacity(FILTER_SIZE_L * 8, FILTER_FP_RATE),
110            13_675
111        );
112        // With 4KB, we can store 3'419 items with a false positive rate of 1%.
113        assert_eq!(
114            bloomy::bloom::optimal_capacity(FILTER_SIZE_M * 8, FILTER_FP_RATE),
115            3419
116        );
117        // With 1KB, we can store 855 items with a false positive rate of 1%.
118        assert_eq!(
119            bloomy::bloom::optimal_capacity(FILTER_SIZE_S * 8, FILTER_FP_RATE),
120            855
121        );
122
123        assert_eq!(
124            bloomy::bloom::optimal_hashes(FILTER_SIZE_L * 8, 13_675),
125            FILTER_HASHES
126        );
127        assert_eq!(
128            bloomy::bloom::optimal_hashes(FILTER_SIZE_M * 8, 3419),
129            FILTER_HASHES
130        );
131        assert_eq!(
132            bloomy::bloom::optimal_hashes(FILTER_SIZE_S * 8, 855),
133            FILTER_HASHES
134        );
135    }
136
137    #[test]
138    fn test_sizes() {
139        let ids = arbitrary::vec::<RepoId>(3420);
140        let f = Filter::new(ids.iter().cloned().take(10));
141        assert_eq!(f.size(), FILTER_SIZE_S);
142
143        let f = Filter::new(ids.iter().cloned().take(1000));
144        assert_eq!(f.size(), FILTER_SIZE_M);
145
146        let f = Filter::new(ids.iter().cloned());
147        assert_eq!(f.size(), FILTER_SIZE_L);
148
149        // Just checking that iterators over hash sets give correct size hints.
150        let hs = arbitrary::set::<RepoId>(42..=42);
151        assert_eq!(hs.iter().size_hint(), (42, Some(42)));
152    }
153}