hyperast/filter/
mod.rs

1pub mod default;
2pub mod pearson_hashing;
3
4use std::fmt::Debug;
5use std::marker::PhantomData;
6
7use bitvec::{order::Lsb0, view::BitViewSized};
8
9use self::default::{Pearson, VaryHasher};
10
11#[derive(PartialEq, Eq)]
12pub enum BloomSize {
13    None,
14    B16,
15    B32,
16    B64,
17    B128,
18    B256,
19    B512,
20    B1024,
21    B2048,
22    B4096,
23    Much,
24}
25
26pub trait BF<T: ?Sized> {
27    type Result;
28    type S;
29    type H: VaryHasher<Self::S>;
30    const SIZE: BloomSize;
31    fn bulk_insert<It: Iterator<Item = Self::S>>(&mut self, it: It);
32    fn check_raw(&self, item: Self::S) -> Self::Result;
33}
34
35/// (2^3)^S = 2^(3*S) bits = S bytes
36pub struct Bloom<T, V: BitViewSized> {
37    bits: bitvec::array::BitArray<V, Lsb0>,
38    _phantom: PhantomData<*const T>,
39}
40
41unsafe impl<T, V: BitViewSized> Send for Bloom<T, V> {}
42unsafe impl<T, V: BitViewSized> Sync for Bloom<T, V> {}
43
44impl<T, V: BitViewSized> Default for Bloom<T, V> {
45    fn default() -> Self {
46        Self {
47            bits: Default::default(),
48            _phantom: Default::default(),
49        }
50    }
51}
52impl<T, V: BitViewSized> Debug for Bloom<T, V> {
53    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
54        f.debug_struct("Bloom").field("bits", &self.bits).finish()
55    }
56}
57
58impl<T: ?Sized, V: BitViewSized, It: Iterator<Item = <Self as BF<T>>::S>> From<It>
59    for Bloom<&'static T, V>
60where
61    Self: BF<T>,
62{
63    fn from(it: It) -> Self {
64        let mut r = Self::default();
65        // for x in it {
66        //     Bloom::insert(&mut r, 0, x.as_ref());
67        // }
68        r.bulk_insert(it);
69        r
70    }
71}
72
73#[derive(PartialEq, Eq)]
74pub enum BloomResult {
75    MaybeContain,
76    DoNotContain,
77}
78
79// impl<T:?Sized,V:BitViewSized> BF<T> for Bloom<&'static T, V> {
80//     type Result = BloomResult;
81
82//     fn insert<U: AsRef<T>>(&mut self, dups: usize, item: U) {
83//         panic!()
84//     }
85
86//     fn check<U: AsRef<T>>(&self, dups: usize, item: U) -> Self::Result {
87//         panic!()
88//     }
89// }
90
91impl BF<[u8]> for Bloom<&'static [u8], u16> {
92    type Result = BloomResult;
93    type S = u8;
94    type H = Pearson<16>;
95    const SIZE: BloomSize = BloomSize::B16;
96
97    fn bulk_insert<It: Iterator<Item = Self::S>>(&mut self, it: It) {
98        it.for_each(|b| self.bits.set(b as usize, true));
99    }
100
101    // fn insert<U: AsRef<[u8]>>(&mut self, dups: usize, item: U) {
102    //     todo!()
103    //     // for i in 0..=dups {
104    //     //     let a = pearson(i, item.as_ref());
105    //     //     let b = (a & 0xf) ^ (a >> 4);
106    //     //     self.bits.set(b as usize, true);
107    //     // }
108    // }
109
110    // fn check<U: AsRef<[u8]>>(&self, dups: usize, item: U) -> Self::Result {
111    //     todo!()
112    //     // log::trace!("{}", self.bits);
113    //     // for i in 0..=dups {
114    //     //     let a = pearson(i, item.as_ref());
115    //     //     let b = (a & 0xf) ^ (a >> 4);
116    //     //     if !self.bits[b as usize] {
117    //     //         return BloomResult::DoNotContain;
118    //     //     }
119    //     // }
120    //     // BloomResult::MaybeContain
121    // }
122
123    fn check_raw(&self, b: Self::S) -> Self::Result {
124        log::trace!("{}", self.bits);
125        if self.bits[b as usize] {
126            BloomResult::MaybeContain
127        } else {
128            BloomResult::DoNotContain
129        }
130    }
131}
132
133impl BF<[u8]> for Bloom<&'static [u8], u32> {
134    type Result = BloomResult;
135    type S = u8;
136    type H = Pearson<32>;
137    const SIZE: BloomSize = BloomSize::B32;
138
139    fn bulk_insert<It: Iterator<Item = Self::S>>(&mut self, it: It) {
140        it.for_each(|b| self.bits.set(b as usize, true));
141    }
142
143    fn check_raw(&self, b: Self::S) -> Self::Result {
144        log::trace!("{}", self.bits);
145        if self.bits[b as usize] {
146            BloomResult::MaybeContain
147        } else {
148            BloomResult::DoNotContain
149        }
150    }
151}
152
153#[cfg(all(target_pointer_width = "64", feature = "native"))]
154mod bloom_64 {
155    use super::default::MyDefaultHasher;
156    use super::Bloom;
157    use super::BloomResult;
158    use super::BloomSize;
159    use super::Pearson;
160    use super::BF;
161
162    impl BF<[u8]> for Bloom<&'static [u8], u64> {
163        type Result = BloomResult;
164        type S = u8;
165        type H = Pearson<64>;
166        const SIZE: BloomSize = BloomSize::B64;
167
168        fn bulk_insert<It: Iterator<Item = Self::S>>(&mut self, it: It) {
169            it.for_each(|b| self.bits.set(b as usize, true));
170        }
171
172        fn check_raw(&self, b: Self::S) -> Self::Result {
173            log::trace!("{}", self.bits);
174            if self.bits[b as usize] {
175                BloomResult::MaybeContain
176            } else {
177                BloomResult::DoNotContain
178            }
179        }
180    }
181
182    impl BF<[u8]> for Bloom<&'static [u8], [u64; 2]> {
183        type Result = BloomResult;
184        type S = u8;
185        type H = Pearson<128>;
186        const SIZE: BloomSize = BloomSize::B128;
187
188        fn bulk_insert<It: Iterator<Item = Self::S>>(&mut self, it: It) {
189            it.for_each(|b| self.bits.set(b as usize, true));
190        }
191
192        fn check_raw(&self, b: Self::S) -> Self::Result {
193            log::trace!("{}", self.bits);
194            if self.bits[b as usize] {
195                BloomResult::MaybeContain
196            } else {
197                BloomResult::DoNotContain
198            }
199        }
200    }
201
202    impl BF<[u8]> for Bloom<&'static [u8], [u64; 4]> {
203        type Result = BloomResult;
204        type S = u8;
205        type H = Pearson<256>;
206        const SIZE: BloomSize = BloomSize::B256;
207
208        fn bulk_insert<It: Iterator<Item = Self::S>>(&mut self, it: It) {
209            it.for_each(|b| self.bits.set(b as usize, true));
210        }
211
212        fn check_raw(&self, b: Self::S) -> Self::Result {
213            log::trace!("{}", self.bits);
214            if self.bits[b as usize] {
215                BloomResult::MaybeContain
216            } else {
217                BloomResult::DoNotContain
218            }
219        }
220    }
221    //TODO
222    impl BF<[u8]> for Bloom<&'static [u8], [u64; 8]> {
223        type Result = BloomResult;
224        type S = u16;
225        type H = MyDefaultHasher<512>;
226        const SIZE: BloomSize = BloomSize::B512;
227
228        fn bulk_insert<It: Iterator<Item = Self::S>>(&mut self, it: It) {
229            it.for_each(|b| self.bits.set(b as usize, true));
230        }
231
232        fn check_raw(&self, b: Self::S) -> Self::Result {
233            log::trace!("{}", self.bits);
234            if self.bits[b as usize] {
235                BloomResult::MaybeContain
236            } else {
237                BloomResult::DoNotContain
238            }
239        }
240    }
241
242    impl BF<[u8]> for Bloom<&'static [u8], [u64; 16]> {
243        type Result = BloomResult;
244        type S = u16;
245        type H = MyDefaultHasher<1024>;
246        const SIZE: BloomSize = BloomSize::B1024;
247
248        fn bulk_insert<It: Iterator<Item = Self::S>>(&mut self, it: It) {
249            it.for_each(|b| self.bits.set(b as usize, true));
250        }
251
252        fn check_raw(&self, b: Self::S) -> Self::Result {
253            log::trace!("{}", self.bits);
254            if self.bits[b as usize] {
255                BloomResult::MaybeContain
256            } else {
257                BloomResult::DoNotContain
258            }
259        }
260    }
261
262    impl BF<[u8]> for Bloom<&'static [u8], [u64; 32]> {
263        type Result = BloomResult;
264        type S = u16;
265        type H = MyDefaultHasher<2048>;
266        const SIZE: BloomSize = BloomSize::B2048;
267
268        fn bulk_insert<It: Iterator<Item = Self::S>>(&mut self, it: It) {
269            it.for_each(|b| self.bits.set(b as usize, true));
270        }
271
272        fn check_raw(&self, b: Self::S) -> Self::Result {
273            log::trace!("{}", self.bits);
274            if self.bits[b as usize] {
275                BloomResult::MaybeContain
276            } else {
277                BloomResult::DoNotContain
278            }
279        }
280    }
281
282    impl BF<[u8]> for Bloom<&'static [u8], [u64; 64]> {
283        type Result = BloomResult;
284        type S = u16;
285        type H = MyDefaultHasher<4096>;
286        const SIZE: BloomSize = BloomSize::B4096;
287
288        fn bulk_insert<It: Iterator<Item = Self::S>>(&mut self, it: It) {
289            it.for_each(|b| self.bits.set(b as usize, true));
290        }
291
292        fn check_raw(&self, b: Self::S) -> Self::Result {
293            log::trace!("{}", self.bits);
294            if self.bits[b as usize] {
295                BloomResult::MaybeContain
296            } else {
297                BloomResult::DoNotContain
298            }
299        }
300    }
301}