1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
// Copyright (C) 2024 Christian Mauduit <ufoot@ufoot.org>

use crate::bloom::Bloom;
use crate::params::Params;
use bitvec::prelude::BitVec;
use std::fmt;
use std::hash::Hash;
use std::sync::{Arc, RwLock};

/// Thread-safe Bloom filter.
///
/// Same as the standard Bloom filter,
/// but safe to use in concurrent environments.
///
/// # Examples
///
/// ```
/// use std::thread;
/// use ofilter::SyncBloom;
///
/// let filter: SyncBloom<usize> = SyncBloom::new(1_000);
///
/// let f = filter.clone();
/// let handle1 = thread::spawn(move || {
///     for i in 0..1_000 {
///         f.set(&i);
///     }
/// });
///
/// let f = filter.clone();
/// let handle2 = thread::spawn(move || {
///     for i in 0..1_000 {
///         f.check(&i);
///     }
/// });
///
/// handle1.join().unwrap();
/// handle2.join().unwrap();
/// ```
#[derive(Debug, Clone)]
pub struct SyncBloom<T> {
    inner: Arc<RwLock<Bloom<T>>>,
}

/// Pretty-print the filter.
///
/// # Examples
///
/// ```
/// use ofilter::SyncBloom;
///
/// let filter: SyncBloom<usize> = SyncBloom::new(100);
///
/// assert_eq!("[sync] { fp_rate: 0.000000, params: { nb_hash: 2, bit_len: 1899, nb_items: 100, fp_rate: 0.009992, predict: false } }", format!("{}", filter));
/// ```
impl<T> fmt::Display for SyncBloom<T>
where
    T: Hash,
{
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(f, "[sync] {}", self.inner.read().unwrap())
    }
}

impl<T> SyncBloom<T> {
    /// Create a new thread-safe Bloom filter, with given capacity.
    ///
    /// All other parameters are set to defaults, or aligned to match capacity.
    ///
    /// # Examples
    ///
    /// ```
    /// use ofilter::SyncBloom;
    ///
    /// let filter: SyncBloom<usize> = SyncBloom::new(100);
    /// assert_eq!(100, filter.capacity());
    /// ```
    pub fn new(capacity: usize) -> Self {
        SyncBloom {
            inner: Arc::new(RwLock::new(Bloom::new(capacity))),
        }
    }

    /// Create a new thread-safe Bloom filter, with specific parameters.
    ///
    /// # Examples
    ///
    /// ```
    /// use ofilter::{SyncBloom, Params};
    ///
    /// let filter: SyncBloom<usize> = SyncBloom::new_with_params(Params::with_nb_items_and_fp_rate(100, 0.1));
    /// assert_eq!(100, filter.capacity());
    /// ```
    pub fn new_with_params(params: Params) -> Self {
        SyncBloom {
            inner: Arc::new(RwLock::new(Bloom::new_with_params(params))),
        }
    }

    /// Get filter params.
    ///
    /// This can be useful because when creating the filter, the `.adjust()`
    /// func is called, and may decide to fine-tune some parameters. With this,
    /// one can know exactly what is used by the filter.
    ///
    /// # Examples
    ///
    /// ```
    /// use ofilter::SyncBloom;
    ///
    /// let filter: SyncBloom<usize> = SyncBloom::new(100);
    /// println!("{}", filter.params());
    /// ```
    pub fn params(&self) -> Params {
        self.inner.read().unwrap().params()
    }

    /// Get filter capacity.
    ///
    /// Returns the value of `params.nb_items`, that is the number of
    /// items the filter is designed for.
    ///
    /// ```
    /// use ofilter::{SyncBloom, Params};
    ///
    /// let filter: SyncBloom<usize> = SyncBloom::new_with_params(Params::with_bit_len(1_000_000));
    /// assert_eq!(52681, filter.capacity());
    /// ```
    pub fn capacity(&self) -> usize {
        self.inner.read().unwrap().capacity()
    }

    /// Clear the filter.
    ///
    /// Clears the bit vector, but keeps parameters.
    ///
    /// ```
    /// use ofilter::SyncBloom;
    ///
    /// // Does not need to be `mut`
    /// let filter: SyncBloom<usize> = SyncBloom::new(1_000);
    /// filter.set(&10);
    /// assert!(filter.check(&10));
    /// filter.clear();
    /// assert!(!filter.check(&10));
    /// ```
    pub fn clear(&self) {
        self.inner.write().unwrap().clear();
    }

    /// Returns true if filter is empty.
    ///
    /// ```
    /// use ofilter::SyncBloom;
    ///
    /// // Does not need to be `mut`
    /// let filter: SyncBloom<usize> = SyncBloom::new(1_000);
    /// assert!(filter.is_empty());
    /// filter.set(&10);
    /// assert!(!filter.is_empty());
    /// ```
    pub fn is_empty(&self) -> bool {
        self.inner.read().unwrap().is_empty()
    }

    /// Returns the current false positive rate.
    ///
    /// In theory the false positive rate `fp_rate` is known at
    /// filter creation. But that, is the theoretical `fp_rate`
    /// that the filter reaches when it is "wasted" because it
    /// has too many entries. Until then, it performs better
    /// than that, statistically.
    ///
    /// This function returns the actual false positive rate.
    /// When this value is greater than the `fp_rate` from
    /// the parameters, one should not continue to add items
    /// to the filter.
    ///
    /// ```
    /// use ofilter::SyncBloom;
    ///
    /// // Does not need to be `mut`
    /// let filter: SyncBloom<usize> = SyncBloom::new(1_000);
    /// assert_eq!(0.0, filter.fp_rate());
    /// filter.set(&10);
    /// assert!(filter.fp_rate() > 0.0); // will be params.fp_rate when filter is full
    /// ```
    pub fn fp_rate(&self) -> f64 {
        self.inner.read().unwrap().fp_rate()
    }

    /// Returns the ratio between real fp_rate, and theoretical fp_rate.
    ///
    /// This is a helper to quickly compare the real `fp_rate`, given
    /// how filled the filter is, and the theoretical `fp_rate`.
    ///
    /// When this value is greater than 1.0, one should not
    /// continue to add items to the filter.
    ///
    /// ```
    /// use ofilter::SyncBloom;
    ///
    /// // Does not need to be `mut`
    /// let filter: SyncBloom<usize> = SyncBloom::new(1_000);
    /// assert_eq!(0.0, filter.level());
    /// filter.set(&10);
    /// assert!(filter.level() > 0.0); // will be 1.0 when filter is full
    /// ```
    pub fn level(&self) -> f64 {
        self.inner.read().unwrap().level()
    }

    /// Returns true if there is still room in the filter.
    ///
    /// This is a helper which returns true if the actual
    /// `fp_rate` is lower than the theoretical `fp_rate`.
    ///
    /// When this is false, one should not
    /// continue to add items to the filter.
    ///
    /// ```
    /// use ofilter::SyncBloom;
    ///
    /// // Does not need to be `mut`
    /// let filter: SyncBloom<usize> = SyncBloom::new(1_000);
    /// assert_eq!(0.0, filter.level());
    /// filter.set(&10);
    /// assert!(filter.level() > 0.0); // will be 1.0 when filter is full
    /// ```
    pub fn is_ok(&self) -> bool {
        self.inner.read().unwrap().is_ok()
    }

    /// Returns the number of ones in the bit vector.
    ///
    /// By comparing this number with the size of the bit vector,
    /// one can estimate how "filled" the filter is.
    ///
    /// ```
    /// use ofilter::SyncBloom;
    ///
    /// // Does not need to be `mut`
    /// let filter: SyncBloom<usize> = SyncBloom::new(1_000_000);
    /// assert_eq!(0, filter.count_ones());
    /// filter.set(&10);
    /// assert_eq!(2, filter.count_ones());
    /// ```
    pub fn count_ones(&self) -> usize {
        self.inner.read().unwrap().count_ones()
    }

    /// Export the underlying bit vector.
    ///
    /// ```
    /// use ofilter::SyncBloom;
    ///
    /// // Does not need to be `mut`
    /// let filter: SyncBloom<usize> = SyncBloom::new(1_000_000);
    /// let bit_vec = filter.export();
    /// println!("{:?}", &bit_vec);
    /// ```
    pub fn export(&self) -> BitVec {
        self.inner.read().unwrap().export().clone()
    }

    /// Import the underlying bit vector.
    ///
    /// ```
    /// use ofilter::SyncBloom;
    /// use bitvec::prelude::BitVec;
    ///
    /// // Does not need to be `mut`
    /// let filter: SyncBloom<usize> = SyncBloom::new(1_000_000);
    /// let mut bit_vec = BitVec::new();
    /// bit_vec.resize(100, false);
    /// bit_vec.set(42, true);
    /// filter.import(&bit_vec); // safe to call even is sizes mismatch
    /// ```
    pub fn import(&self, src: &BitVec) {
        self.inner.write().unwrap().import(src);
    }
}

impl<T> SyncBloom<T>
where
    T: Hash,
{
    /// Record an item in the set.
    ///
    /// Once this has been called, any call to `check()` will return
    /// true, as there are no false negatives. However some other items
    /// may test positive as a consequence of recording this one.
    ///
    /// ```
    /// use ofilter::SyncBloom;
    ///
    /// // Does not need to be `mut`
    /// let filter: SyncBloom<usize> = SyncBloom::new(1_000);
    /// filter.set(&42);
    /// ```
    pub fn set(&self, item: &T) {
        self.inner.write().unwrap().set(item);
    }

    /// Guess whether an item is likely to be in the set.
    ///
    /// If `set()` has been called before with value, then this returns
    /// true, as there are no false negatives. However it may respond
    /// true even if the item has never been recorded in the set.
    ///
    /// ```
    /// use ofilter::SyncBloom;
    ///
    /// // Does not need to be `mut`
    /// let filter: SyncBloom<usize> = SyncBloom::new(1_000);
    /// filter.set(&42);
    /// assert!(filter.check(&42));
    /// ```
    pub fn check(&self, item: &T) -> bool {
        self.inner.read().unwrap().check(item)
    }

    /// Record an item in the set and returns its previous value.
    ///
    /// Equivalent to calling `get()` then `set()` but performs
    /// hash lookup only once so it's a bit more efficient.
    ///
    /// ```
    /// use ofilter::SyncBloom;
    ///
    /// // Does not need to be `mut`
    /// let filter: SyncBloom<usize> = SyncBloom::new(1_000);
    /// assert!(!filter.check_and_set(&42));
    /// assert!(filter.check(&42));
    /// ```
    pub fn check_and_set(&self, item: &T) -> bool {
        self.inner.write().unwrap().check_and_set(item)
    }
}