1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339
// Copyright (C) 2024 Christian Mauduit <ufoot@ufoot.org>
use crate::bloom::Bloom;
use crate::params::Params;
use bitvec::prelude::BitVec;
use std::fmt;
use std::hash::Hash;
use std::sync::{Arc, RwLock};
/// Thread-safe Bloom filter.
///
/// Same as the standard Bloom filter,
/// but safe to use in concurrent environments.
///
/// # Examples
///
/// ```
/// use std::thread;
/// use ofilter::SyncBloom;
///
/// let filter: SyncBloom<usize> = SyncBloom::new(1_000);
///
/// let f = filter.clone();
/// let handle1 = thread::spawn(move || {
/// for i in 0..1_000 {
/// f.set(&i);
/// }
/// });
///
/// let f = filter.clone();
/// let handle2 = thread::spawn(move || {
/// for i in 0..1_000 {
/// f.check(&i);
/// }
/// });
///
/// handle1.join().unwrap();
/// handle2.join().unwrap();
/// ```
#[derive(Debug, Clone)]
pub struct SyncBloom<T> {
inner: Arc<RwLock<Bloom<T>>>,
}
/// Pretty-print the filter.
///
/// # Examples
///
/// ```
/// use ofilter::SyncBloom;
///
/// let filter: SyncBloom<usize> = SyncBloom::new(100);
///
/// assert_eq!("[sync] { fp_rate: 0.000000, params: { nb_hash: 2, bit_len: 1899, nb_items: 100, fp_rate: 0.009992, predict: false } }", format!("{}", filter));
/// ```
impl<T> fmt::Display for SyncBloom<T>
where
T: Hash,
{
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "[sync] {}", self.inner.read().unwrap())
}
}
impl<T> SyncBloom<T> {
/// Create a new thread-safe Bloom filter, with given capacity.
///
/// All other parameters are set to defaults, or aligned to match capacity.
///
/// # Examples
///
/// ```
/// use ofilter::SyncBloom;
///
/// let filter: SyncBloom<usize> = SyncBloom::new(100);
/// assert_eq!(100, filter.capacity());
/// ```
pub fn new(capacity: usize) -> Self {
SyncBloom {
inner: Arc::new(RwLock::new(Bloom::new(capacity))),
}
}
/// Create a new thread-safe Bloom filter, with specific parameters.
///
/// # Examples
///
/// ```
/// use ofilter::{SyncBloom, Params};
///
/// let filter: SyncBloom<usize> = SyncBloom::new_with_params(Params::with_nb_items_and_fp_rate(100, 0.1));
/// assert_eq!(100, filter.capacity());
/// ```
pub fn new_with_params(params: Params) -> Self {
SyncBloom {
inner: Arc::new(RwLock::new(Bloom::new_with_params(params))),
}
}
/// Get filter params.
///
/// This can be useful because when creating the filter, the `.adjust()`
/// func is called, and may decide to fine-tune some parameters. With this,
/// one can know exactly what is used by the filter.
///
/// # Examples
///
/// ```
/// use ofilter::SyncBloom;
///
/// let filter: SyncBloom<usize> = SyncBloom::new(100);
/// println!("{}", filter.params());
/// ```
pub fn params(&self) -> Params {
self.inner.read().unwrap().params()
}
/// Get filter capacity.
///
/// Returns the value of `params.nb_items`, that is the number of
/// items the filter is designed for.
///
/// ```
/// use ofilter::{SyncBloom, Params};
///
/// let filter: SyncBloom<usize> = SyncBloom::new_with_params(Params::with_bit_len(1_000_000));
/// assert_eq!(52681, filter.capacity());
/// ```
pub fn capacity(&self) -> usize {
self.inner.read().unwrap().capacity()
}
/// Clear the filter.
///
/// Clears the bit vector, but keeps parameters.
///
/// ```
/// use ofilter::SyncBloom;
///
/// // Does not need to be `mut`
/// let filter: SyncBloom<usize> = SyncBloom::new(1_000);
/// filter.set(&10);
/// assert!(filter.check(&10));
/// filter.clear();
/// assert!(!filter.check(&10));
/// ```
pub fn clear(&self) {
self.inner.write().unwrap().clear();
}
/// Returns true if filter is empty.
///
/// ```
/// use ofilter::SyncBloom;
///
/// // Does not need to be `mut`
/// let filter: SyncBloom<usize> = SyncBloom::new(1_000);
/// assert!(filter.is_empty());
/// filter.set(&10);
/// assert!(!filter.is_empty());
/// ```
pub fn is_empty(&self) -> bool {
self.inner.read().unwrap().is_empty()
}
/// Returns the current false positive rate.
///
/// In theory the false positive rate `fp_rate` is known at
/// filter creation. But that, is the theoretical `fp_rate`
/// that the filter reaches when it is "wasted" because it
/// has too many entries. Until then, it performs better
/// than that, statistically.
///
/// This function returns the actual false positive rate.
/// When this value is greater than the `fp_rate` from
/// the parameters, one should not continue to add items
/// to the filter.
///
/// ```
/// use ofilter::SyncBloom;
///
/// // Does not need to be `mut`
/// let filter: SyncBloom<usize> = SyncBloom::new(1_000);
/// assert_eq!(0.0, filter.fp_rate());
/// filter.set(&10);
/// assert!(filter.fp_rate() > 0.0); // will be params.fp_rate when filter is full
/// ```
pub fn fp_rate(&self) -> f64 {
self.inner.read().unwrap().fp_rate()
}
/// Returns the ratio between real fp_rate, and theoretical fp_rate.
///
/// This is a helper to quickly compare the real `fp_rate`, given
/// how filled the filter is, and the theoretical `fp_rate`.
///
/// When this value is greater than 1.0, one should not
/// continue to add items to the filter.
///
/// ```
/// use ofilter::SyncBloom;
///
/// // Does not need to be `mut`
/// let filter: SyncBloom<usize> = SyncBloom::new(1_000);
/// assert_eq!(0.0, filter.level());
/// filter.set(&10);
/// assert!(filter.level() > 0.0); // will be 1.0 when filter is full
/// ```
pub fn level(&self) -> f64 {
self.inner.read().unwrap().level()
}
/// Returns true if there is still room in the filter.
///
/// This is a helper which returns true if the actual
/// `fp_rate` is lower than the theoretical `fp_rate`.
///
/// When this is false, one should not
/// continue to add items to the filter.
///
/// ```
/// use ofilter::SyncBloom;
///
/// // Does not need to be `mut`
/// let filter: SyncBloom<usize> = SyncBloom::new(1_000);
/// assert_eq!(0.0, filter.level());
/// filter.set(&10);
/// assert!(filter.level() > 0.0); // will be 1.0 when filter is full
/// ```
pub fn is_ok(&self) -> bool {
self.inner.read().unwrap().is_ok()
}
/// Returns the number of ones in the bit vector.
///
/// By comparing this number with the size of the bit vector,
/// one can estimate how "filled" the filter is.
///
/// ```
/// use ofilter::SyncBloom;
///
/// // Does not need to be `mut`
/// let filter: SyncBloom<usize> = SyncBloom::new(1_000_000);
/// assert_eq!(0, filter.count_ones());
/// filter.set(&10);
/// assert_eq!(2, filter.count_ones());
/// ```
pub fn count_ones(&self) -> usize {
self.inner.read().unwrap().count_ones()
}
/// Export the underlying bit vector.
///
/// ```
/// use ofilter::SyncBloom;
///
/// // Does not need to be `mut`
/// let filter: SyncBloom<usize> = SyncBloom::new(1_000_000);
/// let bit_vec = filter.export();
/// println!("{:?}", &bit_vec);
/// ```
pub fn export(&self) -> BitVec {
self.inner.read().unwrap().export().clone()
}
/// Import the underlying bit vector.
///
/// ```
/// use ofilter::SyncBloom;
/// use bitvec::prelude::BitVec;
///
/// // Does not need to be `mut`
/// let filter: SyncBloom<usize> = SyncBloom::new(1_000_000);
/// let mut bit_vec = BitVec::new();
/// bit_vec.resize(100, false);
/// bit_vec.set(42, true);
/// filter.import(&bit_vec); // safe to call even is sizes mismatch
/// ```
pub fn import(&self, src: &BitVec) {
self.inner.write().unwrap().import(src);
}
}
impl<T> SyncBloom<T>
where
T: Hash,
{
/// Record an item in the set.
///
/// Once this has been called, any call to `check()` will return
/// true, as there are no false negatives. However some other items
/// may test positive as a consequence of recording this one.
///
/// ```
/// use ofilter::SyncBloom;
///
/// // Does not need to be `mut`
/// let filter: SyncBloom<usize> = SyncBloom::new(1_000);
/// filter.set(&42);
/// ```
pub fn set(&self, item: &T) {
self.inner.write().unwrap().set(item);
}
/// Guess whether an item is likely to be in the set.
///
/// If `set()` has been called before with value, then this returns
/// true, as there are no false negatives. However it may respond
/// true even if the item has never been recorded in the set.
///
/// ```
/// use ofilter::SyncBloom;
///
/// // Does not need to be `mut`
/// let filter: SyncBloom<usize> = SyncBloom::new(1_000);
/// filter.set(&42);
/// assert!(filter.check(&42));
/// ```
pub fn check(&self, item: &T) -> bool {
self.inner.read().unwrap().check(item)
}
/// Record an item in the set and returns its previous value.
///
/// Equivalent to calling `get()` then `set()` but performs
/// hash lookup only once so it's a bit more efficient.
///
/// ```
/// use ofilter::SyncBloom;
///
/// // Does not need to be `mut`
/// let filter: SyncBloom<usize> = SyncBloom::new(1_000);
/// assert!(!filter.check_and_set(&42));
/// assert!(filter.check(&42));
/// ```
pub fn check_and_set(&self, item: &T) -> bool {
self.inner.write().unwrap().check_and_set(item)
}
}