use growable_bloom_filter::{GrowableBloom, GrowableBloomBuilder};
use identity_hash::BuildIdentityHasher;
use strum_macros::EnumString;
use xxhash_rust::xxh3::xxh3_64;
use std::collections::HashSet;
pub trait Filter {
fn detect(&mut self, input: &[u8]) -> bool;
}
#[derive(Clone, Debug, Default)]
pub struct SimpleFilter {
inner: HashSet<Vec<u8>>,
}
impl Filter for SimpleFilter {
fn detect(&mut self, input: &[u8]) -> bool {
self.inner.insert(input.to_vec())
}
}
#[derive(Clone, Debug, Default)]
pub struct QuickFilter {
inner: HashSet<u64, BuildIdentityHasher<u64>>,
}
impl Filter for QuickFilter {
fn detect(&mut self, input: &[u8]) -> bool {
self.inner.insert(xxh3_64(input))
}
}
#[derive(Clone, Debug, Default)]
pub struct SortedFilter {
inner: Vec<u8>,
}
impl Filter for SortedFilter {
fn detect(&mut self, input: &[u8]) -> bool {
if input == &self.inner[..] {
return false;
}
self.inner = input.to_vec();
true
}
}
#[derive(Debug)]
pub struct CompactFilter {
inner: GrowableBloom,
}
impl Default for CompactFilter {
fn default() -> Self {
Self {
inner: GrowableBloomBuilder::new()
.estimated_insertions(1_000_000)
.desired_error_ratio(1e-8)
.growth_factor(2)
.tightening_ratio(0.5)
.build(),
}
}
}
impl Filter for CompactFilter {
fn detect(&mut self, input: &[u8]) -> bool {
self.inner.insert(xxh3_64(input))
}
}
#[derive(Copy, Clone, Debug, EnumString)]
#[cfg_attr(feature = "cli", derive(clap::ValueEnum))]
pub enum Filters {
Quick,
Simple,
Sorted,
Compact,
}
impl From<Filters> for Box<dyn Filter> {
fn from(kind: Filters) -> Self {
match kind {
Filters::Quick => Box::<QuickFilter>::default(),
Filters::Simple => Box::<SimpleFilter>::default(),
Filters::Compact => Box::<CompactFilter>::default(),
Filters::Sorted => Box::<SortedFilter>::default(),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn naive_filter_detection() {
let mut filter = SimpleFilter::default();
let ins1 = filter.detect(b"input1");
let ins2 = filter.detect(b"input1");
assert!(ins1);
assert!(!ins2);
}
#[test]
fn digest_filter_detection() {
let mut filter = QuickFilter::default();
let ins1 = filter.detect(b"input1");
let ins2 = filter.detect(b"input1");
assert!(ins1);
assert!(!ins2);
}
#[test]
fn sorted_filter_detection() {
let mut filter = SortedFilter::default();
let ins1 = filter.detect(b"input1");
let ins2 = filter.detect(b"input1");
let ins3 = filter.detect(b"input2");
let ins4 = filter.detect(b"input1");
assert!(ins1);
assert!(!ins2);
assert!(ins3);
assert!(ins4);
}
#[test]
fn bloom_filter_detection() {
let mut filter = CompactFilter::default();
let ins1 = filter.detect(b"input1");
let ins2 = filter.detect(b"input1");
assert!(ins1);
assert!(!ins2);
}
}