use clap::*;
use fnv::FnvHashSet;
use scalable_bloom_filter::ScalableBloomFilter;
use twox_hash::XxHash64;
use std::collections::HashSet;
use std::hash::Hasher;
arg_enum! {
#[doc(hidden)]
#[derive(Copy, Clone, Debug)]
pub enum FilterKind {
Sorted,
Digest,
Naive,
Bloom,
}
}
pub trait Filter {
fn new() -> Self
where
Self: Sized;
fn detect(&mut self, input: &[u8]) -> bool;
}
impl Into<Box<dyn Filter>> for FilterKind {
fn into(self) -> Box<dyn Filter> {
match self {
FilterKind::Sorted => Box::new(SortedFilter::new()),
FilterKind::Digest => Box::new(DigestFilter::new()),
FilterKind::Naive => Box::new(NaiveFilter::new()),
FilterKind::Bloom => Box::new(BloomFilter::new()),
}
}
}
#[derive(Clone, Debug, Default)]
pub struct NaiveFilter {
inner: HashSet<Vec<u8>>,
}
impl Filter for NaiveFilter {
fn new() -> NaiveFilter {
NaiveFilter::default()
}
#[inline]
fn detect(&mut self, input: &[u8]) -> bool {
self.inner.insert(input.to_vec())
}
}
#[derive(Clone, Debug, Default)]
pub struct DigestFilter {
inner: FnvHashSet<u64>,
}
impl Filter for DigestFilter {
fn new() -> DigestFilter {
DigestFilter::default()
}
#[inline]
fn detect(&mut self, input: &[u8]) -> bool {
self.inner.insert(hash(input))
}
}
#[derive(Clone, Debug)]
pub struct SortedFilter {
inner: Vec<u8>,
}
impl Filter for SortedFilter {
fn new() -> SortedFilter {
SortedFilter { inner: Vec::new() }
}
#[inline]
fn detect(&mut self, input: &[u8]) -> bool {
if input == &self.inner[..] {
return false;
}
self.inner = input.to_vec();
true
}
}
#[derive(Debug)]
pub struct BloomFilter {
inner: ScalableBloomFilter<u64>,
}
impl Filter for BloomFilter {
fn new() -> BloomFilter {
BloomFilter {
inner: ScalableBloomFilter::new(1_000_000, 1e-8),
}
}
#[inline]
fn detect(&mut self, input: &[u8]) -> bool {
let digest = hash(input);
if self.inner.contains(&digest) {
return false;
}
self.inner.insert(&digest);
true
}
}
fn hash(input: &[u8]) -> u64 {
let mut hasher = XxHash64::default();
hasher.write(input);
hasher.finish()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn naive_filter_detection() {
let mut filter = NaiveFilter::new();
let ins1 = filter.detect(b"input1");
let ins2 = filter.detect(b"input1");
assert_eq!(ins1, true);
assert_eq!(ins2, false);
}
#[test]
fn digest_filter_detection() {
let mut filter = DigestFilter::new();
let ins1 = filter.detect(b"input1");
let ins2 = filter.detect(b"input1");
assert_eq!(ins1, true);
assert_eq!(ins2, false);
}
#[test]
fn sorted_filter_detection() {
let mut filter = SortedFilter::new();
let ins1 = filter.detect(b"input1");
let ins2 = filter.detect(b"input1");
let ins3 = filter.detect(b"input2");
let ins4 = filter.detect(b"input1");
assert_eq!(ins1, true);
assert_eq!(ins2, false);
assert_eq!(ins3, true);
assert_eq!(ins4, true);
}
#[test]
fn bloom_filter_detection() {
let mut filter = BloomFilter::new();
let ins1 = filter.detect(b"input1");
let ins2 = filter.detect(b"input1");
assert_eq!(ins1, true);
assert_eq!(ins2, false);
}
}