use std::borrow::BorrowMut;
use std::cmp::Ordering;
use hashbrown::HashMap;
const BLOOM_WIDTH: usize = 1 << 27;
const BITS_PER_ENTRY: usize = 12;
#[derive(Debug, Clone, Default)]
pub struct KmerFilter {
buf_size: u64,
buffer: Vec<u64>,
counts: HashMap<u64, u16>,
min_count: u16,
}
impl KmerFilter {
#[inline(always)]
fn reduce(key: u64, range: u64) -> u64 {
(((key as u128) * (range as u128)) >> 64) as u64
}
#[inline(always)]
fn cheap_mix(key: u64) -> u64 {
(key ^ (key >> 31)).wrapping_mul(0x85D0_59AA_3331_21CF)
}
#[inline(always)]
fn fingerprint(key: u64) -> u64 {
1 << (key & 63)
| 1 << ((key >> 6) & 63)
| 1 << ((key >> 12) & 63)
| 1 << ((key >> 18) & 63)
| 1 << ((key >> 24) & 63)
}
#[inline(always)]
fn location(key: u64, range: u64) -> usize {
Self::reduce(Self::cheap_mix(key), range) as usize
}
fn bloom_add_and_check(&mut self, key: u64) -> bool {
let f_print = Self::fingerprint(key);
let buf_val = self.buffer[Self::location(key, self.buf_size)].borrow_mut();
if *buf_val & f_print == f_print {
true
} else {
*buf_val |= f_print;
false
}
}
pub fn new(min_count: u16) -> Self {
let buf_size =
f64::round(BLOOM_WIDTH as f64 * (BITS_PER_ENTRY as f64 / 8.0) / (u64::BITS as f64))
as u64;
Self {
buf_size,
buffer: Vec::new(),
counts: HashMap::new(),
min_count,
}
}
pub fn init(&mut self) {
if self.buffer.is_empty() {
self.buffer.resize(self.buf_size as usize, 0);
}
}
pub fn clear(&mut self) {
self.buffer.clear();
self.counts.clear();
self.init();
}
pub fn filter(&mut self, hash: u64) -> Ordering {
match self.min_count {
0 | 1 => Ordering::Equal,
2 => {
if self.bloom_add_and_check(hash) {
Ordering::Equal
} else {
Ordering::Less
}
}
_ => {
if self.bloom_add_and_check(hash) {
let mut count: u16 = 2;
self.counts
.entry(hash)
.and_modify(|curr_cnt| {
count = curr_cnt.saturating_add(1);
*curr_cnt = count
})
.or_insert(count);
self.min_count.cmp(&count)
} else {
Ordering::Less
}
}
}
}
}