use log::debug;
use std::{
collections::hash_map::DefaultHasher,
hash::{BuildHasher, BuildHasherDefault, Hash, Hasher},
};
type HasherType = DefaultHasher;
#[derive(Default, Debug, Clone, Hash)]
pub struct BitMaps(pub Vec<u64>);
pub struct PCSA {
pub b: u32,
pub m: u64,
pub bitmaps_size: u64,
pub bitmaps: Vec<u64>,
pub hasher: Box<dyn BuildHasher<Hasher = HasherType>>,
}
impl PCSA {
pub fn new() -> Self {
let b = 5;
let m = 2u64.pow(b); let bitmaps_size = m;
let bitmaps = [0].repeat(bitmaps_size as usize);
type Builder = BuildHasherDefault<HasherType>;
let hasher = Builder::default();
PCSA {
b,
m,
bitmaps_size,
bitmaps,
hasher: Box::new(hasher),
}
}
pub fn add(&mut self, record: String) -> std::io::Result<()> {
let mut hasher = self.hasher.build_hasher();
hasher.write(record.as_bytes());
let hash = hasher.finish();
print!(
"PCSA.add:
record ={:?}
count_ones ={:?}
count_zeros ={:?}
leading_zeros ={:?}
trailing_zeros ={:?}
leading_ones ={:?}
trailing_ones ={:?}
hash ={:?}
hash_binary ={:?}
",
record,
hash.count_ones(),
hash.count_zeros(),
hash.leading_zeros(),
hash.trailing_zeros(),
-1,
-1,
hash,
format!("{:064b}", hash),
);
let hash_bytes = hash.to_be_bytes();
let leading_zeros = hash.leading_zeros();
let leading_zeros_bytes = leading_zeros.to_be_bytes();
let low_m_bits = hash & ((1 << self.b) - 1);
let hi = 64 - self.b;
let lo = self.b;
let top = (hash >> lo) & ((1 << hi) - 1);
let bottom = hash & ((1 << lo) - 1);
let bitmap_index = bottom as usize;
let bitmap_old = self.bitmaps[bitmap_index].clone();
let leading_zeros_new = top.leading_zeros();
let mask = 1 << leading_zeros_new;
let bitmap_new = bitmap_old | mask;
let run_length = 0;
print!(
"
run_length ={:?}
hash_bytes ={:?}
low_m_bits ={:?}
low_m_bits_binary ={:?}
top ={:?}
top_binary ={:?}
bottom ={:?}
bottom_binary ={:?}
leading_zeros_new ={:?}
leading_zeros ={:?}
leading_zeros_bytes ={:?}
bitmap_index ={:?}
leading_zeros_binary ={:?}
mask ={:?}
mask_binary ={:?}
bitmap_old ={:?}
bitmap_value_old_binary={:?}
bitmap_new ={:?}
bitmap_value_new_binary={:?}
",
run_length,
hash_bytes,
low_m_bits,
format!("{:064b}", low_m_bits),
top,
format!("{:064b}", top),
bottom,
format!("{:064b}", bottom),
leading_zeros_new,
leading_zeros,
leading_zeros_bytes,
bitmap_index,
format!("{:064b}", leading_zeros),
mask,
format!("{:064b}", mask),
bitmap_old,
format!("{:064b}", bitmap_old),
bitmap_new,
format!("{:064b}", bitmap_new),
);
self.bitmaps[bitmap_index] = bitmap_new;
Ok(())
}
pub fn cardinality(&self) -> std::io::Result<u64> {
let result = 0;
debug!("PCSA.cardinality: result={:?}", result);
Ok(result)
}
}
impl std::fmt::Debug for PCSA {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
let bitmaps_as_str = self
.bitmaps
.iter()
.map(|bitmap_value| format!("{:064b}", bitmap_value))
.collect::<Vec<_>>()
.join(", ");
write!(
f,
"[
b={:?}
m={:?}
bitmaps_size={:?}
bitmaps={:?}
]",
self.b, self.m, self.bitmaps_size, bitmaps_as_str,
)
}
}
impl std::fmt::Display for PCSA {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
let bitmaps_as_str = self
.bitmaps
.iter()
.map(|bitmap_value| format!("{:064b}", bitmap_value))
.collect::<Vec<_>>()
.join(", ");
write!(
f,
"[
b={:?}
m={:?}
bitmaps_size={:?}
bitmaps={:?}
]",
self.b, self.m, self.bitmaps_size, bitmaps_as_str,
)
}
}
#[cfg(test)]
mod tests {
#[test]
fn it_works() {
assert_eq!(2 + 2, 4);
}
}