#![allow(unused)]
#![deny(
missing_docs,
clippy::missing_safety_doc,
clippy::undocumented_unsafe_blocks
)]
use core::hash::Hash;
use std::hash::{BuildHasher, Hasher};
use std::fmt::Debug;
mod murmur;
mod utils;
use utils::*;
use murmur::{Murmur3BuildHasher};
#[derive(Debug, PartialEq, Eq)]
pub struct Hypeerlog<S = Murmur3BuildHasher>
where
S: BuildHasher + Debug,
{
hasher: S,
percision: u8,
registers: Vec<u8>,
}
impl<S> Hypeerlog<S>
where
S: BuildHasher + Debug,
{
pub fn with_hasher(hasher_builder: S) -> Self {
Hypeerlog {
hasher: hasher_builder,
percision: 14,
registers: vec![0; pow_two(14) as usize],
}
}
pub fn with_hasher_percision(percision: u8, hasher_builder: S) -> Self {
let p = percision.clamp(4, 25);
Hypeerlog {
hasher: hasher_builder,
percision: p,
registers: vec![0; pow_two(p) as usize],
}
}
pub fn load_with_hasher(mut bytes: Vec<u8>, hasher_builder: S) -> Result<Self, ()> {
let p = bytes.pop();
if p.is_none() {return Err(());}
if bytes.len() != (pow_two(p.unwrap()) as usize) {return Err(());}
Ok(Hypeerlog {
hasher: hasher_builder,
percision: p.unwrap(),
registers: bytes,
})
}
}
impl Hypeerlog {
pub fn new() -> Hypeerlog<Murmur3BuildHasher> {
Self::with_percision(14)
}
pub fn with_percision(percision: u8) -> Hypeerlog<Murmur3BuildHasher> {
let p = percision.clamp(4, 20);
Hypeerlog {
hasher: Murmur3BuildHasher::new(0),
percision: p,
registers: vec![0; pow_two(p) as usize],
}
}
pub fn with_seed(seed: u32) -> Hypeerlog<Murmur3BuildHasher> {
Hypeerlog {
hasher: Murmur3BuildHasher::new(seed),
percision: 14,
registers: vec![0; pow_two(14) as usize],
}
}
pub fn with_percision_seed(percision: u8, seed: u32) -> Hypeerlog<Murmur3BuildHasher> {
let p = percision.clamp(4, 20);
Hypeerlog {
hasher: Murmur3BuildHasher::new(seed),
percision: p,
registers: vec![0; pow_two(p) as usize],
}
}
pub fn registers(&self) -> usize {
self.registers.len()
}
pub fn insert<H: Hash>(&mut self, data: H) {
let mut hasher = self.hasher.build_hasher();
data.hash(&mut hasher);
let hash = hasher.finish();
let register_idx = get_bucket(self.percision, hash);
self.registers[register_idx] = longest_run(self.percision, hash).max(self.registers[register_idx]);
}
pub fn insert_many<H: Hash>(&mut self, data: &[H]) {
for elem in data {
self.insert(elem);
}
}
pub fn is_empty<H: Hash>(&self) -> bool {
self.registers.iter().all(|&val| val == 0)
}
pub fn cardinality(&self) -> f64 {
let m = pow_two(self.percision) as f64;
let alpha_m = get_alpha_m_bias(m);
let num_zero_registers = self.registers.iter().filter(|&&val| val == 0).count();
if num_zero_registers == m as usize {
return 0.0;
}
let harmonic_mean = harmonic_mean(&self.registers);
let mut estimate = alpha_m * m * m * harmonic_mean;
if num_zero_registers > 0 && estimate < (2.5 * m) {
estimate = m * (m / num_zero_registers as f64).ln();
}
estimate
}
pub fn merge(mut self, other: Self) -> Result<Self, ()> {
if self.percision != other.percision {
return Err(());
}
self.registers.iter_mut()
.zip(other.registers.iter())
.for_each(|(a, b)| *a = a.clone().max(b.clone()));
Ok(self)
}
pub fn dump(&self) -> Vec<u8> {
let mut clone = self.registers.clone();
clone.push(self.percision);
clone
}
pub fn load(mut bytes: Vec<u8>) -> Result<Self, ()> {
let p = bytes.pop();
if p.is_none() {return Err(());}
if bytes.len() != (pow_two(p.unwrap()) as usize) {return Err(());}
Ok(Hypeerlog {
hasher: Murmur3BuildHasher::new(0),
percision: p.unwrap(),
registers: bytes,
})
}
}