use core::borrow::Borrow;
use core::hash::{BuildHasher, Hash, Hasher};
use core::marker::PhantomData;
use serde::{Deserialize, Serialize};
use crate::common::*;
use crate::HyperLogLog;
use crate::HyperLogLogError;
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct HyperLogLogPF<H, B>
where
H: Hash + ?Sized,
B: BuildHasher,
{
builder: B,
count: usize,
precision: u8,
registers: Registers,
phantom: PhantomData<H>,
}
impl<H, B> HyperLogLogPF<H, B>
where
H: Hash + ?Sized,
B: BuildHasher,
{
const MIN_PRECISION: u8 = 4;
const MAX_PRECISION: u8 = 16;
pub fn new(precision: u8, builder: B) -> Result<Self, HyperLogLogError> {
if precision < Self::MIN_PRECISION || precision > Self::MAX_PRECISION {
return Err(HyperLogLogError::InvalidPrecision);
}
let count = Self::register_count(precision);
Ok(HyperLogLogPF {
builder: builder,
count: count,
precision: precision,
registers: Registers::with_count(count),
phantom: PhantomData,
})
}
pub fn merge<S, T>(
&mut self,
other: &HyperLogLogPF<S, T>,
) -> Result<(), HyperLogLogError>
where
S: Hash + ?Sized,
T: BuildHasher,
{
if self.precision != other.precision() {
return Err(HyperLogLogError::IncompatiblePrecision);
}
for (i, val) in other.registers_iter().enumerate() {
self.registers.set_greater(i, val);
}
Ok(())
}
pub fn insert_any<R>(&mut self, value: &R)
where
R: Hash + ?Sized,
{
self.insert_impl(value);
}
#[inline(always)]
fn insert_impl<R>(&mut self, value: &R)
where
R: Hash + ?Sized,
{
let mut hasher = self.builder.build_hasher();
value.hash(&mut hasher);
let mut hash: u32 = hasher.finish() as u32;
let index: usize = (hash >> (32 - self.precision)) as usize;
hash = (hash << self.precision) | (1 << (self.precision - 1));
let zeros: u32 = 1 + hash.leading_zeros();
self.registers.set_greater(index, zeros);
}
#[inline]
fn precision(&self) -> u8 {
self.precision
}
#[inline]
fn registers_iter(&self) -> impl Iterator<Item = u32> + '_ {
self.registers.iter()
}
}
impl<H, B> HyperLogLogCommon for HyperLogLogPF<H, B>
where
H: Hash + ?Sized,
B: BuildHasher,
{
}
impl<H, B> HyperLogLog<H> for HyperLogLogPF<H, B>
where
H: Hash + ?Sized,
B: BuildHasher,
{
fn add(&mut self, value: &H) {
self.insert_impl(value);
}
fn insert<Q>(&mut self, value: &Q)
where
H: Borrow<Q>,
Q: Hash + ?Sized,
{
self.insert_impl(value);
}
fn count(&mut self) -> f64 {
let (mut raw, zeros) = (
Self::estimate_raw_plus(self.registers.iter(), self.count),
self.registers.zeros(),
);
let two32 = (1u64 << 32) as f64;
if raw <= 2.5 * self.count as f64 && zeros != 0 {
raw = Self::linear_count(self.count, zeros);
} else if raw > two32 / 30.0 {
raw = -1.0 * two32 * ln(1.0 - raw / two32);
}
raw
}
}
#[cfg(test)]
mod tests {
use super::*;
use core::hash::{BuildHasher, Hasher};
use siphasher::sip::SipHasher13;
struct PassThroughHasher(u64);
impl Hasher for PassThroughHasher {
#[inline]
fn finish(&self) -> u64 {
self.0
}
#[inline]
fn write(&mut self, _: &[u8]) {}
#[inline]
fn write_u64(&mut self, i: u64) {
self.0 = i;
}
}
#[derive(Serialize, Deserialize)]
struct PassThroughHasherBuilder;
impl BuildHasher for PassThroughHasherBuilder {
type Hasher = PassThroughHasher;
fn build_hasher(&self) -> Self::Hasher {
PassThroughHasher(0)
}
}
#[derive(Serialize, Deserialize)]
struct DefaultBuildHasher;
impl BuildHasher for DefaultBuildHasher {
type Hasher = SipHasher13;
fn build_hasher(&self) -> Self::Hasher {
SipHasher13::new()
}
}
#[test]
fn test_insert() {
let builder = PassThroughHasherBuilder {};
let mut hll: HyperLogLogPF<u64, PassThroughHasherBuilder> =
HyperLogLogPF::new(16, builder).unwrap();
hll.insert(&0x0000000000010fff);
assert_eq!(hll.registers.get(1), 5);
hll.insert(&0x000000000002ffff);
assert_eq!(hll.registers.get(2), 1);
hll.insert(&0x0000000000030000);
assert_eq!(hll.registers.get(3), 17);
hll.insert(&0x0000000000030001);
assert_eq!(hll.registers.get(3), 17);
hll.insert(&0x00000000ff037000);
assert_eq!(hll.registers.get(0xff03), 2);
hll.insert(&0x00000000ff030800);
assert_eq!(hll.registers.get(0xff03), 5);
let builder = PassThroughHasherBuilder {};
let mut hll: HyperLogLogPF<u64, PassThroughHasherBuilder> =
HyperLogLogPF::new(4, builder).unwrap();
hll.insert(&0x000000001fffffff);
assert_eq!(hll.registers.get(1), 1);
hll.insert(&0x00000000ffffffff);
assert_eq!(hll.registers.get(0xf), 1);
hll.insert(&0x0000000000ffffff);
assert_eq!(hll.registers.get(0), 5);
}
#[test]
fn test_count() {
let builder = PassThroughHasherBuilder {};
let mut hll: HyperLogLogPF<u64, PassThroughHasherBuilder> =
HyperLogLogPF::new(16, builder).unwrap();
assert_eq!(hll.count(), 0.0);
hll.insert(&0x0000000000010fff);
hll.insert(&0x0000000000020fff);
hll.insert(&0x0000000000030fff);
hll.insert(&0x0000000000040fff);
hll.insert(&0x0000000000050fff);
hll.insert(&0x0000000000050fff);
assert_eq!(hll.count().trunc() as u64, 5);
}
#[test]
fn test_merge() {
let builder = PassThroughHasherBuilder {};
let mut hll: HyperLogLogPF<u64, PassThroughHasherBuilder> =
HyperLogLogPF::new(16, builder).unwrap();
hll.insert(&0x00000000000101ff);
hll.insert(&0x00000000000202ff);
hll.insert(&0x00000000000304ff);
hll.insert(&0x0000000000040fff);
hll.insert(&0x0000000000050fff);
hll.insert(&0x0000000000060fff);
assert_eq!(hll.registers.get(1), 8);
assert_eq!(hll.registers.get(2), 7);
assert_eq!(hll.registers.get(3), 6);
assert_eq!(hll.registers.get(4), 5);
assert_eq!(hll.registers.get(5), 5);
assert_eq!(hll.registers.get(6), 5);
let builder = PassThroughHasherBuilder {};
let err: HyperLogLogPF<u64, PassThroughHasherBuilder> =
HyperLogLogPF::new(9, builder).unwrap();
assert_eq!(
hll.merge(&err),
Err(HyperLogLogError::IncompatiblePrecision)
);
let builder = PassThroughHasherBuilder {};
let mut other: HyperLogLogPF<u64, PassThroughHasherBuilder> =
HyperLogLogPF::new(16, builder).unwrap();
hll.insert(&0x00000000000404ff);
hll.insert(&0x00000000000502ff);
hll.insert(&0x00000000000601ff);
assert_eq!(other.merge(&hll), Ok(()));
assert_eq!(other.count().trunc() as u64, 6);
assert_eq!(hll.registers.get(1), 8);
assert_eq!(hll.registers.get(2), 7);
assert_eq!(hll.registers.get(3), 6);
assert_eq!(hll.registers.get(4), 6);
assert_eq!(hll.registers.get(5), 7);
assert_eq!(hll.registers.get(6), 8);
}
#[test]
fn test_serialization() {
let builder = PassThroughHasherBuilder {};
let mut hll: HyperLogLogPF<u64, PassThroughHasherBuilder> =
HyperLogLogPF::new(16, builder).unwrap();
hll.insert(&0x0000000000010fff);
hll.insert(&0x0000000000020fff);
hll.insert(&0x0000000000030fff);
hll.insert(&0x0000000000040fff);
hll.insert(&0x0000000000050fff);
hll.insert(&0x0000000000050fff);
assert_eq!(hll.count().trunc() as usize, 5);
let serialized = serde_json::to_string(&hll).unwrap();
let mut deserialized: HyperLogLogPF<u64, PassThroughHasherBuilder> =
serde_json::from_str(&serialized).unwrap();
assert_eq!(deserialized.count().trunc() as usize, 5);
deserialized.insert(&0x0000000000060fff);
assert_eq!(deserialized.count().trunc() as usize, 6);
let mut hll: HyperLogLogPF<u64, DefaultBuildHasher> =
HyperLogLogPF::new(16, DefaultBuildHasher {}).unwrap();
hll.insert(&0x0000000000010fff);
hll.insert(&0x0000000000020fff);
hll.insert(&0x0000000000030fff);
hll.insert(&0x0000000000040fff);
hll.insert(&0x0000000000050fff);
hll.insert(&0x0000000000050fff);
assert_eq!(hll.count().trunc() as usize, 5);
let serialized = serde_json::to_string(&hll).unwrap();
let mut deserialized: HyperLogLogPF<u64, PassThroughHasherBuilder> =
serde_json::from_str(&serialized).unwrap();
assert_eq!(deserialized.count().trunc() as usize, 5);
deserialized.insert(&0x0000000000060fff);
assert_eq!(deserialized.count().trunc() as usize, 6);
}
#[cfg(feature = "bench-units")]
mod benches {
extern crate test;
use super::*;
use rand::prelude::*;
use test::{black_box, Bencher};
#[bench]
fn bench_insert(b: &mut Bencher) {
let builder = PassThroughHasherBuilder {};
let mut hll: HyperLogLogPF<u64, PassThroughHasherBuilder> =
HyperLogLogPF::new(16, builder).unwrap();
b.iter(|| {
for i in 0u64..1000 {
hll.insert(&(u64::max_value() - i));
}
})
}
#[bench]
fn bench_insert_with_hash(b: &mut Bencher) {
let mut rng = rand::thread_rng();
let workload: Vec<String> = (0..2000)
.map(|_| {
format!("- {} - {} -", rng.gen::<u64>(), rng.gen::<u64>())
})
.collect();
b.iter(|| {
let mut hll: HyperLogLogPF<&String, DefaultBuildHasher> =
HyperLogLogPF::new(16, DefaultBuildHasher {}).unwrap();
for val in &workload {
hll.insert(&val);
}
let val = hll.count();
black_box(val);
})
}
#[bench]
fn bench_count(b: &mut Bencher) {
let builder = PassThroughHasherBuilder {};
let mut hll: HyperLogLogPF<u64, PassThroughHasherBuilder> =
HyperLogLogPF::new(16, builder).unwrap();
for i in 0u64..10000 {
hll.insert(&(u64::max_value() - i));
}
b.iter(|| {
let count = hll.count();
black_box(count);
})
}
}
}