use crate::array_default::{ArrayDefault, ArrayIter};
use crate::precisions::{Precision, WordType};
use crate::prelude::HyperLogLogTrait;
use crate::primitive::Primitive;
use core::hash::Hash;
#[derive(Clone, Debug, Copy)]
pub struct HyperLogLog<P: Precision + WordType<BITS>, const BITS: usize> {
pub(crate) words: P::Words,
pub(crate) number_of_zero_registers: P::NumberOfZeros,
}
impl<P: Precision + WordType<BITS>, const BITS: usize> HyperLogLog<P, BITS> {
fn new() -> Self {
Self {
words: P::Words::default_array(),
number_of_zero_registers: P::NumberOfZeros::reverse(P::NUMBER_OF_REGISTERS),
}
}
pub fn from_words(words: &P::Words) -> Self {
let number_of_zero_registers =
P::NumberOfZeros::reverse(words.iter_elements().fold(
0,
|number_of_zero_registers, word| {
debug_assert!(
word & Self::PADDING_BITS_MASK == 0,
concat!(
"The padding bits of the word {} must be all zeros. ",
"We have obtained {} instead."
),
word,
word & Self::PADDING_BITS_MASK
);
(0..Self::NUMBER_OF_REGISTERS_IN_WORD).fold(
number_of_zero_registers,
|number_of_zero_registers, i| {
let register = (word >> (i * BITS)) & Self::LOWER_REGISTER_MASK;
number_of_zero_registers + (register == 0) as usize
},
)
},
)) - P::NumberOfZeros::reverse(Self::get_number_of_padding_registers());
debug_assert!(
words.last().unwrap() & Self::LAST_WORD_PADDING_BITS_MASK == 0,
concat!(
"The padding bits of the last word {} must be all zeros. ",
"We have obtained {} instead. The last word padding bits mask is, ",
"when represented in binary, {:#034b}."
),
words.last().unwrap(),
words.last().unwrap() & Self::LAST_WORD_PADDING_BITS_MASK,
Self::LAST_WORD_PADDING_BITS_MASK
);
Self {
words: *words,
number_of_zero_registers,
}
}
pub fn from_registers(registers: &[u32]) -> Self {
debug_assert!(
registers.len() == P::NUMBER_OF_REGISTERS,
"We expect {} registers, but got {}",
P::NUMBER_OF_REGISTERS,
registers.len()
);
let mut words = P::Words::default_array();
let number_of_zero_registers = P::NumberOfZeros::reverse(
words
.iter_elements_mut()
.zip(registers.chunks(Self::NUMBER_OF_REGISTERS_IN_WORD))
.fold(0, |number_of_zero_registers, (word, word_registers)| {
word_registers.iter().copied().enumerate().fold(
number_of_zero_registers,
|number_of_zero_registers, (i, register)| {
debug_assert!(
register <= Self::LOWER_REGISTER_MASK,
"Register value {} is too large for the given number of bits {}",
register,
BITS
);
*word |= register << (i * BITS);
number_of_zero_registers + (register == 0) as usize
},
)
}),
);
Self {
words,
number_of_zero_registers,
}
}
#[inline(always)]
pub fn insert<T: Hash>(&mut self, rhs: T) -> bool {
let (mut hash, index) = self.get_hash_and_index::<T>(&rhs);
if BITS < 6 {
hash |= 1 << (65 - (1 << BITS))
}
let number_of_zeros: u32 = 1 + hash.leading_zeros();
debug_assert!(
number_of_zeros < 1 << BITS,
concat!("The number of zeros {} must be less than or equal to {}. ",),
number_of_zeros,
1 << BITS
);
let word_position = index / Self::NUMBER_OF_REGISTERS_IN_WORD;
let register_position = index - word_position * Self::NUMBER_OF_REGISTERS_IN_WORD;
debug_assert!(
word_position < self.words.len(),
concat!(
"The word_position {} must be less than the number of words {}. ",
"You have obtained this values starting from the index {} and the number of registers in word {}. ",
"We currently have {} registers. Currently using precision {} and number of bits {}."
),
word_position,
self.words.len(),
index,
Self::NUMBER_OF_REGISTERS_IN_WORD,
P::NUMBER_OF_REGISTERS,
P::EXPONENT,
BITS
);
let register_value: u32 =
(self.words[word_position] >> (register_position * BITS)) & Self::LOWER_REGISTER_MASK;
if number_of_zeros > register_value {
self.words[word_position] &= !(Self::LOWER_REGISTER_MASK << (register_position * BITS));
self.words[word_position] |= number_of_zeros << (register_position * BITS);
self.number_of_zero_registers -=
P::NumberOfZeros::reverse((register_value == 0) as usize);
debug_assert!(
self.words[word_position] >> (register_position * BITS) & Self::LOWER_REGISTER_MASK
== number_of_zeros,
concat!(
"The value of the register at position {} must be {}. ",
"We have obtained {} instead. ",
"The current value of the word is {}."
),
index,
number_of_zeros,
self.words[word_position] >> (register_position * BITS) & Self::LOWER_REGISTER_MASK,
self.words[word_position]
);
debug_assert!(
self.words[word_position] & Self::PADDING_BITS_MASK == 0,
concat!(
"The padding bits of the word {} must be all zeros. ",
"We have obtained {} instead."
),
self.words[word_position],
self.words[word_position] & Self::PADDING_BITS_MASK
);
debug_assert!(
index != P::NUMBER_OF_REGISTERS - 1
|| self.words[word_position] & Self::LAST_WORD_PADDING_BITS_MASK == 0,
concat!(
"The padding bits of the last word {} must be all zeros. ",
"We have obtained {} instead. The last word padding bits mask is, ",
"when represented in binary, {:#034b}.\n ",
"The word in binary is {:#034b}. ",
"The current case is using precision {} and bits {}. As such, ",
"we expect to have {} padding registers in the last word."
),
self.words[word_position],
self.words[word_position] & Self::LAST_WORD_PADDING_BITS_MASK,
Self::LAST_WORD_PADDING_BITS_MASK,
self.words[word_position],
P::EXPONENT,
BITS,
Self::get_number_of_padding_registers()
);
true
} else {
false
}
}
}
impl<P: Precision + WordType<BITS>, const BITS: usize> Eq for HyperLogLog<P, BITS> {
fn assert_receiver_is_total_eq(&self) {
}
}
impl<P: Precision + WordType<BITS>, const BITS: usize> PartialEq for HyperLogLog<P, BITS> {
fn eq(&self, other: &Self) -> bool {
self.words == other.words
}
}
impl<P: Precision + WordType<BITS>, const BITS: usize> Default for HyperLogLog<P, BITS> {
fn default() -> Self {
Self::new()
}
}
impl<P: Precision + WordType<BITS>, const BITS: usize, T: Hash> From<T> for HyperLogLog<P, BITS> {
fn from(value: T) -> Self {
let mut hll = Self::new();
hll.insert(value);
hll
}
}
impl<P: Precision + WordType<BITS>, const BITS: usize> HyperLogLogTrait<P, BITS>
for HyperLogLog<P, BITS>
{
#[inline(always)]
fn get_number_of_zero_registers(&self) -> usize {
self.number_of_zero_registers.convert()
}
#[inline(always)]
fn get_words(&self) -> &P::Words {
&self.words
}
}
impl<P: Precision + WordType<BITS>, const BITS: usize, A: Hash> core::iter::FromIterator<A>
for HyperLogLog<P, BITS>
{
#[inline(always)]
fn from_iter<T: IntoIterator<Item = A>>(iter: T) -> Self {
let mut hll = Self::new();
for item in iter {
hll.insert(item);
}
hll
}
}