use crate::array_default::{ArrayDefault, ArrayIter};
use crate::precisions::{Precision, WordType};
use crate::prelude::*;
use core::hash::Hash;
pub struct HyperLogLogWithMultiplicities<P: Precision + WordType<BITS>, const BITS: usize> {
pub(crate) words: P::Words,
pub(crate) multiplicities: P::RegisterMultiplicities,
}
impl<P: Precision + WordType<BITS>, const BITS: usize> HyperLogLogWithMultiplicities<P, BITS> {
fn new() -> Self {
let mut multiplicities = P::RegisterMultiplicities::default_array();
multiplicities[0] = P::NumberOfZeros::reverse(P::NUMBER_OF_REGISTERS);
Self {
words: P::Words::default_array(),
multiplicities,
}
}
pub fn from_words(words: &P::Words) -> Self {
let mut multiplicities = P::RegisterMultiplicities::default_array();
words.iter_elements().for_each(|word| {
(0..Self::NUMBER_OF_REGISTERS_IN_WORD).for_each(|i| {
let register = (word >> (i * BITS)) & Self::LOWER_REGISTER_MASK;
multiplicities[register as usize] += P::NumberOfZeros::ONE;
});
});
multiplicities[0] -= P::NumberOfZeros::reverse(Self::get_number_of_padding_registers());
Self {
words: *words,
multiplicities,
}
}
pub fn from_registers(registers: &[u32]) -> Self {
debug_assert!(
registers.len() == P::NUMBER_OF_REGISTERS,
"We expect {} registers, but got {}",
P::NUMBER_OF_REGISTERS,
registers.len()
);
let mut words = P::Words::default_array();
let mut multiplicities = P::RegisterMultiplicities::default_array();
words
.iter_elements_mut()
.zip(registers.chunks(Self::NUMBER_OF_REGISTERS_IN_WORD))
.for_each(|(word, word_registers)| {
for (i, register) in word_registers.iter().copied().enumerate() {
debug_assert!(
register <= Self::LOWER_REGISTER_MASK,
"Register value {} is too large for the given number of bits {}",
register,
BITS
);
multiplicities[register as usize] += P::NumberOfZeros::ONE;
*word |= register << (i * BITS);
}
});
Self {
words,
multiplicities,
}
}
#[inline(always)]
pub fn insert<T: Hash>(&mut self, rhs: T) -> bool {
let (mut hash, index) = self.get_hash_and_index::<T>(&rhs);
if BITS < 6 {
hash |= 1 << (64 - ((1 << BITS) - 1));
} else {
hash |= 1 << (P::EXPONENT - 1);
}
let number_of_zeros: u32 = 1 + hash.leading_zeros();
debug_assert!(
number_of_zeros < (1 << BITS),
concat!(
"The number of leading zeros {} must be less than the number of bits {}. ",
"You have obtained this values starting from the hash {:064b} and the precision {}."
),
number_of_zeros,
1 << BITS,
hash,
P::EXPONENT
);
let word_position = index / Self::NUMBER_OF_REGISTERS_IN_WORD;
let register_position_in_u32 = index - word_position * Self::NUMBER_OF_REGISTERS_IN_WORD;
debug_assert!(
word_position < self.words.len(),
concat!(
"The word_position {} must be less than the number of words {}. ",
"You have obtained this values starting from the index {} and the number of registers in word {}. ",
"We currently have {} registers. Currently using precision {} and number of bits {}."
),
word_position,
self.words.len(),
index,
Self::NUMBER_OF_REGISTERS_IN_WORD,
P::NUMBER_OF_REGISTERS,
P::EXPONENT,
BITS
);
let register_value: u32 = (self.words[word_position] >> (register_position_in_u32 * BITS))
& Self::LOWER_REGISTER_MASK;
if number_of_zeros > register_value {
debug_assert!(self.multiplicities[register_value as usize] > P::NumberOfZeros::ZERO,);
self.multiplicities[register_value as usize] -= P::NumberOfZeros::ONE;
self.multiplicities[number_of_zeros as usize] += P::NumberOfZeros::ONE;
self.words[word_position] &=
!(Self::LOWER_REGISTER_MASK << (register_position_in_u32 * BITS));
self.words[word_position] |= number_of_zeros << (register_position_in_u32 * BITS);
debug_assert!(
self.words[word_position] & Self::PADDING_BITS_MASK == 0,
concat!(
"The padding bits of the word {} must be all zeros. ",
"We have obtained {} instead."
),
self.words[word_position],
self.words[word_position] & Self::PADDING_BITS_MASK
);
debug_assert!(
index != P::NUMBER_OF_REGISTERS - 1
|| self.words[word_position] & Self::LAST_WORD_PADDING_BITS_MASK == 0,
concat!(
"The padding bits of the last word {} must be all zeros. ",
"We have obtained {} instead. The last word padding bits mask is, ",
"when represented in binary, {:#034b}.\n ",
"The word in binary is {:#034b}. ",
"The current case is using precision {} and bits {}. As such, ",
"we expect to have {} padding registers in the last word."
),
self.words[word_position],
self.words[word_position] & Self::LAST_WORD_PADDING_BITS_MASK,
Self::LAST_WORD_PADDING_BITS_MASK,
self.words[word_position],
P::EXPONENT,
BITS,
Self::get_number_of_padding_registers()
);
true
} else {
false
}
}
}
impl<P: Precision + WordType<BITS>, const BITS: usize> Default
for HyperLogLogWithMultiplicities<P, BITS>
{
fn default() -> Self {
Self::new()
}
}
impl<P: Precision + WordType<BITS>, const BITS: usize> From<HyperLogLogWithMultiplicities<P, BITS>>
for HyperLogLog<P, BITS>
{
fn from(hll: HyperLogLogWithMultiplicities<P, BITS>) -> Self {
Self::from_words(hll.get_words())
}
}
impl<P: Precision + WordType<BITS>, const BITS: usize> From<HyperLogLog<P, BITS>>
for HyperLogLogWithMultiplicities<P, BITS>
{
fn from(hll: HyperLogLog<P, BITS>) -> Self {
Self::from_words(hll.get_words())
}
}
impl<P: Precision + WordType<BITS>, const BITS: usize> HyperLogLogTrait<P, BITS>
for HyperLogLogWithMultiplicities<P, BITS>
{
#[inline(always)]
fn estimate_cardinality(&self) -> f32 {
Self::estimate_cardinality_from_multiplicities(&self.multiplicities)
}
fn get_words(&self) -> &P::Words {
&self.words
}
#[inline(always)]
fn get_number_of_zero_registers(&self) -> usize {
self.multiplicities[0].convert()
}
}
impl<P: Precision + WordType<BITS>, const BITS: usize, A: Hash> core::iter::FromIterator<A>
for HyperLogLogWithMultiplicities<P, BITS>
{
#[inline(always)]
fn from_iter<T: IntoIterator<Item = A>>(iter: T) -> Self {
let mut hll = Self::default();
for item in iter {
hll.insert(item);
}
hll
}
}