use thiserror::Error;
use crate::utils::divide_by_8_round_up;
const MINIMUM_LOG_2M_PARAM: u32 = 4;
const MAXIMUM_LOG_2M_PARAM: u32 = 31;
const MINIMUM_REG_WIDTH_PARAM: u32 = 1;
const MAXIMUM_REG_WIDTH_PARAM: u32 = 8;
const MINIMUM_EXPTHRESH_PARAM: i32 = -1;
const MAXIMUM_EXPTHRESH_PARAM: i32 = 18;
const MAXIMUM_EXPLICIT_THRESHOLD: u32 = 1 << (MAXIMUM_EXPTHRESH_PARAM - 1);
const AUTO_EXPLICIT_THRESHOLD: i32 = -1;
#[derive(Copy, Clone, Debug, PartialEq)]
pub struct Settings {
pub(crate) log_2m: u32,
pub(crate) reg_width: u32,
pub(crate) explicit_threshold: i32,
pub(crate) sparse_threshold: Option<i32>,
pub(crate) pw_max_mask: u64,
pub(crate) m_bits_mask: u64,
pub(crate) alpha_msquared: f64,
pub(crate) small_estimator_cutoff: f64,
pub(crate) large_estimator_cutoff: f64,
pub(crate) two_to_l: f64,
}
#[derive(Clone, Debug, Error)]
pub enum SettingsError {
#[error("log_2m must be between {MINIMUM_LOG_2M_PARAM}, {MAXIMUM_LOG_2M_PARAM}")]
Log2m,
#[error("reg_width must be between {MINIMUM_REG_WIDTH_PARAM}, {MAXIMUM_REG_WIDTH_PARAM}")]
RegWidth,
#[error("Threshold must be between {MINIMUM_EXPTHRESH_PARAM}, {MAXIMUM_EXPTHRESH_PARAM}")]
Threshold,
#[error("config mismatch. log_2m and reg_width must match when combining hll's")]
MisMatch,
}
impl Settings {
pub fn new(
log_2m: u32,
reg_width: u32,
explicit_threshold: i32,
sparse_enabled: bool,
) -> Result<Self, SettingsError> {
let sparse_threshold = match sparse_enabled {
true => Some(Self::calculate_sparse_threshold(log_2m, reg_width)),
false => None,
};
let settings = Self {
log_2m,
reg_width,
explicit_threshold,
sparse_threshold,
pw_max_mask: Settings::pw_max_mask(reg_width),
m_bits_mask: ((1 << log_2m) - 1),
alpha_msquared: Settings::alpha_m_squared(log_2m),
small_estimator_cutoff: Settings::small_estimator_cutoff(1 << log_2m),
large_estimator_cutoff: Settings::large_estimator_cutoff(Settings::two_to_l(
log_2m, reg_width,
)),
two_to_l: Settings::two_to_l(log_2m, reg_width),
};
settings.validate()?;
Ok(settings)
}
pub fn validate(&self) -> Result<(), SettingsError> {
if !(MINIMUM_LOG_2M_PARAM..=MAXIMUM_LOG_2M_PARAM).contains(&self.log_2m) {
return Err(SettingsError::Log2m);
}
if !(MINIMUM_REG_WIDTH_PARAM..=MAXIMUM_REG_WIDTH_PARAM).contains(&self.reg_width) {
return Err(SettingsError::RegWidth);
}
Ok(())
}
pub fn settings_check(&self, other: &Self) -> Result<(), SettingsError> {
if self.log_2m == other.log_2m && self.reg_width == other.reg_width {
return Ok(());
}
Err(SettingsError::MisMatch)
}
pub fn explicit_threshold(&self) -> u32 {
match self.explicit_threshold {
AUTO_EXPLICIT_THRESHOLD => {
Self::calculate_explicit_threshold(self.log_2m, self.reg_width)
}
_ => self.explicit_threshold as u32,
}
}
pub fn calculate_explicit_threshold(log_2m: u32, reg_width: u32) -> u32 {
let m = 1 << log_2m;
let full_representation_size = divide_by_8_round_up(reg_width * m);
let num_longs = full_representation_size / 8;
if num_longs > MAXIMUM_EXPLICIT_THRESHOLD {
return MAXIMUM_EXPLICIT_THRESHOLD;
}
num_longs
}
fn calculate_sparse_threshold(log_2m: u32, reg_width: u32) -> i32 {
let m = 1 << log_2m;
let short_word_length: f64 = (log_2m + reg_width).into();
let reg_bits: f64 = (m * reg_width).into();
let largest_pow2_less_than_cutoff: u32 = (reg_bits / short_word_length).log2() as u32;
1 << largest_pow2_less_than_cutoff
}
pub(crate) fn pw_max_mask(reg_width: u32) -> u64 {
let shift: u64 = (((1u64 << reg_width) - 1) - 1) % (u64::BITS as u64);
!((1u64 << shift) - 1)
}
pub(crate) fn alpha_m_squared(log_2m: u32) -> f64 {
let m: f64 = (1 << log_2m).into();
match log_2m {
4 => 0.673 * m * m,
5 => 0.697 * m * m,
6 => 0.709 * m * m,
_ => (0.7213 / (1.0 + 1.079 / m)) * m * m,
}
}
pub(crate) fn small_estimator_cutoff(m: u32) -> f64 {
let m: f64 = m.into();
(m * 5.0) / 2.0
}
pub(crate) fn large_estimator_cutoff(two_to_l: f64) -> f64 {
two_to_l / 30.0
}
pub(crate) fn two_to_l(log_2m: u32, reg_width: u32) -> f64 {
let max_register_value = (1 << reg_width) - 1;
let pw_bits = max_register_value - 1;
let total_bits = pw_bits + log_2m;
2_f64.powf(total_bits.into())
}
pub(crate) fn pack_cutoff_byte(&self) -> u8 {
let threshold = if self.explicit_threshold == AUTO_EXPLICIT_THRESHOLD {
63
} else if self.explicit_threshold == 0 {
0
} else {
u32::BITS - (self.explicit_threshold as u32).leading_zeros() - 1
};
let mut res = threshold;
if self.sparse_threshold.is_some() {
res |= 1 << 6
}
res as u8
}
pub(crate) fn unpack_cutoff_byte(b: u8) -> (bool, i32) {
let sparse_enabled = b >> 6 == 1;
let threshold = b & 0x3F;
if threshold == 0 {
return (sparse_enabled, 0);
}
if threshold == 63 {
return (sparse_enabled, -1);
}
(sparse_enabled, 1 << (threshold - 1))
}
}
#[cfg(test)]
mod test {
use super::Settings;
#[test]
fn pw() {
let settings = Settings::new(10, 5, 0, false);
println!("{:?}", settings);
}
#[test]
fn left_shift() {
assert_eq!(1 << 0, 1);
}
}