use std::sync::LazyLock;
use super::bucket_span::BucketSpan;
use super::log_scale_config::LogScaleConfig;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct LogScale {
config: LogScaleConfig,
pub(crate) bucket_min_values: Vec<u64>,
small_value_buckets: Vec<u8>,
}
const MAX_WIDTH: usize = 16;
static LOG_SCALES: LazyLock<Vec<LogScale>> = LazyLock::new(|| (1..=MAX_WIDTH).map(LogScale::new).collect());
impl LogScale {
pub const DEFAULT_WIDTH: usize = 3;
pub fn get(width: usize) -> &'static LogScale {
assert!(
(1..=MAX_WIDTH).contains(&width),
"width must be 1..={MAX_WIDTH}, got {width}"
);
&LOG_SCALES[width - 1]
}
pub fn new(width: usize) -> Self {
let mut s = Self {
config: LogScaleConfig::new(width),
bucket_min_values: Vec::new(),
small_value_buckets: Vec::new(),
};
s.bucket_min_values = (0..s.config.buckets()).map(|i| s.compute_bucket_min_value(i)).collect();
s.small_value_buckets = Vec::with_capacity(s.config.small_value_cache_size());
for v in 0..s.config.small_value_cache_size() {
let bucket = s.calculate_bucket_uncached(v as u64);
let Ok(bucket) = u8::try_from(bucket) else {
break;
};
s.small_value_buckets.push(bucket);
}
s
}
pub fn config(&self) -> &LogScaleConfig {
&self.config
}
#[inline]
pub fn num_buckets(&self) -> usize {
self.config.buckets()
}
pub fn bucket_span(&self, index: usize) -> BucketSpan<'_> {
BucketSpan::new(self, index)
}
#[inline]
pub fn calculate_bucket(&self, value: u64) -> usize {
if value < self.small_value_buckets.len() as u64 {
return self.small_value_buckets[value as usize] as usize;
}
self.calculate_bucket_uncached(value)
}
pub fn calculate_bucket_uncached(&self, value: u64) -> usize {
let g_size = self.config.group_size();
if value < g_size as u64 {
return value as usize;
}
let bits_upto_msb = (u64::BITS - value.leading_zeros()) as usize;
let group_index = bits_upto_msb - self.config.width();
let offset_in_group = ((value >> group_index) & self.config.mask()) as usize;
g_size + group_index * g_size + offset_in_group
}
fn compute_bucket_min_value(&self, bucket: usize) -> u64 {
let g_size = self.config.group_size();
if bucket < g_size {
return bucket as u64;
}
let group_index = (bucket - g_size) / g_size;
let offset_in_group = (bucket - g_size) % g_size;
((offset_in_group | g_size) << group_index) as u64
}
}
impl Default for LogScale {
fn default() -> Self {
Self::new(Self::DEFAULT_WIDTH)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_default() {
let scale = LogScale::default();
assert_eq!(
scale.num_buckets(),
LogScale::get(LogScale::DEFAULT_WIDTH).num_buckets()
);
}
#[test]
fn test_num_buckets() {
assert_eq!(LogScale::get(3).num_buckets(), 252);
}
#[test]
fn test_calculate_bucket_group_0() {
assert_eq!(LogScale::get(3).calculate_bucket_uncached(0), 0);
assert_eq!(LogScale::get(3).calculate_bucket_uncached(1), 1);
assert_eq!(LogScale::get(3).calculate_bucket_uncached(2), 2);
assert_eq!(LogScale::get(3).calculate_bucket_uncached(3), 3);
}
#[test]
fn test_calculate_bucket_group_1() {
assert_eq!(LogScale::get(3).calculate_bucket_uncached(4), 4);
assert_eq!(LogScale::get(3).calculate_bucket_uncached(5), 5);
assert_eq!(LogScale::get(3).calculate_bucket_uncached(6), 6);
assert_eq!(LogScale::get(3).calculate_bucket_uncached(7), 7);
}
#[test]
fn test_calculate_bucket_group_2() {
assert_eq!(LogScale::get(3).calculate_bucket_uncached(8), 8);
assert_eq!(LogScale::get(3).calculate_bucket_uncached(10), 9);
assert_eq!(LogScale::get(3).calculate_bucket_uncached(12), 10);
assert_eq!(LogScale::get(3).calculate_bucket_uncached(14), 11);
}
#[test]
fn test_calculate_bucket_group_3() {
assert_eq!(LogScale::get(3).calculate_bucket_uncached(16), 12);
assert_eq!(LogScale::get(3).calculate_bucket_uncached(20), 13);
assert_eq!(LogScale::get(3).calculate_bucket_uncached(24), 14);
assert_eq!(LogScale::get(3).calculate_bucket_uncached(28), 15);
}
#[test]
fn test_calculate_bucket_group_4() {
assert_eq!(LogScale::get(3).calculate_bucket_uncached(32), 16);
assert_eq!(LogScale::get(3).calculate_bucket_uncached(40), 17);
assert_eq!(LogScale::get(3).calculate_bucket_uncached(48), 18);
assert_eq!(LogScale::get(3).calculate_bucket_uncached(56), 19);
}
#[test]
fn test_reasonable_bucket_ranges() {
assert_eq!(LogScale::get(3).calculate_bucket_uncached(1024), 36);
assert_eq!(LogScale::get(3).calculate_bucket_uncached(2048), 40);
assert_eq!(LogScale::get(3).calculate_bucket_uncached(4096), 44);
let million = 1_048_576;
let million_bucket = LogScale::get(3).calculate_bucket_uncached(million);
assert!(million_bucket < 80);
let billion = 1_073_741_824;
let billion_bucket = LogScale::get(3).calculate_bucket_uncached(billion);
assert!(billion_bucket < 120);
}
#[test]
fn test_bucket_min_values_lookup_table() {
let s = LogScale::get(3);
assert_eq!(s.bucket_span(0).left(), 0);
assert_eq!(s.bucket_span(1).left(), 1);
assert_eq!(s.bucket_span(2).left(), 2);
assert_eq!(s.bucket_span(3).left(), 3);
assert_eq!(s.bucket_span(4).left(), 4);
assert_eq!(s.bucket_span(5).left(), 5);
assert_eq!(s.bucket_span(6).left(), 6);
assert_eq!(s.bucket_span(7).left(), 7);
assert_eq!(s.bucket_span(8).left(), 8);
assert_eq!(s.bucket_span(9).left(), 10);
assert_eq!(s.bucket_span(10).left(), 12);
assert_eq!(s.bucket_span(11).left(), 14);
assert_eq!(s.bucket_span(12).left(), 16);
assert_eq!(s.bucket_span(13).left(), 20);
assert_eq!(s.bucket_span(14).left(), 24);
assert_eq!(s.bucket_span(15).left(), 28);
}
#[test]
fn test_bucket_width() {
let s = LogScale::get(3);
assert_eq!(s.bucket_span(0).width(), 1); assert_eq!(s.bucket_span(1).width(), 1); assert_eq!(s.bucket_span(3).width(), 1);
assert_eq!(s.bucket_span(4).width(), 1); assert_eq!(s.bucket_span(7).width(), 1);
assert_eq!(s.bucket_span(8).width(), 2); assert_eq!(s.bucket_span(11).width(), 2);
assert_eq!(s.bucket_span(12).width(), 4); assert_eq!(s.bucket_span(15).width(), 4);
assert_eq!(s.bucket_span(16).width(), 8);
assert_eq!(s.bucket_span(251).width(), u64::MAX - (0b111 << 61));
assert_eq!(s.bucket_span(250).width(), 1 << 61);
}
#[test]
fn test_bucket_span() {
let b = LogScale::get(3).bucket_span(0);
assert_eq!(b.index(), 0);
assert_eq!(b.left(), 0);
assert_eq!(b.right(), 1);
assert_eq!(b.width(), 1);
assert_eq!(b.midpoint(), 0);
let b = LogScale::get(3).bucket_span(8);
assert_eq!(b.index(), 8);
assert_eq!(b.left(), 8);
assert_eq!(b.right(), 10);
assert_eq!(b.width(), 2);
assert_eq!(b.midpoint(), 9);
let b = LogScale::get(3).bucket_span(12);
assert_eq!(b.index(), 12);
assert_eq!(b.left(), 16);
assert_eq!(b.right(), 20);
assert_eq!(b.width(), 4);
assert_eq!(b.midpoint(), 18);
let b = LogScale::get(3).bucket_span(251);
assert_eq!(b.left(), 0b111 << 61);
assert_eq!(b.right(), u64::MAX);
assert_eq!(b.width(), u64::MAX - (0b111 << 61));
}
#[test]
fn test_cached_bucket_matches_uncached() {
let test_values: Vec<usize> =
(0..100).chain((100..1000).step_by(10)).chain((1000..4096).step_by(100)).collect();
for v in test_values {
let cached = LogScale::get(3).calculate_bucket(v as u64);
let uncached = LogScale::get(3).calculate_bucket_uncached(v as u64);
assert_eq!(cached, uncached, "Mismatch at value {v}");
}
}
#[test]
fn test_cached_bucket_boundary() {
let last_cached = (LogScale::get(3).config().small_value_cache_size() - 1) as u64;
let first_uncached = LogScale::get(3).config().small_value_cache_size() as u64;
assert_eq!(
LogScale::get(3).calculate_bucket(last_cached),
LogScale::get(3).calculate_bucket_uncached(last_cached)
);
assert_eq!(
LogScale::get(3).calculate_bucket(first_uncached),
LogScale::get(3).calculate_bucket_uncached(first_uncached)
);
}
#[test]
fn test_cached_bucket_large_values() {
let large_values = [4096, 10000, 100000, 1_000_000, u64::MAX];
for &v in &large_values {
assert_eq!(
LogScale::get(3).calculate_bucket(v),
LogScale::get(3).calculate_bucket_uncached(v),
"Mismatch at value {v}"
);
}
}
#[test]
fn test_new_reduces_small_value_cache_when_bucket_exceeds_u8() {
let log_scale = LogScale::new(7);
assert_eq!(log_scale.small_value_buckets.len(), 512);
assert_eq!(
log_scale.calculate_bucket(511),
log_scale.calculate_bucket_uncached(511)
);
assert_eq!(
log_scale.calculate_bucket(512),
log_scale.calculate_bucket_uncached(512)
);
}
}