use crate::runtime::metrics::batch::duration_as_u64;
use std::cmp;
use std::error::Error;
use std::fmt::{Display, Formatter};
use std::time::Duration;
const DEFAULT_MIN_VALUE: Duration = Duration::from_nanos(100);
const DEFAULT_MAX_VALUE: Duration = Duration::from_secs(60);
const DEFAULT_PRECISION: u32 = 2;
const MAX_PRECISION: u32 = 10;
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub struct LogHistogram {
pub(crate) num_buckets: usize,
pub(crate) p: u32,
pub(crate) bucket_offset: usize,
}
impl Default for LogHistogram {
fn default() -> Self {
LogHistogramBuilder::default().build()
}
}
impl LogHistogram {
fn from_n_p(n: u32, p: u32, bucket_offset: usize) -> Self {
assert!(n >= p, "{n} (n) must be at least as large as {p} (p)");
let num_buckets = ((n - p + 1) << p) as usize - bucket_offset;
Self {
num_buckets,
p,
bucket_offset,
}
}
fn truncate_to_max_value(&self, max_value: u64) -> LogHistogram {
let mut hist = *self;
while hist.max_value() >= max_value {
hist.num_buckets -= 1;
}
hist.num_buckets += 1;
hist
}
pub fn builder() -> LogHistogramBuilder {
LogHistogramBuilder::default()
}
pub fn max_value(&self) -> u64 {
self.bucket_range(self.num_buckets - 2).end
}
pub(crate) fn value_to_bucket(&self, value: u64) -> usize {
let index = bucket_index(value, self.p);
let offset_bucket = index.saturating_sub(self.bucket_offset as u64);
let max = self.num_buckets - 1;
offset_bucket.min(max as u64) as usize
}
pub(crate) fn bucket_range(&self, bucket: usize) -> std::ops::Range<u64> {
let LogHistogram {
p,
bucket_offset,
num_buckets,
} = self;
let input_bucket = bucket;
let bucket = bucket + bucket_offset;
let range_start_0th_bucket = match input_bucket {
0 => Some(0_u64),
_ => None,
};
let range_end_last_bucket = match input_bucket {
n if n == num_buckets - 1 => Some(u64::MAX),
_ => None,
};
if bucket < 1 << p {
let bucket = bucket as u64;
range_start_0th_bucket.unwrap_or(bucket)..range_end_last_bucket.unwrap_or(bucket + 1)
} else {
let bucket = bucket as u64;
let p = *p as u64;
let w = (bucket >> p) - 1;
let base_bucket = (w + 1) * (1_u64 << p);
let offset = bucket - base_bucket;
let s = 1_u64 << (w + p);
let start = s + (offset << w);
let end = s + ((offset + 1) << w);
range_start_0th_bucket.unwrap_or(start)..range_end_last_bucket.unwrap_or(end)
}
}
}
#[derive(Default, Debug, Copy, Clone)]
pub struct LogHistogramBuilder {
max_value: Option<Duration>,
min_value: Option<Duration>,
precision: Option<u32>,
}
impl From<LogHistogramBuilder> for LogHistogram {
fn from(value: LogHistogramBuilder) -> Self {
value.build()
}
}
impl LogHistogramBuilder {
pub fn max_error(mut self, max_error: f64) -> Self {
assert!(max_error > 0.0, "max_error must be greater than 0");
assert!(max_error < 1.0, "max_error must be less than 1");
let mut p = 2;
while 2_f64.powf(-(p as f64)) > max_error && p <= MAX_PRECISION {
p += 1;
}
self.precision = Some(p);
self
}
pub fn precision_exact(mut self, p: u32) -> Self {
assert!(p <= MAX_PRECISION, "precision must be <= {MAX_PRECISION}");
self.precision = Some(p);
self
}
pub fn min_value(mut self, duration: Duration) -> Self {
self.min_value = Some(duration);
self
}
pub fn max_value(mut self, duration: Duration) -> Self {
if duration.is_zero() {
panic!("max value must be greater than 0");
}
self.max_value = Some(duration);
self
}
pub fn max_buckets(
&mut self,
max_buckets: usize,
) -> Result<LogHistogram, InvalidHistogramConfiguration> {
let histogram = self.build();
if histogram.num_buckets > max_buckets {
return Err(InvalidHistogramConfiguration::TooManyBuckets {
required_bucket_count: histogram.num_buckets,
});
}
Ok(histogram)
}
pub fn build(&self) -> LogHistogram {
let requested_max_value = duration_as_u64(self.max_value.unwrap_or(DEFAULT_MAX_VALUE));
let max_value = requested_max_value.next_power_of_two();
let min_value = duration_as_u64(self.min_value.unwrap_or(DEFAULT_MIN_VALUE));
let p = self.precision.unwrap_or(DEFAULT_PRECISION);
let bucket_offset = cmp::max(bucket_index(min_value, p), 1) - 1;
let n = max_value.ilog2().max(p) + 1;
LogHistogram::from_n_p(n, p, bucket_offset as usize)
.truncate_to_max_value(requested_max_value)
}
}
#[derive(Debug)]
pub enum InvalidHistogramConfiguration {
TooManyBuckets {
required_bucket_count: usize,
},
}
impl Display for InvalidHistogramConfiguration {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
InvalidHistogramConfiguration::TooManyBuckets { required_bucket_count } =>
write!(f, "The configuration for this histogram would have required {required_bucket_count} buckets")
}
}
}
impl Error for InvalidHistogramConfiguration {}
fn bucket_index(value: u64, p: u32) -> u64 {
if value == 0 {
return 0;
}
let h = 63 - value.leading_zeros();
if h <= p {
value
} else {
let w = h - p;
((w + 1) * (1_u32 << p)) as u64 + ((value - (1_u64 << h)) >> w)
}
}
#[cfg(test)]
mod test {
use super::InvalidHistogramConfiguration;
use crate::runtime::metrics::histogram::h2_histogram::LogHistogram;
use crate::runtime::metrics::histogram::HistogramType;
#[cfg(not(target_family = "wasm"))]
mod proptests {
use super::*;
use crate::runtime::metrics::batch::duration_as_u64;
use crate::runtime::metrics::histogram::h2_histogram::MAX_PRECISION;
use proptest::prelude::*;
use std::time::Duration;
fn valid_log_histogram_strategy() -> impl Strategy<Value = LogHistogram> {
(1..=50u32, 0..=MAX_PRECISION, 0..100usize).prop_map(|(n, p, bucket_offset)| {
let p = p.min(n);
let base = LogHistogram::from_n_p(n, p, 0);
LogHistogram::from_n_p(n, p, bucket_offset.min(base.num_buckets - 1))
})
}
fn log_histogram_settings() -> impl Strategy<Value = (u64, u64, u32)> {
(
duration_as_u64(Duration::from_nanos(1))..duration_as_u64(Duration::from_secs(20)),
duration_as_u64(Duration::from_secs(1))..duration_as_u64(Duration::from_secs(1000)),
0..MAX_PRECISION,
)
}
proptest! {
#[test]
fn log_histogram_settings_maintain_invariants((min_value, max_value, p) in log_histogram_settings()) {
if max_value < min_value {
return Ok(())
}
let (min_value, max_value) = (Duration::from_nanos(min_value), Duration::from_nanos(max_value));
let histogram = LogHistogram::builder().min_value(min_value).max_value(max_value).precision_exact(p).build();
let first_bucket_end = Duration::from_nanos(histogram.bucket_range(0).end);
let last_bucket_start = Duration::from_nanos(histogram.bucket_range(histogram.num_buckets - 1).start);
let second_last_bucket_start = Duration::from_nanos(histogram.bucket_range(histogram.num_buckets - 2).start);
prop_assert!(
first_bucket_end <= min_value,
"first bucket {first_bucket_end:?} must be less than {min_value:?}"
);
prop_assert!(
last_bucket_start > max_value,
"last bucket start ({last_bucket_start:?} must be at least as big as `max_value` ({max_value:?})"
);
prop_assert!(
second_last_bucket_start < max_value,
"second last bucket end ({second_last_bucket_start:?} must be at least as big as `max_value` ({max_value:?})"
);
}
#[test]
fn proptest_log_histogram_invariants(histogram in valid_log_histogram_strategy()) {
let first_range = histogram.bucket_range(0);
prop_assert_eq!(first_range.start, 0, "First bucket doesn't start at 0");
let mut prev_end = 0;
let mut prev_size = 0;
for bucket in 0..histogram.num_buckets {
let range = histogram.bucket_range(bucket);
prop_assert_eq!(range.start, prev_end, "Bucket ranges are not contiguous");
prop_assert!(range.start < range.end, "Bucket range is empty or reversed");
let size = range.end - range.start;
if bucket > 0 && bucket < histogram.num_buckets - 1 {
prop_assert!(size.is_power_of_two(), "Bucket size is not a power of 2");
}
if bucket > 1 {
prop_assert!(size >= prev_size, "Bucket sizes are not monotonically increasing: This size {size} (previous: {prev_size}). Bucket: {bucket}");
}
if bucket > 0 && bucket < histogram.num_buckets - 1 {
let p = histogram.p as f64;
let error_bound = 2.0_f64.powf(-p);
let relative_error = ((size as f64 - 1.0) / 2.0) / range.start as f64;
prop_assert!(
relative_error <= error_bound,
"Bucket size error exceeds bound: {:?} > {:?} ({range:?})",
relative_error,
error_bound
);
}
prev_end = range.end;
prev_size = size;
}
prop_assert_eq!(prev_end, u64::MAX, "Last bucket should end at u64::MAX");
for bucket in 0..histogram.num_buckets {
let range = histogram.bucket_range(bucket);
for value in [range.start, range.end - 1] {
prop_assert_eq!(
histogram.value_to_bucket(value),
bucket,
"value_to_bucket is not consistent with bucket_range"
);
}
}
}
}
}
#[test]
fn bucket_ranges_are_correct() {
let p = 2;
let config = HistogramType::H2(LogHistogram {
num_buckets: 1024,
p,
bucket_offset: 0,
});
for i in 0..2_usize.pow(p + 1) {
assert_eq!(
config.value_to_bucket(i as u64),
i,
"{i} should be in bucket {i}"
);
}
let mut value = 2_usize.pow(p + 1);
let current_bucket = value;
while value < current_bucket * 2 {
assert_eq!(
config.value_to_bucket(value as u64),
current_bucket + ((value - current_bucket) / 2),
"bucket for {value}"
);
value += 1;
}
}
#[test]
fn bucket_computation_spot_check() {
let p = 9;
let config = HistogramType::H2(LogHistogram {
num_buckets: 4096,
p,
bucket_offset: 0,
});
struct T {
v: u64,
bucket: usize,
}
let tests = [
T { v: 1, bucket: 1 },
T {
v: 1023,
bucket: 1023,
},
T {
v: 1024,
bucket: 1024,
},
T {
v: 2048,
bucket: 1536,
},
T {
v: 2052,
bucket: 1537,
},
];
for test in tests {
assert_eq!(config.value_to_bucket(test.v), test.bucket);
}
}
#[test]
fn last_bucket_goes_to_infinity() {
let conf = HistogramType::H2(LogHistogram::from_n_p(16, 3, 10));
assert_eq!(conf.bucket_range(conf.num_buckets() - 1).end, u64::MAX);
}
#[test]
fn bucket_offset() {
let conf = HistogramType::H2(LogHistogram::from_n_p(16, 3, 10));
for i in 0..10 {
assert_eq!(conf.value_to_bucket(i), 0);
}
assert_eq!(conf.value_to_bucket(10), 0);
assert_eq!(conf.value_to_bucket(16), 6);
assert_eq!(conf.value_to_bucket(17), 6);
assert_eq!(conf.bucket_range(6), 16..18);
}
#[test]
fn max_buckets_enforcement() {
let error = LogHistogram::builder()
.max_error(0.001)
.max_buckets(5)
.expect_err("this produces way more than 5 buckets");
let num_buckets = match error {
InvalidHistogramConfiguration::TooManyBuckets {
required_bucket_count,
} => required_bucket_count,
};
assert_eq!(num_buckets, 27291);
}
#[test]
fn default_configuration_size() {
let conf = LogHistogram::builder().build();
assert_eq!(conf.num_buckets, 119);
}
}