#![deny(missing_docs, trivial_casts, trivial_numeric_casts, unused_extern_crates,
unused_import_braces, unused_results, variant_size_differences, warnings)]
#![cfg_attr(all(test, feature = "bench_private"), feature(test))]
extern crate num_traits as num;
#[cfg(feature = "serialization")]
#[macro_use]
extern crate nom;
use std::borrow::Borrow;
use std::cmp;
use std::ops::{AddAssign, SubAssign};
use num::ToPrimitive;
use iterators::HistogramIterator;
const ORIGINAL_MIN: u64 = (-1_i64 >> 63) as u64;
const ORIGINAL_MAX: u64 = 0;
#[derive(Debug)]
pub struct Histogram<T: Counter> {
auto_resize: bool,
highest_trackable_value: u64,
lowest_discernible_value: u64,
significant_value_digits: u8,
bucket_count: u8,
sub_bucket_count: u32,
sub_bucket_half_count: u32,
sub_bucket_half_count_magnitude: u8,
sub_bucket_mask: u64,
leading_zero_count_base: u8,
unit_magnitude: u8,
unit_magnitude_mask: u64,
max_value: u64,
min_non_zero_value: u64,
total_count: u64,
counts: Vec<T>,
}
pub mod iterators;
impl<T: Counter> Histogram<T> {
pub fn distinct_values(&self) -> usize {
self.counts.len()
}
pub fn low(&self) -> u64 {
self.lowest_discernible_value
}
pub fn high(&self) -> u64 {
self.highest_trackable_value
}
pub fn sigfig(&self) -> u8 {
self.significant_value_digits
}
#[deprecated(since = "6.0.0", note = "use `len` instead")]
pub fn count(&self) -> u64 {
self.total_count
}
pub fn len(&self) -> u64 {
self.total_count
}
pub fn is_empty(&self) -> bool {
self.total_count == 0
}
pub fn buckets(&self) -> u8 {
self.bucket_count
}
fn index_for(&self, value: u64) -> Option<usize> {
let bucket_index = self.bucket_for(value);
let sub_bucket_index = self.sub_bucket_for(value, bucket_index);
debug_assert!(sub_bucket_index < self.sub_bucket_count);
debug_assert!(bucket_index == 0 || (sub_bucket_index >= self.sub_bucket_half_count));
let bucket_base_index =
(i32::from(bucket_index) + 1) << self.sub_bucket_half_count_magnitude;
let offset_in_bucket = sub_bucket_index as i32 - self.sub_bucket_half_count as i32;
let index = bucket_base_index + offset_in_bucket;
debug_assert!(index >= 0);
index.to_usize()
}
fn index_for_or_last(&self, value: u64) -> usize {
self.index_for(value)
.map_or(self.last_index(), |i| cmp::min(i, self.last_index()))
}
fn mut_at(&mut self, value: u64) -> Option<&mut T> {
self.index_for(value)
.and_then(move |i| self.counts.get_mut(i))
}
fn last_index(&self) -> usize {
self.distinct_values()
.checked_sub(1)
.expect("Empty counts array?")
}
pub fn clone_correct(&self, interval: u64) -> Histogram<T> {
let mut h = Histogram::new_from(self);
for v in self.iter_recorded() {
h.record_n_correct(v.value_iterated_to(), v.count_at_value(), interval)
.expect("Same dimensions; all values should be representable");
}
h
}
pub fn set_to<B: Borrow<Histogram<T>>>(&mut self, source: B) -> Result<(), AdditionError> {
self.reset();
self.add(source.borrow())
}
pub fn set_to_corrected<B: Borrow<Histogram<T>>>(
&mut self,
source: B,
interval: u64,
) -> Result<(), RecordError> {
self.reset();
self.add_correct(source, interval)
}
pub fn add<B: Borrow<Histogram<T>>>(&mut self, source: B) -> Result<(), AdditionError> {
let source = source.borrow();
let top = self.highest_equivalent(self.value_for(self.last_index()));
if top < source.max() {
if !self.auto_resize {
return Err(AdditionError::OtherAddendValueExceedsRange);
}
self.resize(source.max())
.map_err(|_| AdditionError::ResizeFailedUsizeTypeTooSmall)?;
}
if self.bucket_count == source.bucket_count
&& self.sub_bucket_count == source.sub_bucket_count
&& self.unit_magnitude == source.unit_magnitude
{
let mut observed_other_total_count: u64 = 0;
for i in 0..source.distinct_values() {
let other_count = source
.count_at_index(i)
.expect("iterating inside source length");
if other_count != T::zero() {
self.counts[i] = self.counts[i].saturating_add(other_count);
observed_other_total_count =
observed_other_total_count.saturating_add(other_count.as_u64());
}
}
self.total_count = self.total_count.saturating_add(observed_other_total_count);
let mx = source.max();
if mx > self.max() {
self.update_max(mx);
}
let mn = source.min_nz();
if mn < self.min_nz() {
self.update_min(mn);
}
} else {
let other_max_index = source
.index_for(source.max())
.expect("Index for max value must exist");
let other_count = source
.count_at_index(other_max_index)
.expect("max's index must exist");
self.record_n(source.value_for(other_max_index), other_count)
.expect("Record must succeed; already resized for max value");
for i in 0..other_max_index {
let other_count = source
.count_at_index(i)
.expect("index before max must exist");
if other_count != T::zero() {
self.record_n(source.value_for(i), other_count)
.expect("Record must succeed; already recorded max value");
}
}
}
Ok(())
}
pub fn add_correct<B: Borrow<Histogram<T>>>(
&mut self,
source: B,
interval: u64,
) -> Result<(), RecordError> {
let source = source.borrow();
for v in source.iter_recorded() {
self.record_n_correct(v.value_iterated_to(), v.count_at_value(), interval)?;
}
Ok(())
}
pub fn subtract<B: Borrow<Histogram<T>>>(
&mut self,
subtrahend: B,
) -> Result<(), SubtractionError> {
let subtrahend = subtrahend.borrow();
let top = self.highest_equivalent(self.value_for(self.last_index()));
if top < self.highest_equivalent(subtrahend.max()) {
return Err(SubtractionError::SubtrahendValueExceedsMinuendRange);
}
let old_min_highest_equiv = self.highest_equivalent(self.min());
let old_max_lowest_equiv = self.lowest_equivalent(self.max());
let mut needs_restat = self.total_count == u64::max_value();
for i in 0..subtrahend.distinct_values() {
let other_count = subtrahend
.count_at_index(i)
.expect("index inside subtrahend len must exist");
if other_count != T::zero() {
let other_value = subtrahend.value_for(i);
{
let mut_count = self.mut_at(other_value);
if let Some(c) = mut_count {
*c = (*c).checked_sub(&other_count)
.ok_or(SubtractionError::SubtrahendCountExceedsMinuendCount)?;
} else {
panic!("Tried to subtract value outside of range: {}", other_value);
}
}
if other_value <= old_min_highest_equiv || other_value >= old_max_lowest_equiv {
needs_restat = true;
}
if !needs_restat {
self.total_count = self.total_count
.checked_sub(other_count.as_u64())
.expect("total count underflow on subtraction");
}
}
}
if needs_restat {
let l = self.distinct_values();
self.restat(l);
}
Ok(())
}
pub fn clear(&mut self) {
for c in &mut self.counts {
*c = T::zero();
}
self.total_count = 0;
}
pub fn reset(&mut self) {
self.clear();
self.reset_max(ORIGINAL_MAX);
self.reset_min(ORIGINAL_MIN);
}
pub fn auto(&mut self, enabled: bool) {
self.auto_resize = enabled;
}
pub fn new(sigfig: u8) -> Result<Histogram<T>, CreationError> {
let mut h = Self::new_with_bounds(1, 2, sigfig);
if let Ok(ref mut h) = h {
h.auto_resize = true;
}
h
}
pub fn new_with_max(high: u64, sigfig: u8) -> Result<Histogram<T>, CreationError> {
Self::new_with_bounds(1, high, sigfig)
}
pub fn new_with_bounds(low: u64, high: u64, sigfig: u8) -> Result<Histogram<T>, CreationError> {
if low < 1 {
return Err(CreationError::LowIsZero);
}
if low > u64::max_value() / 2 {
return Err(CreationError::LowExceedsMax);
}
if high < 2 * low {
return Err(CreationError::HighLessThanTwiceLow);
}
if sigfig > 5 {
return Err(CreationError::SigFigExceedsMax);
}
let largest = 2 * 10_u32.pow(u32::from(sigfig));
let unit_magnitude = (low as f64).log2().floor() as u8;
let unit_magnitude_mask = (1 << unit_magnitude) - 1;
let sub_bucket_count_magnitude = (f64::from(largest)).log2().ceil() as u8;
let sub_bucket_half_count_magnitude = sub_bucket_count_magnitude - 1;
let sub_bucket_count = 1_u32 << u32::from(sub_bucket_count_magnitude);
if unit_magnitude + sub_bucket_count_magnitude > 63 {
return Err(CreationError::CannotRepresentSigFigBeyondLow);
};
let sub_bucket_half_count = sub_bucket_count / 2;
let sub_bucket_mask = (u64::from(sub_bucket_count) - 1) << unit_magnitude;
let mut h = Histogram {
auto_resize: false,
highest_trackable_value: high,
lowest_discernible_value: low,
significant_value_digits: sigfig,
bucket_count: 0,
sub_bucket_count,
leading_zero_count_base: 64 - unit_magnitude - sub_bucket_count_magnitude,
sub_bucket_half_count_magnitude,
unit_magnitude,
sub_bucket_half_count,
sub_bucket_mask,
unit_magnitude_mask,
max_value: ORIGINAL_MAX,
min_non_zero_value: ORIGINAL_MIN,
total_count: 0,
counts: Vec::new(),
};
h.resize(high)
.map_err(|_| CreationError::UsizeTypeTooSmall)?;
Ok(h)
}
pub fn new_from<F: Counter>(source: &Histogram<F>) -> Histogram<T> {
let mut h = Self::new_with_bounds(
source.lowest_discernible_value,
source.highest_trackable_value,
source.significant_value_digits,
).expect("Using another histogram's parameters failed");
h.auto_resize = source.auto_resize;
h.counts.resize(source.distinct_values(), T::zero());
h
}
pub fn record(&mut self, value: u64) -> Result<(), RecordError> {
self.record_n(value, T::one())
}
pub fn saturating_record(&mut self, value: u64) {
self.saturating_record_n(value, T::one())
}
pub fn record_n(&mut self, value: u64, count: T) -> Result<(), RecordError> {
self.record_n_inner(value, count, false)
}
pub fn saturating_record_n(&mut self, value: u64, count: T) {
self.record_n_inner(value, count, true).unwrap()
}
fn record_n_inner(&mut self, mut value: u64, count: T, clamp: bool) -> Result<(), RecordError> {
let recorded_without_resize = if let Some(c) = self.mut_at(value) {
*c = (*c).saturating_add(count);
true
} else {
false
};
if !recorded_without_resize {
if clamp {
value = if value > self.highest_trackable_value {
self.highest_trackable_value
} else {
self.lowest_discernible_value
};
let c = self.mut_at(value)
.expect("unwrap must succeed since low and high are always representable");
*c = c.saturating_add(count);
} else if !self.auto_resize {
return Err(RecordError::ValueOutOfRangeResizeDisabled);
} else {
self.resize(value)
.map_err(|_| RecordError::ResizeFailedUsizeTypeTooSmall)?;
self.highest_trackable_value =
self.highest_equivalent(self.value_for(self.last_index()));
{
let c = self.mut_at(value).expect("value should fit after resize");
*c = (*c).checked_add(&count)
.expect("count overflow after resize");
}
}
}
self.update_min_max(value);
self.total_count = self.total_count.saturating_add(count.as_u64());
Ok(())
}
pub fn record_correct(&mut self, value: u64, interval: u64) -> Result<(), RecordError> {
self.record_n_correct(value, T::one(), interval)
}
pub fn record_n_correct(
&mut self,
value: u64,
count: T,
interval: u64,
) -> Result<(), RecordError> {
self.record_n(value, count)?;
if interval == 0 {
return Ok(());
}
if value > interval {
let mut missing_value = value - interval;
while missing_value >= interval {
self.record_n_inner(missing_value, count, false)?;
missing_value -= interval;
}
}
Ok(())
}
pub fn iter_quantiles(
&self,
ticks_per_half_distance: u32,
) -> HistogramIterator<T, iterators::quantile::Iter<T>> {
iterators::quantile::Iter::new(self, ticks_per_half_distance)
}
pub fn iter_linear(&self, step: u64) -> HistogramIterator<T, iterators::linear::Iter<T>> {
iterators::linear::Iter::new(self, step)
}
pub fn iter_log(&self, start: u64, exp: f64) -> HistogramIterator<T, iterators::log::Iter<T>> {
iterators::log::Iter::new(self, start, exp)
}
pub fn iter_recorded(&self) -> HistogramIterator<T, iterators::recorded::Iter> {
iterators::recorded::Iter::new(self)
}
pub fn iter_all(&self) -> HistogramIterator<T, iterators::all::Iter> {
iterators::all::Iter::new(self)
}
pub fn min(&self) -> u64 {
if self.total_count == 0
|| self.count_at_index(0)
.expect("counts array must be non-empty") != T::zero()
{
0
} else {
self.min_nz()
}
}
pub fn max(&self) -> u64 {
if self.max_value == ORIGINAL_MAX {
ORIGINAL_MAX
} else {
self.highest_equivalent(self.max_value)
}
}
pub fn min_nz(&self) -> u64 {
if self.min_non_zero_value == ORIGINAL_MIN {
ORIGINAL_MIN
} else {
self.lowest_equivalent(self.min_non_zero_value)
}
}
pub fn equivalent(&self, value1: u64, value2: u64) -> bool {
self.lowest_equivalent(value1) == self.lowest_equivalent(value2)
}
pub fn mean(&self) -> f64 {
if self.total_count == 0 {
return 0.0;
}
self.iter_recorded().fold(0.0_f64, |total, v| {
total
+ self.median_equivalent(v.value_iterated_to()) as f64 * v.count_at_value().as_f64()
/ self.total_count as f64
})
}
pub fn stdev(&self) -> f64 {
if self.total_count == 0 {
return 0.0;
}
let mean = self.mean();
let geom_dev_tot = self.iter_recorded().fold(0.0_f64, |gdt, v| {
let dev = self.median_equivalent(v.value_iterated_to()) as f64 - mean;
gdt + (dev * dev) * v.count_since_last_iteration() as f64
});
(geom_dev_tot / self.total_count as f64).sqrt()
}
pub fn value_at_percentile(&self, percentile: f64) -> u64 {
self.value_at_quantile(percentile / 100.0)
}
pub fn value_at_quantile(&self, quantile: f64) -> u64 {
let quantile = if quantile > 1.0 { 1.0 } else { quantile };
let fractional_count = quantile * self.total_count as f64;
let mut count_at_quantile = fractional_count.ceil() as u64;
if count_at_quantile == 0 {
count_at_quantile = 1;
}
let mut total_to_current_index: u64 = 0;
for i in 0..self.counts.len() {
total_to_current_index += self.counts[i].as_u64();
if total_to_current_index >= count_at_quantile {
let value_at_index = self.value_for(i);
return if quantile == 0.0 {
self.lowest_equivalent(value_at_index)
} else {
self.highest_equivalent(value_at_index)
};
}
}
0
}
pub fn percentile_below(&self, value: u64) -> f64 {
self.quantile_below(value) * 100.0
}
pub fn quantile_below(&self, value: u64) -> f64 {
if self.total_count == 0 {
return 1.0;
}
let target_index = self.index_for_or_last(value);
let total_to_current_index = (0..target_index.checked_add(1).expect("usize overflow"))
.map(|i| self.count_at_index(i).expect("index is <= last_index()"))
.fold(0_u64, |t, v| t.saturating_add(v.as_u64()));
total_to_current_index.as_f64() / self.total_count as f64
}
pub fn count_between(&self, low: u64, high: u64) -> u64 {
let low_index = self.index_for_or_last(low);
let high_index = self.index_for_or_last(high);
(low_index..high_index.checked_add(1).expect("usize overflow"))
.map(|i| self.count_at_index(i).expect("index is <= last_index()"))
.fold(0_u64, |t, v| t.saturating_add(v.as_u64()))
}
pub fn count_at(&self, value: u64) -> T {
self.count_at_index(self.index_for_or_last(value))
.expect("index is <= last_index()")
}
pub fn lowest_equivalent(&self, value: u64) -> u64 {
let bucket_index = self.bucket_for(value);
let sub_bucket_index = self.sub_bucket_for(value, bucket_index);
self.value_from_loc(bucket_index, sub_bucket_index)
}
pub fn highest_equivalent(&self, value: u64) -> u64 {
if value == u64::max_value() {
u64::max_value()
} else {
self.next_non_equivalent(value) - 1
}
}
pub fn median_equivalent(&self, value: u64) -> u64 {
self.lowest_equivalent(value)
.checked_add(self.equivalent_range(value) >> 1)
.expect("median equivalent should not overflow")
}
pub fn next_non_equivalent(&self, value: u64) -> u64 {
self.lowest_equivalent(value)
.saturating_add(self.equivalent_range(value))
}
pub fn equivalent_range(&self, value: u64) -> u64 {
let bucket_index = self.bucket_for(value);
1_u64 << (self.unit_magnitude + bucket_index)
}
fn value_for(&self, index: usize) -> u64 {
let mut bucket_index = (index >> self.sub_bucket_half_count_magnitude) as isize - 1;
let mut sub_bucket_index = ((index.to_u32().expect("index must fit in u32"))
& (self.sub_bucket_half_count - 1))
+ self.sub_bucket_half_count;
if bucket_index < 0 {
sub_bucket_index -= self.sub_bucket_half_count;
bucket_index = 0;
}
self.value_from_loc(bucket_index as u8, sub_bucket_index)
}
fn count_at_index(&self, index: usize) -> Option<T> {
self.counts.get(index).cloned()
}
#[cfg(feature = "serialization")]
fn set_count_at_index(&mut self, index: usize, count: T) -> Result<(), ()> {
let r = self.counts.get_mut(index).ok_or(())?;
*r = count;
Ok(())
}
#[inline]
fn bucket_for(&self, value: u64) -> u8 {
self.leading_zero_count_base - (value | self.sub_bucket_mask).leading_zeros() as u8
}
#[inline]
fn sub_bucket_for(&self, value: u64, bucket_index: u8) -> u32 {
(value >> (bucket_index + self.unit_magnitude)) as u32
}
#[inline]
fn value_from_loc(&self, bucket_index: u8, sub_bucket_index: u32) -> u64 {
u64::from(sub_bucket_index) << (bucket_index + self.unit_magnitude)
}
fn buckets_to_cover(&self, value: u64) -> u8 {
let mut smallest_untrackable_value =
u64::from(self.sub_bucket_count) << self.unit_magnitude;
let mut buckets_needed = 1;
while smallest_untrackable_value <= value {
if smallest_untrackable_value > u64::max_value() / 2 {
return buckets_needed + 1;
}
smallest_untrackable_value <<= 1;
buckets_needed += 1;
}
buckets_needed
}
fn num_bins(&self, number_of_buckets: u8) -> u32 {
(u32::from(number_of_buckets) + 1) * (self.sub_bucket_half_count)
}
fn resize(&mut self, high: u64) -> Result<(), UsizeTypeTooSmall> {
assert!(
high >= 2 * self.lowest_discernible_value,
"highest trackable value must be >= (2 * lowest discernible value)"
);
let buckets_needed = self.buckets_to_cover(high);
let len = self.num_bins(buckets_needed)
.to_usize()
.ok_or(UsizeTypeTooSmall)?;
self.bucket_count = buckets_needed;
self.highest_trackable_value = high;
self.counts.resize(len, T::zero());
Ok(())
}
fn update_max(&mut self, value: u64) {
let internal_value = value | self.unit_magnitude_mask; if internal_value > self.max_value {
self.max_value = internal_value;
}
}
fn update_min(&mut self, value: u64) {
if value <= self.unit_magnitude_mask {
return; }
let internal_value = value & !self.unit_magnitude_mask; if internal_value < self.min_non_zero_value {
self.min_non_zero_value = internal_value;
}
}
fn update_min_max(&mut self, value: u64) {
if value > self.max_value {
self.update_max(value);
}
if value < self.min_non_zero_value && value != 0 {
self.update_min(value);
}
}
fn reset_max(&mut self, max: u64) {
self.max_value = max | self.unit_magnitude_mask; }
fn reset_min(&mut self, min: u64) {
let internal_value = min & !self.unit_magnitude_mask; self.min_non_zero_value = if min == u64::max_value() {
min
} else {
internal_value
};
}
fn restat(&mut self, length_to_scan: usize) {
self.reset_max(ORIGINAL_MAX);
self.reset_min(ORIGINAL_MIN);
let mut restat_state = RestatState::new();
assert!(length_to_scan <= self.counts.len());
for i in 0..length_to_scan {
let count = self.counts[i];
if count != T::zero() {
restat_state.on_nonzero_count(i, count);
}
}
restat_state.update_histogram(self);
}
}
struct RestatState<T: Counter> {
max_index: Option<usize>,
min_index: Option<usize>,
total_count: u64,
phantom: std::marker::PhantomData<T>,
}
impl<T: Counter> RestatState<T> {
fn new() -> RestatState<T> {
RestatState {
max_index: None,
min_index: None,
total_count: 0,
phantom: std::marker::PhantomData,
}
}
#[inline]
fn on_nonzero_count(&mut self, index: usize, count: T) {
self.total_count = self.total_count.saturating_add(count.as_u64());
self.max_index = Some(index);
if self.min_index.is_none() && index != 0 {
self.min_index = Some(index);
}
}
fn update_histogram(self, h: &mut Histogram<T>) {
if let Some(max_i) = self.max_index {
let max = h.highest_equivalent(h.value_for(max_i));
h.update_max(max);
}
if let Some(min_i) = self.min_index {
let min = h.value_for(min_i);
h.update_min(min);
}
h.total_count = self.total_count;
}
}
impl<T: Counter> Clone for Histogram<T> {
fn clone(&self) -> Self {
let mut h = Histogram::new_from(self);
h += self;
h
}
}
impl<'a, T: Counter> AddAssign<&'a Histogram<T>> for Histogram<T> {
fn add_assign(&mut self, source: &'a Histogram<T>) {
self.add(source).unwrap();
}
}
impl<'a, T: Counter> SubAssign<&'a Histogram<T>> for Histogram<T> {
fn sub_assign(&mut self, other: &'a Histogram<T>) {
self.subtract(other).unwrap();
}
}
impl<T: Counter> AddAssign<u64> for Histogram<T> {
fn add_assign(&mut self, value: u64) {
self.record(value).unwrap();
}
}
impl<T: Counter, F: Counter> PartialEq<Histogram<F>> for Histogram<T>
where
T: PartialEq<F>,
{
fn eq(&self, other: &Histogram<F>) -> bool {
if self.lowest_discernible_value != other.lowest_discernible_value
|| self.significant_value_digits != other.significant_value_digits
{
return false;
}
if self.total_count != other.total_count {
return false;
}
if self.max() != other.max() {
return false;
}
if self.min_nz() != other.min_nz() {
return false;
}
(0..self.counts.len()).all(|i| {
self.counts[i] == match other.count_at_index(i) {
Some(c) => c,
None => return false,
}
})
}
}
#[path = "tests/tests.rs"]
#[cfg(test)]
mod tests;
#[cfg(feature = "serialization")]
pub mod serialization;
mod core;
pub mod errors;
pub use errors::*;
pub use core::counter::*;