#![deny(missing_docs)]
#![deny(missing_debug_implementations)]
#![deny(unsafe_code)]
#[cfg(all(test, feature = "quickcheck"))]
#[macro_use]
extern crate quickcheck;
extern crate stats;
use std::cmp;
use std::fmt;
use std::collections::BTreeMap;
use std::collections::btree_map::Range;
#[derive(Debug, Clone)]
pub struct Histogram {
num_buckets: u64,
samples: BTreeMap<u64, u64>,
stats: stats::OnlineStats,
minmax: stats::MinMax<u64>,
}
impl Histogram {
pub fn with_buckets(num_buckets: u64) -> Histogram {
assert!(num_buckets > 0);
Histogram {
num_buckets,
samples: Default::default(),
stats: Default::default(),
minmax: Default::default(),
}
}
pub fn add(&mut self, sample: u64) {
*self.samples.entry(sample).or_insert(0) += 1;
self.minmax.add(sample);
self.stats.add(sample);
}
pub fn buckets(&self) -> Buckets {
Buckets {
histogram: self,
index: 0,
}
}
}
impl fmt::Display for Histogram {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
use std::fmt::Write;
let num_samples: u64 = self.samples.values().sum();
writeln!(f, "# Number of samples = {}", num_samples)?;
if num_samples == 0 {
return Ok(());
}
let min = self.minmax.min().unwrap();
let max = self.minmax.max().unwrap();
writeln!(f, "# Min = {}", min)?;
writeln!(f, "# Max = {}", max)?;
writeln!(f, "#")?;
let mean = self.stats.mean();
let dev = self.stats.stddev();
let var = self.stats.variance();
writeln!(f, "# Mean = {}", mean)?;
writeln!(f, "# Standard deviation = {}", dev)?;
writeln!(f, "# Variance = {}", var)?;
writeln!(f, "#")?;
let max_bucket_count = self.buckets().map(|b| b.count()).fold(0, cmp::max);
const WIDTH: u64 = 50;
let count_per_char = cmp::max(max_bucket_count / WIDTH, 1);
writeln!(f, "# Each ∎ is a count of {}", count_per_char)?;
writeln!(f, "#")?;
let mut count_str = String::new();
let widest_count = self.buckets().fold(0, |n, b| {
count_str.clear();
write!(&mut count_str, "{}", b.count()).unwrap();
cmp::max(n, count_str.len())
});
let mut end_str = String::new();
let widest_range = self.buckets().fold(0, |n, b| {
end_str.clear();
write!(&mut end_str, "{}", b.end()).unwrap();
cmp::max(n, end_str.len())
});
let mut start_str = String::with_capacity(widest_range);
for bucket in self.buckets() {
start_str.clear();
write!(&mut start_str, "{}", bucket.start()).unwrap();
for _ in 0..widest_range - start_str.len() {
start_str.insert(0, ' ');
}
end_str.clear();
write!(&mut end_str, "{}", bucket.end()).unwrap();
for _ in 0..widest_range - end_str.len() {
end_str.insert(0, ' ');
}
count_str.clear();
write!(&mut count_str, "{}", bucket.count()).unwrap();
for _ in 0..widest_count - count_str.len() {
count_str.insert(0, ' ');
}
write!(f, "{} .. {} [ {} ]: ", start_str, end_str, count_str)?;
for _ in 0..bucket.count() / count_per_char {
write!(f, "∎")?;
}
writeln!(f)?;
}
Ok(())
}
}
#[derive(Debug, Clone)]
pub struct Buckets<'a> {
histogram: &'a Histogram,
index: u64,
}
impl<'a> Iterator for Buckets<'a> {
type Item = Bucket<'a>;
fn next(&mut self) -> Option<Self::Item> {
if self.index >= self.histogram.num_buckets {
return None;
}
let (min, max) = match (self.histogram.minmax.min(), self.histogram.minmax.max()) {
(Some(&min), Some(&max)) => (min, max),
_ => return None,
};
let range = max - min;
let range = range + (range % self.histogram.num_buckets);
let bucket_size = range / self.histogram.num_buckets;
let bucket_size = cmp::max(1, bucket_size);
let start = min + self.index * bucket_size;
let end = min + (self.index + 1) * bucket_size;
self.index += 1;
Some(Bucket {
start,
end,
range: if self.index == self.histogram.num_buckets {
self.histogram.samples.range(start..)
} else {
self.histogram.samples.range(start..end)
},
})
}
}
#[derive(Clone)]
pub struct Bucket<'a> {
start: u64,
end: u64,
range: Range<'a, u64, u64>,
}
impl<'a> Bucket<'a> {
pub fn count(&self) -> u64 {
self.range.clone().map(|(_, count)| count).sum()
}
pub fn start(&self) -> u64 {
self.start
}
pub fn end(&self) -> u64 {
self.end
}
}
impl<'a> fmt::Debug for Bucket<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "Bucket {{ {}..{} }}", self.start, self.end)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn num_buckets() {
let mut histo = Histogram::with_buckets(10);
for i in 0..100 {
histo.add(i);
}
assert_eq!(histo.buckets().count(), 10);
}
#[test]
fn bucket_count() {
let mut histo = Histogram::with_buckets(1);
for i in 0..10 {
histo.add(i);
}
assert_eq!(histo.buckets().count(), 1);
assert_eq!(histo.buckets().next().unwrap().count(), 10);
}
#[test]
fn bucket_count_one() {
let mut histo = Histogram::with_buckets(1);
histo.add(1);
assert_eq!(histo.buckets().count(), 1);
assert_eq!(histo.buckets().next().unwrap().count(), 1);
}
#[test]
fn overflow_panic() {
use std::string::ToString;
let mut histo = Histogram::with_buckets(1);
histo.add(99);
histo.to_string();
}
}
#[cfg(all(test, feature = "quickcheck"))]
mod quickchecks {
use super::*;
use std::cmp;
quickcheck! {
fn sum_of_bucket_counts_is_total_count(buckets: u64, samples: Vec<u64>) -> () {
if buckets == 0 {
return;
}
let len = samples.len();
let mut histo = Histogram::with_buckets(buckets);
for s in samples {
histo.add(s);
}
assert_eq!(len, histo.stats.len(), "stats.len() should be correct");
assert_eq!(len, histo.minmax.len(), "minmax.len() should be correct");
assert_eq!(len as u64,
histo.samples.values().cloned().sum::<u64>(),
"samples.values() count should be correct");
assert_eq!(len as u64,
histo.buckets().map(|b| b.count()).sum::<u64>(),
"sum of buckets counts should be correct");
}
fn actual_buckets_should_be_less_than_or_equal_num_buckets(
buckets: u64,
samples: Vec<u64>
) -> () {
if buckets == 0 {
return;
}
let mut histo = Histogram::with_buckets(buckets);
for s in samples {
histo.add(s);
}
assert!(histo.buckets().count() as u64 <= buckets,
"should never have more than expected number of buckets");
}
fn bucket_ranges_should_be_correct(buckets: u64, samples: Vec<u64>) -> () {
if buckets == 0 {
return;
}
let mut histo = Histogram::with_buckets(buckets);
for s in samples {
histo.add(s);
}
histo.buckets()
.fold(None, |minmax, bucket| {
let bucket_range = bucket.end() - bucket.start();
let (min, max) = minmax.unwrap_or((bucket_range, bucket_range));
let min = cmp::min(min, bucket_range);
let max = cmp::max(max, bucket_range);
assert!(max - min <= 1);
Some((min, max))
});
}
fn formatting_should_never_panic(buckets: u64, samples: Vec<u64>) -> () {
use std::string::ToString;
if buckets == 0 {
return;
}
let mut histo = Histogram::with_buckets(buckets);
for s in samples {
histo.add(s);
}
histo.to_string();
}
}
}