#![deny(missing_docs)]
#![deny(missing_debug_implementations)]
#![deny(unsafe_code)]
#[cfg(all(test, feature = "quickcheck"))]
#[macro_use]
extern crate quickcheck;
extern crate stats;
extern crate rand;
use std::cmp;
use std::fmt;
use std::collections::BTreeMap;
use std::collections::btree_map::Range;
use noisy_float::prelude::*;
#[derive(Debug, Clone)]
pub struct Histogram {
num_buckets: u64,
precision: u32,
float_samples: BTreeMap<R64, u64>,
minmax: stats::MinMax<f64>,
stats: stats::OnlineStats,
}
impl Histogram {
pub fn with_buckets(num_buckets: u64, precision: Option<u32>) -> Histogram {
assert!(num_buckets > 0);
Histogram {
num_buckets,
precision: precision.unwrap_or(2),
float_samples: Default::default(),
stats: Default::default(),
minmax: Default::default()
}
}
pub fn add (&mut self, sample: f64) {
let safe_sample = r64(sample);
*self.float_samples.entry(safe_sample.clone()).or_insert(0) += 1;
self.minmax.add(sample.clone());
self.stats.add(sample);
}
pub fn buckets(&self) -> Buckets {
Buckets {
histogram: self,
index: 0,
}
}
}
impl fmt::Display for Histogram {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
use std::fmt::Write;
let num_samples: u64 = self.float_samples.values().sum();
writeln!(f, "# Number of samples = {}", num_samples)?;
if num_samples == 0 {
return Ok(());
}
let min = self.minmax.min().unwrap();
let max = self.minmax.max().unwrap();
writeln!(f, "# Min = {}", min)?;
writeln!(f, "# Max = {}", max)?;
writeln!(f, "#")?;
let mean = self.stats.mean();
let dev = self.stats.stddev();
let var = self.stats.variance();
writeln!(f, "# Mean = {}", mean)?;
writeln!(f, "# Standard deviation = {}", dev)?;
writeln!(f, "# Variance = {}", var)?;
writeln!(f, "#")?;
let max_bucket_count = self.buckets().map(|b| b.count()).fold(0, cmp::max);
const WIDTH: u64 = 50;
let count_per_char = cmp::max(max_bucket_count / WIDTH, 1);
writeln!(f, "# Each ∎ is a count of {}", count_per_char)?;
writeln!(f, "#")?;
let mut count_str = String::new();
let widest_count = self.buckets().fold(0, |n, b| {
count_str.clear();
write!(&mut count_str, "{}", b.count()).unwrap();
cmp::max(n, count_str.len())
});
let mut end_str = String::new();
let widest_range = self.buckets().fold(0, |n, b| {
end_str.clear();
write!(&mut end_str, "{}", b.end()).unwrap();
cmp::max(n, end_str.len())
});
let mut start_str = String::with_capacity(widest_range);
for bucket in self.buckets() {
start_str.clear();
write!(&mut start_str, "{}", bucket.start()).unwrap();
for _ in 0..widest_range - start_str.len() {
start_str.insert(0, ' ');
}
end_str.clear();
write!(&mut end_str, "{}", bucket.end()).unwrap();
for _ in 0..widest_range - end_str.len() {
end_str.insert(0, ' ');
}
count_str.clear();
write!(&mut count_str, "{}", bucket.count()).unwrap();
for _ in 0..widest_count - count_str.len() {
count_str.insert(0, ' ');
}
write!(f, "{} .. {} [ {} ]: ", start_str, end_str, count_str)?;
for _ in 0..bucket.count() / count_per_char {
write!(f, "∎")?;
}
writeln!(f)?;
}
Ok(())
}
}
#[derive(Debug, Clone)]
pub struct Buckets<'a> {
histogram: &'a Histogram,
index: u64,
}
impl<'a> Iterator for Buckets<'a> {
type Item = Bucket<'a>;
fn next(&mut self) -> Option<Self::Item> {
if self.index >= self.histogram.num_buckets {
return None;
}
let (min, max) = match (self.histogram.minmax.min(), self.histogram.minmax.max()) {
(Some(&min), Some(&max)) => (min, max),
_ => return None,
};
let range = (max - min) as f64;
let range = range + (range as u64 % self.histogram.num_buckets) as f64;
let bucket_size = range / self.histogram.num_buckets as f64;
let start = min + (self.index as f64 * bucket_size) as f64;
let end = min + ((self.index + 1) as f64 * bucket_size) as f64;
self.index += 1;
Some(Bucket {
start,
end,
range: if self.index == self.histogram.num_buckets {
self.histogram.float_samples.range(r64(start) ..)
} else {
self.histogram.float_samples.range(r64(start).. r64(end))
},
})
}
}
#[derive(Clone)]
pub struct Bucket<'a> {
start: f64,
end: f64,
range: Range<'a, R64, u64>
}
impl<'a> Bucket<'a> {
pub fn count(&self) -> u64 {
self.range.clone().map(|(_, count)| count).sum()
}
pub fn start(&self) -> f64 {
self.start
}
pub fn end(&self) -> f64 {
self.end
}
}
impl<'a> fmt::Debug for Bucket<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "Bucket {{ {}..{} }}", self.start, self.end)
}
}
#[cfg(test)]
mod tests {
use super::*;
use rand::prelude::*;
#[test]
fn num_buckets() {
let mut histo = Histogram::with_buckets(10, None);
for i in 0..100 {
histo.add(i as f64);
}
assert_eq!(histo.buckets().count(), 10);
}
#[test]
fn bucket_count() {
let mut histo = Histogram::with_buckets(1, None);
for i in 0..10 {
histo.add(i as f64);
}
assert_eq!(histo.buckets().count(), 1);
assert_eq!(histo.buckets().next().unwrap().count(), 10);
}
#[test]
fn bucket_count_one() {
let mut histo = Histogram::with_buckets(1, None);
histo.add(1.0);
assert_eq!(histo.buckets().count(), 1);
assert_eq!(histo.buckets().next().unwrap().count(), 1);
}
#[test]
fn overflow_panic() {
use std::string::ToString;
let mut histo = Histogram::with_buckets(1, None);
histo.add(99f64);
histo.to_string();
}
#[test]
fn add() {
let mut histo = Histogram::with_buckets(5, Some(2));
let numsamples = 100;
let mut rng = rand::thread_rng();
for i in 0..numsamples {
let sample: f64 = rng.gen::<f64>() * i as f64;
histo.add(sample);
}
for (i, bucket) in histo.buckets().enumerate() {
println!("bucket {} from:{} to:{} count:{}", i, bucket.start(), bucket.end(), bucket.count());
}
println!("stats: {:?}", histo.stats);
println!("min,max: {:?}", histo.minmax);
println!("samples: {:?}", histo.float_samples);
println!("histo: {}", histo);
assert_eq!(numsamples, histo.stats.len(), "stats.len() should be correct");
assert_eq!(numsamples, histo.minmax.len(), "minmax.len() should be correct");
}
}
#[cfg(all(test, feature = "quickcheck"))]
mod quickchecks {
use super::*;
use std::cmp;
quickcheck! {
fn sum_of_bucket_counts_is_total_count(buckets: u64, samples: Vec<u64>) -> () {
if buckets == 0 {
return;
}
let len = samples.len();
let mut histo = Histogram::with_buckets(buckets, None);
for s in samples {
histo.add(s as f64);
}
assert_eq!(len, histo.stats.len(), "stats.len() should be correct");
assert_eq!(len, histo.minmax.len(), "minmax.len() should be correct");
assert_eq!(len as u64,
histo.float_samples.values().cloned().sum::<u64>(),
"samples.values() count should be correct");
assert_eq!(len as u64,
histo.buckets().map(|b| b.count()).sum::<u64>(),
"sum of buckets counts should be correct");
}
fn actual_buckets_should_be_less_than_or_equal_num_buckets(
buckets: u64,
samples: Vec<u64>
) -> () {
if buckets == 0 {
return;
}
let mut histo = Histogram::with_buckets(buckets, None);
for s in samples {
histo.add(s as f64);
}
assert!(histo.buckets().count() as u64 <= buckets,
"should never have more than expected number of buckets");
}
fn bucket_ranges_should_be_correct(buckets: u64, samples: Vec<u64>) -> () {
if buckets == 0 {
return;
}
let mut histo = Histogram::with_buckets(buckets, None);
for s in samples {
histo.add(s as f64);
}
histo.buckets()
.fold(None, |minmax, bucket| {
let bucket_range = bucket.end() - bucket.start();
let (min, max) = minmax.unwrap_or((bucket_range, bucket_range));
let min = cmp::min(min, bucket_range);
let max = cmp::max(max, bucket_range);
assert!(max - min <= 1f64);
Some((min, max))
});
}
fn formatting_should_never_panic(buckets: u64, samples: Vec<u64>) -> () {
use std::string::ToString;
if buckets == 0 {
return;
}
let mut histo = Histogram::with_buckets(buckets, None);
for s in samples {
histo.add(s as f64);
}
histo.to_string();
}
}
}