use std::cmp;
use std::u64;
#[derive(Clone)]
pub struct Histogram {
lowest_trackable: i64,
highest_trackable: i64,
unit_mag: i32,
sigfig: u32,
sub_bucket_half_count_mag: i32,
sub_bucket_half_count: i32,
sub_bucket_mask: u64,
sub_bucket_count: isize,
bucket_count: isize,
min_val: u64,
max_val: u64,
norm_index_off: isize,
conversion_ratio: f64,
total_count: u64,
counts: Vec<u64>,
}
fn ctlz64(x: u64) -> u64 {
let mut x = x;
let mut r = 0;
while (x & 0x8000000000000000) == 0 && r < 64 {
r += 1;
x <<= 1;
};
r
}
impl Histogram {
pub fn init(lowest_trackable: i64, highest_trackable: i64, sigfig: u32) -> Result<Histogram, HistoErr> {
let cfg = try!(HistoBucketCfg::new(lowest_trackable, highest_trackable, sigfig));
Ok(Histogram {
lowest_trackable: cfg.lowest_trackable,
highest_trackable: cfg.highest_trackable,
unit_mag: cfg.unit_mag,
sigfig: cfg.sigfig,
sub_bucket_half_count_mag: cfg.sub_bucket_half_count_mag,
sub_bucket_half_count: cfg.sub_bucket_half_count,
sub_bucket_mask: cfg.sub_bucket_mask,
sub_bucket_count: cfg.sub_bucket_count,
bucket_count: cfg.bucket_count,
min_val: u64::max_value(),
max_val: 0,
norm_index_off: 0,
conversion_ratio: 1.0,
total_count: 0,
counts: (0..cfg.counts_len).map(|_| 0).collect(),
})
}
fn bucket_count(&self) -> isize { self.counts.len() as isize }
fn get_bucket_index(&self, value: u64) -> isize {
let pow2ceil : isize = 64 - (ctlz64(value | self.sub_bucket_mask) as isize);
pow2ceil - (self.unit_mag as isize) - (self.sub_bucket_half_count_mag + 1) as isize
}
fn get_sub_bucket_index(&self, value: u64, bucket_index: isize) -> isize {
(value >> (bucket_index + self.unit_mag as isize)) as isize
}
fn counts_index(&self, bucket_index: isize, sub_bucket_index: isize) -> isize {
let bucket_base_index = (bucket_index + 1) << self.sub_bucket_half_count_mag;
let offset_in_bucket = sub_bucket_index - self.sub_bucket_half_count as isize;
bucket_base_index + offset_in_bucket
}
fn counts_index_for(&self, value: u64) -> isize {
let bucket_index = self.get_bucket_index(value);
let sub_bucket_index = self.get_sub_bucket_index(value, bucket_index);
self.counts_index(bucket_index, sub_bucket_index)
}
fn normalize_index(&self, idx: isize) -> usize {
if self.norm_index_off == 0 { return idx as usize }
let normidx = idx - self.norm_index_off;
let ilen = self.counts.len() as isize;
let adjustment = if normidx < 0 { ilen }
else if normidx >= ilen { -ilen }
else { 0 };
(normidx + adjustment) as usize
}
fn counts_inc_normalized(&mut self, idx: isize, count: u64) {
let idx = self.normalize_index(idx);
self.counts[idx] += count;
self.total_count += count;
}
fn update_min_max(&mut self, value: u64) {
self.min_val = cmp::min(self.min_val, value);
self.max_val = cmp::max(self.max_val, value);
}
pub fn record_value(&mut self, value: i64) -> bool { self.record_values(value, 1) }
pub fn record_values(&mut self, value: i64, count: u64) -> bool {
if value < 0 { return false }
let value = value as u64;
let idx = self.counts_index_for(value);
self.counts_inc_normalized(idx, count);
self.update_min_max(value);
true
}
pub fn value_from_index(&self, bucket_index: isize, sub_bucket_index: isize) -> i64 {
(sub_bucket_index as i64) << (bucket_index + self.unit_mag as isize)
}
pub fn lowest_equivalent_value(&self, v: i64) -> i64 {
let bucket_index = self.get_bucket_index(v as u64);
let sub_bucket_index = self.get_sub_bucket_index(v as u64, bucket_index);
self.value_from_index(bucket_index, sub_bucket_index)
}
pub fn values_are_equivalent(&self, a: i64, b: i64) -> bool {
self.lowest_equivalent_value(a) == self.lowest_equivalent_value(b)
}
}
struct BaseIter<'a> {
histo: &'a Histogram,
bucket_index: isize,
sub_bucket_index: isize,
count_at_index: u64,
count_to_index: u64,
value_from_index: u64,
highest_equiv_value: u64,
}
impl<'a> BaseIter<'a> {
fn new(h: &'a Histogram) -> BaseIter<'a> {
BaseIter {
histo: h,
bucket_index: 0,
sub_bucket_index: -1,
count_at_index: 0,
count_to_index: 0,
value_from_index: 0,
highest_equiv_value: 0,
}
}
fn has_next(&self) -> bool {
self.count_to_index < self.histo.total_count
}
fn has_buckets(&self) -> bool {
self.bucket_index < self.histo.bucket_count()
}
fn increment_bucket(&self, bucket_index: &mut isize, sub_bucket_index: &mut isize) {
*sub_bucket_index += 1;
if sub_bucket_index >= self.histo.sub_bucket_count {
sub_bucket_index = self.histo.sub_bucket_half_count;
bucket_index += 1;
}
}
fn move_next(&mut self) -> bool {
self.increment_bucket(&self.bucket_index, &self.sub_bucket_index);
if !self.has_buckets() { return false };
self.count_at_index = self.histo.get_count_at_index(self.bucket_index, self.sub_bucket_index);
self.count_to_index += self.count_at_index;
self.value_from_index = self.histo.value_from_index(self.bucket_index, self.sub_bucket_index);
self.highest_equivalent_value = self.histo.highest_equivalent_value(self.value_from_index);
true
}
fn peek_next_value_from_index(&self) -> i64 {
let mut bucket_index = self.bucket_index;
let mut sub_bucket_index = self.sub_bucket_index;
self.increment_bucket(&bucket_index, &sub_bucket_index);
self.histo.value_from_index(bucket_index, sub_bucket_index)
}
}
pub struct IterItem {
count_to_index: u64,
count_at_index: u64,
value_from_index: u64,
highest_equiv_value: u64,
}
impl IterItem {
fn count_to_index(&self) -> u64 { self.count_to_index }
fn count_at_index(&self) -> u64 { self.count_at_index }
fn value_from_index(&self) -> u64 { self.value_from_index }
fn highest_equiv_value(&self) -> u64 { self.highest_equiv_value }
}
struct LinearIter<'a> {
base: BaseIter<'a>,
value_units_per_bucket: u64,
count_added_in_this_iteration_step: u64,
next_value_reporting_level: u64,
next_value_reporting_level_lowest_equiv: u64,
}
impl<'a> LinearIter<'a> {
fn new(h: &'a Histogram, value_units_per_bucket: u64) -> LinearIter<'a> {
LinearIter {
base: BaseIter::new(h),
value_units_per_bucket: value_units_per_bucket,
count_added_in_this_iteration_step: 0,
next_value_reporting_level: 0,
next_value_reporting_level_lowest_equiv: 0,
}
}
}
impl<'a> Iterator for LinearIter<'a> {
type Item = IterItem;
fn next(&mut self) -> Option<IterItem> {
if !self.base.has_next() ||
self.base.peek_next_value_from_index() <= self.next_value_reporting_level_lowest_equivalent {
return None
}
loop {
if self.base.value_from_idx >= self.next_value_reporting_level_lowest_equivalent {
self.next_value_reporting_level += self.value_units_per_bucket;
self.next_value_reporting_level_lowest_equivalent =
self.h.lowest_equivalent_value(self.next_value_reporting_level);
break
}
if !self.move_next() { return None }
self.count_added_in_this_iteration_step += self.count_at_index;
}
Some(IterItem {
count_at_index: self.base.count_at_index,
count_to_index: self.base.count_to_index,
value_from_index: self.base.value_from_index,
highest_equivalent_value: self.base.highest_equivalent_value,
})
}
}
#[derive(Debug)]
pub struct HistoErr;
impl HistoErr {
fn new() -> HistoErr { HistoErr }
}
struct HistoBucketCfg {
lowest_trackable: i64,
highest_trackable: i64,
unit_mag: i32,
sigfig: u32,
sub_bucket_half_count_mag: i32,
sub_bucket_half_count: i32,
sub_bucket_mask: u64,
sub_bucket_count: isize,
bucket_count: isize,
counts_len: isize,
}
impl HistoBucketCfg {
fn buckets_needed_to_cover_value(value: i64, sub_bucket_count: isize, unit_mag: i32) -> isize {
let mut smallest_untrackable_value = (sub_bucket_count as i64) << unit_mag;
let mut buckets_needed = 1;
while smallest_untrackable_value <= value {
if smallest_untrackable_value > i64::max_value() / 2 {
return buckets_needed + 1
}
smallest_untrackable_value <<= 1;
buckets_needed += 1;
}
buckets_needed
}
fn new(lowest_trackable: i64, highest_trackable: i64, sigfig: u32) -> Result<HistoBucketCfg, HistoErr> {
if lowest_trackable < 1 || sigfig < 1 || 5 < sigfig {
return Err(HistoErr::new())
}
if lowest_trackable * 2 > highest_trackable {
return Err(HistoErr::new())
}
let largest_with_unit_res = 2.0 * 10.0_f64.powi(sigfig as i32);
let sub_bucket_count_mag = largest_with_unit_res.log2().ceil() as i32;
let sub_bucket_half_count_mag = cmp::max(sub_bucket_count_mag, 1) - 1;
let sub_bucket_count = 2_f64.powf((sub_bucket_half_count_mag + 1) as f64) as isize;
let unit_mag = (lowest_trackable as f64).log2().floor() as i32;
let bucket_count = HistoBucketCfg::buckets_needed_to_cover_value(highest_trackable, sub_bucket_count, unit_mag);
let r = HistoBucketCfg {
lowest_trackable: lowest_trackable,
highest_trackable: highest_trackable,
sigfig: sigfig,
sub_bucket_half_count_mag: sub_bucket_half_count_mag,
unit_mag: unit_mag,
sub_bucket_count: sub_bucket_count,
sub_bucket_half_count: (2_f64.powf((sub_bucket_half_count_mag + 1) as f64) / 2_f64) as i32,
sub_bucket_mask: (sub_bucket_count as u64 - 1) << unit_mag,
bucket_count: bucket_count,
counts_len: (bucket_count + 1) * (sub_bucket_count / 2),
};
Ok(r)
}
}
#[cfg(test)]
mod test {
use super::Histogram;
use super::HistoBucketCfg;
#[test]
fn test_create() {
let h = Histogram::init(1, 3600000000, 3).unwrap();
assert_eq!(h.counts.len(), 23552);
}
#[test]
fn test_invalid_init() {
assert!(Histogram::init(0, 6481024, 2).is_err());
assert!(Histogram::init(80, 110, 5).is_err());
}
#[test]
fn test_create_with_large_values() {
let h = Histogram::init(20000000, 100000000, 5).unwrap();
h.record_value(100000000);
h.record_value(20000000);
h.record_value(30000000);
assert!(h.values_are_equivalent(20000000, h.value_at_percentile(50.0)));
assert!(h.values_are_equivalent(30000000, h.value_at_percentile(83.33)));
assert!(h.values_are_equivalent(100000000, h.value_at_percentile(83.34)));
assert!(h.values_are_equivalent(100000000, h.value_at_percentile(99.0)));
}
}