use std::collections::{HashSet, HashMap};
use crate::standard::{Histogram, HistogramParams};
#[cfg(test)]
mod compact_testing {
use crate::c2::new_compact_params;
#[test]
fn init_params(){
assert!(new_compact_params(0.5, 8, 7).is_none());
assert!(new_compact_params(1.0, 8, 7).is_none());
assert!(new_compact_params(4.0, 0, 7).is_none());
assert!(new_compact_params(4.0, 8, 0).is_none());
assert!(new_compact_params(3.0, 9, 7).is_some());
}
#[test]
fn ranges(){
let cp = new_compact_params(3.0, 9, 7).unwrap();
assert_eq!(cp.range(0), Some((1, 3)));
assert_eq!(cp.range(1), Some((4, 9)));
assert_eq!(cp.range(2), Some((10, 27)));
assert_eq!(cp.range(3), Some((28, 81)));
assert!(cp.range(10).is_none());
let cp = new_compact_params(3.5, 9, 7).unwrap();
assert_eq!(cp.range(0), Some((1, 3)));
assert_eq!(cp.range(1), Some((4, 12)));
assert_eq!(cp.range(2), Some((13, 42)));
assert_eq!(cp.range(3), Some((43, 150)));
}
#[test]
fn sub_ranges(){
let cp = new_compact_params(3.0, 9, 7).unwrap();
assert_eq!(cp.get_kth_sub_range(0, 0), Some((1,1)));
assert_eq!(cp.get_kth_sub_range(0, 1), Some((2,2)));
assert_eq!(cp.get_kth_sub_range(0, 2), Some((3,3)));
assert!(cp.get_kth_sub_range(0, 3).is_none());
let cp = new_compact_params(3.5, 9, 7).unwrap();
assert_eq!(cp.get_kth_sub_range(2, 0), Some((13, 17)));
assert_eq!(cp.get_kth_sub_range(2, 1), Some((17, 21)));
assert_eq!(cp.get_kth_sub_range(2, 2), Some((21, 25)));
assert_eq!(cp.get_kth_sub_range(2, 3), Some((25, 29)));
assert_eq!(cp.get_kth_sub_range(2, 4), Some((29, 33)));
assert_eq!(cp.get_kth_sub_range(2, 5), Some((33, 37)));
assert_eq!(cp.get_kth_sub_range(2, 6), Some((37, 42)));
assert!(cp.get_kth_sub_range(2, 7).is_none());
assert_eq!(cp.get_kth_sub_range(3, 0), Some((43, 58)));
assert_eq!(cp.get_kth_sub_range(3, 1), Some((58, 73)));
assert_eq!(cp.get_kth_sub_range(3, 2), Some((73, 88)));
assert_eq!(cp.get_kth_sub_range(3, 3), Some((88, 104)));
assert_eq!(cp.get_kth_sub_range(3, 4), Some((104, 119)));
assert_eq!(cp.get_kth_sub_range(3, 5), Some((119, 134)));
assert_eq!(cp.get_kth_sub_range(3, 6), Some((134, 150)));
}
#[test]
fn pos_leases(){
let cp = new_compact_params(2.0, 3, 2).unwrap();
assert_eq!(cp.possible_leases(), vec![1,2,3,4,6,8]);
let cp = new_compact_params(3.0, 3, 2).unwrap();
assert_eq!(cp.possible_leases(), vec![1,2,6,9,18,27]);
let cp = new_compact_params(4.0, 4, 2).unwrap();
assert_eq!(cp.possible_leases(), vec![2,4,10,16,40,64,160,256]);
let cp = new_compact_params(4.0, 4, 3).unwrap();
assert_eq!(cp.possible_leases(), vec![1,2,3,8,12,16,32,48,64,128,192,256]);
}
#[test]
fn index_buckets(){
let cp = new_compact_params(2.0, 5, 4).unwrap();
assert_eq!(cp.bucket_index(1), Some(0));
assert_eq!(cp.bucket_index(2), Some(0));
assert_eq!(cp.bucket_index(3), Some(1));
assert_eq!(cp.bucket_index(4), Some(1));
assert_eq!(cp.bucket_index(5), Some(2));
assert_eq!(cp.bucket_index(7), Some(2));
assert_eq!(cp.bucket_index(8), Some(2));
assert_eq!(cp.bucket_index(9), Some(3));
assert_eq!(cp.bucket_index(32), Some(4));
assert!(cp.bucket_index(33).is_none());
let cp = new_compact_params(3.7, 5, 4).unwrap();
assert_eq!(cp.bucket_index(1), Some(0));
assert_eq!(cp.bucket_index(2), Some(0));
assert_eq!(cp.bucket_index(3), Some(0));
assert_eq!(cp.bucket_index(4), Some(1));
assert_eq!(cp.bucket_index(5), Some(1));
assert_eq!(cp.bucket_index(13), Some(1));
assert_eq!(cp.bucket_index(14), Some(2));
assert_eq!(cp.bucket_index(50), Some(2));
assert_eq!(cp.bucket_index(51), Some(3));
}
}
#[derive(Debug, Clone)]
pub struct CompactParams {
base : f64,
max : f64,
num_buckets : usize,
k : usize
}
impl CompactParams {
pub fn max(&self) -> f64 { self.max }
pub fn base(&self) -> f64 { self.base }
pub fn k(&self) -> usize { self.k }
pub fn num_buckets(&self) -> usize { self.num_buckets }
pub fn possible_leases(&self) -> Vec<usize> {
let mut output = vec![];
let mut counter = 0;
output.reserve(self.num_buckets * self.k);
for b in 0..self.num_buckets {
for k_i in 0..self.k {
if let Some((_min, max)) = self.get_kth_sub_range(b, k_i) {
output.insert(counter, max as usize);
counter += 1;
}
}
}
output
}
pub fn get_kth_sub_range(&self, bucket_index :usize, k_i :usize) -> Option<(usize, usize)> {
if k_i >= self.k { return None };
if let Some((r_min, r_max)) = self.range(bucket_index) {
let range = r_max - r_min;
if self.k >= range {
if k_i > range { return None; }
return Some((k_i + r_min, k_i + r_min));
}
let sub_range = range as f64 / self.k as f64;
let lower = r_min + (sub_range * k_i as f64) as usize;
let higher = r_min + (sub_range * (k_i + 1) as f64) as usize;
return Some((lower, higher));
}
None
}
pub(crate) fn range(&self, bucket_index : usize) -> Option<(usize, usize)> {
if bucket_index >= self.num_buckets() {
None
} else if bucket_index == 0 {
Some((1, self.base as usize))
} else {
let lower = self.base.powi(bucket_index as i32) as usize + 1;
let higher = self.base.powi((bucket_index+1) as i32) as usize;
Some((lower, higher))
}
}
pub fn bucket_index(&self, ri : usize) -> Option<usize> {
if ri == 1 {
Some(0)
} else if ri as f64 > self.max || ri == 0 { None
} else {
let bi = (ri as f64).log(self.base);
if bi.fract() == 0.0 {
if bi == 1.0 {
Some(0)
} else {
Some(bi as usize - 1)
}
} else { Some(((ri as f64 - 0.1).log(self.base).floor() + 0.1) as usize)
}
}
}
}
pub fn new_compact_params(b: f64, n: usize, k: usize) -> Option<CompactParams> {
if b <= 1.0 || k == 0 || n == 0 {
None
} else {
let max = b.powi(n as i32);
Some(CompactParams { base: b, max, num_buckets: n, k })
}
}
impl HistogramParams for CompactParams {}
pub struct CompactHistogram {
pub(crate) counters : Vec<usize>,
pub(crate) sub_range_averages : Vec<usize>,
pub(crate) overflowed : usize }
impl Histogram<CompactParams, usize, usize> for CompactHistogram {
fn labels(&self, params : &CompactParams) -> Option<Box<dyn Iterator<Item=usize>>>{
let mut v = Vec::new();
let mut count = 0;
for i in 0..params.num_buckets {
if let Some((_min, max)) = params.get_kth_sub_range(
i, self.sub_range_averages[i]) {
if !v.contains(&max) {
v.insert(count, max);
count += 1;
}
}
}
Some(Box::new(v.into_iter()))
}
fn frequency(&self, label : usize, params : &CompactParams) -> Option<usize> {
if let Some(bi) = params.bucket_index(label) {
if let Some(&s) = self.counters.get(bi) {
Some(s)
} else { None }
} else { Some(0) }
}
fn total(&self) -> usize {
let mut sum: usize = 0;
for i in 0..self.counters.len() {
sum += self.counters[i];
}
return sum + self.overflowed as usize;
}
}
#[cfg(test)]
mod compress_testing {
use crate::c2::{new_compressed_params};
#[test]
fn init_params(){
assert!(new_compressed_params(2.0, 5).is_some());
assert!(new_compressed_params(5.0, 51).is_some());
assert!(new_compressed_params(6.8, 15).is_some());
assert!(new_compressed_params(1.5, 34).is_some());
assert!(new_compressed_params(1.0, 21).is_none());
assert!(new_compressed_params(-0.5, 12).is_none());
assert!(new_compressed_params(2.0, 0).is_none());
}
#[test]
fn compression(){
let cp = new_compressed_params(2.0, 5).unwrap();
assert_eq!(cp.compress_count(0), Some(0));
assert_eq!(cp.compress_count(2), Some(1));
assert_eq!(cp.compress_count(3), Some(2));
assert_eq!(cp.compress_count(4), Some(2));
assert_eq!(cp.compress_count(5), Some(3));
assert_eq!(cp.compress_count(7), Some(3));
assert_eq!(cp.compress_count(8), Some(3));
assert_eq!(cp.compress_count(9), Some(4));
assert_eq!(cp.compress_count(31), Some(5));
assert_eq!(cp.compress_count(35), Some(5));
assert_eq!(cp.compress_count(300), Some(5));
let cp = new_compressed_params(1.5, 15).unwrap();
assert_eq!(cp.compress_count(0), Some(0));
assert_eq!(cp.compress_count(1), Some(1));
assert_eq!(cp.compress_count(2), Some(2));
assert_eq!(cp.compress_count(3), Some(3));
assert_eq!(cp.compress_count(4), Some(4));
assert_eq!(cp.compress_count(5), Some(4));
assert_eq!(cp.compress_count(6), Some(5));
assert_eq!(cp.compress_count(7), Some(5));
assert_eq!(cp.compress_count(8), Some(6));
assert_eq!(cp.compress_count(9), Some(6));
assert_eq!(cp.compress_count(10), Some(6));
assert_eq!(cp.compress_count(11), Some(6));
assert_eq!(cp.compress_count(12), Some(7));
assert_eq!(cp.compress_count(13), Some(7));
assert_eq!(cp.compress_count(300), Some(15));
assert_eq!(cp.compress_count(500), Some(15));
assert_eq!(cp.compress_count(5000), Some(15));
}
#[test]
fn decompression(){
let cp = new_compressed_params(2.0, 5).unwrap();
assert_eq!(cp.decompress_count(0), Some(0));
assert_eq!(cp.decompress_count(1), Some(2));
assert_eq!(cp.decompress_count(2), Some(4));
assert_eq!(cp.decompress_count(3), Some(8));
assert_eq!(cp.decompress_count(4), Some(16));
assert_eq!(cp.decompress_count(5), Some(32));
assert!(cp.decompress_count(6).is_none());
let cp = new_compressed_params(1.5, 15).unwrap();
assert_eq!(cp.decompress_count(0), Some(0));
assert_eq!(cp.decompress_count(1), Some(1));
assert_eq!(cp.decompress_count(2), Some(2));
assert_eq!(cp.decompress_count(3), Some(3));
assert_eq!(cp.decompress_count(4), Some(5));
assert_eq!(cp.decompress_count(5), Some(7));
assert_eq!(cp.decompress_count(9), Some(38));
assert_eq!(cp.decompress_count(10), Some(57));
assert_eq!(cp.decompress_count(11), Some(86));
assert_eq!(cp.decompress_count(12), Some(129));
assert_eq!(cp.decompress_count(13), Some(194));
assert!(cp.decompress_count(16).is_none());
}
#[test]
fn decomp_comp(){
let cp = new_compressed_params(2.0, 5).unwrap();
assert_eq!(cp.decompress_count(cp.compress_count(1).unwrap()), Some(2));
assert_eq!(cp.decompress_count(cp.compress_count(2).unwrap()), Some(2));
assert_eq!(cp.decompress_count(cp.compress_count(3).unwrap()), Some(4));
assert_eq!(cp.decompress_count(cp.compress_count(4).unwrap()), Some(4));
assert_eq!(cp.decompress_count(cp.compress_count(5).unwrap()), Some(8));
assert_eq!(cp.decompress_count(cp.compress_count(6).unwrap()), Some(8));
assert_eq!(cp.decompress_count(cp.compress_count(7).unwrap()), Some(8));
assert_eq!(cp.decompress_count(cp.compress_count(8).unwrap()), Some(8));
assert_eq!(cp.decompress_count(cp.compress_count(9).unwrap()), Some(16));
assert_eq!(cp.decompress_count(cp.compress_count(10).unwrap()), Some(16));
assert_eq!(cp.decompress_count(cp.compress_count(11).unwrap()), Some(16));
assert_eq!(cp.decompress_count(cp.compress_count(12).unwrap()), Some(16));
assert_eq!(cp.decompress_count(cp.compress_count(13).unwrap()), Some(16));
assert_eq!(cp.decompress_count(cp.compress_count(14).unwrap()), Some(16));
assert_eq!(cp.decompress_count(cp.compress_count(15).unwrap()), Some(16));
}
}
#[derive(Debug, Clone)]
pub struct CompressedParams {
base : f64,
max_exp : usize,
table: Vec<usize>
}
impl CompressedParams {
pub fn base(&self) -> f64 { self.base }
pub fn max_exp(&self) -> usize { self.max_exp }
}
pub fn new_compressed_params(base : f64, max_exp : usize) -> Option<CompressedParams> {
if base <= 1.01 || max_exp == 0 {
None
} else {
let mut table = Vec::new();
table.reserve_exact(max_exp + 1);
table.insert(0, 0);
for i in 1..(max_exp+1) {
let mut expanded = base.powi(i as i32);
if expanded > usize::MAX as f64 / (base * base) {
for j in i..(max_exp+1) {
table.insert(j, expanded as usize);
}
break;
}
while expanded as usize <= table[i-1] {
expanded = expanded * base;
}
let expanded = expanded as usize;
table.insert(i, expanded);
}
Some(CompressedParams { base, max_exp, table })
}
}
impl HistogramParams for CompressedParams {}
impl CompressedParams {
pub(crate) fn compress_count(&self, count : usize) -> Option<u16> {
if count == 0 { Some(0)
} else if (count as f64) < self.base {
Some(1)
} else if count > self.table[self.max_exp]{
Some(self.max_exp as u16)
} else {
let mut min = 0;
let mut max = self.max_exp+1;
let mut mid;
loop {
mid = (min + max) / 2;
if self.table[mid] >= count && self.table[mid - 1] < count {
break;
}
else if self.table[mid] > count {
max = mid;
}
else if self.table[mid] < count {
min = mid;
}
}
Some(mid as u16)
}
}
pub(crate) fn decompress_count(&self, compressed_count : u16) -> Option<usize> {
if compressed_count > self.max_exp as u16 {
None
} else {
Some(self.table[compressed_count as usize])
}
}
}
#[allow(dead_code)]
pub struct CompressedHistogram {
pub(crate) data : HashMap<usize, u16>,
pub(crate) total : usize,
pub(crate) orig_total : usize
}
impl Histogram<CompressedParams, usize, usize> for CompressedHistogram {
fn labels(&self, _params: &CompressedParams) -> Option<Box<dyn Iterator<Item=usize>>> {
let mut out : Vec<usize> = vec![];
let mut counter : usize = 0;
for k in self.data.keys(){
out.insert(counter, *k);
counter += 1;
}
Some(Box::new(out.into_iter()))
}
fn frequency(&self, label: usize, params: &CompressedParams) -> Option<usize> {
if let Some(c) = self.data.get(&label) {
params.decompress_count(*c)
} else {
None
}
}
fn total(&self) -> usize {
if self.total > self.orig_total {
self.total
} else {
self.orig_total
}
}
}
#[derive(Debug, Clone)]
pub struct C2Params {
compact_params : CompactParams,
compressed_params : CompressedParams
}
impl C2Params {
pub fn compact_ps(&self) -> &CompactParams { &self.compact_params }
pub fn compressed_ps(&self) -> &CompressedParams { &self.compressed_params }
}
impl HistogramParams for C2Params {}
pub fn new_c2_params(compacts : CompactParams, compresseds : CompressedParams) -> C2Params {
C2Params { compact_params : compacts, compressed_params : compresseds }
}
#[allow(dead_code)]
pub struct C2Histogram {
pub(crate) counters : Vec<u16>,
pub(crate) sub_range_averages : Vec<usize>,
pub(crate) orig_total : usize,
pub(crate) total : usize,
pub(crate) overflowed: usize
}
impl Histogram<C2Params, usize, usize> for C2Histogram {
fn labels(&self, params: &C2Params) -> Option<Box<dyn Iterator<Item=usize>>> {
let params = params.compact_ps();
let mut v = HashSet::new();
for i in 0..params.num_buckets() {
if let Some((_min, max)) = params.get_kth_sub_range(
i, self.sub_range_averages[i]) {
v.insert(max as usize);
}
}
Some(Box::new(v.into_iter()))
}
fn frequency(&self, label: usize, params: &C2Params) -> Option<usize> {
if let Some(bi) = params.compact_params.bucket_index(label) {
if let Some(&s) = self.counters.get(bi) {
params.compressed_params.decompress_count(s)
} else { None }
} else { Some(0) }
}
fn total(&self) -> usize {
self.total + self.overflowed
}
}