c2_histograms/
c2.rs

1use std::collections::{HashSet, HashMap};
2use crate::standard::{Histogram, HistogramParams};
3
4/// This module contains Histogram implementations that simulate memory saving
5/// optimizations. Note that this is only for evaluation purposes, it does not actually
6/// perform the space saving operations.
7
8// -----------------------------------------------------------------------------------
9// --- COMPACT HISTOGRAM -------------------------------------------------------------
10// -----------------------------------------------------------------------------------
11#[cfg(test)]
12mod compact_testing {
13    use crate::c2::new_compact_params;
14
15    #[test]
16    fn init_params(){
17        assert!(new_compact_params(0.5, 8, 7).is_none());
18        assert!(new_compact_params(1.0, 8, 7).is_none());
19        assert!(new_compact_params(4.0, 0, 7).is_none());
20        assert!(new_compact_params(4.0, 8, 0).is_none());
21
22        assert!(new_compact_params(3.0, 9, 7).is_some());
23    }
24
25    #[test]
26    fn ranges(){
27        let cp = new_compact_params(3.0, 9, 7).unwrap();
28
29        assert_eq!(cp.range(0), Some((1, 3)));
30        assert_eq!(cp.range(1), Some((4, 9)));
31        assert_eq!(cp.range(2), Some((10, 27)));
32        assert_eq!(cp.range(3), Some((28, 81)));
33
34        assert!(cp.range(10).is_none());
35
36        let cp = new_compact_params(3.5, 9, 7).unwrap();
37
38        assert_eq!(cp.range(0), Some((1, 3)));
39        assert_eq!(cp.range(1), Some((4, 12)));
40        assert_eq!(cp.range(2), Some((13, 42)));
41        assert_eq!(cp.range(3), Some((43, 150)));
42
43    }
44
45    #[test]
46    fn sub_ranges(){
47        let cp = new_compact_params(3.0, 9, 7).unwrap();
48
49        assert_eq!(cp.get_kth_sub_range(0, 0), Some((1,1)));
50        assert_eq!(cp.get_kth_sub_range(0, 1), Some((2,2)));
51        assert_eq!(cp.get_kth_sub_range(0, 2), Some((3,3)));
52
53        assert!(cp.get_kth_sub_range(0, 3).is_none());
54
55        let cp = new_compact_params(3.5, 9, 7).unwrap();
56
57        assert_eq!(cp.get_kth_sub_range(2, 0), Some((13, 17)));
58        assert_eq!(cp.get_kth_sub_range(2, 1), Some((17, 21)));
59        assert_eq!(cp.get_kth_sub_range(2, 2), Some((21, 25)));
60        assert_eq!(cp.get_kth_sub_range(2, 3), Some((25, 29)));
61        assert_eq!(cp.get_kth_sub_range(2, 4), Some((29, 33)));
62        assert_eq!(cp.get_kth_sub_range(2, 5), Some((33, 37)));
63        assert_eq!(cp.get_kth_sub_range(2, 6), Some((37, 42)));
64        assert!(cp.get_kth_sub_range(2, 7).is_none());
65
66        assert_eq!(cp.get_kth_sub_range(3, 0), Some((43, 58)));
67        assert_eq!(cp.get_kth_sub_range(3, 1), Some((58, 73)));
68        assert_eq!(cp.get_kth_sub_range(3, 2), Some((73, 88)));
69        assert_eq!(cp.get_kth_sub_range(3, 3), Some((88, 104)));
70        assert_eq!(cp.get_kth_sub_range(3, 4), Some((104, 119)));
71        assert_eq!(cp.get_kth_sub_range(3, 5), Some((119, 134)));
72        assert_eq!(cp.get_kth_sub_range(3, 6), Some((134, 150)));
73    }
74
75    #[test]
76    fn pos_leases(){
77        let cp = new_compact_params(2.0, 3, 2).unwrap();
78        assert_eq!(cp.possible_leases(), vec![1,2,3,4,6,8]);
79        let cp = new_compact_params(3.0, 3, 2).unwrap();
80        assert_eq!(cp.possible_leases(), vec![1,2,6,9,18,27]);
81        let cp = new_compact_params(4.0, 4, 2).unwrap();
82        assert_eq!(cp.possible_leases(), vec![2,4,10,16,40,64,160,256]);
83        let cp = new_compact_params(4.0, 4, 3).unwrap();
84        assert_eq!(cp.possible_leases(), vec![1,2,3,8,12,16,32,48,64,128,192,256]);
85    }
86
87    #[test]
88    fn index_buckets(){
89        let cp = new_compact_params(2.0, 5, 4).unwrap();
90        assert_eq!(cp.bucket_index(1), Some(0));
91        assert_eq!(cp.bucket_index(2), Some(0));
92        assert_eq!(cp.bucket_index(3), Some(1));
93        assert_eq!(cp.bucket_index(4), Some(1));
94        assert_eq!(cp.bucket_index(5), Some(2));
95        assert_eq!(cp.bucket_index(7), Some(2));
96        assert_eq!(cp.bucket_index(8), Some(2));
97        assert_eq!(cp.bucket_index(9), Some(3));
98        assert_eq!(cp.bucket_index(32), Some(4));
99        assert!(cp.bucket_index(33).is_none());
100
101        let cp = new_compact_params(3.7, 5, 4).unwrap();
102        assert_eq!(cp.bucket_index(1), Some(0));
103        assert_eq!(cp.bucket_index(2), Some(0));
104        assert_eq!(cp.bucket_index(3), Some(0));
105        assert_eq!(cp.bucket_index(4), Some(1));
106        assert_eq!(cp.bucket_index(5), Some(1));
107        assert_eq!(cp.bucket_index(13), Some(1));
108        assert_eq!(cp.bucket_index(14), Some(2));
109        assert_eq!(cp.bucket_index(50), Some(2));
110        assert_eq!(cp.bucket_index(51), Some(3));
111    }
112
113
114}
115
116// contains the parameters for compressing and reinterpreting the histograms
117// this should never be built except by cloning and the new_grid_params function below
118/// `CompactParams` contains 3 critical parameters determining how a [`CompactHistogram`]
119/// is stored and interpreted. See [`CompactHistogram`] for a full explanation.
120///
121/// # Example
122/// To create your own parameters, use the constructor function
123/// ```
124/// # use c2_histograms::c2::{new_compact_params};
125/// if let Some(cp) = new_compact_params(4.5, 12, 9) {
126///     // do something...
127/// } else { }
128/// ```
129///
130/// [`CompactHistogram`]: struct.CompactHistogram.html
131#[derive(Debug, Clone)]
132pub struct CompactParams {
133    // the logarithmic base
134    base : f64,
135
136    // the maximum RI to have a bucket
137    max : f64,
138
139    // the number of buckets
140    num_buckets : usize,
141
142    // the number of sub-ranges
143    k : usize
144}
145
146impl CompactParams {
147
148    // accessors
149    pub fn max(&self) -> f64 { self.max }
150    pub fn base(&self) -> f64 { self.base }
151    pub fn k(&self) -> usize { self.k }
152    pub fn num_buckets(&self) -> usize { self.num_buckets }
153
154    /* for all practical purposes this is irrelevant, just confuses things (also deprecated)
155    pub fn bucket_labels(&self) -> Vec<usize> {
156        let nb = self.num_buckets();
157        let mut output = vec![];
158        output.reserve(nb+1);
159        for i in 0..nb+1 {
160            output.insert(i,(self.base.powi(i as i32).floor() + 0.1) as usize);
161        }
162        output
163    }*/
164
165    // definitely not the most efficient way to do it
166    // compute all possible leases that this grid/ch can represent
167    pub fn possible_leases(&self) -> Vec<usize> {
168        let mut output = vec![];
169        let mut counter = 0;
170        output.reserve(self.num_buckets * self.k);
171        for b in 0..self.num_buckets {
172            for k_i in 0..self.k {
173                if let Some((_min, max)) = self.get_kth_sub_range(b, k_i) {
174                    output.insert(counter, max as usize);
175                    counter += 1;
176                }
177            }
178        }
179        output
180    }
181
182    // returns a the kth sub-range in the bth bucket
183    // k_i must be less than k
184    // if k is larger than the length of the bucket, the reuse interval is completely
185    // determined, it is min + k_i, and k_i <= min - max
186    pub fn get_kth_sub_range(&self, bucket_index :usize, k_i :usize) -> Option<(usize, usize)> {
187        if k_i >= self.k { return None };
188
189        // get the overall range of the bucket
190        if let Some((r_min, r_max)) = self.range(bucket_index) {
191
192            // compute the difference
193            let range = r_max - r_min;
194
195            // directly placed item special case
196            if self.k >= range {
197                if k_i > range { return None; }
198                return Some((k_i + r_min, k_i + r_min));
199            }
200
201            // compute the lower and upper bounds on the sub-range and return
202            let sub_range = range as f64 / self.k as f64;
203            let lower = r_min + (sub_range * k_i as f64) as usize;
204            let higher = r_min + (sub_range * (k_i + 1) as f64) as usize;
205
206            return Some((lower, higher));
207        }
208
209        None
210    }
211
212    // returns the range of the ith bucket under these parameters
213    // [1..base][base+1..base^2]..[base^(i)+1,base^(i+1)]
214    pub(crate) fn range(&self, bucket_index : usize) -> Option<(usize, usize)> {
215        // if it's not a valid bucket, return nothing
216        if bucket_index >= self.num_buckets() {
217            None
218        } else if bucket_index == 0 {
219            Some((1, self.base as usize))
220        } else {
221            let lower = self.base.powi(bucket_index as i32) as usize + 1;
222            let higher = self.base.powi((bucket_index+1) as i32) as usize;
223            Some((lower, higher))
224        }
225    }
226
227    // returns the bucket index that this RI would be placed in under these params
228    pub fn bucket_index(&self, ri : usize) -> Option<usize> {
229        // this is frequent and always the same regardless of base
230        if ri == 1 {
231            Some(0)
232
233            // if we're overflowing the compressed histogram / grid, return none
234        } else if ri as f64 > self.max || ri == 0 { // ri should never be zero but it's good to check
235            None
236        } else {
237            let bi = (ri as f64).log(self.base);
238
239            // if the r_i is a perfect power of the base:
240            if bi.fract() == 0.0 {
241                if bi == 1.0 {
242                    Some(0)
243                } else {
244                    Some(bi as usize - 1)
245                }
246            } else {  // otherwise:
247                Some(((ri as f64 - 0.1).log(self.base).floor() + 0.1) as usize)
248            }
249        }
250    }
251
252}
253
254/// ensures that the parameters are all valid and if so creates a new `CompactParams` struct
255///
256/// For [`CompactParams`], b must be greater than or equal to 1.0, and n, k > 0.
257///
258/// [`CompactParams`]: struct.CompactParams.html
259pub fn new_compact_params(b: f64, n: usize, k: usize) -> Option<CompactParams> {
260    if b <= 1.0 || k == 0 || n == 0 {
261        None
262    } else {
263        let max = b.powi(n as i32);
264        Some(CompactParams { base: b, max, num_buckets: n, k })
265    }
266}
267
268impl HistogramParams for CompactParams {}
269
270/// `CompactHistogram` is a fixed-size representation of a histogram. The set of possible
271/// labels are partitioned into logarithmically scaled ranges, then further subdivided into
272/// equally sized sub-ranges (equally sized within a given range).
273/// A `CompactHistogram` stores a counter for each of the larger logarithmic ranges and which
274/// of the equally sized sub-ranges contains the frequency-weighted average of the labels.
275/// It is associated with [`CompactParams`].
276///
277/// There are 3 parameters that govern this process:
278/// - n -> the number of logarithmically sized ranges to represent
279/// - k -> the number of equally sized sub-ranges that each larger range is divided into
280/// - b -> a floating point number that determines the size of each of the logarithmically
281///         scaled buckets. The ranges are [1..b], [b+1..b^2], ..., [b^(n)+1,b^(n+1)].
282///
283/// Each compact histogram can be stored in n * (c + log_2(k)) + c bits, where c is the number of
284/// bits dedicated to a counter. Note that this value is independent of b.
285///
286/// This histogram also keeps track of how many labels "overflow", i.e. are too large to fit
287/// into any of the ranges. The labels themselves are lost, but the frequencies are recorded and
288/// included in total.
289///
290/// A `CompactHistogram` must be derived from a [`StandardHistogram`] using the [`to_compact`] function.
291///
292/// [`CompactParams`]: struct.CompactParams.html
293/// [`StandardHistogram`]: ../standard/struct.StandardHistogram.html
294/// [`to_compact`]: ../standard/struct.StandardHistogram.html#method.to_compact
295pub struct CompactHistogram {
296    // the counters for the total # ri's in a bucket
297    pub(crate) counters : Vec<usize>,
298
299    // the sub-range into which the average ri falls
300    pub(crate) sub_range_averages : Vec<usize>, // range from 0 - k-1
301
302    pub(crate) overflowed : usize // the number of addresses that overflowed
303}
304
305impl Histogram<CompactParams, usize, usize> for CompactHistogram {
306    // this is the same as in c2, make sure to update both...
307    // code copying isn't great but no better approach in this case
308    fn labels(&self, params : &CompactParams) -> Option<Box<dyn Iterator<Item=usize>>>{
309        let mut v = Vec::new();
310        let mut count = 0;
311        for i in 0..params.num_buckets {
312            if let Some((_min, max)) = params.get_kth_sub_range(
313                i, self.sub_range_averages[i]) {
314                if !v.contains(&max) {
315                    v.insert(count, max);
316                    count += 1;
317                }
318            }
319        }
320        Some(Box::new(v.into_iter()))
321    }
322
323    fn frequency(&self, label : usize, params : &CompactParams) -> Option<usize> {
324        if let Some(bi) = params.bucket_index(label) {
325            if let Some(&s) = self.counters.get(bi) {
326                Some(s)
327            } else { None }
328        } else { Some(0) }
329    }
330
331    fn total(&self) -> usize {
332        let mut sum: usize = 0;
333        for i in 0..self.counters.len() {
334            sum += self.counters[i];
335        }
336        return sum + self.overflowed as usize;
337    }
338}
339
340
341// -----------------------------------------------------------------------------------
342// --- COMPRESSED HISTOGRAM ----------------------------------------------------------
343// -----------------------------------------------------------------------------------
344#[cfg(test)]
345mod compress_testing {
346    use crate::c2::{new_compressed_params};
347
348    #[test]
349    fn init_params(){
350        assert!(new_compressed_params(2.0, 5).is_some());
351        assert!(new_compressed_params(5.0, 51).is_some());
352        assert!(new_compressed_params(6.8, 15).is_some());
353        assert!(new_compressed_params(1.5, 34).is_some());
354        assert!(new_compressed_params(1.0, 21).is_none());
355        assert!(new_compressed_params(-0.5, 12).is_none());
356        assert!(new_compressed_params(2.0, 0).is_none());
357    }
358
359    #[test]
360    fn compression(){
361        let cp = new_compressed_params(2.0, 5).unwrap();
362        assert_eq!(cp.compress_count(0), Some(0));
363        assert_eq!(cp.compress_count(2), Some(1));
364        assert_eq!(cp.compress_count(3), Some(2));
365        assert_eq!(cp.compress_count(4), Some(2));
366        assert_eq!(cp.compress_count(5), Some(3));
367        assert_eq!(cp.compress_count(7), Some(3));
368        assert_eq!(cp.compress_count(8), Some(3));
369        assert_eq!(cp.compress_count(9), Some(4));
370        assert_eq!(cp.compress_count(31), Some(5));
371        assert_eq!(cp.compress_count(35), Some(5));
372        assert_eq!(cp.compress_count(300), Some(5));
373
374        let cp = new_compressed_params(1.5, 15).unwrap();
375
376        assert_eq!(cp.compress_count(0), Some(0));
377        assert_eq!(cp.compress_count(1), Some(1));
378        assert_eq!(cp.compress_count(2), Some(2));
379        assert_eq!(cp.compress_count(3), Some(3));
380        assert_eq!(cp.compress_count(4), Some(4));
381        assert_eq!(cp.compress_count(5), Some(4));
382        assert_eq!(cp.compress_count(6), Some(5));
383        assert_eq!(cp.compress_count(7), Some(5));
384        assert_eq!(cp.compress_count(8), Some(6));
385        assert_eq!(cp.compress_count(9), Some(6));
386        assert_eq!(cp.compress_count(10), Some(6));
387        assert_eq!(cp.compress_count(11), Some(6));
388        assert_eq!(cp.compress_count(12), Some(7));
389        assert_eq!(cp.compress_count(13), Some(7));
390        assert_eq!(cp.compress_count(300), Some(15));
391        assert_eq!(cp.compress_count(500), Some(15));
392        assert_eq!(cp.compress_count(5000), Some(15));
393    }
394
395    #[test]
396    fn decompression(){
397        let cp = new_compressed_params(2.0, 5).unwrap();
398        assert_eq!(cp.decompress_count(0), Some(0));
399        assert_eq!(cp.decompress_count(1), Some(2));
400        assert_eq!(cp.decompress_count(2), Some(4));
401        assert_eq!(cp.decompress_count(3), Some(8));
402        assert_eq!(cp.decompress_count(4), Some(16));
403        assert_eq!(cp.decompress_count(5), Some(32));
404        assert!(cp.decompress_count(6).is_none());
405
406        let cp = new_compressed_params(1.5, 15).unwrap();
407        assert_eq!(cp.decompress_count(0), Some(0));
408        assert_eq!(cp.decompress_count(1), Some(1));
409        assert_eq!(cp.decompress_count(2), Some(2));
410        assert_eq!(cp.decompress_count(3), Some(3));
411        assert_eq!(cp.decompress_count(4), Some(5));
412        assert_eq!(cp.decompress_count(5), Some(7));
413        assert_eq!(cp.decompress_count(9), Some(38));
414        assert_eq!(cp.decompress_count(10), Some(57));
415        assert_eq!(cp.decompress_count(11), Some(86));
416        assert_eq!(cp.decompress_count(12), Some(129));
417        assert_eq!(cp.decompress_count(13), Some(194));
418        assert!(cp.decompress_count(16).is_none());
419    }
420
421    #[test]
422    fn decomp_comp(){
423        let cp = new_compressed_params(2.0, 5).unwrap();
424        assert_eq!(cp.decompress_count(cp.compress_count(1).unwrap()), Some(2));
425        assert_eq!(cp.decompress_count(cp.compress_count(2).unwrap()), Some(2));
426        assert_eq!(cp.decompress_count(cp.compress_count(3).unwrap()), Some(4));
427        assert_eq!(cp.decompress_count(cp.compress_count(4).unwrap()), Some(4));
428        assert_eq!(cp.decompress_count(cp.compress_count(5).unwrap()), Some(8));
429        assert_eq!(cp.decompress_count(cp.compress_count(6).unwrap()), Some(8));
430        assert_eq!(cp.decompress_count(cp.compress_count(7).unwrap()), Some(8));
431        assert_eq!(cp.decompress_count(cp.compress_count(8).unwrap()), Some(8));
432        assert_eq!(cp.decompress_count(cp.compress_count(9).unwrap()), Some(16));
433        assert_eq!(cp.decompress_count(cp.compress_count(10).unwrap()), Some(16));
434        assert_eq!(cp.decompress_count(cp.compress_count(11).unwrap()), Some(16));
435        assert_eq!(cp.decompress_count(cp.compress_count(12).unwrap()), Some(16));
436        assert_eq!(cp.decompress_count(cp.compress_count(13).unwrap()), Some(16));
437        assert_eq!(cp.decompress_count(cp.compress_count(14).unwrap()), Some(16));
438        assert_eq!(cp.decompress_count(cp.compress_count(15).unwrap()), Some(16));
439    }
440
441}
442
443#[derive(Debug, Clone)]
444/// `CompactParams` contains 2 parameters determining how a [`CompressedHistogram`]
445/// is stored and interpreted. See [`CompressedHistogram`] for a full explanation.
446///
447/// # Example
448/// To create your own parameters, use the constructor function
449/// ```
450/// # use c2_histograms::c2::{new_compressed_params};
451/// if let Some(cp) = new_compressed_params(2.0, 15) {
452///     // do something...
453/// } else { }
454/// ```
455/// [`CompressedHistogram`]: struct.CompressedHistogram.html
456pub struct CompressedParams {
457    base : f64,
458    max_exp : usize,
459
460    // lookup table for faster calculation of
461    table:  Vec<usize>
462}
463
464impl CompressedParams {
465    pub fn base(&self) -> f64 { self.base }
466    pub fn max_exp(&self) -> usize { self.max_exp }
467}
468
469/// checks parameters for validity and if so creates a new `CompressedParams` struct
470///
471/// For [`CompressedParams`], the base must be greater than 1.0, and the max_exp must be
472/// nonzero.
473///
474/// [`CompressedParams`]: struct.CompressedParams.html
475pub fn new_compressed_params(base : f64, max_exp : usize) -> Option<CompressedParams> {
476    if base <= 1.01 || max_exp == 0 {
477        None
478    } else {
479        let mut table = Vec::new();
480        table.reserve_exact(max_exp + 1);
481
482
483        // this is ideal for compressedHistogram, but for C2, 0 -> 0
484        // or else it breaks
485        //table.insert(0, 1);
486        table.insert(0, 0);
487
488        for i in 1..(max_exp+1) {
489            let mut expanded = base.powi(i as i32);
490
491            // make sure we don't get too big
492            if expanded > usize::MAX as f64 / (base * base) {
493                for j in i..(max_exp+1) {
494                    table.insert(j, expanded as usize);
495                }
496                break;
497            }
498
499            // continue until we get a new value
500            while expanded as usize <= table[i-1] {
501                expanded = expanded * base;
502            }
503
504            let expanded = expanded as usize;
505
506            table.insert(i, expanded);
507        }
508
509        Some(CompressedParams { base, max_exp, table })
510    }
511}
512
513impl HistogramParams for CompressedParams {}
514
515impl CompressedParams {
516    pub(crate) fn compress_count(&self, count : usize) -> Option<u16> {
517        if count == 0 { // this should only happen in C2Histograms
518            Some(0)
519        } else if (count as f64) < self.base {
520            Some(1)
521        } else if count > self.table[self.max_exp]{
522            Some(self.max_exp as u16)
523        } else {
524            // we need to binary search the table
525            // for the index of the element which is just greater than count
526            let mut min = 0;
527            let mut max = self.max_exp+1;
528            let mut mid;
529
530            loop {
531                mid = (min + max) / 2;
532                if self.table[mid] >= count && self.table[mid - 1] < count {
533                    break;
534                }
535                else if self.table[mid] > count {
536                    max = mid;
537                }
538                else if self.table[mid] < count {
539                    min = mid;
540                }
541            }
542
543            Some(mid as u16)
544
545        }
546    }
547
548    // IN ORDER FOR C2 TO FUNCTION PROPERLY, 0 -> 0
549    pub(crate) fn decompress_count(&self, compressed_count : u16) -> Option<usize> {
550        if compressed_count > self.max_exp as u16 {
551            None
552        } else {
553            Some(self.table[compressed_count as usize])
554        }
555    }
556}
557
558#[allow(dead_code)]
559/// Though not fixed, `CompressedHistogram` encodes histograms using less space than a `StandardHistogram`.
560/// All of the frequencies for each label are approximated by a next greatest exponent.
561/// `CompressedHistogram` uses a library HashMap to store the label-frequency pairs, so it is
562/// unbounded in terms of space. It is associated with [`CompressedParams`]
563///
564/// There are 2 parameters:
565/// - b -> This floating point number is the base of the approximating exponent.
566/// - max_exp -> This is the number of distinct values that can potentially be stored.
567///
568/// A `CompressedHistogram` must be derived from a [`StandardHistogram`] using the [`to_compressed`]
569/// function.
570///
571/// [`CompressedParams`]: struct.CompressedParams.html
572/// [`StandardHistogram`]: ../standard/struct.StandardHistogram.html
573/// [`to_compressed`]: ../standard/struct.StandardHistogram.html#method.to_compressed
574///
575pub struct CompressedHistogram {
576    // maps from ris to compressed counters
577    pub(crate) data : HashMap<usize, u16>,
578    pub(crate) total : usize,
579    pub(crate) orig_total : usize
580}
581
582impl Histogram<CompressedParams, usize, usize> for CompressedHistogram {
583    fn labels(&self, _params: &CompressedParams) -> Option<Box<dyn Iterator<Item=usize>>> {
584        let mut out : Vec<usize> = vec![];
585        let mut counter : usize = 0;
586        for k in self.data.keys(){
587            out.insert(counter, *k);
588            counter += 1;
589        }
590        Some(Box::new(out.into_iter()))
591    }
592
593    fn frequency(&self, label: usize, params: &CompressedParams) -> Option<usize> {
594        if let Some(c) = self.data.get(&label) {
595            params.decompress_count(*c)
596        } else {
597            None
598        }
599    }
600
601    /// returns whichever is greater, the compressed or decompressed size
602    fn total(&self) -> usize {
603        if self.total > self.orig_total {
604            self.total
605        } else {
606            self.orig_total
607        }
608    }
609}
610
611
612// -----------------------------------------------------------------------------------
613// --- C2 HISTOGRAM ------------------------------------------------------------------
614// -----------------------------------------------------------------------------------
615
616#[derive(Debug, Clone)]
617/// `C2Params` are a wrapper struct for a set of [`CompactParams`] and [`CompressedParams`].
618///
619/// # Example
620/// To create your own parameters, use the constructor function
621/// ```
622/// # use c2_histograms::c2::{new_compressed_params, new_compact_params, new_c2_params};
623/// if let Some(cp1) = new_compact_params(2.0, 15, 9) {
624///     if let Some(cp2) = new_compressed_params(1.4, 31) {
625///         let cp = new_c2_params(cp1, cp2);
626///         // do something
627///     } else { }
628/// } else { }
629/// ```
630///
631/// [`CompactParams`]: struct.CompactParams.html
632/// [`CompressedParams`]: struct.CompressedParams.html
633pub struct C2Params {
634    compact_params : CompactParams,
635    compressed_params : CompressedParams
636}
637
638impl C2Params {
639    /// returns a reference to the underlying CompactParameters
640    pub fn compact_ps(&self) -> &CompactParams { &self.compact_params }
641
642    /// returns a reference to the underlying CompressedParameters
643    pub fn compressed_ps(&self) -> &CompressedParams { &self.compressed_params }
644}
645
646impl HistogramParams for C2Params {}
647
648/// Wraps [`CompactParams`] and [`CompressedParams`].
649///
650/// [`CompactParams`]: struct.CompactParams.html
651/// [`CompressedParams`]: struct.CompressedParams.html
652// this doesn't really need any tests
653// this set of parameters really doesn't need it's own set of functions either (just accessors),
654// it can piggy-back off of the other ones
655pub fn new_c2_params(compacts : CompactParams, compresseds : CompressedParams) -> C2Params {
656    C2Params { compact_params : compacts, compressed_params : compresseds }
657}
658
659#[allow(dead_code)]
660/// `C2Histogram` is a combination of [`CompressedHistogram`] and [`CompactHistogram`], performing
661/// the memory saving operations from each of them. It is associated with [`C2Params`].
662///
663/// Each c2 histogram can be stored in n * (c + log2(k)) + c bits, where c = log2(max_exp)
664///
665/// A `C2Histogram` must be derrived from a [`StandardHistogram`] using the [`to_c2`] function.
666///
667/// [`CompressedHistogram`]: struct.CompressedHistogram.html
668/// [`CompactHistogram`]: struct.CompactHistogram.html
669/// [`C2Params`]: struct.C2Params.html
670/// [`StandardHistogram`]: ../standard/struct.StandardHistogram.html
671/// [`to_c2`]: ../standard/struct.StandardHistogram.html#method.to_c2
672///
673pub struct C2Histogram {
674    // compressed counters
675    pub(crate) counters : Vec<u16>,
676
677    // sub-range averages
678    pub(crate) sub_range_averages : Vec<usize>,
679
680    // the pre-compression total
681    // may be useful in saving memory?
682    pub(crate) orig_total : usize,
683
684    pub(crate) total : usize,
685
686    // the amount of labels that overflowed the compaction process
687    pub(crate) overflowed: usize
688
689}
690
691impl Histogram<C2Params, usize, usize> for C2Histogram {
692    // this is exactly the same as compact
693    fn labels(&self, params: &C2Params) -> Option<Box<dyn Iterator<Item=usize>>> {
694        let params = params.compact_ps();
695        let mut v = HashSet::new();
696        for i in 0..params.num_buckets() {
697            if let Some((_min, max)) = params.get_kth_sub_range(
698                i, self.sub_range_averages[i]) {
699                v.insert(max as usize);
700            }
701        }
702        Some(Box::new(v.into_iter()))
703    }
704
705    // this is pretty much a combination of the two
706    fn frequency(&self, label: usize, params: &C2Params) -> Option<usize> {
707        if let Some(bi) = params.compact_params.bucket_index(label) {
708            if let Some(&s) = self.counters.get(bi) {
709                params.compressed_params.decompress_count(s)
710            } else { None }
711        } else { Some(0) }
712    }
713
714    fn total(&self) -> usize {
715        self.total + self.overflowed
716    }
717}