c2_histograms/
standard.rs

1use std::collections::HashMap;
2use crate::c2::{CompressedParams, CompressedHistogram, CompactHistogram, CompactParams, C2Params, C2Histogram};
3
4// -----------------------------------------------------------------------------------------
5// --- GENERAL TRAITS ----------------------------------------------------------------------
6// -----------------------------------------------------------------------------------------
7
8/// For many of the histogram space optimizations, a set of parameters are needed to
9/// interpret the data in memory as a histogram. These parameters are often needed to create
10/// a more complex kind of histogram, and they always must be passed in the histogram's
11/// [`labels`] and [`frequency`] functions.
12///
13/// [`labels`]: trait.Histogram.html#tymethod.labels
14/// [`frequency`]: trait.Histogram.html#tymethod.frequency
15pub trait HistogramParams {}
16
17/// Specifies the behaviors that are necessary to be a histogram. It is generic
18/// and parameterized by `P`, the type of parameters necessary, `L`, the type
19/// of the labels, and `F`, the type that stores the frequency.  Both the labels
20/// and the frequencies need to be some sort of number.
21pub trait Histogram<P, L, F> where P: HistogramParams {
22    /// If possible, creates and returns an iterator for the labels for which
23    /// the histogram has frequencies stored.
24    fn labels(&self, params : &P) -> Option<Box<dyn Iterator<Item=L>>>;
25
26    /// If possible, returns the frequency of a particular label.
27    /// The behavior need only be defined if the label is in the
28    /// set of values returned from labels.
29    fn frequency(&self, label : L, params : &P) -> Option<F>;
30
31    /// the total number of data-points in the histogram
32    fn total(&self) -> F;
33}
34
35// -----------------------------------------------------------------------------------------
36// --- TESTING FOR STANDARD HISTOGRAM ------------------------------------------------------
37// -----------------------------------------------------------------------------------------
38
39#[cfg(test)]
40mod standard_testing {
41    // test the general behavior as well as compacting them
42    // TODO set up tests for compression as well, and c2
43
44    use std::collections::HashMap;
45    use crate::standard::{create_standard_histogram, NullParams, StandardHistogram, Histogram};
46    use crate::c2::{new_compressed_params, new_compact_params, new_c2_params};
47
48    #[test]
49    fn init() {
50        let mut d = HashMap::new();
51        d.insert(1, 40);
52        d.insert(3, 15);
53        d.insert(50, 1);
54
55        let hist = create_standard_histogram(d);
56        let np = NullParams {};
57
58        assert_eq!(hist.total(), 56);
59        assert_eq!(hist.frequency(1, &np), Some(40));
60        assert_eq!(hist.frequency(3, &np), Some(15));
61        assert_eq!(hist.frequency(50, &np), Some(1));
62        assert_eq!(hist.frequency(4, &np), None);
63        for i in hist.labels(&np).unwrap(){
64            assert!(i == 1 || i == 3 || i == 50)
65        }
66    }
67
68    fn get_sample_n_hist() -> StandardHistogram {
69        let mut d = HashMap::new();
70        d.insert(1, 40);
71        d.insert(3, 15);
72        d.insert(50, 1);
73
74        create_standard_histogram(d)
75    }
76
77    #[test]
78    fn compaction1(){
79        let cp = new_compact_params(5.0, 6, 7).unwrap();
80        let hist = get_sample_n_hist().to_compact(&cp).unwrap();
81
82        assert_eq!(hist.total(), 56);
83        assert_eq!(hist.frequency(1, &cp), Some(55));
84        assert_eq!(hist.frequency(3, &cp), Some(55));
85        assert_eq!(hist.frequency(50, &cp), Some(1));
86        assert_eq!(hist.frequency(4, &cp), Some(55));
87        assert_eq!(hist.frequency(126, &cp), Some(0));
88
89    }
90
91    #[test]
92    fn compaction2(){
93        let cp = new_compact_params(2.0, 9, 7).unwrap();
94        let hist = get_sample_n_hist().to_compact(&cp).unwrap();
95
96        assert_eq!(hist.total(), 56);
97        assert_eq!(hist.frequency(1, &cp), Some(40));
98        assert_eq!(hist.frequency(3, &cp), Some(15));
99        assert_eq!(hist.frequency(50, &cp), Some(1));
100        assert_eq!(hist.frequency(126, &cp), Some(0));
101
102    }
103
104    #[test]
105    fn compression1() {
106        let cp = new_compressed_params(2.0, 9).unwrap();
107        let hist = get_sample_n_hist().to_compressed(&cp).unwrap();
108
109        assert_eq!(hist.total(), 82);
110        assert_eq!(hist.frequency(1, &cp), Some(64));
111        assert_eq!(hist.frequency(3, &cp), Some(16));
112        assert_eq!(hist.frequency(50, &cp), Some(2));
113        assert!(hist.frequency(126, &cp).is_none());
114    }
115
116    #[test]
117    fn compression2() {
118        let cp = new_compressed_params(1.5, 30).unwrap();
119        let hist = get_sample_n_hist().to_compressed(&cp).unwrap();
120
121        assert_eq!(hist.total(), 75);
122        assert_eq!(hist.frequency(1, &cp), Some(57));
123        assert_eq!(hist.frequency(3, &cp), Some(17));
124        assert_eq!(hist.frequency(50, &cp), Some(1));
125        assert!(hist.frequency(126, &cp).is_none());
126    }
127
128    #[test]
129    fn c2_1(){
130        let cp = new_compressed_params(1.3, 63).unwrap();
131        let cp2 = new_compact_params(14.0, 8, 15).unwrap();
132        let cp = new_c2_params(cp2, cp);
133
134        let hist = get_sample_n_hist().to_c2(&cp).unwrap();
135
136        assert_eq!(hist.total(), 67);
137        assert_eq!(hist.frequency(1, &cp), Some(66));
138        assert_eq!(hist.frequency(3, &cp), Some(66));
139        assert_eq!(hist.frequency(50, &cp), Some(1));
140
141    }
142
143}
144
145// -----------------------------------------------------------------------------------------
146// --- DEFINE STANDARD HISTOGRAM -----------------------------------------------------------
147// -----------------------------------------------------------------------------------------
148
149/// These are empty parameters, standard histograms don't need any extra information.
150///
151/// # Example
152/// Basic Initialization is sufficient
153/// ```
154/// # use c2_histograms::standard::NullParams;
155/// let np = NullParams {};
156/// ```
157pub struct NullParams {}
158
159impl HistogramParams for NullParams {}
160
161/// `StandardHistogram` is a naive implementation of a histogram with no optimizations.
162/// It has optimal performance since it stores an exact count for exact labels, but it can take
163/// an arbitrary amount of space. It uses a library HashMap to store label-frequency pairs, and
164/// can be instantiated with one using the [`create_standard_histogram`] function.
165/// `StandardHistogram` is associated with NullParams.
166///
167/// # Example
168/// To create, pass a HashMap to the constructor function:
169/// ```
170/// # use c2_histograms::standard::{StandardHistogram, create_standard_histogram};
171/// # use std::collections::HashMap;
172/// let mut data = HashMap::new();
173/// data.insert(1, 40);
174/// data.insert(3, 15);
175/// data.insert(50, 1);
176///
177/// let histogram :StandardHistogram = create_standard_histogram(data);
178/// ```
179///
180/// [`create_standard_histogram`]: fn.create_standard_histogram.html
181pub struct StandardHistogram {
182    // maps from reuse interval to frequency
183    data : HashMap<usize, usize>,
184    total : usize
185}
186
187impl StandardHistogram {
188    pub(crate) fn data(&self) -> &HashMap<usize, usize> { &self.data }
189}
190
191/// Constructs a `StandardHistogram` from the given HashMap
192pub fn create_standard_histogram(data : HashMap<usize, usize>) -> StandardHistogram {
193    let total : usize = data.values().sum();
194    StandardHistogram { data, total }
195}
196
197impl Histogram<NullParams, usize, usize> for StandardHistogram {
198    fn labels(&self, _params: &NullParams) -> Option<Box<dyn Iterator<Item=usize>>> {
199        let mut out : Vec<usize> = vec![];
200        let mut counter : usize = 0;
201        for k in self.data.keys(){
202            out.insert(counter, *k);
203            counter += 1;
204        }
205        Some(Box::new(out.into_iter()))
206    }
207
208    fn frequency(&self, label: usize, _params: &NullParams) -> Option<usize> {
209        if let Some(t) = self.data.get(&label) {
210            Some(*t)
211        }
212        else{
213            None
214        }
215    }
216
217    fn total(&self) -> usize {
218        self.total
219    }
220}
221
222// ----------------------------------------------------------------------------------
223// --- TRANSFORMATION FUNCTIONS -----------------------------------------------------
224// ----------------------------------------------------------------------------------
225
226impl StandardHistogram {
227    /// If possible, creates a `CompactHistogram` of this histogram.
228    ///
229    /// # Example
230    /// ```
231    /// # use c2_histograms::standard::{StandardHistogram, create_standard_histogram};
232    /// # use std::collections::HashMap;
233    /// use c2_histograms::c2::{CompactHistogram, new_compact_params, CompactParams};
234    /// # let mut data = HashMap::new();
235    /// # data.insert(1, 40);
236    /// # data.insert(3, 15);
237    /// # data.insert(50, 1);
238    ///
239    /// let histogram : StandardHistogram = create_standard_histogram(data);
240    /// let cp : CompactParams = new_compact_params(3.5, 8, 8).unwrap();
241    ///
242    /// let compact_histogram : CompactHistogram = histogram.to_compact(&cp).unwrap();
243    /// ```
244    ///
245    pub fn to_compact(&self, cp : &CompactParams) -> Option<CompactHistogram> {
246        let mut overflow = 0;
247        let nb = cp.num_buckets();
248        let mut counters :Vec<usize> = vec![];
249        let mut sums :Vec<usize> = vec![];
250        let mut sub_range_averages :Vec<usize> = vec![];
251        for i in 0..nb {
252            counters.insert(i, 0);
253            sums.insert(i, 0);
254            sub_range_averages.insert(i, 0);
255        }
256
257        for lab in self.labels(&NullParams {} ).unwrap() {
258            if let Some(bi) = cp.bucket_index(lab) {
259                counters[bi] += *self.data().get(&lab).unwrap();
260                let sum : usize = self.data().values().sum();
261                sums[bi] += sum;
262            } else {
263                overflow += *self.data().get(&lab).unwrap();
264            }
265        }
266
267        for i in 0..nb {
268            let (min, max) = cp.range(i).unwrap();
269            let avg = (sums[i] as f64) / (counters[i] as f64);
270
271            let range = (max - min) as f64;
272            // if k is greater than range, do the additive thing
273            if cp.k() as f64 > range {
274                sub_range_averages.insert(i, ((avg - min as f64) / range).floor() as usize);
275            } else { // otherwise do proportional thing
276                sub_range_averages.insert(i, (((avg - min as f64) / range) * cp.k() as f64).floor() as usize);
277            }
278        }
279        if counters.iter().min() == counters.iter().max() {
280            None
281        } else {
282            Some(CompactHistogram { counters, sub_range_averages, overflowed : overflow  } )
283        }
284    }
285
286    /// If possible, creates a `CompressedHistogram` of this histogram.
287    ///
288    /// # Example
289    /// ```
290    /// # use c2_histograms::standard::{StandardHistogram, create_standard_histogram};
291    /// # use std::collections::HashMap;
292    /// use c2_histograms::c2::{CompressedHistogram, new_compressed_params, CompressedParams};
293    /// # let mut data = HashMap::new();
294    /// # data.insert(1, 40);
295    /// # data.insert(3, 15);
296    /// # data.insert(50, 1);
297    ///
298    /// let histogram : StandardHistogram = create_standard_histogram(data);
299    /// let cp : CompressedParams = new_compressed_params(1.5, 255).unwrap();
300    ///
301    /// let compact_histogram : CompressedHistogram = histogram.to_compressed(&cp).unwrap();
302    /// ```
303    ///
304    pub fn to_compressed(&self, cp : &CompressedParams) -> Option<CompressedHistogram> {
305        // we just need to transform the data into compressed form
306        let mut outdata = HashMap::new();
307        let mut tot = 0;
308        let mut sum = 0;
309
310        for k in self.data.keys() {
311            let count = *self.data.get(k).unwrap();
312            let comp = cp.compress_count(count).unwrap();
313            outdata.insert(*k, comp);
314            sum += cp.decompress_count(comp).unwrap();
315            tot += count;
316        }
317
318        Some(CompressedHistogram { data: outdata, total: sum, orig_total: tot } )
319    }
320
321    /// If possible, creates a `C2Histogram` of this histogram
322    /// # Example
323    /// ```
324    /// # use c2_histograms::standard::{StandardHistogram, create_standard_histogram};
325    /// # use std::collections::HashMap;
326    /// use c2_histograms::c2::{CompactHistogram, new_compact_params, CompactParams, new_c2_params, C2Params, C2Histogram};
327    /// use c2_histograms::c2::{CompressedHistogram, new_compressed_params, CompressedParams};
328    /// # let mut data = HashMap::new();
329    /// # data.insert(1, 40);
330    /// # data.insert(3, 15);
331    /// # data.insert(50, 1);
332    ///
333    /// let histogram : StandardHistogram = create_standard_histogram(data);
334    ///
335    /// let cp1 : CompactParams = new_compact_params(3.5, 8, 8).unwrap();
336    /// let cp2 : CompressedParams = new_compressed_params(1.5, 255).unwrap();
337    ///
338    /// let cp : C2Params = new_c2_params(cp1, cp2);
339    ///
340    /// let compact_histogram : C2Histogram = histogram.to_c2(&cp).unwrap();
341    /// ```
342    ///
343    pub fn to_c2(&self, c2p : &C2Params) -> Option<C2Histogram> {
344        let pressed_ps = c2p.compressed_ps();
345        let pact_ps = c2p.compact_ps();
346        if let Some(compact) = self.to_compact(&pact_ps) {
347            let orig_total = compact.total() - compact.overflowed;
348            let mut n_total = 0;
349            let mut counters = vec![];
350
351            let mut b = 0;
352            for lab in compact.labels(&pact_ps).unwrap() {
353                let comp_count = compact.frequency(lab, &pact_ps).unwrap();
354                if comp_count == 0 {
355                    counters.insert(b, comp_count as u16);
356                    b += 1;
357                    continue;
358                }
359                let comp_count = pressed_ps.compress_count(comp_count).unwrap();
360                counters.insert(b, comp_count);
361                n_total += pressed_ps.decompress_count(comp_count).unwrap();
362                b += 1;
363            }
364
365            Some(C2Histogram { counters, sub_range_averages: compact.sub_range_averages, orig_total,
366                                total : n_total,
367                                overflowed : compact.overflowed})
368        } else {
369            None
370        }
371    }
372}