c2_histograms/standard.rs
1use std::collections::HashMap;
2use crate::c2::{CompressedParams, CompressedHistogram, CompactHistogram, CompactParams, C2Params, C2Histogram};
3
4// -----------------------------------------------------------------------------------------
5// --- GENERAL TRAITS ----------------------------------------------------------------------
6// -----------------------------------------------------------------------------------------
7
8/// For many of the histogram space optimizations, a set of parameters are needed to
9/// interpret the data in memory as a histogram. These parameters are often needed to create
10/// a more complex kind of histogram, and they always must be passed in the histogram's
11/// [`labels`] and [`frequency`] functions.
12///
13/// [`labels`]: trait.Histogram.html#tymethod.labels
14/// [`frequency`]: trait.Histogram.html#tymethod.frequency
15pub trait HistogramParams {}
16
17/// Specifies the behaviors that are necessary to be a histogram. It is generic
18/// and parameterized by `P`, the type of parameters necessary, `L`, the type
19/// of the labels, and `F`, the type that stores the frequency. Both the labels
20/// and the frequencies need to be some sort of number.
21pub trait Histogram<P, L, F> where P: HistogramParams {
22 /// If possible, creates and returns an iterator for the labels for which
23 /// the histogram has frequencies stored.
24 fn labels(&self, params : &P) -> Option<Box<dyn Iterator<Item=L>>>;
25
26 /// If possible, returns the frequency of a particular label.
27 /// The behavior need only be defined if the label is in the
28 /// set of values returned from labels.
29 fn frequency(&self, label : L, params : &P) -> Option<F>;
30
31 /// the total number of data-points in the histogram
32 fn total(&self) -> F;
33}
34
35// -----------------------------------------------------------------------------------------
36// --- TESTING FOR STANDARD HISTOGRAM ------------------------------------------------------
37// -----------------------------------------------------------------------------------------
38
39#[cfg(test)]
40mod standard_testing {
41 // test the general behavior as well as compacting them
42 // TODO set up tests for compression as well, and c2
43
44 use std::collections::HashMap;
45 use crate::standard::{create_standard_histogram, NullParams, StandardHistogram, Histogram};
46 use crate::c2::{new_compressed_params, new_compact_params, new_c2_params};
47
48 #[test]
49 fn init() {
50 let mut d = HashMap::new();
51 d.insert(1, 40);
52 d.insert(3, 15);
53 d.insert(50, 1);
54
55 let hist = create_standard_histogram(d);
56 let np = NullParams {};
57
58 assert_eq!(hist.total(), 56);
59 assert_eq!(hist.frequency(1, &np), Some(40));
60 assert_eq!(hist.frequency(3, &np), Some(15));
61 assert_eq!(hist.frequency(50, &np), Some(1));
62 assert_eq!(hist.frequency(4, &np), None);
63 for i in hist.labels(&np).unwrap(){
64 assert!(i == 1 || i == 3 || i == 50)
65 }
66 }
67
68 fn get_sample_n_hist() -> StandardHistogram {
69 let mut d = HashMap::new();
70 d.insert(1, 40);
71 d.insert(3, 15);
72 d.insert(50, 1);
73
74 create_standard_histogram(d)
75 }
76
77 #[test]
78 fn compaction1(){
79 let cp = new_compact_params(5.0, 6, 7).unwrap();
80 let hist = get_sample_n_hist().to_compact(&cp).unwrap();
81
82 assert_eq!(hist.total(), 56);
83 assert_eq!(hist.frequency(1, &cp), Some(55));
84 assert_eq!(hist.frequency(3, &cp), Some(55));
85 assert_eq!(hist.frequency(50, &cp), Some(1));
86 assert_eq!(hist.frequency(4, &cp), Some(55));
87 assert_eq!(hist.frequency(126, &cp), Some(0));
88
89 }
90
91 #[test]
92 fn compaction2(){
93 let cp = new_compact_params(2.0, 9, 7).unwrap();
94 let hist = get_sample_n_hist().to_compact(&cp).unwrap();
95
96 assert_eq!(hist.total(), 56);
97 assert_eq!(hist.frequency(1, &cp), Some(40));
98 assert_eq!(hist.frequency(3, &cp), Some(15));
99 assert_eq!(hist.frequency(50, &cp), Some(1));
100 assert_eq!(hist.frequency(126, &cp), Some(0));
101
102 }
103
104 #[test]
105 fn compression1() {
106 let cp = new_compressed_params(2.0, 9).unwrap();
107 let hist = get_sample_n_hist().to_compressed(&cp).unwrap();
108
109 assert_eq!(hist.total(), 82);
110 assert_eq!(hist.frequency(1, &cp), Some(64));
111 assert_eq!(hist.frequency(3, &cp), Some(16));
112 assert_eq!(hist.frequency(50, &cp), Some(2));
113 assert!(hist.frequency(126, &cp).is_none());
114 }
115
116 #[test]
117 fn compression2() {
118 let cp = new_compressed_params(1.5, 30).unwrap();
119 let hist = get_sample_n_hist().to_compressed(&cp).unwrap();
120
121 assert_eq!(hist.total(), 75);
122 assert_eq!(hist.frequency(1, &cp), Some(57));
123 assert_eq!(hist.frequency(3, &cp), Some(17));
124 assert_eq!(hist.frequency(50, &cp), Some(1));
125 assert!(hist.frequency(126, &cp).is_none());
126 }
127
128 #[test]
129 fn c2_1(){
130 let cp = new_compressed_params(1.3, 63).unwrap();
131 let cp2 = new_compact_params(14.0, 8, 15).unwrap();
132 let cp = new_c2_params(cp2, cp);
133
134 let hist = get_sample_n_hist().to_c2(&cp).unwrap();
135
136 assert_eq!(hist.total(), 67);
137 assert_eq!(hist.frequency(1, &cp), Some(66));
138 assert_eq!(hist.frequency(3, &cp), Some(66));
139 assert_eq!(hist.frequency(50, &cp), Some(1));
140
141 }
142
143}
144
145// -----------------------------------------------------------------------------------------
146// --- DEFINE STANDARD HISTOGRAM -----------------------------------------------------------
147// -----------------------------------------------------------------------------------------
148
149/// These are empty parameters, standard histograms don't need any extra information.
150///
151/// # Example
152/// Basic Initialization is sufficient
153/// ```
154/// # use c2_histograms::standard::NullParams;
155/// let np = NullParams {};
156/// ```
157pub struct NullParams {}
158
159impl HistogramParams for NullParams {}
160
161/// `StandardHistogram` is a naive implementation of a histogram with no optimizations.
162/// It has optimal performance since it stores an exact count for exact labels, but it can take
163/// an arbitrary amount of space. It uses a library HashMap to store label-frequency pairs, and
164/// can be instantiated with one using the [`create_standard_histogram`] function.
165/// `StandardHistogram` is associated with NullParams.
166///
167/// # Example
168/// To create, pass a HashMap to the constructor function:
169/// ```
170/// # use c2_histograms::standard::{StandardHistogram, create_standard_histogram};
171/// # use std::collections::HashMap;
172/// let mut data = HashMap::new();
173/// data.insert(1, 40);
174/// data.insert(3, 15);
175/// data.insert(50, 1);
176///
177/// let histogram :StandardHistogram = create_standard_histogram(data);
178/// ```
179///
180/// [`create_standard_histogram`]: fn.create_standard_histogram.html
181pub struct StandardHistogram {
182 // maps from reuse interval to frequency
183 data : HashMap<usize, usize>,
184 total : usize
185}
186
187impl StandardHistogram {
188 pub(crate) fn data(&self) -> &HashMap<usize, usize> { &self.data }
189}
190
191/// Constructs a `StandardHistogram` from the given HashMap
192pub fn create_standard_histogram(data : HashMap<usize, usize>) -> StandardHistogram {
193 let total : usize = data.values().sum();
194 StandardHistogram { data, total }
195}
196
197impl Histogram<NullParams, usize, usize> for StandardHistogram {
198 fn labels(&self, _params: &NullParams) -> Option<Box<dyn Iterator<Item=usize>>> {
199 let mut out : Vec<usize> = vec![];
200 let mut counter : usize = 0;
201 for k in self.data.keys(){
202 out.insert(counter, *k);
203 counter += 1;
204 }
205 Some(Box::new(out.into_iter()))
206 }
207
208 fn frequency(&self, label: usize, _params: &NullParams) -> Option<usize> {
209 if let Some(t) = self.data.get(&label) {
210 Some(*t)
211 }
212 else{
213 None
214 }
215 }
216
217 fn total(&self) -> usize {
218 self.total
219 }
220}
221
222// ----------------------------------------------------------------------------------
223// --- TRANSFORMATION FUNCTIONS -----------------------------------------------------
224// ----------------------------------------------------------------------------------
225
226impl StandardHistogram {
227 /// If possible, creates a `CompactHistogram` of this histogram.
228 ///
229 /// # Example
230 /// ```
231 /// # use c2_histograms::standard::{StandardHistogram, create_standard_histogram};
232 /// # use std::collections::HashMap;
233 /// use c2_histograms::c2::{CompactHistogram, new_compact_params, CompactParams};
234 /// # let mut data = HashMap::new();
235 /// # data.insert(1, 40);
236 /// # data.insert(3, 15);
237 /// # data.insert(50, 1);
238 ///
239 /// let histogram : StandardHistogram = create_standard_histogram(data);
240 /// let cp : CompactParams = new_compact_params(3.5, 8, 8).unwrap();
241 ///
242 /// let compact_histogram : CompactHistogram = histogram.to_compact(&cp).unwrap();
243 /// ```
244 ///
245 pub fn to_compact(&self, cp : &CompactParams) -> Option<CompactHistogram> {
246 let mut overflow = 0;
247 let nb = cp.num_buckets();
248 let mut counters :Vec<usize> = vec![];
249 let mut sums :Vec<usize> = vec![];
250 let mut sub_range_averages :Vec<usize> = vec![];
251 for i in 0..nb {
252 counters.insert(i, 0);
253 sums.insert(i, 0);
254 sub_range_averages.insert(i, 0);
255 }
256
257 for lab in self.labels(&NullParams {} ).unwrap() {
258 if let Some(bi) = cp.bucket_index(lab) {
259 counters[bi] += *self.data().get(&lab).unwrap();
260 let sum : usize = self.data().values().sum();
261 sums[bi] += sum;
262 } else {
263 overflow += *self.data().get(&lab).unwrap();
264 }
265 }
266
267 for i in 0..nb {
268 let (min, max) = cp.range(i).unwrap();
269 let avg = (sums[i] as f64) / (counters[i] as f64);
270
271 let range = (max - min) as f64;
272 // if k is greater than range, do the additive thing
273 if cp.k() as f64 > range {
274 sub_range_averages.insert(i, ((avg - min as f64) / range).floor() as usize);
275 } else { // otherwise do proportional thing
276 sub_range_averages.insert(i, (((avg - min as f64) / range) * cp.k() as f64).floor() as usize);
277 }
278 }
279 if counters.iter().min() == counters.iter().max() {
280 None
281 } else {
282 Some(CompactHistogram { counters, sub_range_averages, overflowed : overflow } )
283 }
284 }
285
286 /// If possible, creates a `CompressedHistogram` of this histogram.
287 ///
288 /// # Example
289 /// ```
290 /// # use c2_histograms::standard::{StandardHistogram, create_standard_histogram};
291 /// # use std::collections::HashMap;
292 /// use c2_histograms::c2::{CompressedHistogram, new_compressed_params, CompressedParams};
293 /// # let mut data = HashMap::new();
294 /// # data.insert(1, 40);
295 /// # data.insert(3, 15);
296 /// # data.insert(50, 1);
297 ///
298 /// let histogram : StandardHistogram = create_standard_histogram(data);
299 /// let cp : CompressedParams = new_compressed_params(1.5, 255).unwrap();
300 ///
301 /// let compact_histogram : CompressedHistogram = histogram.to_compressed(&cp).unwrap();
302 /// ```
303 ///
304 pub fn to_compressed(&self, cp : &CompressedParams) -> Option<CompressedHistogram> {
305 // we just need to transform the data into compressed form
306 let mut outdata = HashMap::new();
307 let mut tot = 0;
308 let mut sum = 0;
309
310 for k in self.data.keys() {
311 let count = *self.data.get(k).unwrap();
312 let comp = cp.compress_count(count).unwrap();
313 outdata.insert(*k, comp);
314 sum += cp.decompress_count(comp).unwrap();
315 tot += count;
316 }
317
318 Some(CompressedHistogram { data: outdata, total: sum, orig_total: tot } )
319 }
320
321 /// If possible, creates a `C2Histogram` of this histogram
322 /// # Example
323 /// ```
324 /// # use c2_histograms::standard::{StandardHistogram, create_standard_histogram};
325 /// # use std::collections::HashMap;
326 /// use c2_histograms::c2::{CompactHistogram, new_compact_params, CompactParams, new_c2_params, C2Params, C2Histogram};
327 /// use c2_histograms::c2::{CompressedHistogram, new_compressed_params, CompressedParams};
328 /// # let mut data = HashMap::new();
329 /// # data.insert(1, 40);
330 /// # data.insert(3, 15);
331 /// # data.insert(50, 1);
332 ///
333 /// let histogram : StandardHistogram = create_standard_histogram(data);
334 ///
335 /// let cp1 : CompactParams = new_compact_params(3.5, 8, 8).unwrap();
336 /// let cp2 : CompressedParams = new_compressed_params(1.5, 255).unwrap();
337 ///
338 /// let cp : C2Params = new_c2_params(cp1, cp2);
339 ///
340 /// let compact_histogram : C2Histogram = histogram.to_c2(&cp).unwrap();
341 /// ```
342 ///
343 pub fn to_c2(&self, c2p : &C2Params) -> Option<C2Histogram> {
344 let pressed_ps = c2p.compressed_ps();
345 let pact_ps = c2p.compact_ps();
346 if let Some(compact) = self.to_compact(&pact_ps) {
347 let orig_total = compact.total() - compact.overflowed;
348 let mut n_total = 0;
349 let mut counters = vec![];
350
351 let mut b = 0;
352 for lab in compact.labels(&pact_ps).unwrap() {
353 let comp_count = compact.frequency(lab, &pact_ps).unwrap();
354 if comp_count == 0 {
355 counters.insert(b, comp_count as u16);
356 b += 1;
357 continue;
358 }
359 let comp_count = pressed_ps.compress_count(comp_count).unwrap();
360 counters.insert(b, comp_count);
361 n_total += pressed_ps.decompress_count(comp_count).unwrap();
362 b += 1;
363 }
364
365 Some(C2Histogram { counters, sub_range_averages: compact.sub_range_averages, orig_total,
366 total : n_total,
367 overflowed : compact.overflowed})
368 } else {
369 None
370 }
371 }
372}