rustics/
float_histogram.rs

1//
2//  Copyright 2024 Jonathan L Bertoni
3//
4//  This code is available under the Berkeley 2-Clause, Berkeley 3-clause,
5//  and MIT licenses.
6//
7
8//!
9//! ## Type
10//! * FloatHistogram
11//!   * FloatHistogram provides a very coarse log histogram that is similar to
12//!     the LogHistogram type with its pseudo-log function.
13//!
14//!   * Samples are divided into buckets based on their sign and exponent.
15//!     There is one bucket per 16 exponents, and separate buckets for positive
16//!     and negative samples with the same exponent.
17//!
18//!   * NaNs are counted separately, and otherwise are ignored.
19//!
20//!   * f64::INFINITY samples go into the largest bucket, and into a count of
21//!     infinite values.
22//!
23//!   * f64::NEG_INFINITY samples go into the smallest bucket, and into a count
24//!     of infinite values.
25//!
26//! ## Example
27//!```
28//!     use rustics::float_histogram::FloatHistogram;
29//!     use rustics::float_histogram::bucket_divisor;
30//!     use rustics::exponent_bias;
31//!     use rustics::PrintOpts;
32//!     use rustics::float_histogram::HistoOpts;
33//!     use rustics::stdout_printer;
34//!     use rustics::printer_mut;
35//!
36//!     // Create a HistoOpts for new().
37//!
38//!     let merge_min    = 0;  // not implemented yet
39//!     let merge_max    = 0;  // not implemented yet
40//!     let no_zero_rows = false;
41//!
42//!     let histo_opts   = HistoOpts { merge_min, merge_max, no_zero_rows };
43//!     let histo_opts   = Some(histo_opts);
44//!     let printer      = None;
45//!     let title        = None;
46//!     let units        = None;
47//!     let print_opts   = PrintOpts { printer, title, units, histo_opts };
48//!
49//!     // Create a histogram.
50//!
51//!     let mut histogram = FloatHistogram::new(&Some(print_opts));
52//!
53//!     let sample_count = 1000;
54//!
55//!     for i in 0..sample_count {
56//!          histogram.record(-(i as f64));
57//!     }
58//!
59//!     // Create a Printer instance for output.
60//!
61//!     let printer = stdout_printer();
62//!     let printer = printer_mut!(printer);
63//!
64//!     histogram.print(printer);
65//!
66//!     assert!(histogram.samples     == sample_count as usize);
67//!     assert!(histogram.nans        == 0);
68//!     assert!(histogram.infinities  == 0);
69//!
70//!     // Values -0.0 and -1.0 should be in the same bucket.
71//!
72//!     let zero_bucket = exponent_bias() / bucket_divisor();
73//!     let zero_bucket = zero_bucket as usize;
74//!
75//!     assert!(histogram.negative[zero_bucket    ] == 2);
76//!     assert!(histogram.negative[zero_bucket + 1] == sample_count - 2);
77//!
78//!     // Now test some non-finite values.  NaN values do not
79//!     // go into the sample count.
80//!
81//!     histogram.record(f64::INFINITY);
82//!     histogram.record(f64::NEG_INFINITY);
83//!     histogram.record(f64::NAN);
84//!
85//!     assert!(histogram.nans       == 1);
86//!     assert!(histogram.infinities == 2);
87//!     assert!(histogram.samples    == sample_count as usize + 2);
88//!
89//!     histogram.print(printer);
90
91use super::Histogram;
92use super::Printable;
93use super::FloatHistogramBox;
94use super::PrintOption;
95use super::LogHistogramBox;
96use super::Printer;
97use super::biased_exponent;
98use super::max_biased_exponent;
99use super::exponent_bias;
100use super::sign;
101use super::parse_histo_opts;
102
103/// The HistoOpts struct is used to specify options on how to print
104/// a histogram.
105
106#[derive(Clone, Copy)]
107pub struct HistoOpts {
108    pub merge_min:     isize,   // not yet implemented
109    pub merge_max:     isize,   // not yet implemented
110    pub no_zero_rows:  bool,    // suppress any rows that are all zeros
111}
112
113impl Default for HistoOpts {
114    fn default() -> HistoOpts {
115        let merge_min    = 0;
116        let merge_max    = 0;
117        let no_zero_rows = false;
118
119        HistoOpts { merge_min, merge_max, no_zero_rows }
120    }
121}
122
123/// FloatHistogram records a log-like histogram of f64 samples.
124/// The numbers are recorded into buckets based on the exponent,
125/// broken into groups of 16.  For example, exponents 2^1 through
126/// 2^16 form one bucket.
127
128pub struct FloatHistogram {
129    pub negative:   Vec<u64>,
130    pub positive:   Vec<u64>,
131    pub buckets:    usize,
132    pub nans:       usize,
133    pub infinities: usize,
134    pub samples:    usize,
135    pub print_opts: PrintOption,
136    pub histo_opts: HistoOpts,
137}
138
139/// Defines how many exponent values are merged into one bucket.
140
141pub fn bucket_divisor() -> isize {
142    16
143}
144
145// Define the number of buckets printed per row.  This actually
146// is hard-coded in the actual format statement.
147
148fn print_roundup() -> usize {
149    4
150}
151
152// Compute the number of buckets for the negative and positive
153// arrays.
154
155fn buckets() -> isize {
156    max_biased_exponent() / bucket_divisor()
157}
158
159// Do covered division.
160
161fn roundup(value: usize, multiple: usize) -> usize {
162    ((value + multiple - 1) / multiple) * multiple
163}
164
165impl FloatHistogram {
166    /// Creates a new histogram.  The histo_opts option currently is
167    /// only partially implemented.
168
169    pub fn new(print_opts: &PrintOption) -> FloatHistogram {
170        let buckets    = buckets() as usize;
171        let buckets    = roundup(buckets, print_roundup());
172        let negative   = vec![0; buckets];
173        let positive   = vec![0; buckets];
174        let samples    = 0;
175        let nans       = 0;
176        let infinities = 0;
177        let histo_opts = parse_histo_opts(print_opts);
178        let print_opts = print_opts.clone();
179
180        FloatHistogram {
181            negative, positive, buckets, samples, nans, infinities, print_opts, histo_opts
182        }
183    }
184
185    /// Records one f64 sample into its bucket.
186
187    pub fn record(&mut self, sample: f64) {
188        // NaN values are counted but otherwise ignored.
189
190        if sample.is_nan() {
191            self.nans += 1;
192            return;
193        }
194
195        // Get the index into the histogram.
196        //
197        // We have two separate arrays for positive and negative
198        // values, so keep track of the sign.
199
200        let index =
201            if sample.is_infinite() {
202                self.infinities += 1;
203
204                let index = max_biased_exponent() / bucket_divisor();
205
206                index as usize
207            } else if sample == 0.0 {
208                let index = exponent_bias() / bucket_divisor();
209
210                index as usize
211            } else {
212                let index = biased_exponent(sample) / bucket_divisor();
213
214                index as usize
215            };
216
217        let sign = sign(sample);
218
219        // Now index into the appropriate array.
220
221        if sign < 0 {
222            self.negative[index] += 1;
223        } else {
224            self.positive[index] += 1;
225        }
226
227        self.samples += 1;
228    }
229
230    /// This function returns the unbiased IEEE binary64
231    /// exponent, with the sign of the sample value used
232    /// as a sign for the result.
233
234    pub fn convert_log_mode(&self) -> (isize, isize) {
235        let mut mode = 0;
236        let mut sign = -1;
237        let mut max  = self.negative[0];
238
239        for i in 1..self.negative.len() {
240            if self.negative[i] > max {
241                max  = self.negative[i];
242                mode = i as isize;
243            }
244        }
245
246        for i in 0..self.positive.len() {
247            if self.positive[i] > max {
248                max  = self.positive[i];
249                mode = i as isize;
250                sign = 1;
251            }
252        }
253
254        let biased_exponent = mode * bucket_divisor();
255        let biased_exponent = biased_exponent + bucket_divisor() / 2;
256
257        (sign, biased_exponent - exponent_bias())
258    }
259
260    pub fn mode_value(&self) -> f64 {
261        let (sign, exponent) = self.convert_log_mode();
262
263        let result   = 2.0_f64;
264        let result   = result.powi(exponent as i32);
265        let result   = result - result / 4.0;
266
267        sign as f64 * result
268    }
269
270    // This helper method prints the negative buckets.
271
272    fn print_negative(&self, printer: &mut dyn Printer, histo_opts: &HistoOpts) {
273        // Skip printing buckets that would appear before the first non-zero bucket.
274        // So find the non-zero bucket with the highest index in the array.
275
276        let mut scan = self.negative.len() - 1;
277
278        while scan > 0 && self.negative[scan] == 0 {
279            scan -= 1;
280        }
281
282        // If there's nothing to print, just return.
283
284        if scan == 0 && self.negative[0] == 0 {
285            return;
286        }
287
288        // Start printing from the lowest-index row.
289
290        let     start_row   = scan / print_roundup();
291        let mut rows        = start_row + 1;
292        let mut index       = start_row * print_roundup();
293
294        while rows > 0 {
295            if
296                histo_opts.no_zero_rows
297            ||  self.negative[index    ] != 0
298            ||  self.negative[index + 1] != 0
299            ||  self.negative[index + 2] != 0
300            ||  self.negative[index + 3] != 0 {
301
302                let exponent = (index as isize) * bucket_divisor();
303                let exponent = exponent - exponent_bias();
304
305                assert!(print_roundup() == 4);    // This format assumes a
306
307                let output =
308                    format!("    -2^{:>5}:    {:>10}    {:>10}    {:>10}    {:>10}",
309                        exponent,
310                        Printable::commas_u64(self.negative[index    ]),
311                        Printable::commas_u64(self.negative[index + 1]),
312                        Printable::commas_u64(self.negative[index + 2]),
313                        Printable::commas_u64(self.negative[index + 3])
314                    );
315
316                printer.print(&output);
317            }
318
319            rows -= 1;
320
321            if index >= print_roundup() {
322                index -= 4;
323            }
324        }
325    }
326
327    // This helper method prints the positive buckets.
328
329    fn print_positive(&self, printer: &mut dyn Printer, histo_opts: &HistoOpts) {
330        if self.samples == 0 {
331            return;
332        }
333
334        let mut last = self.buckets - 1;
335
336        while last > 0 && self.positive[last] == 0 {
337            last -= 1;
338        }
339
340        let     stop_index = last;
341        let mut i          = 0;
342
343        assert!(print_roundup() == 4);    // This code assumes len() % 4 == 0
344
345        // Print the rows that have non-zero entries.  Each row has
346        // the sample counts for 4 buckets.
347
348        while i <= stop_index {
349            assert!(i <= self.positive.len() - 4);
350
351            if
352                histo_opts.no_zero_rows
353            ||  self.positive[i    ] != 0
354            ||  self.positive[i + 1] != 0
355            ||  self.positive[i + 2] != 0
356            ||  self.positive[i + 3] != 0 {
357
358                let exponent = i as isize * bucket_divisor();
359                let exponent = exponent - exponent_bias();
360
361                let output =
362                    format!("    2^{:>5}:    {:>10}    {:>10}    {:>10}    {:>10}",
363                        exponent,
364                        Printable::commas_u64(self.positive[i]    ),
365                        Printable::commas_u64(self.positive[i + 1]),
366                        Printable::commas_u64(self.positive[i + 2]),
367                        Printable::commas_u64(self.positive[i + 3])
368                    );
369
370                printer.print(&output);
371            }
372
373            i += 4;
374        }
375    }
376
377    /// Prints the histogram.
378
379    pub fn print(&self, printer: &mut dyn Printer) {
380        self.print_opts(printer, &self.histo_opts);
381    }
382
383    /// Prints the histogram with the options specified.  The HistoOpts struct
384    /// is not fully implemented.
385
386    pub fn print_opts(&self, printer: &mut dyn Printer, histo_opts: &HistoOpts) {
387        let header =
388            format!("  Float Histogram:  ({} NaN, {} infinite, {} samples)",
389                self.nans, self.infinities, self.samples);
390
391        printer.print(&header);
392        self.print_negative(printer, histo_opts);
393        printer.print("  -----------------------");
394        self.print_positive(printer, histo_opts);
395    }
396
397    /// Resets the histogram to its initial state.
398
399    pub fn clear(&mut self) {
400        self.negative   = vec![0; self.buckets];
401        self.positive   = vec![0; self.buckets];
402        self.samples    = 0;
403        self.nans       = 0;
404        self.infinities = 0;
405    }
406
407    /// Returns the number of samples that were NaN and the number that
408    /// were non-finite.
409
410    pub fn non_finites(&self) -> (usize, usize) {
411        (self.nans, self.infinities)
412    }
413
414    pub fn histo_opts(&self) -> HistoOpts {
415        self.histo_opts
416    }
417
418    pub fn equals(&self, other: &FloatHistogram) -> bool {
419        for i in 0..other.negative.len() {
420            if self.negative[i] != other.negative[i] {
421                return false;
422            }
423        }
424
425        for i in 0..other.positive.len() {
426            if self.positive[i] != other.positive[i] {
427                return false;
428            }
429        }
430
431        if self.samples != other.samples {
432            return false;
433        }
434
435        if self.nans != other.nans {
436            return false;
437        }
438
439        if self.infinities != other.infinities {
440            return false;
441        }
442
443        true
444    }
445}
446
447impl Histogram for FloatHistogram {
448    fn print_histogram(&self, printer: &mut dyn Printer) {
449        self.print_opts(printer, &self.histo_opts);
450    }
451
452    /// Clears the histogram data.
453
454    fn clear_histogram(&mut self) {
455        self.clear()
456    }
457
458    /// Returns None since this histogram is not in a box.
459
460    fn to_log_histogram  (&self) -> Option<LogHistogramBox> {
461        None
462    }
463
464    /// Returns None since this histogram is not in a box.
465
466    fn to_float_histogram(&self) -> Option<FloatHistogramBox> {
467        None
468    }
469}
470
471#[cfg(test)]
472mod tests {
473    use crate::stdout_printer;
474    use crate::min_exponent;
475    use crate::PrintOpts;
476    use crate::printer_mut;
477    use super::*;
478
479    fn simple_test() {
480        let     merge_min    = min_exponent();
481        let     merge_max    = min_exponent();
482        let     no_zero_rows = true;
483        let     printer      = None;
484        let     title        = None;
485        let     units        = None;
486        let     histo_opts   = HistoOpts { merge_min, merge_max, no_zero_rows };
487        let     histo_opts   = Some(histo_opts);
488        let     print_opts   = PrintOpts { printer, title, units, histo_opts };
489        let mut histogram    = FloatHistogram::new(&Some(print_opts));
490        let     max_index    = max_biased_exponent() / bucket_divisor();
491
492        for i in 0..= max_index {
493            histogram.negative[i as usize] = i as u64;
494        }
495
496        for i in 0..= max_index {
497            histogram.positive[i as usize] = i as u64;
498        }
499
500        let printer_box = stdout_printer();
501        let printer     = printer_mut!(printer_box);
502
503        histogram.print(printer);
504
505        histogram.clear();
506
507        for data in &histogram.negative {
508            assert!(*data == 0);
509        }
510
511        for data in &histogram.positive {
512            assert!(*data == 0);
513        }
514
515        assert!(histogram.samples    == 0);
516        assert!(histogram.nans       == 0);
517        assert!(histogram.infinities == 0);
518
519        histogram.print(printer);
520
521        let sample_count = 1000;
522
523        for i in 0..sample_count {
524            histogram.record(-(i as f64));
525        }
526
527        histogram.print(printer);
528
529        assert!(histogram.samples     == sample_count as usize);
530        assert!(histogram.nans        == 0);
531        assert!(histogram.infinities  == 0);
532
533        // Values -0.0 and -1.0 should be in the same bucket.
534
535        let zero_bucket = exponent_bias() / bucket_divisor();
536        let zero_bucket = zero_bucket as usize;
537
538        assert!(histogram.negative[zero_bucket    ] == 2);
539        assert!(histogram.negative[zero_bucket + 1] == sample_count - 2);
540
541        for i in 0..sample_count {
542            histogram.record(i as f64);
543        }
544
545        histogram.print(printer);
546
547        assert!(histogram.samples     == 2 * sample_count as usize);
548        assert!(histogram.nans        == 0);
549        assert!(histogram.infinities  == 0);
550
551        assert!(histogram.positive[zero_bucket    ] == 2);
552        assert!(histogram.positive[zero_bucket + 1] == sample_count - 2);
553
554        histogram.record(f64::INFINITY);
555        histogram.record(f64::NEG_INFINITY);
556        histogram.record(f64::NAN);
557
558        histogram.print(printer);
559
560        let index = max_biased_exponent() / bucket_divisor();
561        let index = index as usize;
562
563        assert!(histogram.positive[index] == 1);
564        assert!(histogram.positive[index] == 1);
565
566        assert!(histogram.samples == (2 * sample_count + 2) as usize);
567
568        assert!(histogram.nans       == 1);
569        assert!(histogram.infinities == 2);
570    }
571
572    fn test_documentation() {
573        // Create a HistoOpts for new().
574
575        let merge_min    = 10;  // not implemented yet
576        let merge_max    = 11;  // not implemented yet
577        let no_zero_rows = false;
578        let histo_opts   = HistoOpts { merge_min, merge_max, no_zero_rows };
579        let printer      = None;
580        let title        = None;
581        let units        = None;
582        let histo_opts   = Some(histo_opts);
583        let print_opts   = PrintOpts { printer, title, units, histo_opts };
584
585        // Create a histogram and accept the default output format.
586
587        let mut histogram    = FloatHistogram::new(&Some(print_opts));
588        let     sample_count = 1000;
589
590        for i in 0..sample_count {
591             histogram.record(-(i as f64));
592        }
593
594        // Create a Printer instance for output.
595
596        let printer_box = stdout_printer();
597        let printer     = printer_mut!(printer_box);
598
599        histogram.print(printer);
600
601        assert!(histogram.samples     == sample_count as usize);
602        assert!(histogram.nans        == 0);
603        assert!(histogram.infinities  == 0);
604
605        // Values -0.0 and -1.0 should be in the same bucket.
606
607        let zero_bucket = exponent_bias() / bucket_divisor();
608        let zero_bucket = zero_bucket as usize;
609
610        assert!(histogram.negative[zero_bucket    ] == 2);
611        assert!(histogram.negative[zero_bucket + 1] == sample_count - 2);
612
613        // Now test some non-finite values.  NaN values do not
614        // go into the sample count.
615
616        histogram.record(f64::INFINITY);
617        histogram.record(f64::NEG_INFINITY);
618        histogram.record(f64::NAN);
619
620        assert!(histogram.nans       == 1);
621        assert!(histogram.infinities == 2);
622        assert!(histogram.samples    == sample_count as usize + 2);
623
624        // Check the official interface.
625
626        let (nans, infinities) = histogram.non_finites();
627
628        assert!(nans       == 1);
629        assert!(infinities == 2);
630
631        // Check the Histogram trait.
632
633        histogram.print_histogram(printer);
634        histogram.clear_histogram();
635
636        let (nans, infinities) = histogram.non_finites();
637
638        assert!(nans       == 0);
639        assert!(infinities == 0);
640
641        // Check histo_opts().
642
643        let histo_opts = histogram.histo_opts();
644
645        assert!(histo_opts.merge_min    == merge_min);
646        assert!(histo_opts.merge_max    == merge_max);
647        assert!(histo_opts.no_zero_rows == no_zero_rows);
648    }
649
650    fn test_log_mode() {
651        // Create a HistoOpts for new().
652
653        let merge_min    = 0;  // not implemented yet
654        let merge_max    = 0;  // not implemented yet
655        let no_zero_rows = false;
656        let histo_opts   = HistoOpts { merge_min, merge_max, no_zero_rows };
657
658        let printer      = None;
659        let title        = None;
660        let units        = None;
661        let histo_opts   = Some(histo_opts);
662        let print_opts   = PrintOpts { printer, title, units, histo_opts };
663
664        // Create a histogram and accept the default output format.
665
666        let mut histogram = FloatHistogram::new(&Some(print_opts));
667
668        let sample_count = 1000;
669
670        for i in 0..sample_count {
671             histogram.record(-(i as f64));
672        }
673
674        let (sign, exponent) = histogram.convert_log_mode();
675
676        let sign     = sign as f64;
677        let value    = 2_f64.powi(exponent as i32);
678        let value    = value - value / 4.0;
679        let expected = sign * value;
680
681        let log_mode = histogram.mode_value();
682
683        println!("test_log_mode:  got {}, expected {}", log_mode, expected);
684        assert!(log_mode == expected);
685    }
686
687    fn test_float_equals() {
688        let mut histo_1 = FloatHistogram::new(&None);
689        let mut histo_2 = FloatHistogram::new(&None);
690
691        for i in 0..1000 {
692            let sample = i as f64;
693
694            histo_1.record( sample);
695            histo_1.record(-sample);
696            histo_2.record( sample);
697            histo_2.record(-sample);
698        }
699
700        assert!(histo_1.equals(&histo_2));
701
702        histo_1.positive[1] += 1;
703        assert!(! histo_1.equals(&histo_2));
704        histo_1.positive[1] -= 1;
705        assert!(histo_1.equals(&histo_2));
706
707        histo_1.negative[1] += 1;
708        assert!(! histo_1.equals(&histo_2));
709        histo_1.negative[1] -= 1;
710        assert!(histo_1.equals(&histo_2));
711
712        histo_1.samples += 1;
713        assert!(! histo_1.equals(&histo_2));
714        histo_1.samples -= 1;
715        assert!(histo_1.equals(&histo_2));
716
717        histo_1.nans += 1;
718        assert!(! histo_1.equals(&histo_2));
719        histo_1.nans -= 1;
720        assert!(histo_1.equals(&histo_2));
721
722        histo_1.infinities += 1;
723        assert!(! histo_1.equals(&histo_2));
724        histo_1.infinities -= 1;
725        assert!(histo_1.equals(&histo_2));
726    }
727
728    #[test]
729    #[should_panic]
730    fn test_to_log() {
731        let histogram = FloatHistogram::new(&None);
732        let _         = histogram.to_log_histogram().unwrap();
733    }
734
735    #[test]
736    #[should_panic]
737    fn test_to_float() {
738        let histogram = FloatHistogram::new(&None);
739        let _         = histogram.to_float_histogram().unwrap();
740    }
741
742    #[test]
743    fn run_tests() {
744        simple_test       ();
745        test_documentation();
746        test_log_mode     ();
747        test_float_equals ();
748    }
749}