inc_stats/lib.rs
1//! Module with incremental statistics functions
2//!
3//! This contains helper functions for computing statistics on iterators, as well as structs that
4//! support incremental addition of data.
5mod bytes;
6mod copy;
7mod utils;
8
9use bytes::ToBytes;
10pub use copy::DerefCopy;
11use num_traits::{Float, FromPrimitive};
12use std::cell::RefCell;
13use std::cmp::{self, Eq};
14use std::collections::{BTreeSet, HashMap};
15use std::f64;
16use std::hash::{Hash, Hasher};
17use std::iter::{self, FromIterator};
18use std::ops::AddAssign;
19pub use utils::StatsError;
20
21/// Summary statistics struct
22///
23/// This struct aggregates data to compute summary statistics using constant space overhead. It
24/// implements the FromIterator trait so it can be collected from an iterator of floats.
25///
26/// # Examples
27///
28/// ```
29/// let mut stats = inc_stats::SummStats::new();
30/// for &num in &[2.0, 4.0, 8.0] {
31/// stats.add(num);
32/// }
33/// assert_eq!(3, stats.count());
34/// ```
35///
36/// ```
37/// let stats: inc_stats::SummStats<f64> = [2.0, 4.0, 8.0].iter().collect();
38/// assert_eq!(3, stats.count());
39/// ```
40#[derive(Debug)]
41pub struct SummStats<T: Float + FromPrimitive + AddAssign> {
42 non_nan: bool,
43 count: u64,
44 mean: T,
45 ssd: T,
46 min: T,
47 max: T,
48}
49
50impl<T: Float + FromPrimitive + AddAssign> SummStats<T> {
51 /// Create a new SummStats struct with no data
52 pub fn new() -> Self {
53 SummStats {
54 non_nan: false, // any value is not nan
55 count: 0,
56 mean: T::zero(),
57 ssd: T::zero(),
58 min: T::infinity(),
59 max: T::neg_infinity(),
60 }
61 }
62
63 /// Add a number
64 ///
65 /// # Examples
66 ///
67 /// ```
68 /// let mut stats = inc_stats::SummStats::new();
69 /// stats.add(0.0);
70 /// stats.add(&1.2);
71 /// assert_eq!(2, stats.count());
72 /// ```
73 ///
74 /// # Panics
75 ///
76 /// when the internal count can't be converted into the float data type.
77 pub fn add(&mut self, bval: impl DerefCopy<Output = T>) {
78 self.checked_add(bval).unwrap();
79 }
80
81 /// Add a number
82 ///
83 /// Check for conversion errors, will only happen when the internal count can't be converted
84 /// into the float data type.
85 ///
86 /// # Examples
87 ///
88 /// ```
89 /// let mut stats = inc_stats::SummStats::new();
90 /// stats.checked_add(0.0).unwrap();
91 /// assert_eq!(1, stats.count());
92 /// ```
93 pub fn checked_add(&mut self, rval: impl DerefCopy<Output = T>) -> Result<(), StatsError> {
94 // NOTE need to exit early before mutating state
95 let count = T::from_u64(self.count + 1).ok_or("can't convert from count to float type")?;
96 let val = rval.deref_copy();
97 self.non_nan |= !val.is_nan();
98 self.count += 1;
99 let delta = val - self.mean;
100 self.mean += delta / count;
101 self.ssd += (val - self.mean) * delta;
102 if val < self.min {
103 self.min = val;
104 }
105 if self.max < val {
106 self.max = val;
107 }
108 Ok(())
109 }
110
111 /// Get the number of values added
112 pub fn count(&self) -> u64 {
113 self.count
114 }
115
116 fn tcount(&self) -> T {
117 // if we could add the last value, then we must have been able to convert this
118 T::from_u64(self.count).unwrap()
119 }
120
121 /// Get the minimum non nan value
122 ///
123 /// Constant time. If no non nan values have been added, this is None.
124 ///
125 /// # Examples
126 ///
127 /// ```
128 /// let stats: inc_stats::SummStats<_> = [2.0, 4.0, std::f64::NAN].iter().collect();
129 /// assert_eq!(2.0, stats.min().unwrap());
130 /// ```
131 ///
132 /// ```
133 /// let mut stats = inc_stats::SummStats::new();
134 /// stats.add(std::f64::NAN);
135 /// assert!(stats.min().is_none());
136 /// ```
137 pub fn min(&self) -> Option<T> {
138 if self.non_nan {
139 Some(self.min)
140 } else {
141 None
142 }
143 }
144
145 /// Get the maximum non nan value
146 ///
147 /// Constant time. If no non nan values have been added, this is None.
148 ///
149 /// # Examples
150 ///
151 /// ```
152 /// let stats: inc_stats::SummStats<_> = [2.0, 4.0, std::f64::NAN].iter().collect();
153 /// assert_eq!(4.0, stats.max().unwrap());
154 /// ```
155 pub fn max(&self) -> Option<T> {
156 if self.non_nan {
157 Some(self.max)
158 } else {
159 None
160 }
161 }
162
163 /// Get the mean
164 ///
165 /// Constant time.
166 ///
167 /// # Examples
168 ///
169 /// ```
170 /// let stats: inc_stats::SummStats<f64> = [2.0, 4.0].iter().collect();
171 /// assert!((3.0 - stats.mean().unwrap()).abs() < 1.0e-6);
172 /// ```
173 ///
174 /// ```
175 /// let stats = inc_stats::SummStats::<f64>::new();
176 /// assert!(stats.mean().is_none());
177 /// ```
178 pub fn mean(&self) -> Option<T> {
179 match self.count {
180 0 => None,
181 _ => Some(self.mean),
182 }
183 }
184
185 /// Get the sum
186 ///
187 /// Constant time.
188 ///
189 /// # Examples
190 ///
191 /// ```
192 /// let stats: inc_stats::SummStats<f64> = [2.0, 4.0].iter().collect();
193 /// assert!((6.0 - stats.sum()).abs() < 1.0e-6);
194 /// ```
195 pub fn sum(&self) -> T {
196 self.tcount() * self.mean
197 }
198
199 /// Get the sample standard deviation
200 ///
201 /// Constant time.
202 ///
203 /// # Examples
204 ///
205 /// ```
206 /// let stats: inc_stats::SummStats<f64> = [2.0, 4.0].iter().collect();
207 /// assert!((1.4142136 - stats.standard_deviation().unwrap()).abs() < 1.0e-6);
208 /// ```
209 pub fn standard_deviation(&self) -> Option<T> {
210 self.variance().map(T::sqrt)
211 }
212
213 /// Get the sample variance
214 ///
215 /// Constant time.
216 ///
217 /// # Examples
218 ///
219 /// ```
220 /// let stats: inc_stats::SummStats<f64> = [2.0, 4.0].iter().collect();
221 /// assert!((2.0 - stats.variance().unwrap()).abs() < 1.0e-6);
222 /// ```
223 ///
224 /// ```
225 /// let mut stats = inc_stats::SummStats::new();
226 /// stats.add(0.0);
227 /// assert!(stats.variance().is_none());
228 /// ```
229 pub fn variance(&self) -> Option<T> {
230 match self.count {
231 0 | 1 => None,
232 // if we could add to this, it must be possible
233 _ => Some(self.ssd / T::from_u64(self.count - 1).unwrap()),
234 }
235 }
236
237 /// Get the standard error
238 ///
239 /// Constant time.
240 ///
241 /// # Examples
242 ///
243 /// ```
244 /// let stats: inc_stats::SummStats<f64> = [2.0, 4.0].iter().collect();
245 /// assert!((1.0 - stats.standard_error().unwrap()).abs() < 1.0e-6);
246 /// ```
247 pub fn standard_error(&self) -> Option<T> {
248 self.standard_deviation().map(|d| d / self.tcount().sqrt())
249 }
250}
251
252impl<T: Float + FromPrimitive + AddAssign> Default for SummStats<T> {
253 fn default() -> Self {
254 SummStats::new()
255 }
256}
257
258impl<T: Float + FromPrimitive + AddAssign, V: DerefCopy<Output = T>> FromIterator<V>
259 for SummStats<T>
260{
261 fn from_iter<I>(iter: I) -> Self
262 where
263 I: IntoIterator<Item = V>,
264 {
265 let mut stats = SummStats::new();
266 for val in iter {
267 stats.add(val);
268 }
269 stats
270 }
271}
272
273/// Get the mean of a set of data
274///
275/// This method takes constant space and linear time.
276///
277/// # Examples:
278///
279/// ```
280/// let mean: f64 = inc_stats::mean(&[2.0, 4.0]).unwrap();
281/// assert!((3.0 - mean).abs() < 1.0e-6);
282/// ```
283pub fn mean<T, V, I>(data: I) -> Option<T>
284where
285 T: Float + FromPrimitive + AddAssign,
286 V: DerefCopy<Output = T>,
287 I: IntoIterator<Item = V>,
288{
289 data.into_iter().collect::<SummStats<_>>().mean()
290}
291
292/// The mutable data structure that caches ordered percentiles
293#[derive(Debug)]
294struct CachedOrdering<T: Float + FromPrimitive> {
295 data: Vec<T>,
296 in_order: BTreeSet<usize>,
297}
298
299impl<T: Float + FromPrimitive> CachedOrdering<T> {
300 /// Create a new Percentiles object with no data
301 fn new() -> Self {
302 CachedOrdering {
303 // all of the points aded so far
304 data: Vec::new(),
305 // indices in data that are known to be in sorted order
306 in_order: BTreeSet::new(),
307 }
308 }
309
310 /// Add a data point
311 fn add(&mut self, val: T) {
312 self.data.push(val);
313 self.in_order.clear();
314 }
315
316 /// assert index is in sorted order, and get value at that order
317 fn order_index(&mut self, index: usize) -> T {
318 if self.in_order.insert(index) {
319 let start = match self.in_order.range(..index).next_back() {
320 Some(ind) => ind + 1,
321 None => 0,
322 };
323 let end = match self.in_order.range(index + 1..).next() {
324 Some(&ind) => ind,
325 None => self.data.len(),
326 };
327 self.data[start..end].select_nth_unstable_by(index - start, |a, b| {
328 // we filter out nans
329 a.partial_cmp(b).unwrap()
330 });
331 }
332 self.data[index]
333 }
334
335 /// Get the amount of data
336 fn len(&self) -> usize {
337 self.data.len()
338 }
339}
340
341/// Data percentile struct
342///
343/// This struct stores data to allow efficient computation of percentiles. This struct takes linear
344/// space. It implements FromIterator to allow collection. This collection ignores NaNs.
345///
346/// The structure is designed for efficient computation of percentiles when data is added and then
347/// percentiles are computed. Adding data is constant time, querying percentiles is linear time,
348/// with some caching to make it faster for computing several percentiles. If you were going to
349/// query percentiles while adding data, then you probably want to use a different data structure.
350///
351/// # Examples
352///
353/// ```
354/// let mut percs = inc_stats::Percentiles::new();
355/// for &num in &[2.0, 4.0, 8.0] {
356/// percs.add(num);
357/// }
358/// assert_eq!(3, percs.count());
359/// ```
360///
361/// ```
362/// let percs: inc_stats::Percentiles<f64> = [2.0, 4.0, 8.0].iter().collect();
363/// assert_eq!(3, percs.count());
364/// ```
365#[derive(Debug)]
366pub struct Percentiles<T: Float + FromPrimitive> {
367 data: RefCell<CachedOrdering<T>>,
368 nan_count: usize,
369}
370
371impl<T: Float + FromPrimitive> Percentiles<T> {
372 /// Create a new Percentiles object with no data
373 pub fn new() -> Self {
374 Percentiles {
375 data: RefCell::new(CachedOrdering::new()),
376 nan_count: 0,
377 }
378 }
379
380 /// Add a data point
381 pub fn add(&mut self, rval: impl DerefCopy<Output = T>) {
382 let val = rval.deref_copy();
383 if val.is_nan() {
384 self.nan_count += 1;
385 } else {
386 self.data.borrow_mut().add(val);
387 }
388 }
389
390 /// Get the number of data points
391 pub fn count(&self) -> usize {
392 self.data.borrow().len() + self.nan_count
393 }
394
395 /// Get a number of percentiles
396 ///
397 /// This takes linear time in the number of added data points, and log linear in the number of
398 /// percentiles. This will be marginally more efficient than calling percentile repeatedly in a
399 /// bad order.
400 ///
401 /// # Examples:
402 ///
403 /// ```
404 /// let percs: inc_stats::Percentiles<f64> = [1.0, 3.0, 7.0].iter().collect();
405 /// let quarts = percs.percentiles(&[0.75, 0.25, 0.5]).unwrap().unwrap();
406 /// assert!((5.0 - quarts[0]).abs() < 1.0e-6);
407 /// assert!((2.0 - quarts[1]).abs() < 1.0e-6);
408 /// assert!((3.0 - quarts[2]).abs() < 1.0e-6);
409 /// ```
410 // NOTE inside out does not guarantee worst case linear complexity. Asking for percentiles that
411 // correspond to the 1st, 2nd, 3rd, index etc will still have `log p * n` complexity (versus `p
412 // * n` for the native way). If we instead picked the percentiles closest to the midpoint of
413 // the remaining space, the complexity would drop to `log p + n`, which is just n.
414 pub fn percentiles<P, I>(&self, percentiles: I) -> Result<Option<Vec<T>>, StatsError>
415 where
416 P: DerefCopy<Output = f64>,
417 I: IntoIterator<Item = P>,
418 {
419 let len = self.data.borrow().len();
420 match len {
421 0 => Ok(None),
422 _ => {
423 // need to output result in same order, but need this sorted for efficiency
424 let mut indexed: Vec<(usize, f64)> = percentiles
425 .into_iter()
426 .map(DerefCopy::deref_copy)
427 .enumerate()
428 .collect();
429 if indexed.iter().any(|(_, e)| e.is_nan()) {
430 Err(StatsError::from("percentiles can't be nan"))?
431 }
432 // we checked there were no nans
433 indexed.sort_unstable_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap());
434 // allocate result
435 let mut result: Vec<Option<T>> = iter::repeat(None).take(indexed.len()).collect();
436 for &(ind, perc) in utils::inside_out(&indexed)? {
437 // we checked that we had data
438 result[ind] = Some(self.percentile(perc)?.unwrap());
439 }
440 let checked_result: Option<Vec<_>> = result.iter().copied().collect();
441 // fails if there is a logic error in inside_out
442 Ok(Some(checked_result.unwrap()))
443 }
444 }
445 }
446
447 /// Get a percentile
448 ///
449 /// Linear time.
450 ///
451 /// # Examples:
452 ///
453 /// ```
454 /// let percs: inc_stats::Percentiles<f64> = [1.0, 5.0].iter().collect();
455 /// let quart = percs.percentile(0.25).unwrap().unwrap();
456 /// assert!((2.0 - quart).abs() < 1.0e-6);
457 /// ```
458 pub fn percentile(
459 &self,
460 percentile: impl DerefCopy<Output = f64>,
461 ) -> Result<Option<T>, StatsError> {
462 let perc = percentile.deref_copy();
463 if perc < 0.0 || 1.0 < perc {
464 Err(StatsError::new(format!(
465 "all percentiles must be between 0 and 1, but got: {}",
466 perc
467 )))
468 } else {
469 let mut ordering = self.data.borrow_mut();
470 match ordering.len() {
471 0 => Ok(None),
472 _ => {
473 let p_index = (ordering.len() - 1) as f64 * perc;
474 let low_index = p_index.floor() as usize;
475 let high_index = p_index.ceil() as usize;
476 let low = ordering.order_index(low_index);
477 let high = ordering.order_index(high_index);
478 let weight = p_index - low_index as f64;
479 let perc = utils::weighted_average(low, high, weight)
480 .ok_or("can't convert from weight to float")?;
481 Ok(Some(perc))
482 }
483 }
484 }
485 }
486
487 /// Get the median
488 ///
489 /// Linear time.
490 ///
491 /// # Examples:
492 ///
493 /// ```
494 /// let percs: inc_stats::Percentiles<f64> = [1.0, 5.0, 100.0].iter().collect();
495 /// let med = percs.median().unwrap();
496 /// assert_eq!(5.0, med);
497 /// ```
498 pub fn median(&self) -> Option<T> {
499 self.percentile(0.5).expect("0.5 is a valid percentile")
500 }
501}
502
503impl<T: Float + FromPrimitive> Default for Percentiles<T> {
504 fn default() -> Self {
505 Percentiles::new()
506 }
507}
508
509impl<T: Float + FromPrimitive, V: DerefCopy<Output = T>> FromIterator<V> for Percentiles<T> {
510 fn from_iter<I>(iter: I) -> Self
511 where
512 I: IntoIterator<Item = V>,
513 {
514 let mut percs = Percentiles::new();
515 for val in iter {
516 percs.add(val);
517 }
518 percs
519 }
520}
521
522/// Get the median of a set of data
523///
524/// This takes linear time and linear space.
525///
526/// # Examples
527///
528/// ```
529/// let med = inc_stats::median(&[3.0, 1.0, 2.0]).unwrap();
530/// assert_eq!(2.0, med);
531/// ```
532///
533/// ```
534/// let med = inc_stats::median(std::iter::empty::<f64>());
535/// assert!(med.is_none());
536/// ```
537pub fn median<T, V, I>(data: I) -> Option<T>
538where
539 T: Float + FromPrimitive,
540 V: DerefCopy<Output = T>,
541 I: IntoIterator<Item = V>,
542{
543 data.into_iter().collect::<Percentiles<T>>().median()
544}
545
546#[derive(Debug, PartialEq)]
547struct HashFloat<T: Float + ToBytes>(T);
548
549impl<T: Float + ToBytes> Eq for HashFloat<T> {}
550
551impl<T: Float + ToBytes> Hash for HashFloat<T> {
552 fn hash<H: Hasher>(&self, state: &mut H) {
553 self.0.to_bytes().hash(state);
554 }
555}
556
557/// Mode computation struct
558///
559/// This struct stores data to allow efficient computation of the mode. This struct takes linear
560/// space. It implements FromIterator to allow collection.
561///
562/// # Examples
563///
564/// ```
565/// let mut mode = inc_stats::Mode::new();
566/// for &num in &[2.0, 4.0, 8.0] {
567/// mode.add(num);
568/// }
569/// assert_eq!(3, mode.count());
570/// ```
571///
572/// ```
573/// let mode: inc_stats::Mode<f64> = [2.0, 4.0, 8.0].iter().collect();
574/// assert_eq!(3, mode.count());
575/// ```
576#[derive(Debug)]
577pub struct Mode<T: Float + ToBytes> {
578 counts: HashMap<HashFloat<T>, usize>,
579 count: usize,
580 nan_count: usize,
581 mode: Vec<T>,
582 mode_count: usize,
583}
584
585impl<T: Float + ToBytes> Mode<T> {
586 /// Create a new Mode object with no data
587 pub fn new() -> Self {
588 Mode {
589 counts: HashMap::new(),
590 count: 0,
591 nan_count: 0,
592 mode: Vec::new(),
593 mode_count: 0,
594 }
595 }
596
597 /// Add a data point
598 pub fn add(&mut self, rval: impl DerefCopy<Output = T>) {
599 let val = rval.deref_copy();
600 self.count += 1;
601 if val.is_nan() {
602 self.nan_count += 1;
603 } else {
604 let val_count = self.counts.entry(HashFloat(val)).or_insert(0);
605 *val_count += 1;
606 if *val_count > self.mode_count {
607 self.mode.clear();
608 self.mode.push(val);
609 self.mode_count += 1;
610 } else if *val_count == self.mode_count {
611 self.mode.push(val);
612 }
613 }
614 }
615
616 /// Get the number of data points
617 ///
618 /// # Examples
619 ///
620 /// ```
621 /// let num: inc_stats::Mode<_> = [1.0, 2.0, std::f64::NAN].iter().collect();
622 /// assert_eq!(3, num.count());
623 /// ```
624 pub fn count(&self) -> usize {
625 self.count
626 }
627
628 /// Count the number of distinct values
629 ///
630 /// Distinctness for floating points is very finicy. Values that may print the same may not be
631 /// same underlying value. Computations that yield the same value in "real" math may not yield
632 /// the same value in floating point math.
633 ///
634 /// This ignores nans
635 ///
636 /// # Examples
637 ///
638 /// ```
639 /// let num: inc_stats::Mode<_> = [1.0, 2.0, 2.0, std::f64::NAN].iter().collect();
640 /// assert_eq!(2, num.count_distinct());
641 /// ```
642 pub fn count_distinct(&self) -> usize {
643 self.counts.len()
644 }
645
646 /// Count the number of distinct values
647 ///
648 /// This treats all NaNs as different
649 ///
650 /// # Examples
651 ///
652 /// ```
653 /// let num: inc_stats::Mode<_> = [1.0, std::f64::NAN, std::f64::NAN].iter().collect();
654 /// assert_eq!(3, num.count_distinct_nan());
655 /// ```
656 ///
657 /// Treat all nans the same
658 /// ```
659 /// let num: inc_stats::Mode<_> = [1.0, std::f64::NAN, std::f64::NAN].iter().collect();
660 /// assert_eq!(2, std::cmp::min(num.count_distinct() + 1, num.count_distinct_nan()));
661 /// ```
662 pub fn count_distinct_nan(&self) -> usize {
663 self.counts.len() + self.nan_count
664 }
665
666 /// Return an iterator of all of the modes
667 ///
668 /// Multiple modes are retruned in the order they became a mode. NaNs are ignored.
669 ///
670 /// This iterator has read only reference to the mode data structure that must be dropped to
671 /// continue modifying the mode.
672 ///
673 /// Constant time.
674 ///
675 /// # Examples
676 ///
677 /// ```
678 /// let mut mode = inc_stats::Mode::new();
679 /// {
680 /// let mut it = mode.modes();
681 /// assert!(it.next().is_none());
682 /// }
683 ///
684 /// mode.add(5.0);
685 /// {
686 /// let mut it = mode.modes();
687 /// assert_eq!(Some(5.0), it.next());
688 /// assert!(it.next().is_none());
689 /// }
690 ///
691 /// mode.add(3.0);
692 /// {
693 /// let mut it = mode.modes();
694 /// assert_eq!(Some(5.0), it.next());
695 /// assert_eq!(Some(3.0), it.next());
696 /// assert!(it.next().is_none());
697 /// }
698 ///
699 /// mode.add(3.0);
700 /// {
701 /// let mut it = mode.modes();
702 /// assert_eq!(Some(3.0), it.next());
703 /// assert!(it.next().is_none());
704 /// }
705 /// ```
706 pub fn modes(&self) -> impl Iterator<Item = T> + '_ {
707 self.mode.iter().copied()
708 }
709
710 /// gets an option for if nan would be in the mode
711 fn nan_mode(&self) -> Option<T> {
712 if self.nan_count > 0 && self.nan_count >= self.mode_count {
713 Some(T::nan())
714 } else {
715 None
716 }
717 }
718
719 /// Return an iterator of all of the modes
720 ///
721 /// This iterator will include NaN if present as a mode. NaN will always be returned last
722 ///
723 /// Constant time.
724 ///
725 /// # Examples
726 ///
727 /// ```
728 /// let mode: inc_stats::Mode<_> = [std::f64::NAN, 5.0].iter().collect();
729 /// let mut it = mode.modes_nan();
730 /// assert_eq!(Some(5.0), it.next());
731 /// assert!(it.next().unwrap().is_nan());
732 /// assert!(it.next().is_none());
733 /// ```
734 pub fn modes_nan(&self) -> impl Iterator<Item = T> + '_ {
735 self.modes().chain(self.nan_mode())
736 }
737
738 /// Return the current mode
739 ///
740 /// If multiple modes exist, this returns the first element that reached the largest count.
741 /// NaNs are ignored when computing the mode.
742 ///
743 /// Constant time.
744 ///
745 /// # Examples
746 ///
747 /// ```
748 /// let mode: inc_stats::Mode<_> = [2.0, 4.0, std::f64::NAN, 4.0].iter().collect();
749 /// assert_eq!(4.0, mode.mode().unwrap());
750 /// ```
751 ///
752 /// ```
753 /// let mode = inc_stats::Mode::<f64>::new();
754 /// assert!(mode.mode().is_none());
755 /// ```
756 pub fn mode(&self) -> Option<T> {
757 self.modes().next()
758 }
759
760 /// Return the current mode
761 ///
762 /// If multiple modes exist, this returns the first element that reached the largest count that
763 /// wasn't NaN. NaN will be returned only if it is the unique mode.
764 ///
765 /// Constant time.
766 ///
767 /// # Examples
768 ///
769 /// ```
770 /// let mode: inc_stats::Mode<_> = [2.0, 4.0, std::f64::NAN, std::f64::NAN].iter().collect();
771 /// assert!(mode.mode_nan().unwrap().is_nan());
772 /// ```
773 pub fn mode_nan(&self) -> Option<T> {
774 if self.nan_count > self.mode_count {
775 Some(T::nan())
776 } else {
777 self.mode()
778 }
779 }
780
781 /// Return the number of times the mode occurred
782 ///
783 /// Constant time.
784 ///
785 /// # Examples
786 ///
787 /// ```
788 /// let mode: inc_stats::Mode<_> = [2.0, 4.0, std::f64::NAN, 4.0].iter().collect();
789 /// assert_eq!(2, mode.mode_count());
790 /// ```
791 pub fn mode_count(&self) -> usize {
792 self.mode_count
793 }
794
795 /// Return the number of times the mode occurred
796 ///
797 /// Counts NaNs as a possible mode.
798 ///
799 /// Constant time.
800 ///
801 /// # Examples
802 ///
803 /// ```
804 /// let mode: inc_stats::Mode<_> = [2.0, 4.0, std::f64::NAN, std::f64::NAN].iter().collect();
805 /// assert_eq!(2, mode.mode_count_nan());
806 /// ```
807 pub fn mode_count_nan(&self) -> usize {
808 cmp::max(self.mode_count, self.nan_count)
809 }
810}
811
812impl<T: Float + ToBytes, V: DerefCopy<Output = T>> FromIterator<V> for Mode<T> {
813 fn from_iter<I>(iter: I) -> Self
814 where
815 I: IntoIterator<Item = V>,
816 {
817 let mut mode = Mode::new();
818 for val in iter {
819 mode.add(val);
820 }
821 mode
822 }
823}
824
825/// Get the mode of a set of data
826///
827/// If multiple modes exist, this returns the first element that reached the largest count.
828/// NaNs are ignored when computing the mode.
829///
830/// # Examples:
831///
832/// ```
833/// let mode = inc_stats::mode(&[2.0, 4.0, 2.0]);
834/// assert_eq!(Some(2.0), mode);
835/// ```
836///
837/// ```
838/// let mode: Option<f64> = inc_stats::mode(&[]);
839/// assert!(mode.is_none());
840/// ```
841pub fn mode<T, V, I>(data: I) -> Option<T>
842where
843 T: Float + ToBytes,
844 V: DerefCopy<Output = T>,
845 I: IntoIterator<Item = V>,
846{
847 data.into_iter().collect::<Mode<T>>().mode()
848}
849
850#[cfg(test)]
851mod tests {
852 use super::*;
853
854 #[test]
855 fn f32_mean_test() {
856 let avg: f32 = mean(&[0.0, 1.0, 2.0]).unwrap();
857 assert!((avg - 1.0).abs() < 1e-6);
858 }
859
860 #[test]
861 fn f32_median_test() {
862 let avg: f32 = median(&[0.0, 1.0, 2.0, 3.0]).unwrap();
863 assert!((avg - 1.5).abs() < 1e-6);
864 }
865
866 #[test]
867 fn nan_percentile_test() {
868 let percs: Percentiles<_> = [f64::NAN].iter().collect();
869 // we know we put something in
870 assert_eq!(1, percs.count());
871 // but don't have enough data to get median
872 assert_eq!(None, percs.median());
873 }
874
875 #[test]
876 fn nan_mode_test() {
877 let avg: Mode<_> = [f64::NAN].iter().collect();
878 assert!(avg.mode_nan().unwrap().is_nan());
879 }
880
881 #[test]
882 fn cached_ordering_test() {
883 let mut ord = CachedOrdering::new();
884 ord.add(0.0);
885 ord.add(1.0);
886 ord.add(2.0);
887 // gets correct index
888 assert_eq!(1.0, ord.order_index(1));
889 // cached for later
890 assert!(ord.in_order.contains(&1));
891 }
892}