inc_stats/
lib.rs

1//! Module with incremental statistics functions
2//!
3//! This contains helper functions for computing statistics on iterators, as well as structs that
4//! support incremental addition of data.
5mod bytes;
6mod copy;
7mod utils;
8
9use bytes::ToBytes;
10pub use copy::DerefCopy;
11use num_traits::{Float, FromPrimitive};
12use std::cell::RefCell;
13use std::cmp::{self, Eq};
14use std::collections::{BTreeSet, HashMap};
15use std::f64;
16use std::hash::{Hash, Hasher};
17use std::iter::{self, FromIterator};
18use std::ops::AddAssign;
19pub use utils::StatsError;
20
21/// Summary statistics struct
22///
23/// This struct aggregates data to compute summary statistics using constant space overhead. It
24/// implements the FromIterator trait so it can be collected from an iterator of floats.
25///
26/// # Examples
27///
28/// ```
29/// let mut stats = inc_stats::SummStats::new();
30/// for &num in &[2.0, 4.0, 8.0] {
31///     stats.add(num);
32/// }
33/// assert_eq!(3, stats.count());
34/// ```
35///
36/// ```
37/// let stats: inc_stats::SummStats<f64> = [2.0, 4.0, 8.0].iter().collect();
38/// assert_eq!(3, stats.count());
39/// ```
40#[derive(Debug)]
41pub struct SummStats<T: Float + FromPrimitive + AddAssign> {
42    non_nan: bool,
43    count: u64,
44    mean: T,
45    ssd: T,
46    min: T,
47    max: T,
48}
49
50impl<T: Float + FromPrimitive + AddAssign> SummStats<T> {
51    /// Create a new SummStats struct with no data
52    pub fn new() -> Self {
53        SummStats {
54            non_nan: false, // any value is not nan
55            count: 0,
56            mean: T::zero(),
57            ssd: T::zero(),
58            min: T::infinity(),
59            max: T::neg_infinity(),
60        }
61    }
62
63    /// Add a number
64    ///
65    /// # Examples
66    ///
67    /// ```
68    /// let mut stats = inc_stats::SummStats::new();
69    /// stats.add(0.0);
70    /// stats.add(&1.2);
71    /// assert_eq!(2, stats.count());
72    /// ```
73    ///
74    /// # Panics
75    ///
76    /// when the internal count can't be converted into the float data type.
77    pub fn add(&mut self, bval: impl DerefCopy<Output = T>) {
78        self.checked_add(bval).unwrap();
79    }
80
81    /// Add a number
82    ///
83    /// Check for conversion errors, will only happen when the internal count can't be converted
84    /// into the float data type.
85    ///
86    /// # Examples
87    ///
88    /// ```
89    /// let mut stats = inc_stats::SummStats::new();
90    /// stats.checked_add(0.0).unwrap();
91    /// assert_eq!(1, stats.count());
92    /// ```
93    pub fn checked_add(&mut self, rval: impl DerefCopy<Output = T>) -> Result<(), StatsError> {
94        // NOTE need to exit early before mutating state
95        let count = T::from_u64(self.count + 1).ok_or("can't convert from count to float type")?;
96        let val = rval.deref_copy();
97        self.non_nan |= !val.is_nan();
98        self.count += 1;
99        let delta = val - self.mean;
100        self.mean += delta / count;
101        self.ssd += (val - self.mean) * delta;
102        if val < self.min {
103            self.min = val;
104        }
105        if self.max < val {
106            self.max = val;
107        }
108        Ok(())
109    }
110
111    /// Get the number of values added
112    pub fn count(&self) -> u64 {
113        self.count
114    }
115
116    fn tcount(&self) -> T {
117        // if we could add the last value, then we must have been able to convert this
118        T::from_u64(self.count).unwrap()
119    }
120
121    /// Get the minimum non nan value
122    ///
123    /// Constant time. If no non nan values have been added, this is None.
124    ///
125    /// # Examples
126    ///
127    /// ```
128    /// let stats: inc_stats::SummStats<_> = [2.0, 4.0, std::f64::NAN].iter().collect();
129    /// assert_eq!(2.0, stats.min().unwrap());
130    /// ```
131    ///
132    /// ```
133    /// let mut stats = inc_stats::SummStats::new();
134    /// stats.add(std::f64::NAN);
135    /// assert!(stats.min().is_none());
136    /// ```
137    pub fn min(&self) -> Option<T> {
138        if self.non_nan {
139            Some(self.min)
140        } else {
141            None
142        }
143    }
144
145    /// Get the maximum non nan value
146    ///
147    /// Constant time. If no non nan values have been added, this is None.
148    ///
149    /// # Examples
150    ///
151    /// ```
152    /// let stats: inc_stats::SummStats<_> = [2.0, 4.0, std::f64::NAN].iter().collect();
153    /// assert_eq!(4.0, stats.max().unwrap());
154    /// ```
155    pub fn max(&self) -> Option<T> {
156        if self.non_nan {
157            Some(self.max)
158        } else {
159            None
160        }
161    }
162
163    /// Get the mean
164    ///
165    /// Constant time.
166    ///
167    /// # Examples
168    ///
169    /// ```
170    /// let stats: inc_stats::SummStats<f64> = [2.0, 4.0].iter().collect();
171    /// assert!((3.0 - stats.mean().unwrap()).abs() < 1.0e-6);
172    /// ```
173    ///
174    /// ```
175    /// let stats = inc_stats::SummStats::<f64>::new();
176    /// assert!(stats.mean().is_none());
177    /// ```
178    pub fn mean(&self) -> Option<T> {
179        match self.count {
180            0 => None,
181            _ => Some(self.mean),
182        }
183    }
184
185    /// Get the sum
186    ///
187    /// Constant time.
188    ///
189    /// # Examples
190    ///
191    /// ```
192    /// let stats: inc_stats::SummStats<f64> = [2.0, 4.0].iter().collect();
193    /// assert!((6.0 - stats.sum()).abs() < 1.0e-6);
194    /// ```
195    pub fn sum(&self) -> T {
196        self.tcount() * self.mean
197    }
198
199    /// Get the sample standard deviation
200    ///
201    /// Constant time.
202    ///
203    /// # Examples
204    ///
205    /// ```
206    /// let stats: inc_stats::SummStats<f64> = [2.0, 4.0].iter().collect();
207    /// assert!((1.4142136 - stats.standard_deviation().unwrap()).abs() < 1.0e-6);
208    /// ```
209    pub fn standard_deviation(&self) -> Option<T> {
210        self.variance().map(T::sqrt)
211    }
212
213    /// Get the sample variance
214    ///
215    /// Constant time.
216    ///
217    /// # Examples
218    ///
219    /// ```
220    /// let stats: inc_stats::SummStats<f64> = [2.0, 4.0].iter().collect();
221    /// assert!((2.0 - stats.variance().unwrap()).abs() < 1.0e-6);
222    /// ```
223    ///
224    /// ```
225    /// let mut stats = inc_stats::SummStats::new();
226    /// stats.add(0.0);
227    /// assert!(stats.variance().is_none());
228    /// ```
229    pub fn variance(&self) -> Option<T> {
230        match self.count {
231            0 | 1 => None,
232            // if we could add to this, it must be possible
233            _ => Some(self.ssd / T::from_u64(self.count - 1).unwrap()),
234        }
235    }
236
237    /// Get the standard error
238    ///
239    /// Constant time.
240    ///
241    /// # Examples
242    ///
243    /// ```
244    /// let stats: inc_stats::SummStats<f64> = [2.0, 4.0].iter().collect();
245    /// assert!((1.0 - stats.standard_error().unwrap()).abs() < 1.0e-6);
246    /// ```
247    pub fn standard_error(&self) -> Option<T> {
248        self.standard_deviation().map(|d| d / self.tcount().sqrt())
249    }
250}
251
252impl<T: Float + FromPrimitive + AddAssign> Default for SummStats<T> {
253    fn default() -> Self {
254        SummStats::new()
255    }
256}
257
258impl<T: Float + FromPrimitive + AddAssign, V: DerefCopy<Output = T>> FromIterator<V>
259    for SummStats<T>
260{
261    fn from_iter<I>(iter: I) -> Self
262    where
263        I: IntoIterator<Item = V>,
264    {
265        let mut stats = SummStats::new();
266        for val in iter {
267            stats.add(val);
268        }
269        stats
270    }
271}
272
273/// Get the mean of a set of data
274///
275/// This method takes constant space and linear time.
276///
277/// # Examples:
278///
279/// ```
280/// let mean: f64 = inc_stats::mean(&[2.0, 4.0]).unwrap();
281/// assert!((3.0 - mean).abs() < 1.0e-6);
282/// ```
283pub fn mean<T, V, I>(data: I) -> Option<T>
284where
285    T: Float + FromPrimitive + AddAssign,
286    V: DerefCopy<Output = T>,
287    I: IntoIterator<Item = V>,
288{
289    data.into_iter().collect::<SummStats<_>>().mean()
290}
291
292/// The mutable data structure that caches ordered percentiles
293#[derive(Debug)]
294struct CachedOrdering<T: Float + FromPrimitive> {
295    data: Vec<T>,
296    in_order: BTreeSet<usize>,
297}
298
299impl<T: Float + FromPrimitive> CachedOrdering<T> {
300    /// Create a new Percentiles object with no data
301    fn new() -> Self {
302        CachedOrdering {
303            // all of the points aded so far
304            data: Vec::new(),
305            // indices in data that are known to be in sorted order
306            in_order: BTreeSet::new(),
307        }
308    }
309
310    /// Add a data point
311    fn add(&mut self, val: T) {
312        self.data.push(val);
313        self.in_order.clear();
314    }
315
316    /// assert index is in sorted order, and get value at that order
317    fn order_index(&mut self, index: usize) -> T {
318        if self.in_order.insert(index) {
319            let start = match self.in_order.range(..index).next_back() {
320                Some(ind) => ind + 1,
321                None => 0,
322            };
323            let end = match self.in_order.range(index + 1..).next() {
324                Some(&ind) => ind,
325                None => self.data.len(),
326            };
327            self.data[start..end].select_nth_unstable_by(index - start, |a, b| {
328                // we filter out nans
329                a.partial_cmp(b).unwrap()
330            });
331        }
332        self.data[index]
333    }
334
335    /// Get the amount of data
336    fn len(&self) -> usize {
337        self.data.len()
338    }
339}
340
341/// Data percentile struct
342///
343/// This struct stores data to allow efficient computation of percentiles. This struct takes linear
344/// space. It implements FromIterator to allow collection. This collection ignores NaNs.
345///
346/// The structure is designed for efficient computation of percentiles when data is added and then
347/// percentiles are computed. Adding data is constant time, querying percentiles is linear time,
348/// with some caching to make it faster for computing several percentiles. If you were going to
349/// query percentiles while adding data, then you probably want to use a different data structure.
350///
351/// # Examples
352///
353/// ```
354/// let mut percs = inc_stats::Percentiles::new();
355/// for &num in &[2.0, 4.0, 8.0] {
356///     percs.add(num);
357/// }
358/// assert_eq!(3, percs.count());
359/// ```
360///
361/// ```
362/// let percs: inc_stats::Percentiles<f64> = [2.0, 4.0, 8.0].iter().collect();
363/// assert_eq!(3, percs.count());
364/// ```
365#[derive(Debug)]
366pub struct Percentiles<T: Float + FromPrimitive> {
367    data: RefCell<CachedOrdering<T>>,
368    nan_count: usize,
369}
370
371impl<T: Float + FromPrimitive> Percentiles<T> {
372    /// Create a new Percentiles object with no data
373    pub fn new() -> Self {
374        Percentiles {
375            data: RefCell::new(CachedOrdering::new()),
376            nan_count: 0,
377        }
378    }
379
380    /// Add a data point
381    pub fn add(&mut self, rval: impl DerefCopy<Output = T>) {
382        let val = rval.deref_copy();
383        if val.is_nan() {
384            self.nan_count += 1;
385        } else {
386            self.data.borrow_mut().add(val);
387        }
388    }
389
390    /// Get the number of data points
391    pub fn count(&self) -> usize {
392        self.data.borrow().len() + self.nan_count
393    }
394
395    /// Get a number of percentiles
396    ///
397    /// This takes linear time in the number of added data points, and log linear in the number of
398    /// percentiles. This will be marginally more efficient than calling percentile repeatedly in a
399    /// bad order.
400    ///
401    /// # Examples:
402    ///
403    /// ```
404    /// let percs: inc_stats::Percentiles<f64> = [1.0, 3.0, 7.0].iter().collect();
405    /// let quarts = percs.percentiles(&[0.75, 0.25, 0.5]).unwrap().unwrap();
406    /// assert!((5.0 - quarts[0]).abs() < 1.0e-6);
407    /// assert!((2.0 - quarts[1]).abs() < 1.0e-6);
408    /// assert!((3.0 - quarts[2]).abs() < 1.0e-6);
409    /// ```
410    // NOTE inside out does not guarantee worst case linear complexity. Asking for percentiles that
411    // correspond to the 1st, 2nd, 3rd, index etc will still have `log p * n` complexity (versus `p
412    // * n` for the native way). If we instead picked the percentiles closest to the midpoint of
413    // the remaining space, the complexity would drop to `log p + n`, which is just n.
414    pub fn percentiles<P, I>(&self, percentiles: I) -> Result<Option<Vec<T>>, StatsError>
415    where
416        P: DerefCopy<Output = f64>,
417        I: IntoIterator<Item = P>,
418    {
419        let len = self.data.borrow().len();
420        match len {
421            0 => Ok(None),
422            _ => {
423                // need to output result in same order, but need this sorted for efficiency
424                let mut indexed: Vec<(usize, f64)> = percentiles
425                    .into_iter()
426                    .map(DerefCopy::deref_copy)
427                    .enumerate()
428                    .collect();
429                if indexed.iter().any(|(_, e)| e.is_nan()) {
430                    Err(StatsError::from("percentiles can't be nan"))?
431                }
432                // we checked there were no nans
433                indexed.sort_unstable_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap());
434                // allocate result
435                let mut result: Vec<Option<T>> = iter::repeat(None).take(indexed.len()).collect();
436                for &(ind, perc) in utils::inside_out(&indexed)? {
437                    // we checked that we had data
438                    result[ind] = Some(self.percentile(perc)?.unwrap());
439                }
440                let checked_result: Option<Vec<_>> = result.iter().copied().collect();
441                // fails if there is a logic error in inside_out
442                Ok(Some(checked_result.unwrap()))
443            }
444        }
445    }
446
447    /// Get a percentile
448    ///
449    /// Linear time.
450    ///
451    /// # Examples:
452    ///
453    /// ```
454    /// let percs: inc_stats::Percentiles<f64> = [1.0, 5.0].iter().collect();
455    /// let quart = percs.percentile(0.25).unwrap().unwrap();
456    /// assert!((2.0 - quart).abs() < 1.0e-6);
457    /// ```
458    pub fn percentile(
459        &self,
460        percentile: impl DerefCopy<Output = f64>,
461    ) -> Result<Option<T>, StatsError> {
462        let perc = percentile.deref_copy();
463        if perc < 0.0 || 1.0 < perc {
464            Err(StatsError::new(format!(
465                "all percentiles must be between 0 and 1, but got: {}",
466                perc
467            )))
468        } else {
469            let mut ordering = self.data.borrow_mut();
470            match ordering.len() {
471                0 => Ok(None),
472                _ => {
473                    let p_index = (ordering.len() - 1) as f64 * perc;
474                    let low_index = p_index.floor() as usize;
475                    let high_index = p_index.ceil() as usize;
476                    let low = ordering.order_index(low_index);
477                    let high = ordering.order_index(high_index);
478                    let weight = p_index - low_index as f64;
479                    let perc = utils::weighted_average(low, high, weight)
480                        .ok_or("can't convert from weight to float")?;
481                    Ok(Some(perc))
482                }
483            }
484        }
485    }
486
487    /// Get the median
488    ///
489    /// Linear time.
490    ///
491    /// # Examples:
492    ///
493    /// ```
494    /// let percs: inc_stats::Percentiles<f64> = [1.0, 5.0, 100.0].iter().collect();
495    /// let med = percs.median().unwrap();
496    /// assert_eq!(5.0, med);
497    /// ```
498    pub fn median(&self) -> Option<T> {
499        self.percentile(0.5).expect("0.5 is a valid percentile")
500    }
501}
502
503impl<T: Float + FromPrimitive> Default for Percentiles<T> {
504    fn default() -> Self {
505        Percentiles::new()
506    }
507}
508
509impl<T: Float + FromPrimitive, V: DerefCopy<Output = T>> FromIterator<V> for Percentiles<T> {
510    fn from_iter<I>(iter: I) -> Self
511    where
512        I: IntoIterator<Item = V>,
513    {
514        let mut percs = Percentiles::new();
515        for val in iter {
516            percs.add(val);
517        }
518        percs
519    }
520}
521
522/// Get the median of a set of data
523///
524/// This takes linear time and linear space.
525///
526/// # Examples
527///
528/// ```
529/// let med = inc_stats::median(&[3.0, 1.0, 2.0]).unwrap();
530/// assert_eq!(2.0, med);
531/// ```
532///
533/// ```
534/// let med = inc_stats::median(std::iter::empty::<f64>());
535/// assert!(med.is_none());
536/// ```
537pub fn median<T, V, I>(data: I) -> Option<T>
538where
539    T: Float + FromPrimitive,
540    V: DerefCopy<Output = T>,
541    I: IntoIterator<Item = V>,
542{
543    data.into_iter().collect::<Percentiles<T>>().median()
544}
545
546#[derive(Debug, PartialEq)]
547struct HashFloat<T: Float + ToBytes>(T);
548
549impl<T: Float + ToBytes> Eq for HashFloat<T> {}
550
551impl<T: Float + ToBytes> Hash for HashFloat<T> {
552    fn hash<H: Hasher>(&self, state: &mut H) {
553        self.0.to_bytes().hash(state);
554    }
555}
556
557/// Mode computation struct
558///
559/// This struct stores data to allow efficient computation of the mode. This struct takes linear
560/// space. It implements FromIterator to allow collection.
561///
562/// # Examples
563///
564/// ```
565/// let mut mode = inc_stats::Mode::new();
566/// for &num in &[2.0, 4.0, 8.0] {
567///     mode.add(num);
568/// }
569/// assert_eq!(3, mode.count());
570/// ```
571///
572/// ```
573/// let mode: inc_stats::Mode<f64> = [2.0, 4.0, 8.0].iter().collect();
574/// assert_eq!(3, mode.count());
575/// ```
576#[derive(Debug)]
577pub struct Mode<T: Float + ToBytes> {
578    counts: HashMap<HashFloat<T>, usize>,
579    count: usize,
580    nan_count: usize,
581    mode: Vec<T>,
582    mode_count: usize,
583}
584
585impl<T: Float + ToBytes> Mode<T> {
586    /// Create a new Mode object with no data
587    pub fn new() -> Self {
588        Mode {
589            counts: HashMap::new(),
590            count: 0,
591            nan_count: 0,
592            mode: Vec::new(),
593            mode_count: 0,
594        }
595    }
596
597    /// Add a data point
598    pub fn add(&mut self, rval: impl DerefCopy<Output = T>) {
599        let val = rval.deref_copy();
600        self.count += 1;
601        if val.is_nan() {
602            self.nan_count += 1;
603        } else {
604            let val_count = self.counts.entry(HashFloat(val)).or_insert(0);
605            *val_count += 1;
606            if *val_count > self.mode_count {
607                self.mode.clear();
608                self.mode.push(val);
609                self.mode_count += 1;
610            } else if *val_count == self.mode_count {
611                self.mode.push(val);
612            }
613        }
614    }
615
616    /// Get the number of data points
617    ///
618    /// # Examples
619    ///
620    /// ```
621    /// let num: inc_stats::Mode<_> = [1.0, 2.0, std::f64::NAN].iter().collect();
622    /// assert_eq!(3, num.count());
623    /// ```
624    pub fn count(&self) -> usize {
625        self.count
626    }
627
628    /// Count the number of distinct values
629    ///
630    /// Distinctness for floating points is very finicy. Values that may print the same may not be
631    /// same underlying value. Computations that yield the same value in "real" math may not yield
632    /// the same value in floating point math.
633    ///
634    /// This ignores nans
635    ///
636    /// # Examples
637    ///
638    /// ```
639    /// let num: inc_stats::Mode<_> = [1.0, 2.0, 2.0, std::f64::NAN].iter().collect();
640    /// assert_eq!(2, num.count_distinct());
641    /// ```
642    pub fn count_distinct(&self) -> usize {
643        self.counts.len()
644    }
645
646    /// Count the number of distinct values
647    ///
648    /// This treats all NaNs as different
649    ///
650    /// # Examples
651    ///
652    /// ```
653    /// let num: inc_stats::Mode<_> = [1.0, std::f64::NAN, std::f64::NAN].iter().collect();
654    /// assert_eq!(3, num.count_distinct_nan());
655    /// ```
656    ///
657    /// Treat all nans the same
658    /// ```
659    /// let num: inc_stats::Mode<_> = [1.0, std::f64::NAN, std::f64::NAN].iter().collect();
660    /// assert_eq!(2, std::cmp::min(num.count_distinct() + 1, num.count_distinct_nan()));
661    /// ```
662    pub fn count_distinct_nan(&self) -> usize {
663        self.counts.len() + self.nan_count
664    }
665
666    /// Return an iterator of all of the modes
667    ///
668    /// Multiple modes are retruned in the order they became a mode. NaNs are ignored.
669    ///
670    /// This iterator has read only reference to the mode data structure that must be dropped to
671    /// continue modifying the mode.
672    ///
673    /// Constant time.
674    ///
675    /// # Examples
676    ///
677    /// ```
678    /// let mut mode = inc_stats::Mode::new();
679    /// {
680    ///     let mut it = mode.modes();
681    ///     assert!(it.next().is_none());
682    /// }
683    ///
684    /// mode.add(5.0);
685    /// {
686    ///     let mut it = mode.modes();
687    ///     assert_eq!(Some(5.0), it.next());
688    ///     assert!(it.next().is_none());
689    /// }
690    ///
691    /// mode.add(3.0);
692    /// {
693    ///     let mut it = mode.modes();
694    ///     assert_eq!(Some(5.0), it.next());
695    ///     assert_eq!(Some(3.0), it.next());
696    ///     assert!(it.next().is_none());
697    /// }
698    ///
699    /// mode.add(3.0);
700    /// {
701    ///     let mut it = mode.modes();
702    ///     assert_eq!(Some(3.0), it.next());
703    ///     assert!(it.next().is_none());
704    /// }
705    /// ```
706    pub fn modes(&self) -> impl Iterator<Item = T> + '_ {
707        self.mode.iter().copied()
708    }
709
710    /// gets an option for if nan would be in the mode
711    fn nan_mode(&self) -> Option<T> {
712        if self.nan_count > 0 && self.nan_count >= self.mode_count {
713            Some(T::nan())
714        } else {
715            None
716        }
717    }
718
719    /// Return an iterator of all of the modes
720    ///
721    /// This iterator will include NaN if present as a mode. NaN will always be returned last
722    ///
723    /// Constant time.
724    ///
725    /// # Examples
726    ///
727    /// ```
728    /// let mode: inc_stats::Mode<_> = [std::f64::NAN, 5.0].iter().collect();
729    /// let mut it = mode.modes_nan();
730    /// assert_eq!(Some(5.0), it.next());
731    /// assert!(it.next().unwrap().is_nan());
732    /// assert!(it.next().is_none());
733    /// ```
734    pub fn modes_nan(&self) -> impl Iterator<Item = T> + '_ {
735        self.modes().chain(self.nan_mode())
736    }
737
738    /// Return the current mode
739    ///
740    /// If multiple modes exist, this returns the first element that reached the largest count.
741    /// NaNs are ignored when computing the mode.
742    ///
743    /// Constant time.
744    ///
745    /// # Examples
746    ///
747    /// ```
748    /// let mode: inc_stats::Mode<_> = [2.0, 4.0, std::f64::NAN, 4.0].iter().collect();
749    /// assert_eq!(4.0, mode.mode().unwrap());
750    /// ```
751    ///
752    /// ```
753    /// let mode = inc_stats::Mode::<f64>::new();
754    /// assert!(mode.mode().is_none());
755    /// ```
756    pub fn mode(&self) -> Option<T> {
757        self.modes().next()
758    }
759
760    /// Return the current mode
761    ///
762    /// If multiple modes exist, this returns the first element that reached the largest count that
763    /// wasn't NaN. NaN will be returned only if it is the unique mode.
764    ///
765    /// Constant time.
766    ///
767    /// # Examples
768    ///
769    /// ```
770    /// let mode: inc_stats::Mode<_> = [2.0, 4.0, std::f64::NAN, std::f64::NAN].iter().collect();
771    /// assert!(mode.mode_nan().unwrap().is_nan());
772    /// ```
773    pub fn mode_nan(&self) -> Option<T> {
774        if self.nan_count > self.mode_count {
775            Some(T::nan())
776        } else {
777            self.mode()
778        }
779    }
780
781    /// Return the number of times the mode occurred
782    ///
783    /// Constant time.
784    ///
785    /// # Examples
786    ///
787    /// ```
788    /// let mode: inc_stats::Mode<_> = [2.0, 4.0, std::f64::NAN, 4.0].iter().collect();
789    /// assert_eq!(2, mode.mode_count());
790    /// ```
791    pub fn mode_count(&self) -> usize {
792        self.mode_count
793    }
794
795    /// Return the number of times the mode occurred
796    ///
797    /// Counts NaNs as a possible mode.
798    ///
799    /// Constant time.
800    ///
801    /// # Examples
802    ///
803    /// ```
804    /// let mode: inc_stats::Mode<_> = [2.0, 4.0, std::f64::NAN, std::f64::NAN].iter().collect();
805    /// assert_eq!(2, mode.mode_count_nan());
806    /// ```
807    pub fn mode_count_nan(&self) -> usize {
808        cmp::max(self.mode_count, self.nan_count)
809    }
810}
811
812impl<T: Float + ToBytes, V: DerefCopy<Output = T>> FromIterator<V> for Mode<T> {
813    fn from_iter<I>(iter: I) -> Self
814    where
815        I: IntoIterator<Item = V>,
816    {
817        let mut mode = Mode::new();
818        for val in iter {
819            mode.add(val);
820        }
821        mode
822    }
823}
824
825/// Get the mode of a set of data
826///
827/// If multiple modes exist, this returns the first element that reached the largest count.
828/// NaNs are ignored when computing the mode.
829///
830/// # Examples:
831///
832/// ```
833/// let mode = inc_stats::mode(&[2.0, 4.0, 2.0]);
834/// assert_eq!(Some(2.0), mode);
835/// ```
836///
837/// ```
838/// let mode: Option<f64> = inc_stats::mode(&[]);
839/// assert!(mode.is_none());
840/// ```
841pub fn mode<T, V, I>(data: I) -> Option<T>
842where
843    T: Float + ToBytes,
844    V: DerefCopy<Output = T>,
845    I: IntoIterator<Item = V>,
846{
847    data.into_iter().collect::<Mode<T>>().mode()
848}
849
850#[cfg(test)]
851mod tests {
852    use super::*;
853
854    #[test]
855    fn f32_mean_test() {
856        let avg: f32 = mean(&[0.0, 1.0, 2.0]).unwrap();
857        assert!((avg - 1.0).abs() < 1e-6);
858    }
859
860    #[test]
861    fn f32_median_test() {
862        let avg: f32 = median(&[0.0, 1.0, 2.0, 3.0]).unwrap();
863        assert!((avg - 1.5).abs() < 1e-6);
864    }
865
866    #[test]
867    fn nan_percentile_test() {
868        let percs: Percentiles<_> = [f64::NAN].iter().collect();
869        // we know we put something in
870        assert_eq!(1, percs.count());
871        // but don't have enough data to get median
872        assert_eq!(None, percs.median());
873    }
874
875    #[test]
876    fn nan_mode_test() {
877        let avg: Mode<_> = [f64::NAN].iter().collect();
878        assert!(avg.mode_nan().unwrap().is_nan());
879    }
880
881    #[test]
882    fn cached_ordering_test() {
883        let mut ord = CachedOrdering::new();
884        ord.add(0.0);
885        ord.add(1.0);
886        ord.add(2.0);
887        // gets correct index
888        assert_eq!(1.0, ord.order_index(1));
889        // cached for later
890        assert!(ord.in_order.contains(&1));
891    }
892}
inc_stats/lib.rs

inc_stats/
lib.rs