iterstats 0.7.0

Statistics for rust iterators.
Documentation
#![doc=include_str!("../README.md")]
#![deny(warnings, missing_docs, unsafe_code)]

pub mod argsort;
pub mod fill_nan;
pub mod mean;
pub mod median;
pub mod normalize;
pub mod range;
pub mod rank;
pub mod stddev;
pub mod sum_of_squares;
pub mod variance;
mod welford_online;
pub mod zscore;

use argsort::{ArgSort, ArgSortIter};
pub use fill_nan::FillNan;
use fill_nan::FillNanIter;
pub use mean::Mean;
pub use median::Median;
pub use normalize::Normalize;
use normalize::NormalizeIter;
use range::Range;
use rank::{Rank, RankIter};
pub use stddev::StdDev;
pub use sum_of_squares::SumOfSquares;
pub use variance::Variance;
pub use zscore::ZScore;
use zscore::ZScoreIter;

/// This trait allows you to call all the [`iterstats`](crate) calculations as iterator methods.
///
/// # Example
///
/// ```rust
/// use iterstats::Iterstats;
///
/// let data = [1f32, 2., 3., 4.];
/// let mean = data.iter().mean();
/// assert_eq!(mean, 2.5);
/// let variance = data.iter().variance();
/// assert_eq!(variance, 1.25);
/// let stddev = data.iter().stddev();
/// assert_eq!(stddev, 1.25f32.sqrt());
/// let zscores = data.iter().zscore().collect::<Vec<_>>();
/// assert_eq!(zscores, vec![-1.3416407, -0.4472136, 0.4472136, 1.3416407]);
/// ```
pub trait Iterstats<A>: Iterator {
    /// Calculate the mean of the collection.
    fn mean(self) -> <A as Mean>::Output
    where
        Self: Sized + Iterator<Item = A> + Clone,
        A: Mean,
    {
        A::mean(self)
    }

    /// Calculate the variance of the collection.
    fn variance(self) -> <A as Variance>::Output
    where
        Self: Sized + Iterator<Item = A>,
        A: Variance,
    {
        A::variance(self)
    }

    /// Calculate the population standard deviation of the collection.
    fn stddev(self) -> <A as StdDev>::Output
    where
        Self: Sized + Iterator<Item = A>,
        A: StdDev,
    {
        A::stddev(self)
    }

    /// Calculate the Z-score of each item of the collection.
    fn zscore(self) -> ZScoreIter<<A as ZScore>::Output>
    where
        Self: Sized + Iterator<Item = A> + Clone,
        A: ZScore,
    {
        A::zscore(self)
    }

    /// Normalize each item of the collection to a specified range.
    ///
    /// ```rust
    /// use iterstats::Iterstats;
    ///
    /// let data = [-1.0f64, -2., 3., 8.];
    /// let norm = data.iter().normalize(0., 100.).collect::<Vec<_>>();
    /// assert_eq!(vec![10., 0., 50., 100.], norm);
    /// ```
    fn normalize(
        self,
        min: <A as Normalize>::Output,
        max: <A as Normalize>::Output,
    ) -> NormalizeIter<<A as Normalize>::Output>
    where
        Self: Sized + Iterator<Item = A> + Clone,
        A: Normalize,
    {
        A::normalize(self, min, max)
    }

    /// Calculate the total sum of squares of the collection.
    fn sum_of_squares(self) -> <A as SumOfSquares>::Output
    where
        Self: Sized + Iterator<Item = A>,
        A: SumOfSquares,
    {
        A::sum_of_squares(self)
    }

    /// Calculate the median of the collection.
    ///
    /// For integer types this may be inexact if there are an even number of elements and the true
    /// median is a non-integer number.
    fn median(self) -> <A as Median>::Output
    where
        Self: Sized + Iterator<Item = A>,
        A: Median,
    {
        A::median(self)
    }

    /// Calculate the range of the collection.
    ///
    /// This gets the minimum & maximum in 1 pass. Returned is a tuple of `Some((min, max))`,
    /// unless the iterator is empty, in which case `None` is returned.
    fn range(self) -> Option<(<A as Range>::Output, <A as Range>::Output)>
    where
        Self: Sized + Iterator<Item = A>,
        A: Range,
    {
        A::range(self)
    }

    /// Replace NANs with another value.
    fn fill_nan(
        self,
        repl: <A as FillNan>::Output,
    ) -> FillNanIter<impl Iterator<Item = <A as FillNan>::Output>, <A as FillNan>::Output>
    where
        Self: Sized + Iterator<Item = A>,
        A: FillNan,
    {
        A::fill_nan(self, repl)
    }

    /// Get the indexes that would produce a sorted iterator.
    fn argsort(self) -> ArgSortIter<<A as ArgSort>::Output>
    where
        Self: Sized + Iterator<Item = A>,
        A: ArgSort,
    {
        A::argsort(self)
    }

    /// Get the rank order of each element in an iterator.
    fn rank(self) -> RankIter
    where
        Self: Sized + Iterator<Item = A>,
        A: Rank,
    {
        A::rank(self)
    }
}

impl<I, A> Iterstats<A> for I where I: Iterator + ?Sized {}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn mean() {
        let data = [1., 2., 3., 4.];
        let exp = 2.5;
        assert_eq!(data.iter().mean(), exp);
        assert_eq!(data.into_iter().mean(), exp);
    }

    #[test]
    fn variance() {
        let data = [1., 2., 3., 4.];
        let exp = 1.25;
        assert_eq!(data.iter().variance(), exp);
        assert_eq!(data.into_iter().variance(), exp);
    }

    #[test]
    fn stddev() {
        let data = [1., 2., 3., 4.];
        let exp = 1.25f64.sqrt();
        assert_eq!(data.iter().stddev(), exp);
        assert_eq!(data.into_iter().stddev(), exp);
    }

    #[test]
    fn zscore() {
        let data = [1., 2., 3., 4.];
        let exp = vec![
            -1.3416407864998738,
            -0.4472135954999579,
            0.4472135954999579,
            1.3416407864998738,
        ];
        assert_eq!(data.iter().zscore().collect::<Vec<_>>(), exp);
        assert_eq!(data.into_iter().zscore().collect::<Vec<_>>(), exp);
    }

    #[test]
    fn normalize() {
        let data = [1., 2., 3., 5.];
        let normmin = 0.;
        let normmax = 1.;
        let exp = vec![0., 0.25, 0.5, 1.];
        assert_eq!(
            data.iter().normalize(normmin, normmax).collect::<Vec<_>>(),
            exp
        );
        assert_eq!(
            data.into_iter()
                .normalize(normmin, normmax)
                .collect::<Vec<_>>(),
            exp
        );
    }

    #[test]
    fn sum_of_squares() {
        let data = [1., 2., 3., 5.];
        let exp = 8.75;
        assert_eq!(data.iter().sum_of_squares(), exp);
        assert_eq!(data.into_iter().sum_of_squares(), exp);
    }

    #[test]
    fn median() {
        let data = [2, 7, 198, 2, 2, 7];
        let exp = 4;
        assert_eq!(data.iter().median(), exp);
        assert_eq!(data.into_iter().median(), exp);
    }

    #[test]
    fn range() {
        let data = [2, 7, -198, 2, 7];
        let exprange = (-198, 7);
        assert_eq!(data.iter().range().unwrap(), exprange);
        assert_eq!(data.into_iter().range().unwrap(), exprange);
    }

    #[test]
    fn fillnan() {
        let data = [1., 100., 2., f32::NAN, 3., 5.];
        let fill = 100.;
        let exp = vec![1., 100., 2., 100., 3., 5.];
        assert_eq!(data.iter().fill_nan(fill).collect::<Vec<_>>(), exp);
        assert_eq!(data.into_iter().fill_nan(fill).collect::<Vec<_>>(), exp);
    }

    #[test]
    fn argsort() {
        let data = [
            f64::NAN,
            4.2,
            0.0,
            f64::NEG_INFINITY,
            -0.0,
            f64::INFINITY,
            -f64::NAN,
        ];
        let exp = vec![6, 3, 4, 2, 1, 5, 0];
        assert_eq!(data.iter().argsort().collect::<Vec<_>>(), exp);
        assert_eq!(data.into_iter().argsort().collect::<Vec<_>>(), exp);
    }

    #[test]
    fn rank() {
        let data = [
            f64::NAN,
            4.2,
            0.0,
            f64::NEG_INFINITY,
            -0.0,
            f64::INFINITY,
            -f64::NAN,
        ];
        let exp = vec![6, 4, 3, 1, 2, 5, 0];
        assert_eq!(data.iter().rank().collect::<Vec<_>>(), exp);
        assert_eq!(data.into_iter().rank().collect::<Vec<_>>(), exp);
    }
}