rstat/
lib.rs

1//! Probability distributions and statistics in Rust with integrated fitting
2//! routines, convolution support and mixtures.
3// #![warn(missing_docs)]
4extern crate failure;
5
6extern crate rand;
7extern crate rand_distr;
8
9extern crate num;
10extern crate special_fun;
11
12extern crate spaces;
13
14extern crate ndarray;
15#[cfg(feature = "ndarray-linalg")]
16extern crate ndarray_linalg;
17
18#[cfg_attr(feature = "serde", macro_use)]
19#[cfg(feature = "serde")]
20extern crate serde_crate;
21
22macro_rules! undefined {
23    () => (panic!("quantity undefined"));
24    ($($arg:tt)+) => (panic!("quantity undefined: {}", std::format_args!($($arg)+)));
25}
26
27mod consts;
28mod utils;
29
30pub mod linalg;
31
32mod probability;
33pub use self::probability::{
34    InvalidProbabilityError, Probability,
35    InvalidSimplexError, SimplexVector, UnitSimplex,
36};
37
38#[macro_use]
39pub mod params;
40
41/// Iterator for drawing random samples from a
42/// [distribution](trait.Distribution.html).
43pub struct Sampler<'a, D: ?Sized, R: ?Sized> {
44    pub(crate) distribution: &'a D,
45    pub(crate) rng: &'a mut R,
46}
47
48impl<'a, D, R> Iterator for Sampler<'a, D, R>
49where
50    D: Distribution + ?Sized,
51    R: rand::Rng + ?Sized,
52{
53    type Item = <D::Support as spaces::Space>::Value;
54
55    #[inline(always)]
56    fn next(&mut self) -> Option<Self::Item> { Some(self.distribution.sample(&mut self.rng)) }
57
58    fn size_hint(&self) -> (usize, Option<usize>) { (usize::max_value(), None) }
59}
60
61macro_rules! ln_variant {
62    ($(#[$attr:meta])* => $name:ident, $name_ln:ident, $x:ty) => {
63        $(#[$attr])*
64        fn $name_ln(&self, x: $x) -> f64 {
65            self.$name(x).ln()
66        }
67    }
68}
69
70/// Type alias for the sample type of a [distribution](trait.Distribution.html).
71pub type Sample<D> = <<D as Distribution>::Support as spaces::Space>::Value;
72
73/// Trait for probability distributions with a well-defined CDF.
74pub trait Distribution: From<<Self as Distribution>::Params> {
75    /// Support of sample elements.
76    type Support: spaces::Space;
77
78    /// Parameter set uniquely defining the instance.
79    type Params;
80
81    /// Returns an instance of the support `Space`, `Self::Support`.
82    ///
83    /// # Examples
84    /// ```
85    /// # use spaces::{Space, BoundedSpace, Dim};
86    /// # use rstat::{Distribution, univariate, params::Param};
87    /// let dist = univariate::beta::Beta::default();
88    /// let support = dist.support();
89    ///
90    /// assert_eq!(support.dim(), Dim::Finite(1));
91    /// assert_eq!(support.inf().unwrap(), 0.0);
92    /// assert_eq!(support.sup().unwrap(), 1.0);
93    /// ```
94    fn support(&self) -> Self::Support;
95
96    /// Converts `self` into an instance of `Self::Support`.
97    fn into_support(self) -> Self::Support { self.support() }
98
99    /// Returns an instance of the distribution parameters, `Self::Params`.
100    ///
101    /// # Examples
102    /// ```
103    /// # use rstat::{Distribution, univariate, params::Param};
104    /// let dist = univariate::normal::Normal::standard();
105    /// let params = dist.params();
106    ///
107    /// assert_eq!(params.mu.value(), &0.0);
108    /// assert_eq!(params.Sigma.value(), &1.0);
109    /// ```
110    fn params(&self) -> Self::Params;
111
112    /// Converts `self` into an instance of `Self::Params`.
113    fn into_params(self) -> Self::Params { self.params() }
114
115    /// Evaluates the cumulative distribution function (CDF) at \\(x\\).
116    ///
117    /// The CDF is defined as the probability that a random variable \\(X\\)
118    /// takes on a value less than or equal to \\(x\\), i.e. \\(F(x) = P(X
119    /// \leq x)\\).
120    ///
121    /// # Examples
122    /// ```
123    /// # use rstat::{Distribution, Probability, univariate};
124    /// # use std::f64;
125    /// let dist = univariate::normal::Normal::standard();
126    ///
127    /// assert_eq!(dist.cdf(&f64::NEG_INFINITY), Probability::zero());
128    /// assert_eq!(dist.cdf(&0.0), Probability::half());
129    /// assert_eq!(dist.cdf(&f64::INFINITY), Probability::one());
130    /// ```
131    fn cdf(&self, x: &Sample<Self>) -> Probability {
132        Probability::new_unchecked(self.log_cdf(x).exp())
133    }
134
135    /// Evaluates the complementary CDF at \\(x\\).
136    ///
137    /// The complementary CDF (also known as the survival function) is defined
138    /// as the probability that a random variable \\(X\\) takes on a value
139    /// strictly greater than \\(x\\), i.e. \\(\bar{F}(x) = P(X > x) = 1 -
140    /// F(x)\\).
141    fn ccdf(&self, x: &Sample<Self>) -> Probability { !self.cdf(x) }
142
143    ln_variant!(
144        /// Evaluates the log CDF at \\(x\\), i.e. \\(\ln{F(x)}\\).
145        => cdf, log_cdf, &Sample<Self>
146    );
147
148    ln_variant!(
149        /// Evaluates the log complementary CDF at \\(x\\), i.e. \\(\ln{(1 - F(x))}\\).
150        => ccdf, log_ccdf, &Sample<Self>
151    );
152
153    /// Draw a random value from the distribution support.
154    fn sample<R: rand::Rng + ?Sized>(&self, rng: &mut R) -> Sample<Self>;
155
156    /// Draw n random value from the distribution support.
157    fn sample_n<R: rand::Rng + ?Sized>(&self, rng: &mut R, n: usize) -> Vec<Sample<Self>> {
158        (0..n).into_iter().map(move |_| self.sample(rng)).collect()
159    }
160
161    /// Draw an indefinite number of random values from the distribution
162    /// support.
163    fn sample_iter<'a, R: rand::Rng + ?Sized>(&'a self, rng: &'a mut R) -> Sampler<'a, Self, R> {
164        Sampler {
165            distribution: self,
166            rng,
167        }
168    }
169}
170
171macro_rules! new_dist {
172    ($(#[$attr:meta])* $name:ident<$pt:ty>) => {
173        $(#[$attr])*
174        #[derive(Debug, Clone)]
175        pub struct $name(pub(crate) $pt);
176
177        impl From<$pt> for $name {
178            fn from(from: $pt) -> $name { $name(from) }
179        }
180    };
181}
182
183#[inline]
184pub fn params_of<D: Distribution>(dist: &D) -> D::Params { dist.params() }
185
186#[inline]
187pub fn support_of<D: Distribution>(dist: &D) -> D::Support { dist.support() }
188
189/// Trait for [distributions](trait.Distribution.html) over a countable
190/// `Support`.
191///
192/// The PMF is defined as the probability that a random variable \\(X\\) takes a
193/// value exactly equal to \\(x\\), i.e. \\(f(x) = P(X = x) = P(s \in S : X(s) =
194/// x)\\). We require that all sum of probabilities over all possible outcomes
195/// sums to 1.
196pub trait DiscreteDistribution: Distribution {
197    /// Evaluates the probability mass function (PMF) at \\(x\\).
198    ///
199    /// # Examples
200    /// ```
201    /// # use rstat::{DiscreteDistribution, Probability, univariate::binomial::Binomial};
202    /// let dist = Binomial::new_unchecked(5, Probability::new_unchecked(0.75));
203    ///
204    /// assert_eq!(dist.pmf(&0), Probability::new_unchecked(0.0009765625));
205    /// assert_eq!(dist.pmf(&1), Probability::new_unchecked(0.0146484375));
206    /// assert_eq!(dist.pmf(&2), Probability::new_unchecked(0.087890625));
207    /// assert_eq!(dist.pmf(&3), Probability::new_unchecked(0.263671875));
208    /// assert_eq!(dist.pmf(&4), Probability::new_unchecked(0.3955078125));
209    /// assert_eq!(dist.pmf(&5), Probability::new_unchecked(0.2373046875));
210    /// ```
211    fn pmf(&self, x: &Sample<Self>) -> Probability {
212        Probability::new_unchecked(self.log_pmf(x).exp())
213    }
214
215    ln_variant!(
216        /// Evaluates the log PMF at \\(x\\).
217        => pmf, log_pmf, &Sample<Self>
218    );
219}
220
221/// Trait for [distributions](trait.Distribution.html) with an absolutely
222/// continuous CDF.
223///
224/// The PDF can be interpreted as the relative likelihood that a random variable
225/// \\(X\\) takes on a value equal to \\(x\\). For absolutely continuous
226/// univariate distributions it is defined by the derivative of the CDF, i.e
227/// \\(f(x) = F'(x)\\). Intuitively, one may think of \\(f(x)\text{d}x\\) that
228/// as representing the probability that the random variable \\(X\\) lies in the
229/// infinitesimal interval \\([x, x + \text{d}x]\\). Alternatively, one can
230/// interpret the PDF, for infinitesimally small \\(\text{d}t\\), as:
231/// \\(f(t)\text{d}t = P(t < X < t + \text{d}t)\\). For a finite interval \\([a,
232/// b],\\) we have that: \\[P(a < X < b) = \int_a^b f(t)\text{d}t.\\]
233pub trait ContinuousDistribution: Distribution {
234    /// Evaluates the probability density function (PDF) at \\(x\\).
235    ///
236    /// # Examples
237    /// ```
238    /// # use rstat::{ContinuousDistribution, Probability, univariate::triangular::Triangular};
239    /// let dist = Triangular::new_unchecked(0.0, 0.5, 0.5);
240    ///
241    /// assert_eq!(dist.pdf(&0.0), 0.0);
242    /// assert_eq!(dist.pdf(&0.25), 1.0);
243    /// assert_eq!(dist.pdf(&0.5), 2.0);
244    /// assert_eq!(dist.pdf(&0.75), 1.0);
245    /// assert_eq!(dist.pdf(&1.0), 0.0);
246    /// ```
247    fn pdf(&self, x: &Sample<Self>) -> f64 { self.log_pdf(x).exp() }
248
249    ln_variant!(
250        /// Evaluates the log PDF at \\(x\\).
251        => pdf, log_pdf, &Sample<Self>
252    );
253}
254
255/// Trait for [distributions](trait.Distribution.html) that support the convolve
256/// operation.
257///
258/// The convolution of probability [distributions](trait.Distribution.html)
259/// amounts to taking linear combinations of independent random variables. For
260/// example, consider a set of \\(N\\) random variables \\(X_i \sim
261/// \text{Bernoulli}(p)\\), where \\(p \in (0, 1)\\) and \\(1 \leq i \leq N\\).
262/// We then have that the random variables \\(Y = \sum_{i=1}^N X_i\\) and \\(Z
263/// \sim \text{Binomial}(N, p)\\) are exactly equivalent, i.e. \\(Y
264/// \stackrel{\text{d}}{=} Z\\).
265pub trait Convolution<T: Distribution = Self> {
266    /// The resulting [Distribution](trait.Distribution.html) type.
267    type Output: Distribution;
268
269    /// Return the unweighted linear sum of `self` with another
270    /// [Distribution](trait.Distribution.html) of type `T`.
271    ///
272    /// # Examples
273    /// ```
274    /// # use rstat::{Distribution, Convolution, params::Param, univariate::normal::Normal};
275    /// let dist_a = Normal::new_unchecked(0.0, 1.0f64.powi(2));
276    /// let dist_b = Normal::new_unchecked(1.0, 2.0f64.powi(2));
277    ///
278    /// let dist_c = dist_a.convolve(dist_b).unwrap();
279    /// let params = dist_c.params();
280    ///
281    /// assert_eq!(params.mu.value(), &1.0);
282    /// assert_eq!(params.Sigma.value(), &5.0f64);
283    /// ```
284    fn convolve(self, rv: T) -> Result<Self::Output, failure::Error>;
285
286    /// Return the unweighted linear sum of `self` with a set of
287    /// [Distributions](trait.Distribution.html) of type `T`.
288    fn convolve_many(self, mut rvs: Vec<T>) -> Result<Self::Output, failure::Error>
289    where
290        Self::Output: Convolution<T, Output = Self::Output>,
291        Self: Sized,
292    {
293        let n = rvs.len();
294        let _ = assert_constraint!(n > 1).map_err(|e| e.with_target("n"))?;
295
296        let new_dist = self.convolve(rvs.pop().unwrap());
297
298        rvs.into_iter()
299            .fold(new_dist, |acc, rv| acc.and_then(|d| d.convolve(rv)))
300    }
301}
302
303pub mod metrics;
304pub mod statistics;
305pub mod fitting;
306
307pub mod normal;
308pub mod univariate;
309pub mod bivariate;
310pub mod multivariate;
311
312mod mixture;
313pub use self::mixture::Mixture;
314
315pub mod builder;