rstat/lib.rs
1//! Probability distributions and statistics in Rust with integrated fitting
2//! routines, convolution support and mixtures.
3// #![warn(missing_docs)]
4extern crate failure;
5
6extern crate rand;
7extern crate rand_distr;
8
9extern crate num;
10extern crate special_fun;
11
12extern crate spaces;
13
14extern crate ndarray;
15#[cfg(feature = "ndarray-linalg")]
16extern crate ndarray_linalg;
17
18#[cfg_attr(feature = "serde", macro_use)]
19#[cfg(feature = "serde")]
20extern crate serde_crate;
21
22macro_rules! undefined {
23 () => (panic!("quantity undefined"));
24 ($($arg:tt)+) => (panic!("quantity undefined: {}", std::format_args!($($arg)+)));
25}
26
27mod consts;
28mod utils;
29
30pub mod linalg;
31
32mod probability;
33pub use self::probability::{
34 InvalidProbabilityError, Probability,
35 InvalidSimplexError, SimplexVector, UnitSimplex,
36};
37
38#[macro_use]
39pub mod params;
40
41/// Iterator for drawing random samples from a
42/// [distribution](trait.Distribution.html).
43pub struct Sampler<'a, D: ?Sized, R: ?Sized> {
44 pub(crate) distribution: &'a D,
45 pub(crate) rng: &'a mut R,
46}
47
48impl<'a, D, R> Iterator for Sampler<'a, D, R>
49where
50 D: Distribution + ?Sized,
51 R: rand::Rng + ?Sized,
52{
53 type Item = <D::Support as spaces::Space>::Value;
54
55 #[inline(always)]
56 fn next(&mut self) -> Option<Self::Item> { Some(self.distribution.sample(&mut self.rng)) }
57
58 fn size_hint(&self) -> (usize, Option<usize>) { (usize::max_value(), None) }
59}
60
61macro_rules! ln_variant {
62 ($(#[$attr:meta])* => $name:ident, $name_ln:ident, $x:ty) => {
63 $(#[$attr])*
64 fn $name_ln(&self, x: $x) -> f64 {
65 self.$name(x).ln()
66 }
67 }
68}
69
70/// Type alias for the sample type of a [distribution](trait.Distribution.html).
71pub type Sample<D> = <<D as Distribution>::Support as spaces::Space>::Value;
72
73/// Trait for probability distributions with a well-defined CDF.
74pub trait Distribution: From<<Self as Distribution>::Params> {
75 /// Support of sample elements.
76 type Support: spaces::Space;
77
78 /// Parameter set uniquely defining the instance.
79 type Params;
80
81 /// Returns an instance of the support `Space`, `Self::Support`.
82 ///
83 /// # Examples
84 /// ```
85 /// # use spaces::{Space, BoundedSpace, Dim};
86 /// # use rstat::{Distribution, univariate, params::Param};
87 /// let dist = univariate::beta::Beta::default();
88 /// let support = dist.support();
89 ///
90 /// assert_eq!(support.dim(), Dim::Finite(1));
91 /// assert_eq!(support.inf().unwrap(), 0.0);
92 /// assert_eq!(support.sup().unwrap(), 1.0);
93 /// ```
94 fn support(&self) -> Self::Support;
95
96 /// Converts `self` into an instance of `Self::Support`.
97 fn into_support(self) -> Self::Support { self.support() }
98
99 /// Returns an instance of the distribution parameters, `Self::Params`.
100 ///
101 /// # Examples
102 /// ```
103 /// # use rstat::{Distribution, univariate, params::Param};
104 /// let dist = univariate::normal::Normal::standard();
105 /// let params = dist.params();
106 ///
107 /// assert_eq!(params.mu.value(), &0.0);
108 /// assert_eq!(params.Sigma.value(), &1.0);
109 /// ```
110 fn params(&self) -> Self::Params;
111
112 /// Converts `self` into an instance of `Self::Params`.
113 fn into_params(self) -> Self::Params { self.params() }
114
115 /// Evaluates the cumulative distribution function (CDF) at \\(x\\).
116 ///
117 /// The CDF is defined as the probability that a random variable \\(X\\)
118 /// takes on a value less than or equal to \\(x\\), i.e. \\(F(x) = P(X
119 /// \leq x)\\).
120 ///
121 /// # Examples
122 /// ```
123 /// # use rstat::{Distribution, Probability, univariate};
124 /// # use std::f64;
125 /// let dist = univariate::normal::Normal::standard();
126 ///
127 /// assert_eq!(dist.cdf(&f64::NEG_INFINITY), Probability::zero());
128 /// assert_eq!(dist.cdf(&0.0), Probability::half());
129 /// assert_eq!(dist.cdf(&f64::INFINITY), Probability::one());
130 /// ```
131 fn cdf(&self, x: &Sample<Self>) -> Probability {
132 Probability::new_unchecked(self.log_cdf(x).exp())
133 }
134
135 /// Evaluates the complementary CDF at \\(x\\).
136 ///
137 /// The complementary CDF (also known as the survival function) is defined
138 /// as the probability that a random variable \\(X\\) takes on a value
139 /// strictly greater than \\(x\\), i.e. \\(\bar{F}(x) = P(X > x) = 1 -
140 /// F(x)\\).
141 fn ccdf(&self, x: &Sample<Self>) -> Probability { !self.cdf(x) }
142
143 ln_variant!(
144 /// Evaluates the log CDF at \\(x\\), i.e. \\(\ln{F(x)}\\).
145 => cdf, log_cdf, &Sample<Self>
146 );
147
148 ln_variant!(
149 /// Evaluates the log complementary CDF at \\(x\\), i.e. \\(\ln{(1 - F(x))}\\).
150 => ccdf, log_ccdf, &Sample<Self>
151 );
152
153 /// Draw a random value from the distribution support.
154 fn sample<R: rand::Rng + ?Sized>(&self, rng: &mut R) -> Sample<Self>;
155
156 /// Draw n random value from the distribution support.
157 fn sample_n<R: rand::Rng + ?Sized>(&self, rng: &mut R, n: usize) -> Vec<Sample<Self>> {
158 (0..n).into_iter().map(move |_| self.sample(rng)).collect()
159 }
160
161 /// Draw an indefinite number of random values from the distribution
162 /// support.
163 fn sample_iter<'a, R: rand::Rng + ?Sized>(&'a self, rng: &'a mut R) -> Sampler<'a, Self, R> {
164 Sampler {
165 distribution: self,
166 rng,
167 }
168 }
169}
170
171macro_rules! new_dist {
172 ($(#[$attr:meta])* $name:ident<$pt:ty>) => {
173 $(#[$attr])*
174 #[derive(Debug, Clone)]
175 pub struct $name(pub(crate) $pt);
176
177 impl From<$pt> for $name {
178 fn from(from: $pt) -> $name { $name(from) }
179 }
180 };
181}
182
183#[inline]
184pub fn params_of<D: Distribution>(dist: &D) -> D::Params { dist.params() }
185
186#[inline]
187pub fn support_of<D: Distribution>(dist: &D) -> D::Support { dist.support() }
188
189/// Trait for [distributions](trait.Distribution.html) over a countable
190/// `Support`.
191///
192/// The PMF is defined as the probability that a random variable \\(X\\) takes a
193/// value exactly equal to \\(x\\), i.e. \\(f(x) = P(X = x) = P(s \in S : X(s) =
194/// x)\\). We require that all sum of probabilities over all possible outcomes
195/// sums to 1.
196pub trait DiscreteDistribution: Distribution {
197 /// Evaluates the probability mass function (PMF) at \\(x\\).
198 ///
199 /// # Examples
200 /// ```
201 /// # use rstat::{DiscreteDistribution, Probability, univariate::binomial::Binomial};
202 /// let dist = Binomial::new_unchecked(5, Probability::new_unchecked(0.75));
203 ///
204 /// assert_eq!(dist.pmf(&0), Probability::new_unchecked(0.0009765625));
205 /// assert_eq!(dist.pmf(&1), Probability::new_unchecked(0.0146484375));
206 /// assert_eq!(dist.pmf(&2), Probability::new_unchecked(0.087890625));
207 /// assert_eq!(dist.pmf(&3), Probability::new_unchecked(0.263671875));
208 /// assert_eq!(dist.pmf(&4), Probability::new_unchecked(0.3955078125));
209 /// assert_eq!(dist.pmf(&5), Probability::new_unchecked(0.2373046875));
210 /// ```
211 fn pmf(&self, x: &Sample<Self>) -> Probability {
212 Probability::new_unchecked(self.log_pmf(x).exp())
213 }
214
215 ln_variant!(
216 /// Evaluates the log PMF at \\(x\\).
217 => pmf, log_pmf, &Sample<Self>
218 );
219}
220
221/// Trait for [distributions](trait.Distribution.html) with an absolutely
222/// continuous CDF.
223///
224/// The PDF can be interpreted as the relative likelihood that a random variable
225/// \\(X\\) takes on a value equal to \\(x\\). For absolutely continuous
226/// univariate distributions it is defined by the derivative of the CDF, i.e
227/// \\(f(x) = F'(x)\\). Intuitively, one may think of \\(f(x)\text{d}x\\) that
228/// as representing the probability that the random variable \\(X\\) lies in the
229/// infinitesimal interval \\([x, x + \text{d}x]\\). Alternatively, one can
230/// interpret the PDF, for infinitesimally small \\(\text{d}t\\), as:
231/// \\(f(t)\text{d}t = P(t < X < t + \text{d}t)\\). For a finite interval \\([a,
232/// b],\\) we have that: \\[P(a < X < b) = \int_a^b f(t)\text{d}t.\\]
233pub trait ContinuousDistribution: Distribution {
234 /// Evaluates the probability density function (PDF) at \\(x\\).
235 ///
236 /// # Examples
237 /// ```
238 /// # use rstat::{ContinuousDistribution, Probability, univariate::triangular::Triangular};
239 /// let dist = Triangular::new_unchecked(0.0, 0.5, 0.5);
240 ///
241 /// assert_eq!(dist.pdf(&0.0), 0.0);
242 /// assert_eq!(dist.pdf(&0.25), 1.0);
243 /// assert_eq!(dist.pdf(&0.5), 2.0);
244 /// assert_eq!(dist.pdf(&0.75), 1.0);
245 /// assert_eq!(dist.pdf(&1.0), 0.0);
246 /// ```
247 fn pdf(&self, x: &Sample<Self>) -> f64 { self.log_pdf(x).exp() }
248
249 ln_variant!(
250 /// Evaluates the log PDF at \\(x\\).
251 => pdf, log_pdf, &Sample<Self>
252 );
253}
254
255/// Trait for [distributions](trait.Distribution.html) that support the convolve
256/// operation.
257///
258/// The convolution of probability [distributions](trait.Distribution.html)
259/// amounts to taking linear combinations of independent random variables. For
260/// example, consider a set of \\(N\\) random variables \\(X_i \sim
261/// \text{Bernoulli}(p)\\), where \\(p \in (0, 1)\\) and \\(1 \leq i \leq N\\).
262/// We then have that the random variables \\(Y = \sum_{i=1}^N X_i\\) and \\(Z
263/// \sim \text{Binomial}(N, p)\\) are exactly equivalent, i.e. \\(Y
264/// \stackrel{\text{d}}{=} Z\\).
265pub trait Convolution<T: Distribution = Self> {
266 /// The resulting [Distribution](trait.Distribution.html) type.
267 type Output: Distribution;
268
269 /// Return the unweighted linear sum of `self` with another
270 /// [Distribution](trait.Distribution.html) of type `T`.
271 ///
272 /// # Examples
273 /// ```
274 /// # use rstat::{Distribution, Convolution, params::Param, univariate::normal::Normal};
275 /// let dist_a = Normal::new_unchecked(0.0, 1.0f64.powi(2));
276 /// let dist_b = Normal::new_unchecked(1.0, 2.0f64.powi(2));
277 ///
278 /// let dist_c = dist_a.convolve(dist_b).unwrap();
279 /// let params = dist_c.params();
280 ///
281 /// assert_eq!(params.mu.value(), &1.0);
282 /// assert_eq!(params.Sigma.value(), &5.0f64);
283 /// ```
284 fn convolve(self, rv: T) -> Result<Self::Output, failure::Error>;
285
286 /// Return the unweighted linear sum of `self` with a set of
287 /// [Distributions](trait.Distribution.html) of type `T`.
288 fn convolve_many(self, mut rvs: Vec<T>) -> Result<Self::Output, failure::Error>
289 where
290 Self::Output: Convolution<T, Output = Self::Output>,
291 Self: Sized,
292 {
293 let n = rvs.len();
294 let _ = assert_constraint!(n > 1).map_err(|e| e.with_target("n"))?;
295
296 let new_dist = self.convolve(rvs.pop().unwrap());
297
298 rvs.into_iter()
299 .fold(new_dist, |acc, rv| acc.and_then(|d| d.convolve(rv)))
300 }
301}
302
303pub mod metrics;
304pub mod statistics;
305pub mod fitting;
306
307pub mod normal;
308pub mod univariate;
309pub mod bivariate;
310pub mod multivariate;
311
312mod mixture;
313pub use self::mixture::Mixture;
314
315pub mod builder;