easy_ml/
distributions.rs

1/*!
2Models of distributions that samples can be drawn from.
3
4These structs and methods require numerical types that can be treated as real numbers, ie
5unsigned and signed integers cannot be used here.
6
7# Example of plotting a Gaussian
8
9```
10extern crate rand;
11extern crate rand_chacha;
12extern crate textplots;
13extern crate easy_ml;
14
15use rand::{Rng, SeedableRng};
16use rand::distr::{Iter, StandardUniform};
17use rand_chacha::ChaCha8Rng;
18use textplots::{Chart, Plot, Shape};
19use easy_ml::distributions::Gaussian;
20
21const SAMPLES: usize = 10000;
22
23// create a normal distribution, note that the mean and variance are
24// given in floating point notation as this will be a f64 Gaussian
25let normal_distribution = Gaussian::new(0.0, 1.0);
26
27// first create random numbers between 0 and 1
28// using a fixed seed random generator from the rand crate
29let mut random_generator = ChaCha8Rng::seed_from_u64(10);
30let mut random_numbers: Iter<StandardUniform, &mut ChaCha8Rng, f64> =
31    (&mut random_generator).sample_iter(StandardUniform);
32
33// draw samples from the normal distribution
34let samples: Vec<f64> = normal_distribution.draw(&mut random_numbers, SAMPLES)
35    // unwrap is perfectly safe if and only if we know we have supplied enough random numbers
36    .unwrap();
37
38// create a [(f32, f32)] list to plot a histogram of
39let histogram_points = {
40    let x = 0..SAMPLES;
41    let mut y = samples;
42    let mut points = Vec::with_capacity(SAMPLES);
43    for (x, y) in y.into_iter().zip(x).map(|(y, x)| (x as f32, y as f32)) {
44        points.push((x, y));
45    }
46    points
47};
48
49// Plot a histogram from -3 to 3 with 30 bins to check that this distribution
50// looks like a Gaussian. This will show a bell curve for large enough SAMPLES.
51let histogram = textplots::utils::histogram(&histogram_points, -3.0, 3.0, 30);
52Chart::new(180, 60, -3.0, 3.0)
53    .lineplot(&Shape::Bars(&histogram))
54    .nice();
55```
56
57# Getting an infinite iterator using the rand crate
58
59It may be convenient to create an infinite iterator for random numbers so you don't need
60to populate lists of random numbers when using these types.
61
62```
63use rand::{Rng, SeedableRng};
64use rand::distr::{Iter, StandardUniform};
65use rand_chacha::ChaCha8Rng;
66
67// using a fixed seed random generator from the rand crate
68let mut random_generator = ChaCha8Rng::seed_from_u64(16);
69// now pass this Iterator to Gaussian functions that accept a &mut Iterator
70let mut random_numbers: Iter<StandardUniform, &mut ChaCha8Rng, f64> =
71    (&mut random_generator).sample_iter(StandardUniform);
72```
73
74# Example of creating an infinite iterator
75
76The below example is for reference, don't actually do this if you're using rand because rand
77can give you an infinite iterator already (see above example).
78
79```
80use rand::{Rng, SeedableRng};
81
82// using a fixed seed random generator from the rand crate
83let mut random_generator = rand_chacha::ChaCha8Rng::seed_from_u64(16);
84
85struct EndlessRandomGenerator {
86    rng: rand_chacha::ChaCha8Rng
87}
88
89impl Iterator for EndlessRandomGenerator {
90    type Item = f64;
91
92    fn next(&mut self) -> Option<Self::Item> {
93        // always return Some, hence this iterator is infinite
94        Some(self.rng.random::<f64>())
95    }
96}
97
98// now pass this Iterator to Gaussian functions that accept a &mut Iterator
99let mut random_numbers = EndlessRandomGenerator { rng: random_generator };
100```
101
102# Example of creating an infinite iterator for web assembly targets
103[See web_assembly module for Example of creating an infinite iterator for web assembly targets](super::web_assembly)
104 */
105
106use crate::linear_algebra;
107use crate::matrices::Matrix;
108use crate::numeric::extra::{Real, RealRef};
109use crate::tensors::views::{TensorRef, TensorView};
110use crate::tensors::{Dimension, Tensor};
111
112use std::error::Error;
113use std::fmt;
114
115/**
116 * A Gaussian probability density function of a normally distributed
117 * random variable with expected value / mean μ, and variance σ<sup>2</sup>.
118 *
119 * See: [https://en.wikipedia.org/wiki/Gaussian_function](https://en.wikipedia.org/wiki/Gaussian_function)
120 */
121#[derive(Clone, Debug)]
122pub struct Gaussian<T: Real> {
123    /**
124     * The mean is the expected value of this gaussian.
125     */
126    pub mean: T,
127    /**
128     * The variance is a measure of the spread of values around the mean, high variance means
129     * one standard deviation encompasses a larger spread of values from the mean.
130     */
131    pub variance: T,
132}
133
134impl<T: Real> Gaussian<T> {
135    pub fn new(mean: T, variance: T) -> Gaussian<T> {
136        Gaussian { mean, variance }
137    }
138
139    /**
140     * Creates a Gaussian approximating the mean and variance in the provided
141     * data.
142     *
143     * Note that this will always be an approximation, if you generate some data
144     * according to some mean and variance then construct a Gaussian from
145     * the mean and variance of that generated data the approximated mean
146     * and variance is unlikely to be exactly the same as the parameters the
147     * data was generated with, though as the amout of data increases you
148     * can expect the approximation to be more close.
149     */
150    pub fn approximating<I>(data: I) -> Gaussian<T>
151    where
152        I: Iterator<Item = T>,
153    {
154        let mut copy: Vec<T> = data.collect();
155        Gaussian {
156            // duplicate the data to pass once each to mean and variance
157            // functions of linear_algebra
158            mean: linear_algebra::mean(copy.iter().cloned()),
159            variance: linear_algebra::variance(copy.drain(..)),
160        }
161    }
162}
163
164impl<T: Real> Gaussian<T>
165where
166    for<'a> &'a T: RealRef<T>,
167{
168    /**
169     * Computes g(x) for some x, the probability density of a normally
170     * distributed random variable x, or in other words how likely x is
171     * to be drawn from this normal distribution.
172     *
173     * g(x) is largest for x equal to this distribution's mean and
174     * g(x) will tend towards zero as x is further from this distribution's
175     * mean, at a rate corresponding to this distribution's variance.
176     */
177    pub fn probability(&self, x: &T) -> T {
178        // FIXME: &T sqrt doesn't seem to be picked up by the compiler here
179        let standard_deviation = self.variance.clone().sqrt();
180        let two = T::one() + T::one();
181        let two_pi = &two * T::pi();
182        let fraction = T::one() / (standard_deviation * (&two_pi.sqrt()));
183        let exponent = (-T::one() / &two) * ((x - &self.mean) / &self.variance).pow(&two);
184        fraction * exponent.exp()
185    }
186
187    /**
188     * Given a source of random variables in the uniformly distributed
189     * range [0, 1] inclusive, draws `max_samples` of independent
190     * random numbers according to this Gaussian distribution's mean and
191     * variance using the Box-Muller transform:
192     *
193     * [https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform](https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform)
194     *
195     * The source of random variables must provide at least as many values
196     * as `max_samples` if `max_samples` is even, and one more than `max_samples`
197     * if `max_samples` is odd. If fewer are provided None is returned.
198     *
199     * As all randomness is provided to this method, this code is deterministic
200     * and will always compute the same samples given the same random source
201     * of numbers.
202     *
203     * [Example of generating and feeding random numbers](self)
204     */
205    pub fn draw<I>(&self, source: &mut I, max_samples: usize) -> Option<Vec<T>>
206    where
207        I: Iterator<Item = T>,
208    {
209        let two = T::one() + T::one();
210        let minus_two = -&two;
211        let two_pi = &two * T::pi();
212        let mut samples = Vec::with_capacity(max_samples);
213        let standard_deviation = self.variance.clone().sqrt();
214        // keep drawing samples from this normal Gaussian distribution
215        // until either the iterator runs out or we reach the max_samples
216        // limit
217        while samples.len() < max_samples {
218            let (u, v) = self.generate_pair(source)?;
219            // these computations convert two samples from the inclusive 0 - 1
220            // range to two samples of a normal distribution with with
221            // μ = 0 and σ = 1.
222            let z1 = (&minus_two * u.clone().ln()).sqrt() * ((&two_pi * &v).cos());
223            let z2 = (&minus_two * u.clone().ln()).sqrt() * ((&two_pi * &v).sin());
224            // now we scale to the mean and variance for this Gaussian
225            let sample1 = (z1 * &standard_deviation) + &self.mean;
226            let sample2 = (z2 * &standard_deviation) + &self.mean;
227            samples.push(sample1);
228            samples.push(sample2);
229        }
230        // return the full list of samples, removing the final sample
231        // if adding 2 samples took us over the max
232        if samples.len() > max_samples {
233            samples.pop();
234            return Some(samples);
235        }
236        Some(samples)
237    }
238
239    fn generate_pair<I>(&self, source: &mut I) -> Option<(T, T)>
240    where
241        I: Iterator<Item = T>,
242    {
243        Some((source.next()?, source.next()?))
244    }
245
246    #[deprecated(since = "1.1.0", note = "renamed to `probability`")]
247    pub fn map(&self, x: &T) -> T {
248        self.probability(x)
249    }
250}
251
252/**
253 * A multivariate Gaussian distribution with mean vector μ, and covariance matrix Σ.
254 *
255 * See: [https://en.wikipedia.org/wiki/Multivariate_normal_distribution](https://en.wikipedia.org/wiki/Multivariate_normal_distribution)
256 *
257 * # Invariants
258 *
259 * The mean [Matrix] must always be a column vector, and must be the same length as the
260 * covariance matrix.
261 */
262#[derive(Clone, Debug)]
263pub struct MultivariateGaussian<T: Real> {
264    mean: Matrix<T>,
265    covariance: Matrix<T>,
266}
267
268impl<T: Real> MultivariateGaussian<T> {
269    /**
270     * Constructs a new multivariate Gaussian distribution from
271     * a Nx1 column vector of means and a NxN covariance matrix
272     *
273     * This function does not check that the provided covariance matrix
274     * is actually a covariance matrix. If a square matrix that is not
275     * symmetric is supplied the Gaussian is not defined.
276     *
277     * # Panics
278     *
279     * Panics if the covariance matrix is not square, or the column vector
280     * is not the same length as the covariance matrix size. Does not currently
281     * panic if the covariance matrix is not symmetric, but this could be checked
282     * in the future.
283     */
284    pub fn new(mean: Matrix<T>, covariance: Matrix<T>) -> MultivariateGaussian<T> {
285        assert!(mean.columns() == 1, "Mean must be a column vector");
286        assert!(
287            covariance.rows() == covariance.columns(),
288            "Supplied 'covariance' matrix is not square"
289        );
290        assert!(
291            mean.rows() == covariance.rows(),
292            "Means must be same length as covariance matrix"
293        );
294        MultivariateGaussian { mean, covariance }
295    }
296
297    /**
298     * The mean is a column vector of expected values in each dimension
299     */
300    pub fn mean(&self) -> &Matrix<T> {
301        &self.mean
302    }
303
304    /**
305     * The covariance matrix is a measure of how much values from each dimension vary
306     * from their expected value with respect to each other.
307     *
308     * For a 2 dimensional multivariate Gaussian the covariance matrix could be the 2x2 identity
309     * matrix:
310     *
311     * ```ignore
312     * [
313     *   1.0, 0.0
314     *   0.0, 1.0
315     * ]
316     * ```
317     *
318     * In which case the two dimensions are completely uncorrelated as `C[0,1] = C[1,0] = 0`.
319     */
320    pub fn covariance(&self) -> &Matrix<T> {
321        &self.covariance
322    }
323}
324
325impl<T: Real> MultivariateGaussian<T>
326where
327    for<'a> &'a T: RealRef<T>,
328{
329    /**
330     * Draws samples from this multivariate distribution, provided that the covariance
331     * matrix is positive definite.
332     *
333     * For max_samples of M, sufficient random numbers from the source iterator in the uniformly
334     * distributed range [0, 1] inclusive, and this Gaussian's dimensionality of N, returns an
335     * MxN matrix of drawn values.
336     *
337     * The source iterator must have at least MxN random values if N is even, and
338     * Mx(N+1) random values if N is odd, or `None` will be returned.
339     *
340     * [Example of generating and feeding random numbers](super::k_means)
341     *
342     * If the covariance matrix is only positive semi definite, `None` is returned. You
343     * can check if a given covariance matrix is positive definite instead of just positive semi
344     * definite with the [cholesky](linear_algebra::cholesky_decomposition) decomposition.
345     */
346    pub fn draw<I>(&self, source: &mut I, max_samples: usize) -> Option<Matrix<T>>
347    where
348        I: Iterator<Item = T>,
349    {
350        use crate::interop::{DimensionNames, RowAndColumn, TensorRefMatrix};
351        // Since we already validated our state on construction, we wouldn't expect these
352        // conversions to fail but if they do return None
353        // Convert the column vector to a 1 dimensional tensor by selecting the sole column
354        let mean = crate::tensors::views::TensorIndex::from(
355            TensorRefMatrix::from(&self.mean).ok()?,
356            [(RowAndColumn.names()[1], 0)],
357        );
358        let covariance = TensorRefMatrix::from(&self.covariance).ok()?;
359        let samples = draw_tensor_samples::<T, _, _, _>(
360            &mean,
361            &covariance,
362            source,
363            max_samples,
364            "samples",
365            "features",
366        );
367        samples.map(|tensor| tensor.into_matrix())
368    }
369}
370
371/**
372 * A multivariate Gaussian distribution with mean vector μ, and covariance matrix Σ.
373 *
374 * See: [https://en.wikipedia.org/wiki/Multivariate_normal_distribution](https://en.wikipedia.org/wiki/Multivariate_normal_distribution)
375 */
376#[derive(Clone, Debug)]
377pub struct MultivariateGaussianTensor<T: Real> {
378    mean: Tensor<T, 1>,
379    covariance: Tensor<T, 2>,
380}
381
382#[non_exhaustive]
383#[derive(Clone, Debug, PartialEq)]
384pub enum MultivariateGaussianError<T> {
385    NotCovarianceMatrix {
386        mean: Tensor<T, 1>,
387        covariance: Tensor<T, 2>,
388    },
389    MeanVectorWrongLength {
390        mean: Tensor<T, 1>,
391        covariance: Tensor<T, 2>,
392    },
393}
394
395impl<T: fmt::Debug> fmt::Display for MultivariateGaussianError<T> {
396    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
397        match self {
398            MultivariateGaussianError::NotCovarianceMatrix {
399                mean: _,
400                covariance,
401            } => write!(f, "Covariance matrix is not square: {:?}", covariance,),
402            MultivariateGaussianError::MeanVectorWrongLength { mean, covariance } => write!(
403                f,
404                "Mean vector has a different length {:?} to the covariance matrix size: {:?}",
405                mean.shape(),
406                covariance.shape(),
407            ),
408        }
409    }
410}
411
412impl<T: fmt::Debug> Error for MultivariateGaussianError<T> {}
413
414impl<T: Real> MultivariateGaussianTensor<T> {
415    /**
416     * Constructs a new multivariate Gaussian distribution from
417     * a N length vector of means and a NxN covariance matrix
418     *
419     * This function does not check that the provided covariance matrix
420     * is actually a covariance matrix. If a square matrix that is not
421     * symmetric is supplied the Gaussian is not defined.
422     *
423     * Result::Err is returned if the covariance matrix is not square, or the mean
424     * vector is not the same length as the size of the covariance matrix. Does not currently
425     * panic if the covariance matrix is not symmetric, but this could be checked
426     * in the future.
427     *
428     * The dimension names of the mean and covariance matrix are not used, and do not need
429     * to match.
430     */
431    // Boxing error variant as per clippy lint that MultivariateGaussianError is 152 bytes which
432    // is kinda big
433    pub fn new(
434        mean: Tensor<T, 1>,
435        covariance: Tensor<T, 2>,
436    ) -> Result<MultivariateGaussianTensor<T>, Box<MultivariateGaussianError<T>>> {
437        let covariance_shape = covariance.shape();
438        if !crate::tensors::dimensions::is_square(&covariance_shape) {
439            return Err(Box::new(MultivariateGaussianError::NotCovarianceMatrix {
440                mean,
441                covariance,
442            }));
443        }
444        let length = covariance_shape[0].1;
445        if mean.shape()[0].1 != length {
446            return Err(Box::new(MultivariateGaussianError::MeanVectorWrongLength {
447                mean,
448                covariance,
449            }));
450        }
451        Ok(MultivariateGaussianTensor { mean, covariance })
452    }
453
454    /**
455     * The mean is a vector of expected values in each dimension
456     */
457    pub fn mean(&self) -> &Tensor<T, 1> {
458        &self.mean
459    }
460
461    /**
462     * The covariance matrix is a NxN matrix where N is the number of dimensions for
463     * this Gaussian. A covariance matrix must always be symmetric, that is `C[i,j] = C[j,i]`.
464     *
465     * The covariance matrix is a measure of how much values from each dimension vary
466     * from their expected value with respect to each other.
467     *
468     * For a 2 dimensional multivariate Gaussian the covariance matrix could be the 2x2 identity
469     * matrix:
470     *
471     * ```ignore
472     * [
473     *   1.0, 0.0
474     *   0.0, 1.0
475     * ]
476     * ```
477     *
478     * In which case the two dimensions are completely uncorrelated as `C[0,1] = C[1,0] = 0`.
479     */
480    pub fn covariance(&self) -> &Tensor<T, 2> {
481        &self.covariance
482    }
483}
484
485fn draw_tensor_samples<T, S1, S2, I>(
486    mean: S1,
487    covariance: S2,
488    source: &mut I,
489    max_samples: usize,
490    samples: Dimension,
491    features: Dimension,
492) -> Option<Tensor<T, 2>>
493where
494    T: Real,
495    for<'a> &'a T: RealRef<T>,
496    I: Iterator<Item = T>,
497    S1: TensorRef<T, 1>,
498    S2: TensorRef<T, 2>,
499{
500    if samples == features {
501        return None;
502    }
503    let mean = TensorView::from(mean);
504    let covariance = TensorView::from(covariance);
505    use linear_algebra::cholesky_decomposition_tensor as cholesky_decomposition;
506    // Follow the method outlined at
507    // https://en.wikipedia.org/wiki/Multivariate_normal_distribution#Computational_methods
508    let normal_distribution = Gaussian::new(T::zero(), T::one());
509    let mut lower_triangular = cholesky_decomposition::<T, _, _>(&covariance)?;
510    lower_triangular.rename([samples, features]);
511
512    let number_of_samples = max_samples;
513    let number_of_features = mean.shape()[0].1;
514    let shape = [(samples, max_samples), (features, number_of_features)];
515    let mut drawn_samples = Tensor::empty(shape, T::zero());
516
517    // Construct a TensorView from the mean vector with a shape of
518    // [(samples, number_of_features), (features, 1)]
519    let column_vector_mean = mean.rename_view([samples]).expand_owned([(1, features)]);
520
521    let mut drawn_samples_iterator = drawn_samples.iter_reference_mut();
522    for _sample_row in 0..number_of_samples {
523        // use the box muller transform to get N independent values from
524        // a normal distribution (x)
525        let standard_normals = normal_distribution.draw(source, number_of_features)?;
526        let standard_normals = Tensor::from(
527            // Construct a column vector with as many rows as our N features
528            [(samples, standard_normals.len()), (features, 1)],
529            standard_normals,
530        );
531        // mean + (L * standard_normals) yields each m'th vector from the distribution
532        let random_vector = &column_vector_mean + (&lower_triangular * standard_normals);
533        // We now have an Nx1 matrix of samples which we can assign to this sample row vector
534        for x in random_vector.iter() {
535            // Since we'll assign a value exactly number_of_samples * number_of_features times
536            // this will always be the Some case
537            *drawn_samples_iterator.next()? = x;
538        }
539    }
540    Some(drawn_samples)
541}
542
543impl<T: Real> MultivariateGaussianTensor<T>
544where
545    for<'a> &'a T: RealRef<T>,
546{
547    /**
548     * Draws samples from this multivariate distribution, provided that the covariance
549     * matrix is positive definite.
550     *
551     * For max_samples of M, sufficient random numbers from the source iterator, in the uniformly
552     * distributed range [0, 1] inclusive and this Gaussian's dimensionality of N, returns an MxN
553     * matrix of drawn values with dimension names `samples` and `features` for M and N respectively.
554     *
555     * The source iterator must have at least MxN random values if N is even, and
556     * Mx(N+1) random values if N is odd, or `None` will be returned.
557     *
558     * [Example of generating and feeding random numbers](super::k_means)
559     *
560     * If the covariance matrix is only positive semi definite, `None` is returned. You
561     * can check if a given covariance matrix is positive definite instead of just positive semi
562     * definite with the [cholesky](linear_algebra::cholesky_decomposition_tensor) decomposition.
563     */
564    pub fn draw<I>(
565        &self,
566        source: &mut I,
567        max_samples: usize,
568        samples: Dimension,
569        features: Dimension,
570    ) -> Option<Tensor<T, 2>>
571    where
572        I: Iterator<Item = T>,
573    {
574        draw_tensor_samples::<T, _, _, _>(
575            &self.mean,
576            &self.covariance,
577            source,
578            max_samples,
579            samples,
580            features,
581        )
582    }
583}