easy_ml/distributions.rs
1/*!
2Models of distributions that samples can be drawn from.
3
4These structs and methods require numerical types that can be treated as real numbers, ie
5unsigned and signed integers cannot be used here.
6
7# Example of plotting a Gaussian
8
9```
10extern crate rand;
11extern crate rand_chacha;
12extern crate textplots;
13extern crate easy_ml;
14
15use rand::{Rng, SeedableRng};
16use rand::distr::{Iter, StandardUniform};
17use rand_chacha::ChaCha8Rng;
18use textplots::{Chart, Plot, Shape};
19use easy_ml::distributions::Gaussian;
20
21const SAMPLES: usize = 10000;
22
23// create a normal distribution, note that the mean and variance are
24// given in floating point notation as this will be a f64 Gaussian
25let normal_distribution = Gaussian::new(0.0, 1.0);
26
27// first create random numbers between 0 and 1
28// using a fixed seed random generator from the rand crate
29let mut random_generator = ChaCha8Rng::seed_from_u64(10);
30let mut random_numbers: Iter<StandardUniform, &mut ChaCha8Rng, f64> =
31 (&mut random_generator).sample_iter(StandardUniform);
32
33// draw samples from the normal distribution
34let samples: Vec<f64> = normal_distribution.draw(&mut random_numbers, SAMPLES)
35 // unwrap is perfectly safe if and only if we know we have supplied enough random numbers
36 .unwrap();
37
38// create a [(f32, f32)] list to plot a histogram of
39let histogram_points = {
40 let x = 0..SAMPLES;
41 let mut y = samples;
42 let mut points = Vec::with_capacity(SAMPLES);
43 for (x, y) in y.into_iter().zip(x).map(|(y, x)| (x as f32, y as f32)) {
44 points.push((x, y));
45 }
46 points
47};
48
49// Plot a histogram from -3 to 3 with 30 bins to check that this distribution
50// looks like a Gaussian. This will show a bell curve for large enough SAMPLES.
51let histogram = textplots::utils::histogram(&histogram_points, -3.0, 3.0, 30);
52Chart::new(180, 60, -3.0, 3.0)
53 .lineplot(&Shape::Bars(&histogram))
54 .nice();
55```
56
57# Getting an infinite iterator using the rand crate
58
59It may be convenient to create an infinite iterator for random numbers so you don't need
60to populate lists of random numbers when using these types.
61
62```
63use rand::{Rng, SeedableRng};
64use rand::distr::{Iter, StandardUniform};
65use rand_chacha::ChaCha8Rng;
66
67// using a fixed seed random generator from the rand crate
68let mut random_generator = ChaCha8Rng::seed_from_u64(16);
69// now pass this Iterator to Gaussian functions that accept a &mut Iterator
70let mut random_numbers: Iter<StandardUniform, &mut ChaCha8Rng, f64> =
71 (&mut random_generator).sample_iter(StandardUniform);
72```
73
74# Example of creating an infinite iterator
75
76The below example is for reference, don't actually do this if you're using rand because rand
77can give you an infinite iterator already (see above example).
78
79```
80use rand::{Rng, SeedableRng};
81
82// using a fixed seed random generator from the rand crate
83let mut random_generator = rand_chacha::ChaCha8Rng::seed_from_u64(16);
84
85struct EndlessRandomGenerator {
86 rng: rand_chacha::ChaCha8Rng
87}
88
89impl Iterator for EndlessRandomGenerator {
90 type Item = f64;
91
92 fn next(&mut self) -> Option<Self::Item> {
93 // always return Some, hence this iterator is infinite
94 Some(self.rng.random::<f64>())
95 }
96}
97
98// now pass this Iterator to Gaussian functions that accept a &mut Iterator
99let mut random_numbers = EndlessRandomGenerator { rng: random_generator };
100```
101
102# Example of creating an infinite iterator for web assembly targets
103[See web_assembly module for Example of creating an infinite iterator for web assembly targets](super::web_assembly)
104 */
105
106use crate::linear_algebra;
107use crate::matrices::Matrix;
108use crate::numeric::extra::{Real, RealRef};
109use crate::tensors::views::{TensorRef, TensorView};
110use crate::tensors::{Dimension, Tensor};
111
112use std::error::Error;
113use std::fmt;
114
115/**
116 * A Gaussian probability density function of a normally distributed
117 * random variable with expected value / mean μ, and variance σ<sup>2</sup>.
118 *
119 * See: [https://en.wikipedia.org/wiki/Gaussian_function](https://en.wikipedia.org/wiki/Gaussian_function)
120 */
121#[derive(Clone, Debug)]
122pub struct Gaussian<T: Real> {
123 /**
124 * The mean is the expected value of this gaussian.
125 */
126 pub mean: T,
127 /**
128 * The variance is a measure of the spread of values around the mean, high variance means
129 * one standard deviation encompasses a larger spread of values from the mean.
130 */
131 pub variance: T,
132}
133
134impl<T: Real> Gaussian<T> {
135 pub fn new(mean: T, variance: T) -> Gaussian<T> {
136 Gaussian { mean, variance }
137 }
138
139 /**
140 * Creates a Gaussian approximating the mean and variance in the provided
141 * data.
142 *
143 * Note that this will always be an approximation, if you generate some data
144 * according to some mean and variance then construct a Gaussian from
145 * the mean and variance of that generated data the approximated mean
146 * and variance is unlikely to be exactly the same as the parameters the
147 * data was generated with, though as the amout of data increases you
148 * can expect the approximation to be more close.
149 */
150 pub fn approximating<I>(data: I) -> Gaussian<T>
151 where
152 I: Iterator<Item = T>,
153 {
154 let mut copy: Vec<T> = data.collect();
155 Gaussian {
156 // duplicate the data to pass once each to mean and variance
157 // functions of linear_algebra
158 mean: linear_algebra::mean(copy.iter().cloned()),
159 variance: linear_algebra::variance(copy.drain(..)),
160 }
161 }
162}
163
164impl<T: Real> Gaussian<T>
165where
166 for<'a> &'a T: RealRef<T>,
167{
168 /**
169 * Computes g(x) for some x, the probability density of a normally
170 * distributed random variable x, or in other words how likely x is
171 * to be drawn from this normal distribution.
172 *
173 * g(x) is largest for x equal to this distribution's mean and
174 * g(x) will tend towards zero as x is further from this distribution's
175 * mean, at a rate corresponding to this distribution's variance.
176 */
177 pub fn probability(&self, x: &T) -> T {
178 // FIXME: &T sqrt doesn't seem to be picked up by the compiler here
179 let standard_deviation = self.variance.clone().sqrt();
180 let two = T::one() + T::one();
181 let two_pi = &two * T::pi();
182 let fraction = T::one() / (standard_deviation * (&two_pi.sqrt()));
183 let exponent = (-T::one() / &two) * ((x - &self.mean) / &self.variance).pow(&two);
184 fraction * exponent.exp()
185 }
186
187 /**
188 * Given a source of random variables in the uniformly distributed
189 * range [0, 1] inclusive, draws `max_samples` of independent
190 * random numbers according to this Gaussian distribution's mean and
191 * variance using the Box-Muller transform:
192 *
193 * [https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform](https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform)
194 *
195 * The source of random variables must provide at least as many values
196 * as `max_samples` if `max_samples` is even, and one more than `max_samples`
197 * if `max_samples` is odd. If fewer are provided None is returned.
198 *
199 * As all randomness is provided to this method, this code is deterministic
200 * and will always compute the same samples given the same random source
201 * of numbers.
202 *
203 * [Example of generating and feeding random numbers](self)
204 */
205 pub fn draw<I>(&self, source: &mut I, max_samples: usize) -> Option<Vec<T>>
206 where
207 I: Iterator<Item = T>,
208 {
209 let two = T::one() + T::one();
210 let minus_two = -&two;
211 let two_pi = &two * T::pi();
212 let mut samples = Vec::with_capacity(max_samples);
213 let standard_deviation = self.variance.clone().sqrt();
214 // keep drawing samples from this normal Gaussian distribution
215 // until either the iterator runs out or we reach the max_samples
216 // limit
217 while samples.len() < max_samples {
218 let (u, v) = self.generate_pair(source)?;
219 // these computations convert two samples from the inclusive 0 - 1
220 // range to two samples of a normal distribution with with
221 // μ = 0 and σ = 1.
222 let z1 = (&minus_two * u.clone().ln()).sqrt() * ((&two_pi * &v).cos());
223 let z2 = (&minus_two * u.clone().ln()).sqrt() * ((&two_pi * &v).sin());
224 // now we scale to the mean and variance for this Gaussian
225 let sample1 = (z1 * &standard_deviation) + &self.mean;
226 let sample2 = (z2 * &standard_deviation) + &self.mean;
227 samples.push(sample1);
228 samples.push(sample2);
229 }
230 // return the full list of samples, removing the final sample
231 // if adding 2 samples took us over the max
232 if samples.len() > max_samples {
233 samples.pop();
234 return Some(samples);
235 }
236 Some(samples)
237 }
238
239 fn generate_pair<I>(&self, source: &mut I) -> Option<(T, T)>
240 where
241 I: Iterator<Item = T>,
242 {
243 Some((source.next()?, source.next()?))
244 }
245
246 #[deprecated(since = "1.1.0", note = "renamed to `probability`")]
247 pub fn map(&self, x: &T) -> T {
248 self.probability(x)
249 }
250}
251
252/**
253 * A multivariate Gaussian distribution with mean vector μ, and covariance matrix Σ.
254 *
255 * See: [https://en.wikipedia.org/wiki/Multivariate_normal_distribution](https://en.wikipedia.org/wiki/Multivariate_normal_distribution)
256 *
257 * # Invariants
258 *
259 * The mean [Matrix] must always be a column vector, and must be the same length as the
260 * covariance matrix.
261 */
262#[derive(Clone, Debug)]
263pub struct MultivariateGaussian<T: Real> {
264 mean: Matrix<T>,
265 covariance: Matrix<T>,
266}
267
268impl<T: Real> MultivariateGaussian<T> {
269 /**
270 * Constructs a new multivariate Gaussian distribution from
271 * a Nx1 column vector of means and a NxN covariance matrix
272 *
273 * This function does not check that the provided covariance matrix
274 * is actually a covariance matrix. If a square matrix that is not
275 * symmetric is supplied the Gaussian is not defined.
276 *
277 * # Panics
278 *
279 * Panics if the covariance matrix is not square, or the column vector
280 * is not the same length as the covariance matrix size. Does not currently
281 * panic if the covariance matrix is not symmetric, but this could be checked
282 * in the future.
283 */
284 pub fn new(mean: Matrix<T>, covariance: Matrix<T>) -> MultivariateGaussian<T> {
285 assert!(mean.columns() == 1, "Mean must be a column vector");
286 assert!(
287 covariance.rows() == covariance.columns(),
288 "Supplied 'covariance' matrix is not square"
289 );
290 assert!(
291 mean.rows() == covariance.rows(),
292 "Means must be same length as covariance matrix"
293 );
294 MultivariateGaussian { mean, covariance }
295 }
296
297 /**
298 * The mean is a column vector of expected values in each dimension
299 */
300 pub fn mean(&self) -> &Matrix<T> {
301 &self.mean
302 }
303
304 /**
305 * The covariance matrix is a measure of how much values from each dimension vary
306 * from their expected value with respect to each other.
307 *
308 * For a 2 dimensional multivariate Gaussian the covariance matrix could be the 2x2 identity
309 * matrix:
310 *
311 * ```ignore
312 * [
313 * 1.0, 0.0
314 * 0.0, 1.0
315 * ]
316 * ```
317 *
318 * In which case the two dimensions are completely uncorrelated as `C[0,1] = C[1,0] = 0`.
319 */
320 pub fn covariance(&self) -> &Matrix<T> {
321 &self.covariance
322 }
323}
324
325impl<T: Real> MultivariateGaussian<T>
326where
327 for<'a> &'a T: RealRef<T>,
328{
329 /**
330 * Draws samples from this multivariate distribution, provided that the covariance
331 * matrix is positive definite.
332 *
333 * For max_samples of M, sufficient random numbers from the source iterator in the uniformly
334 * distributed range [0, 1] inclusive, and this Gaussian's dimensionality of N, returns an
335 * MxN matrix of drawn values.
336 *
337 * The source iterator must have at least MxN random values if N is even, and
338 * Mx(N+1) random values if N is odd, or `None` will be returned.
339 *
340 * [Example of generating and feeding random numbers](super::k_means)
341 *
342 * If the covariance matrix is only positive semi definite, `None` is returned. You
343 * can check if a given covariance matrix is positive definite instead of just positive semi
344 * definite with the [cholesky](linear_algebra::cholesky_decomposition) decomposition.
345 */
346 pub fn draw<I>(&self, source: &mut I, max_samples: usize) -> Option<Matrix<T>>
347 where
348 I: Iterator<Item = T>,
349 {
350 use crate::interop::{DimensionNames, RowAndColumn, TensorRefMatrix};
351 // Since we already validated our state on construction, we wouldn't expect these
352 // conversions to fail but if they do return None
353 // Convert the column vector to a 1 dimensional tensor by selecting the sole column
354 let mean = crate::tensors::views::TensorIndex::from(
355 TensorRefMatrix::from(&self.mean).ok()?,
356 [(RowAndColumn.names()[1], 0)],
357 );
358 let covariance = TensorRefMatrix::from(&self.covariance).ok()?;
359 let samples = draw_tensor_samples::<T, _, _, _>(
360 &mean,
361 &covariance,
362 source,
363 max_samples,
364 "samples",
365 "features",
366 );
367 samples.map(|tensor| tensor.into_matrix())
368 }
369}
370
371/**
372 * A multivariate Gaussian distribution with mean vector μ, and covariance matrix Σ.
373 *
374 * See: [https://en.wikipedia.org/wiki/Multivariate_normal_distribution](https://en.wikipedia.org/wiki/Multivariate_normal_distribution)
375 */
376#[derive(Clone, Debug)]
377pub struct MultivariateGaussianTensor<T: Real> {
378 mean: Tensor<T, 1>,
379 covariance: Tensor<T, 2>,
380}
381
382#[non_exhaustive]
383#[derive(Clone, Debug, PartialEq)]
384pub enum MultivariateGaussianError<T> {
385 NotCovarianceMatrix {
386 mean: Tensor<T, 1>,
387 covariance: Tensor<T, 2>,
388 },
389 MeanVectorWrongLength {
390 mean: Tensor<T, 1>,
391 covariance: Tensor<T, 2>,
392 },
393}
394
395impl<T: fmt::Debug> fmt::Display for MultivariateGaussianError<T> {
396 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
397 match self {
398 MultivariateGaussianError::NotCovarianceMatrix {
399 mean: _,
400 covariance,
401 } => write!(f, "Covariance matrix is not square: {:?}", covariance,),
402 MultivariateGaussianError::MeanVectorWrongLength { mean, covariance } => write!(
403 f,
404 "Mean vector has a different length {:?} to the covariance matrix size: {:?}",
405 mean.shape(),
406 covariance.shape(),
407 ),
408 }
409 }
410}
411
412impl<T: fmt::Debug> Error for MultivariateGaussianError<T> {}
413
414impl<T: Real> MultivariateGaussianTensor<T> {
415 /**
416 * Constructs a new multivariate Gaussian distribution from
417 * a N length vector of means and a NxN covariance matrix
418 *
419 * This function does not check that the provided covariance matrix
420 * is actually a covariance matrix. If a square matrix that is not
421 * symmetric is supplied the Gaussian is not defined.
422 *
423 * Result::Err is returned if the covariance matrix is not square, or the mean
424 * vector is not the same length as the size of the covariance matrix. Does not currently
425 * panic if the covariance matrix is not symmetric, but this could be checked
426 * in the future.
427 *
428 * The dimension names of the mean and covariance matrix are not used, and do not need
429 * to match.
430 */
431 // Boxing error variant as per clippy lint that MultivariateGaussianError is 152 bytes which
432 // is kinda big
433 pub fn new(
434 mean: Tensor<T, 1>,
435 covariance: Tensor<T, 2>,
436 ) -> Result<MultivariateGaussianTensor<T>, Box<MultivariateGaussianError<T>>> {
437 let covariance_shape = covariance.shape();
438 if !crate::tensors::dimensions::is_square(&covariance_shape) {
439 return Err(Box::new(MultivariateGaussianError::NotCovarianceMatrix {
440 mean,
441 covariance,
442 }));
443 }
444 let length = covariance_shape[0].1;
445 if mean.shape()[0].1 != length {
446 return Err(Box::new(MultivariateGaussianError::MeanVectorWrongLength {
447 mean,
448 covariance,
449 }));
450 }
451 Ok(MultivariateGaussianTensor { mean, covariance })
452 }
453
454 /**
455 * The mean is a vector of expected values in each dimension
456 */
457 pub fn mean(&self) -> &Tensor<T, 1> {
458 &self.mean
459 }
460
461 /**
462 * The covariance matrix is a NxN matrix where N is the number of dimensions for
463 * this Gaussian. A covariance matrix must always be symmetric, that is `C[i,j] = C[j,i]`.
464 *
465 * The covariance matrix is a measure of how much values from each dimension vary
466 * from their expected value with respect to each other.
467 *
468 * For a 2 dimensional multivariate Gaussian the covariance matrix could be the 2x2 identity
469 * matrix:
470 *
471 * ```ignore
472 * [
473 * 1.0, 0.0
474 * 0.0, 1.0
475 * ]
476 * ```
477 *
478 * In which case the two dimensions are completely uncorrelated as `C[0,1] = C[1,0] = 0`.
479 */
480 pub fn covariance(&self) -> &Tensor<T, 2> {
481 &self.covariance
482 }
483}
484
485fn draw_tensor_samples<T, S1, S2, I>(
486 mean: S1,
487 covariance: S2,
488 source: &mut I,
489 max_samples: usize,
490 samples: Dimension,
491 features: Dimension,
492) -> Option<Tensor<T, 2>>
493where
494 T: Real,
495 for<'a> &'a T: RealRef<T>,
496 I: Iterator<Item = T>,
497 S1: TensorRef<T, 1>,
498 S2: TensorRef<T, 2>,
499{
500 if samples == features {
501 return None;
502 }
503 let mean = TensorView::from(mean);
504 let covariance = TensorView::from(covariance);
505 use linear_algebra::cholesky_decomposition_tensor as cholesky_decomposition;
506 // Follow the method outlined at
507 // https://en.wikipedia.org/wiki/Multivariate_normal_distribution#Computational_methods
508 let normal_distribution = Gaussian::new(T::zero(), T::one());
509 let mut lower_triangular = cholesky_decomposition::<T, _, _>(&covariance)?;
510 lower_triangular.rename([samples, features]);
511
512 let number_of_samples = max_samples;
513 let number_of_features = mean.shape()[0].1;
514 let shape = [(samples, max_samples), (features, number_of_features)];
515 let mut drawn_samples = Tensor::empty(shape, T::zero());
516
517 // Construct a TensorView from the mean vector with a shape of
518 // [(samples, number_of_features), (features, 1)]
519 let column_vector_mean = mean.rename_view([samples]).expand_owned([(1, features)]);
520
521 let mut drawn_samples_iterator = drawn_samples.iter_reference_mut();
522 for _sample_row in 0..number_of_samples {
523 // use the box muller transform to get N independent values from
524 // a normal distribution (x)
525 let standard_normals = normal_distribution.draw(source, number_of_features)?;
526 let standard_normals = Tensor::from(
527 // Construct a column vector with as many rows as our N features
528 [(samples, standard_normals.len()), (features, 1)],
529 standard_normals,
530 );
531 // mean + (L * standard_normals) yields each m'th vector from the distribution
532 let random_vector = &column_vector_mean + (&lower_triangular * standard_normals);
533 // We now have an Nx1 matrix of samples which we can assign to this sample row vector
534 for x in random_vector.iter() {
535 // Since we'll assign a value exactly number_of_samples * number_of_features times
536 // this will always be the Some case
537 *drawn_samples_iterator.next()? = x;
538 }
539 }
540 Some(drawn_samples)
541}
542
543impl<T: Real> MultivariateGaussianTensor<T>
544where
545 for<'a> &'a T: RealRef<T>,
546{
547 /**
548 * Draws samples from this multivariate distribution, provided that the covariance
549 * matrix is positive definite.
550 *
551 * For max_samples of M, sufficient random numbers from the source iterator, in the uniformly
552 * distributed range [0, 1] inclusive and this Gaussian's dimensionality of N, returns an MxN
553 * matrix of drawn values with dimension names `samples` and `features` for M and N respectively.
554 *
555 * The source iterator must have at least MxN random values if N is even, and
556 * Mx(N+1) random values if N is odd, or `None` will be returned.
557 *
558 * [Example of generating and feeding random numbers](super::k_means)
559 *
560 * If the covariance matrix is only positive semi definite, `None` is returned. You
561 * can check if a given covariance matrix is positive definite instead of just positive semi
562 * definite with the [cholesky](linear_algebra::cholesky_decomposition_tensor) decomposition.
563 */
564 pub fn draw<I>(
565 &self,
566 source: &mut I,
567 max_samples: usize,
568 samples: Dimension,
569 features: Dimension,
570 ) -> Option<Tensor<T, 2>>
571 where
572 I: Iterator<Item = T>,
573 {
574 draw_tensor_samples::<T, _, _, _>(
575 &self.mean,
576 &self.covariance,
577 source,
578 max_samples,
579 samples,
580 features,
581 )
582 }
583}